Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtimezonelocale.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#include <private/qtimezonelocale_p.h>
6#include <private/qtimezoneprivate_p.h>
7
8#if !QT_CONFIG(icu)
9# include <QtCore/qspan.h>
10# include <private/qdatetime_p.h>
11# include <private/qtools_p.h>
12// Use data generated from CLDR:
13# include "qtimezonelocale_data_p.h"
14# include "qtimezoneprivate_data_p.h"
15# ifdef QT_CLDR_ZONE_DEBUG
16# include "../text/qlocale_data_p.h"
17QT_BEGIN_NAMESPACE
18static_assert(std::size(locale_data) == std::size(QtTimeZoneLocale::localeZoneData));
19// Size includes terminal rows: for now, they do match in tag IDs, but they needn't.
20static_assert([]() {
21 for (std::size_t i = 0; i < std::size(locale_data); ++i) {
22 const auto &loc = locale_data[i];
23 const auto &zone = QtTimeZoneLocale::localeZoneData[i];
24 if (loc.m_language_id != zone.m_language_id
25 || loc.m_script_id != zone.m_script_id
26 || loc.m_territory_id != zone.m_territory_id) {
27 return false;
28 }
29 }
30 return true;
31}());
32QT_END_NAMESPACE
33# endif
34#endif
35
36#include <algorithm>
37
38QT_BEGIN_NAMESPACE
39
40using namespace Qt::StringLiterals;
41
42#if QT_CONFIG(icu) // Get data from ICU:
43namespace {
44
45// Convert TimeType and NameType into ICU UCalendarDisplayNameType
46UCalendarDisplayNameType ucalDisplayNameType(QTimeZone::TimeType timeType,
47 QTimeZone::NameType nameType)
48{
49 // TODO ICU C UCalendarDisplayNameType does not support full set of C++ TimeZone::EDisplayType
50 // For now, treat Generic as Standard
51 switch (nameType) {
52 case QTimeZone::ShortName:
53 return timeType == QTimeZone::DaylightTime ? UCAL_SHORT_DST : UCAL_SHORT_STANDARD;
54 case QTimeZone::DefaultName:
55 case QTimeZone::LongName:
56 return timeType == QTimeZone::DaylightTime ? UCAL_DST : UCAL_STANDARD;
57 case QTimeZone::OffsetName:
58 Q_UNREACHABLE(); // Callers of ucalTimeZoneDisplayName() should take care of OffsetName.
59 }
60 Q_UNREACHABLE_RETURN(UCAL_STANDARD);
61}
62
63} // nameless namespace
64
65namespace QtTimeZoneLocale {
66
67// Qt wrapper around ucal_getTimeZoneDisplayName()
68// Used directly by ICU backend; indirectly by TZ (see below).
69QString ucalTimeZoneDisplayName(UCalendar *ucal,
70 QTimeZone::TimeType timeType,
71 QTimeZone::NameType nameType,
72 const QByteArray &localeCode)
73{
74 constexpr int32_t BigNameLength = 50;
75 int32_t size = BigNameLength;
76 QString result(size, Qt::Uninitialized);
77 auto dst = [&result]() { return reinterpret_cast<UChar *>(result.data()); };
78 UErrorCode status = U_ZERO_ERROR;
79 const UCalendarDisplayNameType utype = ucalDisplayNameType(timeType, nameType);
80
81 // size = ucal_getTimeZoneDisplayName(cal, type, locale, result, resultLength, status)
82 size = ucal_getTimeZoneDisplayName(ucal, utype, localeCode.constData(),
83 dst(), size, &status);
84
85 // If overflow, then resize and retry
86 if (size > BigNameLength || status == U_BUFFER_OVERFLOW_ERROR) {
87 result.resize(size);
88 status = U_ZERO_ERROR;
89 size = ucal_getTimeZoneDisplayName(ucal, utype, localeCode.constData(),
90 dst(), size, &status);
91 }
92
93 if (!U_SUCCESS(status))
94 return QString();
95
96 // Resize and return:
97 result.resize(size);
98 return result;
99}
100
101bool ucalKnownTimeZoneId(const QString &ianaStr)
102{
103 const UChar *const name = reinterpret_cast<const UChar *>(ianaStr.constData());
104 // We are not interested in the value, but we have to pass something.
105 // No known IANA zone name is (up to 2023) longer than 30 characters.
106 constexpr size_t size = 64;
107 UChar buffer[size];
108
109 // TODO: convert to ucal_getIanaTimeZoneID(), new draft in ICU 74, once we
110 // can rely on its availability, assuming it works the same once not draft.
111 UErrorCode status = U_ZERO_ERROR;
112 UBool isSys = false;
113 // Returns the length of the IANA zone name (but we don't care):
114 ucal_getCanonicalTimeZoneID(name, ianaStr.size(), buffer, size, &isSys, &status);
115 // We're only interested if the result is a "system" (i.e. IANA) ID:
116 return isSys;
117}
118
119} // QtTimeZoneLocale
120
121// Used by TZ backends when ICU is available:
122QString QTimeZonePrivate::localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc,
123 QTimeZone::TimeType timeType,
124 QTimeZone::NameType nameType,
125 const QLocale &locale) const
126{
127 Q_UNUSED(atMSecsSinceEpoch);
128 // TODO: use CLDR data for the offset name.
129 // No ICU API for offset formats, so fall back to our ISO one, even if
130 // locale isn't C:
131 if (nameType == QTimeZone::OffsetName)
132 return isoOffsetFormat(offsetFromUtc);
133
134 const QString id = QString::fromUtf8(m_id);
135 // Need to check id is known to ICU, since ucal_open() will return a
136 // misleading "valid" GMT ucal when it doesn't recognise id.
137 if (!QtTimeZoneLocale::ucalKnownTimeZoneId(id))
138 return QString();
139
140 const QByteArray loc = locale.name().toUtf8();
141 UErrorCode status = U_ZERO_ERROR;
142 // TODO: QTBUG-124271 can we cache any of this ?
143 UCalendar *ucal = ucal_open(reinterpret_cast<const UChar *>(id.data()), id.size(),
144 loc.constData(), UCAL_DEFAULT, &status);
145 if (ucal && U_SUCCESS(status)) {
146 auto tidier = qScopeGuard([ucal]() { ucal_close(ucal); });
147 return QtTimeZoneLocale::ucalTimeZoneDisplayName(ucal, timeType, nameType, loc);
148 }
149 return QString();
150}
151#else // No ICU, use QTZ[LP]_data_p.h data for feature timezone_locale.
153// Inline methods promised in QTZL_p.h
154using namespace QtTimeZoneCldr; // QTZP_data_p.h
157} // QtTimeZoneLocale
158
159namespace {
160using namespace QtTimeZoneLocale; // QTZL_p.h QTZL_data_p.h
161using namespace QtTimeZoneCldr; // QTZP_data_p.h
162// Accessors for the QTZL_data_p.h
163
164template <typename Row, typename Sought, typename Condition>
165const Row *findTableEntryFor(const QSpan<Row> data, Sought value, Condition test)
166{
167 // We have the present locale's data (if any). Its rows are sorted on
168 // (localeIndex and) a field for which we want the Sought value. The test()
169 // compares that field.
170 auto begin = data.begin(), end = data.end();
171 Q_ASSERT(begin == end || end->localeIndex > begin->localeIndex);
172 Q_ASSERT(begin == end || end[-1].localeIndex == begin->localeIndex);
173 auto row = std::lower_bound(begin, end, value, test);
174 return row == end ? nullptr : row;
175}
176
177QString exemplarCityFor(const LocaleZoneData &locale, const LocaleZoneData &next,
178 QByteArrayView iana)
179{
180 auto xct = findTableEntryFor(
181 QSpan(localeZoneExemplarTable).first(next.m_exemplarTableStart
182 ).sliced(locale.m_exemplarTableStart),
183 iana, [](auto &row, QByteArrayView key) { return row.ianaId() < key; });
184 if (xct && xct->ianaId() == iana)
185 return xct->exemplarCity().getData(exemplarCityTable);
186 return {};
187}
188
189// Accessors for the QTZP_data_p.h
190quint32 clipEpochMinute(qint64 epochMinute)
191{
192 // ZoneMetaHistory's quint32 UTC epoch minutes.
193 // Dates from 1970-01-01 to 10136-02-16 (at 04:14) are representable.
194 constexpr quint32 epoch = 0;
195 // Since the .end value of an interval that does end is the first epoch
196 // minutes *after* the interval, intervalEndsBefore() uses a <= test. The
197 // value ~epoch (0xffffffff) is used as a sentinel value to mean "there is
198 // no end", so we need a value strictly less than it for "epoch minutes too
199 // big to represent" so that this value is less than "no end". So the value
200 // 1 ^ ~epoch (0xfffffffe) is reserved as this "unrepresentably late time"
201 // and the scripts to generate data assert that no actual interval ends then
202 // or later.
203 constexpr quint32 ragnarok = 1 ^ ~epoch;
204 return epochMinute + 1 >= ragnarok ? ragnarok : quint32(epochMinute);
205}
206
207constexpr bool intervalEndsBefore(const ZoneMetaHistory &record, quint32 dt) noexcept
208{
209 // See clipEpochMinute()'s explanation of ragnarok for why this is <=
210 return record.end <= dt;
211}
212
213/* The metaZoneKey of the ZoneMetaHistory entry whose ianaId() is equal to the
214 given zoneId, for which atMSecsSinceEpoch refers to an instant between its
215 begin and end. Returns zero if there is no such ZoneMetaHistory entry.
216*/
217quint16 metaZoneAt(QByteArrayView zoneId, qint64 atMSecsSinceEpoch)
218{
219 using namespace QtPrivate::DateTimeConstants;
220 auto it = std::lower_bound(std::begin(zoneHistoryTable), std::end(zoneHistoryTable), zoneId,
221 [](const ZoneMetaHistory &record, QByteArrayView id) {
222 return record.ianaId().compare(id, Qt::CaseInsensitive) < 0;
223 });
224 if (it == std::end(zoneHistoryTable) || it->ianaId().compare(zoneId, Qt::CaseInsensitive) > 0)
225 return 0;
226 const auto stop =
227 std::upper_bound(it, std::end(zoneHistoryTable), zoneId,
228 [](QByteArrayView id, const ZoneMetaHistory &record) {
229 return id.compare(record.ianaId(), Qt::CaseInsensitive) < 0;
230 });
231 const quint32 dt = clipEpochMinute(atMSecsSinceEpoch / MSECS_PER_MIN);
232 it = std::lower_bound(it, stop, dt, intervalEndsBefore);
233 return it != stop && it->begin <= dt ? it->metaZoneKey : 0;
234}
235
236// True if the named zone is ever part of the specified metazone:
237bool zoneEverInMeta(QByteArrayView zoneId, quint16 metaKey)
238{
239 for (auto it = std::lower_bound(std::begin(zoneHistoryTable), std::end(zoneHistoryTable),
240 zoneId,
241 [](const ZoneMetaHistory &record, QByteArrayView id) {
242 return record.ianaId().compare(id, Qt::CaseInsensitive) < 0;
243 });
244 it != std::end(zoneHistoryTable) && it->ianaId().compare(zoneId, Qt::CaseInsensitive) == 0;
245 ++it) {
246 if (it->metaZoneKey == metaKey)
247 return true;
248 }
249 return false;
250}
251
252constexpr bool dataBeforeMeta(const MetaZoneData &row, quint16 metaKey) noexcept
253{
254 return row.metaZoneKey < metaKey;
255}
256
257constexpr bool metaDataBeforeTerritory(const MetaZoneData &row, qint16 territory) noexcept
258{
259 return row.territory < territory;
260}
261
262const MetaZoneData *metaZoneStart(quint16 metaKey)
263{
264 const MetaZoneData *const from =
265 std::lower_bound(std::begin(metaZoneTable), std::end(metaZoneTable),
266 metaKey, dataBeforeMeta);
267 if (from == std::end(metaZoneTable) || from->metaZoneKey != metaKey) {
268 qWarning("No metazone data found for metazone key %d", metaKey);
269 return nullptr;
270 }
271 return from;
272}
273
274const MetaZoneData *metaZoneDataFor(const MetaZoneData *from, QLocale::Territory territory)
275{
276 const quint16 metaKey = from->metaZoneKey;
277 const MetaZoneData *const end =
278 std::lower_bound(from, std::end(metaZoneTable), metaKey + 1, dataBeforeMeta);
279 Q_ASSERT(end != from && end[-1].metaZoneKey == metaKey);
280 QLocale::Territory land = territory;
281 do {
282 const MetaZoneData *row =
283 std::lower_bound(from, end, qint16(land), metaDataBeforeTerritory);
284 if (row != end && QLocale::Territory(row->territory) == land) {
285 Q_ASSERT(row->metaZoneKey == metaKey);
286 return row;
287 }
288 // Fall back to World (if territory itself isn't World).
289 } while (std::exchange(land, QLocale::World) != QLocale::World);
290
291 qWarning("Metazone %s lacks World data for %ls",
292 from->metaZoneId().constData(),
293 qUtf16Printable(QLocale::territoryToString(territory)));
294 return nullptr;
295}
296
297QString addPadded(qsizetype width, const QString &zero, const QString &number, QString &&onto)
298{
299 // TODO (QTBUG-122834): QLocale::toString() should support zero-padding directly.
300 width -= number.size() / zero.size();
301 while (width > 0) {
302 onto += zero;
303 --width;
304 }
305 return std::move(onto) + number;
306}
307
308QString formatOffset(QStringView format, int offsetMinutes, const QLocale &locale,
309 QtTemporalPattern::TemporalFieldFlags flags)
310{
311 using Flag = QtTemporalPattern::TemporalFieldFlag;
312 Q_ASSERT(offsetMinutes >= 0);
313 const QString hour = locale.toString(offsetMinutes / 60);
314 const QString mins = locale.toString(offsetMinutes % 60);
315 // If zero.size() > 1, digits are surrogate pairs; each only counts one
316 // towards width of the field, even if it contributes more to result.size().
317 const QString zero = locale.zeroDigit();
318 QStringView tail = format;
319 QString result;
320 while (!tail.isEmpty()) {
321 if (tail.startsWith(u'\'')) {
322 qsizetype end = tail.indexOf(u'\'', 1);
323 if (end < 0) {
324 qWarning("Unbalanced quote in offset format string: %s",
325 format.toUtf8().constData());
326 return result + tail; // Include the quote; format is bogus.
327 } else if (end == 1) {
328 // Special case: adjacent quotes signify a simple quote.
329 result += u'\'';
330 tail = tail.sliced(2);
331 } else {
332 Q_ASSERT(end > 1); // We searched from index 1.
333 while (end + 1 < tail.size() && tail[end + 1] == u'\'') {
334 // Special case: adjacent quotes inside a quoted string also
335 // signify a simple quote.
336 result += tail.sliced(1, end); // Include a quote at the end
337 tail = tail.sliced(end + 1); // Still starts with a quote
338 end = tail.indexOf(u'\'', 1); // Where's the next ?
339 if (end < 0) {
340 qWarning("Unbalanced quoted quote in offset format string: %s",
341 format.toUtf8().constData());
342 return result + tail;
343 }
344 Q_ASSERT(end > 0);
345 }
346 // Skip leading and trailng quotes:
347 result += tail.sliced(1, end - 1);
348 tail = tail.sliced(end + 1);
349 }
350 } else if (tail.startsWith(u'H')) {
351 qsizetype width = 1;
352 while (width < tail.size() && tail[width] == u'H')
353 ++width;
354 tail = tail.sliced(width);
355 if (flags.testFlag(Flag::ZeroPad))
356 result = addPadded(width, zero, hour, std::move(result));
357 else
358 result += hour;
359 } else if (tail.startsWith(u'm')) {
360 qsizetype width = 1;
361 while (width < tail.size() && tail[width] == u'm')
362 ++width;
363 // (At CLDR v45, all locales use two-digit minutes.)
364 // (No known zone has single-digit non-zero minutes.)
365 tail = tail.sliced(width);
366 if (flags.testFlag(Flag::ZeroPad))
367 result = addPadded(width, zero, mins, std::move(result));
368 else if (offsetMinutes % 60)
369 result += mins;
370 else if (result.endsWith(u':') || result.endsWith(u'.'))
371 result.chop(1);
372 // (At CLDR v45, mm follows H either immediately or after a colon or dot.)
373 } else if (tail[0].isHighSurrogate() && tail.size() > 1
374 && tail[1].isLowSurrogate()) {
375 result += tail.first(2);
376 tail = tail.sliced(2);
377 } else {
378 result += tail.front();
379 tail = tail.sliced(1);
380 }
381 }
382 return result;
383}
384
385struct OffsetFormatMatch
386{
387 qsizetype size = 0;
388 int offset = 0;
389 operator bool() const { return size > 0; }
390};
391
392OffsetFormatMatch matchOffsetText(QStringView text, QStringView format, const QLocale &locale,
393 QtTemporalPattern::TemporalFieldFlags flags)
394{
395 using namespace QtTemporalPattern;
396 using namespace FieldGroup;
397 using Flag = TemporalFieldFlag;
398 const bool zeroPad = flags.testFlag(Flag::ZeroPad);
399 OffsetFormatMatch res;
400
401 // At least at CLDR v48:
402 // Amharic in Ethiopia has ±HHmm formats; all others use separators.
403 // None have single m. All have H or HH before mm. None has anything after mm.
404 // (If a user has configured a system locale to violate that, they get to
405 // endure the resulting failure to parse.)
406 // Some Balkan states have a space before the minus sign of the negative format.
407 // Sign is taken care of by caller; it's part of the format's text, before
408 // H, but here we just match it as a literal.
409 // TODO: flexible matching of space, when present in format; ignoring Unicode
410 // invisibles; matching variations on U+2212, dash and other forms of minus sign.
411
412 qsizetype cut = format.indexOf(u'H');
413 if (cut < 0 || !text.startsWith(format.first(cut)) || !format.endsWith(u"mm"))
414 return res;
415 QStringView sep = format.sliced(cut).chopped(2); // Prune prefix and "mm".
416 int hlen = 1; // We already know we have one 'H' at the start of sep.
417 while (hlen < sep.size() && sep[hlen] == u'H')
418 ++hlen;
419 Q_ASSERT(hlen <= 2);
420 sep = sep.sliced(hlen);
421
422 const QLocaleData *const locDat = QLocalePrivate::get(locale)->m_data;
423 using Digits = QLocaleData::DigitSequence;
424 const Digits early = locDat->digitSequence(text, {}, cut);
425 Q_ASSERT(!early.sign);
426
427 Digits hrs = early;
428 if (qsizetype maxLen = std::max(2, hlen); maxLen < hrs.digits.size())
429 hrs = hrs.first(maxLen);
430 if (hrs.digits.size() < 1) // There must be an hour field.
431 return res;
432 if (zeroPad && hrs.digits.size() < hlen) // ZeroPad requires full width
433 return res;
434
435 Digits mins = early.sliced(hrs.digits.size()); // Initial guess.
436 if (!sep.isEmpty()) {
437 const qsizetype sepAt = text.indexOf(sep, cut);
438 if (sepAt == hrs.endIndex()) // Separator must immediately follow hour field:
439 mins = locDat->digitSequence(text, {}, sepAt + sep.size());
440 else // If missing or misplaced, we only have an hour field:
441 mins = mins.first(0);
442 }
443 Q_ASSERT(!mins.sign);
444 if (mins.digits.size() > 2)
445 mins = mins.first(2);
446 else if (!mins.isEmpty() && mins.digits.size() < 2) // Not long enough: ignore
447 mins = mins.first(0);
448
449 constexpr int MaxOffsetHours
450 = (std::max)(-QTimeZone::MinUtcOffsetSecs, QTimeZone::MaxUtcOffsetSecs) / 3600;
451
452 bool ok = true;
453 uint hour = hrs.digits.toUInt(&ok);
454 if (!ok || hour > MaxOffsetHours || (zeroPad && mins.isEmpty())) {
455 // MaxOffsetHours > 10, so hrs.digits().size() > 1.
456 if (zeroPad) // Hour field must have full width.
457 return res;
458 // Truncated hour field at first digit and exclude minutes:
459 hrs = hrs.first(1);
460 mins = mins.first(0);
461 hour = hrs.digits.toUInt(&ok);
462 }
463 if (ok) {
464 if (!mins.isEmpty()
465 && (!zeroPad || matchesFlagWithin(flags, Flag::Abbreviated, WidthMask))) {
466 uint minute = mins.digits.toUInt(&ok);
467 if (ok && minute < 60) {
468 res.offset = (hour * 60 + minute) * 60;
469 res.size = mins.endIndex();
470 return res;
471 }
472 }
473 if (!zeroPad || matchesFlagWithin(flags, Flag::Narrow, WidthMask)) {
474 res.offset = hour * 60 * 60;
475 res.size = hrs.endIndex();
476 }
477 }
478 return res;
479}
480
481OffsetFormatMatch matchOffsetFormat(QStringView text, const QLocale &locale, qsizetype locInd,
482 QtTemporalPattern::TemporalFieldFlags flags)
483{
484 OffsetFormatMatch best;
485 using Flag = QtTemporalPattern::TemporalFieldFlag;
486 using namespace QtTemporalPattern::FieldGroup;
487 const LocaleZoneData &locData = localeZoneData[locInd];
488 const QStringView posHourForm = locData.posHourFormat().viewData(hourFormatTable);
489 const QStringView negHourForm = locData.negHourFormat().viewData(hourFormatTable);
490 // For the negative format, allow U+002d to match U+2212 or locale.negativeSign();
491 const bool mapNeg = text.contains(u'-')
492 && (negHourForm.contains(u'\u2212') || negHourForm.contains(locale.negativeSign()));
493 if (QtTemporalPattern::matchesFlagWithin(flags, Flag::NeedNoUtcPrefix, UtcPrefixMask)) {
494 if (auto match = matchOffsetText(text, posHourForm, locale, flags))
495 best = match;
496 if (auto match = matchOffsetText(text, negHourForm, locale, flags); match.size > best.size)
497 best = { match.size, -match.offset };
498 if (mapNeg) {
499 const QString mapped = negHourForm.toString()
500 .replace(u'\u2212', u'-').replace(locale.negativeSign(), "-"_L1);
501 if (auto match = matchOffsetText(text, mapped, locale, flags); match.size > best.size)
502 best = { match.size, -match.offset };
503 }
504 }
505 if (QtTemporalPattern::matchesFlagWithin(flags, Flag::AcceptUtcPrefix, UtcPrefixMask)) {
506 const QStringView offsetFormat = locData.offsetGmtFormat().viewData(gmtFormatTable);
507 if (const qsizetype cut = offsetFormat.indexOf(u"%0"); cut >= 0) { // Should be present
508 const QStringView gmtPrefix = offsetFormat.first(cut);
509 const QStringView gmtSuffix = offsetFormat.sliced(cut + 2); // After %0
510 const qsizetype gmtSize = cut + gmtSuffix.size();
511 const auto crossMatch = [gmtPrefix, text]
512 (QLatin1StringView lhs, QLatin1StringView rhs) {
513 const qsizetype len = lhs.size();
514 Q_ASSERT(len == rhs.size());
515 if (!gmtPrefix.startsWith(lhs) || !text.startsWith(rhs))
516 return false;
517 if (gmtPrefix.size() == len)
518 return true;
519 return text.sliced(len).startsWith(gmtPrefix.sliced(len));
520 };
521 // Cheap pre-test: check suffix does appear after prefix, albeit we must
522 // later check it actually appears right after the offset text:
523 if ((gmtPrefix.isEmpty() || text.startsWith(gmtPrefix)
524 // Treat GMT and UTC as matches for one another to match
525 // QUtcTimeZonePrivate::displayName()'s kludges:
526 || crossMatch("GMT"_L1, "UTC"_L1) || crossMatch("UTC"_L1, "GMT"_L1))
527 && (gmtSuffix.isEmpty() || text.sliced(cut).indexOf(gmtSuffix) >= 0)) {
528 if (auto match = matchOffsetText(text.sliced(cut), posHourForm, locale, flags);
529 gmtSize + match.size > best.size) {
530 if (text.sliced(cut + match.size).startsWith(gmtSuffix)) // too sliced ?
531 best = { gmtSize + match.size, match.offset };
532 }
533 if (auto match = matchOffsetText(text.sliced(cut), negHourForm, locale, flags)) {
534 if (gmtSize + match.size > best.size
535 && text.sliced(cut + match.size).startsWith(gmtSuffix)) {
536 best = { gmtSize + match.size, -match.offset };
537 }
538 } else if (mapNeg) {
539 const QString mapped = negHourForm.toString()
540 .replace(u'\u2212', u'-').replace(locale.negativeSign(), "-"_L1);
541 if (auto match = matchOffsetText(text.sliced(cut), mapped, locale, flags);
542 gmtSize + match.size > best.size) {
543 if (text.sliced(cut + match.size).startsWith(gmtSuffix))
544 best = { gmtSize + match.size, -match.offset };
545 }
546 }
547 // Match empty offset as UTC (unless that'd be an empty match):
548 if (gmtSize > best.size && text.sliced(cut).startsWith(gmtSuffix))
549 return { gmtSize, 0 };
550 }
551 }
552 }
553 return best;
554}
555
556} // nameless namespace
557
558namespace QtTimeZoneLocale {
559
560QList<QByteArrayView> ianaIdsForTerritory(QLocale::Territory territory)
561{
562 QList<QByteArrayView> result;
563 {
564 const TerritoryZone *row =
565 std::lower_bound(std::begin(territoryZoneMap), std::end(territoryZoneMap),
566 qint16(territory),
567 [](const TerritoryZone &row, qint16 territory) {
568 return row.territory < territory;
569 });
570 if (row != std::end(territoryZoneMap) && QLocale::Territory(row->territory) == territory)
571 result << row->ianaId();
572 }
573 for (const MetaZoneData &row : metaZoneTable) {
574 if (QLocale::Territory(row.territory) == territory)
575 result << row.ianaId();
576 }
577 return result;
578}
579
580#if QT_CONFIG(datestring)
581// The QDateTime is only needed by the fall-back implementation in qlocale.cpp;
582// the calls below don't need to pass a valid QDateTime (based on its
583// atMSecsSinceEpoch); an invalid QDateTime() will suffice and be ignored.
586 const QDateTime &, int offsetSeconds)
587{
588 // See the final "zone" section of the table
589 // https://www.unicode.org/reports/tr35/tr35-dates.html#table-date-field-symbol-table
590 // for the full range of LDML-specified formats.
593 using namespace QtTemporalPattern::FieldGroup;
594
598 // Sign is already handled by choice of the hourFormat:
600 // Offsets are only displayed in minutes - round seconds (if any) to nearest
601 // minute (rounding halves away from zero offset):
602 const int offsetMinutes = (offsetSeconds + 30) / 60;
603
606 return hourOffset;
607
611}
612#endif // datestring
613
614} // QtTimeZoneLocale
615
616QString QTimeZonePrivate::localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc,
617 QTimeZone::TimeType timeType,
618 QTimeZone::NameType nameType,
619 const QLocale &locale) const
620{
621#if QT_CONFIG(datestring)
622 if (nameType == QTimeZone::OffsetName
623 // Use offset forms for QUtcTimeZonePrivate instances:
624 || QUtcTimeZonePrivate::offsetFromUtcString(m_id) != invalidSeconds()) {
625 using Flag = QtTemporalPattern::TemporalFieldFlag;
626 constexpr QtTemporalPattern::TemporalFieldFlags flags
627 = Flag::Numeric | Flag::Abbreviated | Flag::AcceptUtcPrefix | Flag::ZeroPad;
628 // Doesn't need fallbacks, since every locale has hour and offset formats.
629 return QtTimeZoneLocale::zoneOffsetFormat(locale, locale.d->m_index, flags,
630 QDateTime(), offsetFromUtc);
631 }
632#endif // datestring
633 // Handling of long names must stay in sync with findLongNamePrefix(), below.
634
635 // An IANA ID may give clues to fall back on for abbreviation or exemplar city:
636 QByteArray ianaAbbrev, ianaTail;
637 const auto scanIana = [&](QByteArrayView iana) {
638 // Scan the name of each zone whose data we consider using and, if the
639 // name gives us a clue to a fallback for which we have nothing better
640 // yet, remember it (and ignore later clues for that fallback).
641 if (!ianaAbbrev.isEmpty() && !ianaTail.isEmpty())
642 return;
643 qsizetype cut = iana.lastIndexOf('/');
644 QByteArrayView tail = cut < 0 ? iana : iana.sliced(cut + 1);
645 // Deal with a couple of special cases
646 if (tail == "McMurdo") { // Exceptional lowercase-uppercase sequence without space
647 if (ianaTail.isEmpty())
648 ianaTail = "McMurdo"_ba;
649 return;
650 } else if (tail == "DumontDUrville") { // Chopped to fit into IANA's 14-char limit
651 if (ianaTail.isEmpty())
652 ianaTail = "Dumont d'Urville"_ba;
653 return;
654 } else if (tail.isEmpty()) {
655 // Custom zone with perverse m_id ?
656 return;
657 }
658 const auto isMixedCaseAbbrev = [tail](char ch) {
659 // cv-RU and en-GU abbreviate Chamorro as ChST
660 // scn-IT abbreviates Cuba as CuT/CuST/CuDT
661 // blo-BJ abbreviates GMT as Gk
662 switch (tail.size()) {
663 case 2: return tail == "Gk";
664 case 3: return tail == "CuT";
665 case 4:
666 if (tail[0] == 'C' && tail[1] == ch && tail[3] == 'T') {
667 switch (ch) {
668 case 'h': return tail[2] == 'S';
669 case 'u': return tail[2] == 'S' || tail[2] == 'D';
670 default: break;
671 }
672 }
673 return false;
674 default:
675 break;
676 }
677 return false;
678 };
679
680 // Even if it is abbr or city name, we don't care if we've found one before.
681 bool maybeAbbr = ianaAbbrev.isEmpty(), maybeCityName = ianaTail.isEmpty(), inword = false;
682 char sign = '\0';
683 for (char ch : tail) {
684 if (ch == '+' || ch == '-') {
685 if (ch == '+' || !inword)
686 maybeCityName = false;
687 inword = false;
688 if (maybeAbbr) {
689 if (sign)
690 maybeAbbr = false; // two signs: no
691 else
692 sign = ch;
693 }
694 } else if (ch == '_') {
695 maybeAbbr = false;
696 if (!inword) // No double-underscore, or leading underscore
697 maybeCityName = false;
698 inword = false;
699 } else if (QChar::isLower(ch)) {
700 maybeAbbr = isMixedCaseAbbrev(ch);
701 // Dar_es_Salaam shows both cases as word starts
702 inword = true;
703 } else if (QChar::isUpper(ch)) {
704 if (sign)
705 maybeAbbr = false;
706 if (inword)
707 maybeCityName = false;
708 inword = true;
709 } else if (QChar::isDigit(ch)) {
710 if (!sign)
711 maybeAbbr = false;
712 maybeCityName = false;
713 inword = false;
714 }
715
716 if (!maybeAbbr && !maybeCityName)
717 break;
718 }
719 if (maybeAbbr && maybeCityName) // No real IANA ID matches both
720 return;
721
722 if (maybeAbbr) {
723 if (tail.endsWith("-0") || tail.endsWith("+0"))
724 tail = tail.chopped(2);
725 ianaAbbrev = tail.toByteArray();
726 if (sign && iana.startsWith("Etc/")) { // Reverse convention for offsets
727 if (sign == '-')
728 ianaAbbrev = ianaAbbrev.replace('-', '+');
729 else if (sign == '+')
730 ianaAbbrev = ianaAbbrev.replace('+', '-');
731 }
732 }
733 // See https://www.unicode.org/reports/tr35/tr35-dates.html#Time_Zone_Goals
734 // under "Composition", point 3:
735 if (maybeCityName)
736 ianaTail = tail.toByteArray().replace('_', ' ');
737 }; // end scanIana
738
739 scanIana(m_id);
740 if (QByteArrayView iana = aliasToIana(m_id); !iana.isEmpty() && iana != m_id)
741 scanIana(iana);
742
743 // Requires locData, nextData set suitably - save repetition of member:
744#define tableLookup(table, member, sought, test)
745 findTableEntryFor(QSpan(table).first(nextData.member).sliced(locData.member), sought, test)
746 // Note: any commas in test need to be within parentheses; but the only
747 // comma a comparison should need is in its (parenthesised) parameter list.
748
749 const QList<qsizetype> indices = fallbackLocalesFor(locale.d->m_index);
750 QString exemplarCity; // In case we need it.
751 const auto metaIdBefore = [](auto &row, quint16 key) { return row.metaIdIndex < key; };
752
753 // First try for an actual name:
754 for (const qsizetype locInd : indices) {
755 const LocaleZoneData &locData = localeZoneData[locInd];
756 // After the row for the last actual locale, there's a terminal row:
757 Q_ASSERT(std::size_t(locInd) < std::size(localeZoneData) - 1);
758 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
759
760 QByteArrayView iana{m_id};
761 if (quint16 metaKey = metaZoneAt(iana, atMSecsSinceEpoch)) {
762 if (const MetaZoneData *metaFrom = metaZoneStart(metaKey)) {
763 quint16 metaIdIndex = metaFrom->metaIdIndex;
764 QLocaleData::DataRange range{0, 0};
765 const char16_t *strings = nullptr;
766 if (nameType == QTimeZone::ShortName) {
767 auto row = tableLookup(localeMetaZoneShortNameTable, m_metaShortTableStart,
768 metaIdIndex, metaIdBefore);
769 if (row && row->metaIdIndex == metaIdIndex) {
770 range = row->shortName(timeType);
771 strings = shortMetaZoneNameTable;
772 }
773 } else { // LongName or DefaultName
774 auto row = tableLookup(localeMetaZoneLongNameTable, m_metaLongTableStart,
775 metaIdIndex, metaIdBefore);
776 if (row && row->metaIdIndex == metaIdIndex) {
777 range = row->longName(timeType);
778 strings = longMetaZoneNameTable;
779 }
780 }
781 Q_ASSERT(strings || !range.size);
782
783 if (range.size)
784 return range.getData(strings);
785
786 if (const auto *metaRow = metaZoneDataFor(metaFrom, locale.territory()))
787 iana = metaRow->ianaId(); // Use IANA ID of zone in use at that time
788 }
789 }
790
791 // Use exemplar city from closest match to locale, m_id:
792 if (exemplarCity.isEmpty()) {
793 exemplarCity = exemplarCityFor(locData, nextData, m_id);
794 if (exemplarCity.isEmpty())
795 exemplarCity = exemplarCityFor(locData, nextData, iana);
796 }
797 if (iana != m_id) // Check for hints to abbreviation and exemplar city:
798 scanIana(iana);
799
800 // That may give us a revised IANA ID; if the first search fails, fall back
801 // to m_id, if different.
802 do {
803 auto row = tableLookup(
804 localeZoneNameTable, m_zoneTableStart,
805 iana, [](auto &row, QByteArrayView key) { return row.ianaId() < key; });
806 if (row && row->ianaId() == iana) {
807 QLocaleData::DataRange range = row->name(nameType, timeType);
808 if (range.size) {
809 auto table = nameType == QTimeZone::ShortName
810 ? shortZoneNameTable
811 : longZoneNameTable;
812 return range.getData(table);
813 }
814 }
815 } while (std::exchange(iana, QByteArrayView{m_id}) != m_id);
816 }
817 // Most zones should now have ianaAbbrev or ianaTail set, maybe even both.
818 // We've now tried all the candidates we'll see for those.
819 // If an IANA ID's last component looked like a city name, use it.
820 if (exemplarCity.isEmpty() && !ianaTail.isEmpty())
821 exemplarCity = QString::fromLatin1(ianaTail); // It's ASCII
822
823 switch (nameType) {
824 case QTimeZone::DefaultName:
825 case QTimeZone::LongName:
826 for (const qsizetype locInd : indices) {
827 const LocaleZoneData &locData = localeZoneData[locInd];
828 QStringView regionFormat
829 = locData.regionFormatRange(timeType).viewData(regionFormatTable);
830 if (!regionFormat.isEmpty()) {
831 QString where = exemplarCity;
832 // TODO: if empty, use territory name
833 if (!where.isEmpty())
834 return regionFormat.arg(where);
835 }
836 }
837#if 0 // See comment within.
838 for (const qsizetype locInd : indices) {
839 const LocaleZoneData &locData = localeZoneData[locInd];
840 QStringView fallbackFormat = locData.fallbackFormat().viewData(fallbackFormatTable);
841 // Use fallbackFormat - probably never needed, as regionFormat is
842 // never empty, and this also needs city or territory name (along
843 // with metazone name).
844 }
845#endif
846 break;
847
848 case QTimeZone::ShortName:
849 // If an IANA ID's last component looked like an abbreviation (UTC, EST, ...) use it.
850 if (!ianaAbbrev.isEmpty())
851 return QString::fromLatin1(ianaAbbrev); // It's ASCII
852 break;
853
854 case QTimeZone::OffsetName:
855 Q_UNREACHABLE_RETURN(QString());
856 }
857
858#undef tableLookup
859
860 // Final fall-back: ICU seems to use a compact form of offset time for
861 // short-forms it doesn't know. This seems to correspond to the short form
862 // of LDML's Localized GMT format.
863#if QT_CONFIG(datestring)
864 using Flag = QtTemporalPattern::TemporalFieldFlag;
865 constexpr QtTemporalPattern::TemporalFieldFlags compact
866 = Flag::Numeric | Flag::Abbreviated | Flag::AcceptUtcPrefix;
867 return QtTimeZoneLocale::zoneOffsetFormat(locale, locale.d->m_index, compact,
868 QDateTime(), offsetFromUtc);
869#else
870 return {};
871#endif // datestring
872}
873
874// Match what the above might return at the start of a text (usually a tail of a
875// datetime string).
876QTimeZonePrivate::NamePrefixMatch
877QTimeZonePrivate::findLongNamePrefix(QStringView text, const QLocale &locale,
878 std::optional<qint64> atEpochMillis)
879{
880 constexpr std::size_t invalidMetaId = std::size(metaIdData);
881 constexpr std::size_t invalidIanaId = std::size(ianaIdData);
882 constexpr QTimeZone::TimeType timeTypes[] = {
883 // In preference order, should more than one match:
884 QTimeZone::GenericTime,
885 QTimeZone::StandardTime,
886 QTimeZone::DaylightTime,
887 };
888 struct {
889 qsizetype nameLength = 0;
890 QTimeZone::TimeType timeType = QTimeZone::GenericTime;
891 quint16 ianaIdIndex = invalidIanaId;
892 quint16 metaIdIndex = invalidMetaId;
893 QLocale::Territory where = QLocale::AnyTerritory;
894 } best;
895#define localeRows(table, member) QSpan(table).first(nextData.member).sliced(locData.member)
896
897 const QList<qsizetype> indices = fallbackLocalesFor(locale.d->m_index);
898 for (const qsizetype locInd : indices) {
899 const LocaleZoneData &locData = localeZoneData[locInd];
900 // After the row for the last actual locale, there's a terminal row:
901 Q_ASSERT(std::size_t(locInd) < std::size(localeZoneData) - 1);
902 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
903
904 // TODO: support for FlexSpace, IgnoreCase and maybe others.
905 const auto metaRows = localeRows(localeMetaZoneLongNameTable, m_metaLongTableStart);
906 for (const LocaleMetaZoneLongNames &row : metaRows) {
907 for (const QTimeZone::TimeType type : timeTypes) {
908 QLocaleData::DataRange range = row.longName(type);
909 if (range.size > best.nameLength) {
910 QStringView name = range.viewData(longMetaZoneNameTable);
911 if (text.startsWith(name)) {
912 best = { static_cast<qsizetype>(range.size), type,
913 invalidIanaId, row.metaIdIndex };
914 if (best.nameLength >= text.size())
915 break;
916 }
917 }
918 }
919 if (best.nameLength >= text.size())
920 break;
921 }
922
923 const auto ianaRows = localeRows(localeZoneNameTable, m_zoneTableStart);
924 for (const LocaleZoneNames &row : ianaRows) {
925 for (const QTimeZone::TimeType type : timeTypes) {
926 QLocaleData::DataRange range = row.longName(type);
927 if (range.size > best.nameLength) {
928 QStringView name = range.viewData(longZoneNameTable);
929 // Save potentially expensive "zone is supported" check when possible:
930 bool gotZone = row.ianaIdIndex == best.ianaIdIndex
931 || QTimeZone::isTimeZoneIdAvailable(row.ianaId());
932 if (text.startsWith(name) && gotZone)
933 best = { static_cast<qsizetype>(range.size), type, row.ianaIdIndex };
934 }
935 }
936 }
937 }
938 // That's found us our best match, possibly as a meta-zone
939 if (best.metaIdIndex != invalidMetaId) {
940 const auto metaIdBefore = [](auto &row, quint16 key) { return row.metaIdIndex < key; };
941 // Find the standard IANA ID for this meta-zone (or one for another
942 // supported zone using the meta-zone at the specified time).
943 const MetaZoneData *metaRow =
944 std::lower_bound(std::begin(metaZoneTable), std::end(metaZoneTable),
945 best.metaIdIndex, metaIdBefore);
946 // Table is sorted by metazone, then territory.
947 for (; metaRow < std::end(metaZoneTable)
948 && metaRow->metaIdIndex == best.metaIdIndex; ++metaRow) {
949 auto metaLand = QLocale::Territory(metaRow->territory);
950 // World entry is the "standard" zone for this metazone, so always
951 // prefer it over any territory-specific one (from an earlier row):
952 if ((best.where == QLocale::AnyTerritory || metaLand == QLocale::World)
953 && (atEpochMillis
954 ? metaRow->metaZoneKey == metaZoneAt(metaRow->ianaId(), *atEpochMillis)
955 : zoneEverInMeta(metaRow->ianaId(), metaRow->metaZoneKey))) {
956 if (metaRow->ianaIdIndex == best.ianaIdIndex
957 || QTimeZone::isTimeZoneIdAvailable(metaRow->ianaId())) {
958 best.ianaIdIndex = metaRow->ianaIdIndex;
959 best.where = metaLand;
960 if (best.where == QLocale::World)
961 break;
962 }
963 }
964 }
965 }
966 if (best.ianaIdIndex != invalidIanaId)
967 return { QByteArray(ianaIdData + best.ianaIdIndex), best.nameLength, best.timeType };
968
969 // Now try for a region format.
970 // Since we may get the IANA ID directly from a zone, we may not need an
971 // ianaIdIndex from CLDR-derived tables: and the active backend may know
972 // some zones newer than our latest CLDR.
973 NamePrefixMatch found;
974 for (const qsizetype locInd : indices) {
975 const LocaleZoneData &locData = localeZoneData[locInd];
976 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
977 for (const QTimeZone::TimeType timeType : timeTypes) {
978 QStringView regionFormat
979 = locData.regionFormatRange(timeType).viewData(regionFormatTable);
980 // "%0 [Season] Time", "Time in %0 [during Season]" &c.
981 const qsizetype cut = regionFormat.indexOf(u"%0");
982 if (cut < 0) // Shouldn't happen unless empty.
983 continue;
984
985 QStringView prefix = regionFormat.first(cut);
986 // Any text before %0 must appear verbatim at the start of our text:
987 if (cut > 0 && !text.startsWith(prefix))
988 continue;
989 QStringView suffix = regionFormat.sliced(cut + 2); // after %0
990 // This must start with an exemplar city or territory, followed by suffix:
991 QStringView tail = text.sliced(cut);
992
993 // Cheap pretest - any text after %0 must appear *somewhere* in our text:
994 if (suffix.size() && tail.indexOf(suffix) < 0)
995 continue; // No match possible
996
997 // Of course, particularly if just punctuation, a copy of our suffix
998 // might appear within the city or territory name.
999 const auto textMatches = [tail, suffix](QStringView where) {
1000 return (where.isEmpty() || tail.startsWith(where))
1001 && (suffix.isEmpty() || tail.sliced(where.size()).startsWith(suffix));
1002 };
1003
1004 const auto cityRows = localeRows(localeZoneExemplarTable, m_exemplarTableStart);
1005 for (const LocaleZoneExemplar &row : cityRows) {
1006 QStringView city = row.exemplarCity().viewData(exemplarCityTable);
1007 if (textMatches(city)) {
1008 qsizetype length = cut + city.size() + suffix.size();
1009 if (length > found.nameLength) {
1010 bool gotZone = row.ianaId() == found.ianaId // (cheap pre-test)
1011 || QTimeZone::isTimeZoneIdAvailable(row.ianaId());
1012 if (gotZone)
1013 found = { row.ianaId().toByteArray(), length, timeType };
1014 }
1015 }
1016 }
1017 // In localeName() we fall back to the last part of the IANA ID:
1018 const QList<QByteArray> allZones = QTimeZone::availableTimeZoneIds();
1019 for (const auto &iana : allZones) {
1020 Q_ASSERT(!iana.isEmpty());
1021 qsizetype slash = iana.lastIndexOf('/');
1022 QByteArray local = slash > 0 ? iana.sliced(slash + 1) : iana;
1023 QString city = QString::fromLatin1(local.replace('_', ' '));
1024 if (textMatches(city)) {
1025 qsizetype length = cut + city.size() + suffix.size();
1026 if (length > found.nameLength)
1027 found = { iana, length, timeType };
1028 }
1029 }
1030 // TODO: similar for territories, at least once localeName() does so.
1031 }
1032 }
1033#undef localeRows
1034
1035 return found;
1036}
1037
1038QTimeZonePrivate::NamePrefixMatch
1039QTimeZonePrivate::findNarrowOffsetPrefix(QStringView text, const QLocale &locale)
1040{
1041 using Flag = QtTemporalPattern::TemporalFieldFlag;
1042 constexpr auto narrowOffset = Flag::Numeric | Flag::Abbreviated | Flag::AcceptUtcPrefix;
1043 if (const auto match = matchOffsetFormat(text, locale, locale.d->m_index, narrowOffset)) {
1044 // Check offset is sane:
1045 if (QTimeZone::MinUtcOffsetSecs <= match.offset
1046 && match.offset <= QTimeZone::MaxUtcOffsetSecs) {
1047
1048 // Although we don't have an IANA ID, the ISO offset format text
1049 // should match what the QLocale(ianaId) constructor accepts, which
1050 // is good enough for our purposes.
1051 return { isoOffsetFormat(match.offset, QTimeZone::OffsetName).toLatin1(),
1052 match.size, QTimeZone::GenericTime };
1053 }
1054 }
1055 return {};
1056}
1057
1058QTimeZonePrivate::NamePrefixMatch
1059QTimeZonePrivate::findOffsetPrefix(QStringView text, const QLocale &locale,
1060 QtTemporalPattern::TemporalFieldFlags flags)
1061{
1062 NamePrefixMatch best;
1063 if (text.isEmpty())
1064 return best;
1065
1066 const auto idForOffset = [](int offsetSeconds) -> QByteArray {
1067 if (!offsetSeconds)
1068 return "UTC";
1069 return isoOffsetFormat(offsetSeconds, QTimeZone::OffsetName).toLatin1();
1070 };
1071
1072 const auto match = matchOffsetFormat(text, locale, locale.d->m_index, flags);
1073 if (match && match.size > best.nameLength
1074 && QTimeZone::MinUtcOffsetSecs <= match.offset
1075 && match.offset <= QTimeZone::MaxUtcOffsetSecs) {
1076 best = { idForOffset(match.offset), match.size, QTimeZone::GenericTime };
1077 }
1078
1079 return best;
1080}
1081#endif // ICU or not
1082
1083QT_END_NAMESPACE
QList< QByteArrayView > ianaIdsForTerritory(QLocale::Territory territory)
#define tableLookup(table, member, sought, test)
#define localeRows(table, member)