Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtimezonelocale.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#include <private/qtimezonelocale_p.h>
6#include <private/qtimezoneprivate_p.h>
7
8#if !QT_CONFIG(icu)
9# include <QtCore/qspan.h>
10# include <private/qdatetime_p.h>
11# include <private/qtools_p.h>
12// Use data generated from CLDR:
13# include "qtimezonelocale_data_p.h"
14# include "qtimezoneprivate_data_p.h"
15# ifdef QT_CLDR_ZONE_DEBUG
16# include "../text/qlocale_data_p.h"
17QT_BEGIN_NAMESPACE
18static_assert(std::size(locale_data) == std::size(QtTimeZoneLocale::localeZoneData));
19// Size includes terminal rows: for now, they do match in tag IDs, but they needn't.
20static_assert([]() {
21 for (std::size_t i = 0; i < std::size(locale_data); ++i) {
22 const auto &loc = locale_data[i];
23 const auto &zone = QtTimeZoneLocale::localeZoneData[i];
24 if (loc.m_language_id != zone.m_language_id
25 || loc.m_script_id != zone.m_script_id
26 || loc.m_territory_id != zone.m_territory_id) {
27 return false;
28 }
29 }
30 return true;
31}());
32QT_END_NAMESPACE
33# endif
34#endif
35
36QT_BEGIN_NAMESPACE
37
38using namespace Qt::StringLiterals;
39
40#if QT_CONFIG(icu) // Get data from ICU:
41namespace {
42
43// Convert TimeType and NameType into ICU UCalendarDisplayNameType
44UCalendarDisplayNameType ucalDisplayNameType(QTimeZone::TimeType timeType,
45 QTimeZone::NameType nameType)
46{
47 // TODO ICU C UCalendarDisplayNameType does not support full set of C++ TimeZone::EDisplayType
48 // For now, treat Generic as Standard
49 switch (nameType) {
50 case QTimeZone::ShortName:
51 return timeType == QTimeZone::DaylightTime ? UCAL_SHORT_DST : UCAL_SHORT_STANDARD;
52 case QTimeZone::DefaultName:
53 case QTimeZone::LongName:
54 return timeType == QTimeZone::DaylightTime ? UCAL_DST : UCAL_STANDARD;
55 case QTimeZone::OffsetName:
56 Q_UNREACHABLE(); // Callers of ucalTimeZoneDisplayName() should take care of OffsetName.
57 }
58 Q_UNREACHABLE_RETURN(UCAL_STANDARD);
59}
60
61} // nameless namespace
62
63namespace QtTimeZoneLocale {
64
65// Qt wrapper around ucal_getTimeZoneDisplayName()
66// Used directly by ICU backend; indirectly by TZ (see below).
67QString ucalTimeZoneDisplayName(UCalendar *ucal,
68 QTimeZone::TimeType timeType,
69 QTimeZone::NameType nameType,
70 const QByteArray &localeCode)
71{
72 constexpr int32_t BigNameLength = 50;
73 int32_t size = BigNameLength;
74 QString result(size, Qt::Uninitialized);
75 auto dst = [&result]() { return reinterpret_cast<UChar *>(result.data()); };
76 UErrorCode status = U_ZERO_ERROR;
77 const UCalendarDisplayNameType utype = ucalDisplayNameType(timeType, nameType);
78
79 // size = ucal_getTimeZoneDisplayName(cal, type, locale, result, resultLength, status)
80 size = ucal_getTimeZoneDisplayName(ucal, utype, localeCode.constData(),
81 dst(), size, &status);
82
83 // If overflow, then resize and retry
84 if (size > BigNameLength || status == U_BUFFER_OVERFLOW_ERROR) {
85 result.resize(size);
86 status = U_ZERO_ERROR;
87 size = ucal_getTimeZoneDisplayName(ucal, utype, localeCode.constData(),
88 dst(), size, &status);
89 }
90
91 if (!U_SUCCESS(status))
92 return QString();
93
94 // Resize and return:
95 result.resize(size);
96 return result;
97}
98
99bool ucalKnownTimeZoneId(const QString &ianaStr)
100{
101 const UChar *const name = reinterpret_cast<const UChar *>(ianaStr.constData());
102 // We are not interested in the value, but we have to pass something.
103 // No known IANA zone name is (up to 2023) longer than 30 characters.
104 constexpr size_t size = 64;
105 UChar buffer[size];
106
107 // TODO: convert to ucal_getIanaTimeZoneID(), new draft in ICU 74, once we
108 // can rely on its availability, assuming it works the same once not draft.
109 UErrorCode status = U_ZERO_ERROR;
110 UBool isSys = false;
111 // Returns the length of the IANA zone name (but we don't care):
112 ucal_getCanonicalTimeZoneID(name, ianaStr.size(), buffer, size, &isSys, &status);
113 // We're only interested if the result is a "system" (i.e. IANA) ID:
114 return isSys;
115}
116
117} // QtTimeZoneLocale
118
119// Used by TZ backends when ICU is available:
120QString QTimeZonePrivate::localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc,
121 QTimeZone::TimeType timeType,
122 QTimeZone::NameType nameType,
123 const QLocale &locale) const
124{
125 Q_UNUSED(atMSecsSinceEpoch);
126 // TODO: use CLDR data for the offset name.
127 // No ICU API for offset formats, so fall back to our ISO one, even if
128 // locale isn't C:
129 if (nameType == QTimeZone::OffsetName)
130 return isoOffsetFormat(offsetFromUtc);
131
132 const QString id = QString::fromUtf8(m_id);
133 // Need to check id is known to ICU, since ucal_open() will return a
134 // misleading "valid" GMT ucal when it doesn't recognise id.
135 if (!QtTimeZoneLocale::ucalKnownTimeZoneId(id))
136 return QString();
137
138 const QByteArray loc = locale.name().toUtf8();
139 UErrorCode status = U_ZERO_ERROR;
140 // TODO: QTBUG-124271 can we cache any of this ?
141 UCalendar *ucal = ucal_open(reinterpret_cast<const UChar *>(id.data()), id.size(),
142 loc.constData(), UCAL_DEFAULT, &status);
143 if (ucal && U_SUCCESS(status)) {
144 auto tidier = qScopeGuard([ucal]() { ucal_close(ucal); });
145 return QtTimeZoneLocale::ucalTimeZoneDisplayName(ucal, timeType, nameType, loc);
146 }
147 return QString();
148}
149#else // No ICU, use QTZ[LP]_data_p.h data for feature timezone_locale.
150namespace QtTimeZoneLocale {
151// Inline methods promised in QTZL_p.h
152using namespace QtTimeZoneCldr; // QTZP_data_p.h
153constexpr QByteArrayView LocaleZoneExemplar::ianaId() const { return ianaIdData + ianaIdIndex; }
154constexpr QByteArrayView LocaleZoneNames::ianaId() const { return ianaIdData + ianaIdIndex; }
155} // QtTimeZoneLocale
156
157namespace {
158using namespace QtTimeZoneLocale; // QTZL_p.h QTZL_data_p.h
159using namespace QtTimeZoneCldr; // QTZP_data_p.h
160// Accessors for the QTZL_data_p.h
161
162template <typename Row, typename Sought, typename Condition>
163const Row *findTableEntryFor(const QSpan<Row> data, Sought value, Condition test)
164{
165 // We have the present locale's data (if any). Its rows are sorted on
166 // (localeIndex and) a field for which we want the Sought value. The test()
167 // compares that field.
168 auto begin = data.begin(), end = data.end();
169 Q_ASSERT(begin == end || end->localeIndex > begin->localeIndex);
170 Q_ASSERT(begin == end || end[-1].localeIndex == begin->localeIndex);
171 auto row = std::lower_bound(begin, end, value, test);
172 return row == end ? nullptr : row;
173}
174
175QString exemplarCityFor(const LocaleZoneData &locale, const LocaleZoneData &next,
176 QByteArrayView iana)
177{
178 auto xct = findTableEntryFor(
179 QSpan(localeZoneExemplarTable).first(next.m_exemplarTableStart
180 ).sliced(locale.m_exemplarTableStart),
181 iana, [](auto &row, QByteArrayView key) { return row.ianaId() < key; });
182 if (xct && xct->ianaId() == iana)
183 return xct->exemplarCity().getData(exemplarCityTable);
184 return {};
185}
186
187// Accessors for the QTZP_data_p.h
188quint32 clipEpochMinute(qint64 epochMinute)
189{
190 // ZoneMetaHistory's quint32 UTC epoch minutes.
191 // Dates from 1970-01-01 to 10136-02-16 (at 04:14) are representable.
192 constexpr quint32 epoch = 0;
193 // Since the .end value of an interval that does end is the first epoch
194 // minutes *after* the interval, intervalEndsBefore() uses a <= test. The
195 // value ~epoch (0xffffffff) is used as a sentinel value to mean "there is
196 // no end", so we need a value strictly less than it for "epoch minutes too
197 // big to represent" so that this value is less than "no end". So the value
198 // 1 ^ ~epoch (0xfffffffe) is reserved as this "unrepresentably late time"
199 // and the scripts to generate data assert that no actual interval ends then
200 // or later.
201 constexpr quint32 ragnarok = 1 ^ ~epoch;
202 return epochMinute + 1 >= ragnarok ? ragnarok : quint32(epochMinute);
203}
204
205constexpr bool intervalEndsBefore(const ZoneMetaHistory &record, quint32 dt) noexcept
206{
207 // See clipEpochMinute()'s explanation of ragnarok for why this is <=
208 return record.end <= dt;
209}
210
211/* The metaZoneKey of the ZoneMetaHistory entry whose ianaId() is equal to the
212 given zoneId, for which atMSecsSinceEpoch refers to an instant between its
213 begin and end. Returns zero if there is no such ZoneMetaHistory entry.
214*/
215quint16 metaZoneAt(QByteArrayView zoneId, qint64 atMSecsSinceEpoch)
216{
217 using namespace QtPrivate::DateTimeConstants;
218 auto it = std::lower_bound(std::begin(zoneHistoryTable), std::end(zoneHistoryTable), zoneId,
219 [](const ZoneMetaHistory &record, QByteArrayView id) {
220 return record.ianaId().compare(id, Qt::CaseInsensitive) < 0;
221 });
222 if (it == std::end(zoneHistoryTable) || it->ianaId().compare(zoneId, Qt::CaseInsensitive) > 0)
223 return 0;
224 const auto stop =
225 std::upper_bound(it, std::end(zoneHistoryTable), zoneId,
226 [](QByteArrayView id, const ZoneMetaHistory &record) {
227 return id.compare(record.ianaId(), Qt::CaseInsensitive) < 0;
228 });
229 const quint32 dt = clipEpochMinute(atMSecsSinceEpoch / MSECS_PER_MIN);
230 it = std::lower_bound(it, stop, dt, intervalEndsBefore);
231 return it != stop && it->begin <= dt ? it->metaZoneKey : 0;
232}
233
234// True if the named zone is ever part of the specified metazone:
235bool zoneEverInMeta(QByteArrayView zoneId, quint16 metaKey)
236{
237 for (auto it = std::lower_bound(std::begin(zoneHistoryTable), std::end(zoneHistoryTable),
238 zoneId,
239 [](const ZoneMetaHistory &record, QByteArrayView id) {
240 return record.ianaId().compare(id, Qt::CaseInsensitive) < 0;
241 });
242 it != std::end(zoneHistoryTable) && it->ianaId().compare(zoneId, Qt::CaseInsensitive) == 0;
243 ++it) {
244 if (it->metaZoneKey == metaKey)
245 return true;
246 }
247 return false;
248}
249
250constexpr bool dataBeforeMeta(const MetaZoneData &row, quint16 metaKey) noexcept
251{
252 return row.metaZoneKey < metaKey;
253}
254
255constexpr bool metaDataBeforeTerritory(const MetaZoneData &row, qint16 territory) noexcept
256{
257 return row.territory < territory;
258}
259
260const MetaZoneData *metaZoneStart(quint16 metaKey)
261{
262 const MetaZoneData *const from =
263 std::lower_bound(std::begin(metaZoneTable), std::end(metaZoneTable),
264 metaKey, dataBeforeMeta);
265 if (from == std::end(metaZoneTable) || from->metaZoneKey != metaKey) {
266 qWarning("No metazone data found for metazone key %d", metaKey);
267 return nullptr;
268 }
269 return from;
270}
271
272const MetaZoneData *metaZoneDataFor(const MetaZoneData *from, QLocale::Territory territory)
273{
274 const quint16 metaKey = from->metaZoneKey;
275 const MetaZoneData *const end =
276 std::lower_bound(from, std::end(metaZoneTable), metaKey + 1, dataBeforeMeta);
277 Q_ASSERT(end != from && end[-1].metaZoneKey == metaKey);
278 QLocale::Territory land = territory;
279 do {
280 const MetaZoneData *row =
281 std::lower_bound(from, end, qint16(land), metaDataBeforeTerritory);
282 if (row != end && QLocale::Territory(row->territory) == land) {
283 Q_ASSERT(row->metaZoneKey == metaKey);
284 return row;
285 }
286 // Fall back to World (if territory itself isn't World).
287 } while (std::exchange(land, QLocale::World) != QLocale::World);
288
289 qWarning("Metazone %s lacks World data for %ls",
290 from->metaZoneId().constData(),
291 qUtf16Printable(QLocale::territoryToString(territory)));
292 return nullptr;
293}
294
295QString addPadded(qsizetype width, const QString &zero, const QString &number, QString &&onto)
296{
297 // TODO (QTBUG-122834): QLocale::toString() should support zero-padding directly.
298 width -= number.size() / zero.size();
299 while (width > 0) {
300 onto += zero;
301 --width;
302 }
303 return std::move(onto) + number;
304}
305
306QString formatOffset(QStringView format, int offsetMinutes, const QLocale &locale,
307 QLocale::FormatType form)
308{
309 Q_ASSERT(offsetMinutes >= 0);
310 const QString hour = locale.toString(offsetMinutes / 60);
311 const QString mins = locale.toString(offsetMinutes % 60);
312 // If zero.size() > 1, digits are surrogate pairs; each only counts one
313 // towards width of the field, even if it contributes more to result.size().
314 const QString zero = locale.zeroDigit();
315 QStringView tail = format;
316 QString result;
317 while (!tail.isEmpty()) {
318 if (tail.startsWith(u'\'')) {
319 qsizetype end = tail.indexOf(u'\'', 1);
320 if (end < 0) {
321 qWarning("Unbalanced quote in offset format string: %s",
322 format.toUtf8().constData());
323 return result + tail; // Include the quote; format is bogus.
324 } else if (end == 1) {
325 // Special case: adjacent quotes signify a simple quote.
326 result += u'\'';
327 tail = tail.sliced(2);
328 } else {
329 Q_ASSERT(end > 1); // We searched from index 1.
330 while (end + 1 < tail.size() && tail[end + 1] == u'\'') {
331 // Special case: adjacent quotes inside a quoted string also
332 // signify a simple quote.
333 result += tail.sliced(1, end); // Include a quote at the end
334 tail = tail.sliced(end + 1); // Still starts with a quote
335 end = tail.indexOf(u'\'', 1); // Where's the next ?
336 if (end < 0) {
337 qWarning("Unbalanced quoted quote in offset format string: %s",
338 format.toUtf8().constData());
339 return result + tail;
340 }
341 Q_ASSERT(end > 0);
342 }
343 // Skip leading and trailng quotes:
344 result += tail.sliced(1, end - 1);
345 tail = tail.sliced(end + 1);
346 }
347 } else if (tail.startsWith(u'H')) {
348 qsizetype width = 1;
349 while (width < tail.size() && tail[width] == u'H')
350 ++width;
351 tail = tail.sliced(width);
352 if (form != QLocale::NarrowFormat)
353 result = addPadded(width, zero, hour, std::move(result));
354 else
355 result += hour;
356 } else if (tail.startsWith(u'm')) {
357 qsizetype width = 1;
358 while (width < tail.size() && tail[width] == u'm')
359 ++width;
360 // (At CLDR v45, all locales use two-digit minutes.)
361 // (No known zone has single-digit non-zero minutes.)
362 tail = tail.sliced(width);
363 if (form != QLocale::NarrowFormat)
364 result = addPadded(width, zero, mins, std::move(result));
365 else if (offsetMinutes % 60)
366 result += mins;
367 else if (result.endsWith(u':') || result.endsWith(u'.'))
368 result.chop(1);
369 // (At CLDR v45, mm follows H either immediately or after a colon or dot.)
370 } else if (tail[0].isHighSurrogate() && tail.size() > 1
371 && tail[1].isLowSurrogate()) {
372 result += tail.first(2);
373 tail = tail.sliced(2);
374 } else {
375 result += tail.front();
376 tail = tail.sliced(1);
377 }
378 }
379 return result;
380}
381
382struct OffsetFormatMatch
383{
384 qsizetype size = 0;
385 int offset = 0;
386 operator bool() const { return size > 0; }
387};
388
389OffsetFormatMatch matchOffsetText(QStringView text, QStringView format, const QLocale &locale,
390 QLocale::FormatType scale)
391{
392 // Sign is taken care of by caller.
393 // TODO (QTBUG-77948): rework in terms of text pattern matchers.
394 // For now, don't try to be general, it gets too tricky.
395 OffsetFormatMatch res;
396 // At least at CLDR v46:
397 // Amharic in Ethiopia has ±HHmm formats; all others use separators.
398 // None have single m. All have H or HH before mm. (None has anything after mm.)
399 // In narrow format, mm and its preceding separator are elided for 0
400 // minutes; and hour may be single digit even if the format says HH.
401 const QString zero = locale.zeroDigit();
402 qsizetype cut = format.indexOf(u'H');
403 if (cut < 0 || !text.startsWith(format.first(cut)) || !format.endsWith(u"mm"))
404 return res;
405 text = text.sliced(cut);
406 QStringView sep = format.sliced(cut).chopped(2); // Prune prefix and "mm".
407 int hlen = 1; // We already know we have one 'H' at the start of sep.
408 while (hlen < sep.size() && sep[hlen] == u'H')
409 ++hlen;
410 sep = sep.sliced(hlen);
411
412 const auto hasDigitAt = [digitWidth = zero.size(), text](qsizetype index) {
413 if (digitWidth == 1)
414 return index < text.size() && text[index].isDigit();
415 Q_ASSERT(digitWidth == 2);
416 const qsizetype offset = index * 2;
417 if (offset + 1 >= text.size())
418 return false;
419 if (!text[offset].isHighSurrogate() || !text[offset + 1].isLowSurrogate())
420 return false;
421 const char32_t ch = QChar::surrogateToUcs4(text[offset], text[offset + 1]);
422 return QChar::isDigit(ch);
423 };
424 int digits = 0; // Count of digits: multiply by zero.size() for indexing.
425 while (digits < 4 && hasDigitAt(digits))
426 ++digits;
427
428 // See zoneOffsetFormat() for the eccentric meaning of scale.
429 QStringView minStr;
430 if (sep.isEmpty()) {
431 if (digits > hlen) {
432 // Long and Short formats allow two-digit match when hlen < 2.
433 if (scale == QLocale::NarrowFormat || (hlen < 2 && !text.startsWith(zero)))
434 hlen = digits - 2;
435 else if (digits < hlen + 2)
436 return res;
437 minStr = text.sliced(hlen * zero.size()).first(2 * zero.size());
438 } else if (scale == QLocale::NarrowFormat) {
439 hlen = digits;
440 } else if (hlen != digits) {
441 return res;
442 }
443 } else {
444 const qsizetype sepAt = text.indexOf(sep); // May be -1; digits isn't < -1.
445 if (digits * zero.size() < sepAt) // Separator doesn't immediately follow hour.
446 return res;
447 if (scale == QLocale::NarrowFormat || (hlen < 2 && !text.startsWith(zero)))
448 hlen = digits;
449 else if (digits != hlen)
450 return res;
451 if (sepAt >= 0 && text.size() >= sepAt + sep.size() + 2 * zero.size())
452 minStr = text.sliced(sepAt + sep.size()).first(2 * zero.size());
453 else if (scale != QLocale::NarrowFormat)
454 return res;
455 else if (sepAt >= 0) // Allow minutes without zero-padding in narrow format.
456 minStr = text.sliced(sepAt + sep.size());
457 }
458 if (hlen < 1)
459 return res;
460
461 bool ok = true;
462 uint minute = minStr.isEmpty() ? 0 : locale.toUInt(minStr, &ok);
463 if (!ok && scale == QLocale::NarrowFormat) {
464 // Fall back to matching hour-only form:
465 minStr = {};
466 ok = true;
467 }
468 if (ok && minute < 60) {
469 uint hour = locale.toUInt(text.first(hlen * zero.size()), &ok);
470 if (ok) {
471 res.offset = (hour * 60 + minute) * 60;
472 res.size = cut + hlen * zero.size();
473 if (!minStr.isEmpty())
474 res.size += sep.size() + minStr.size();
475 }
476 }
477 return res;
478}
479
480OffsetFormatMatch matchOffsetFormat(QStringView text, const QLocale &locale, qsizetype locInd,
481 QLocale::FormatType scale)
482{
483 const LocaleZoneData &locData = localeZoneData[locInd];
484 const QStringView posHourForm = locData.posHourFormat().viewData(hourFormatTable);
485 const QStringView negHourForm = locData.negHourFormat().viewData(hourFormatTable);
486 // For the negative format, allow U+002d to match U+2212 or locale.negativeSign();
487 const bool mapNeg = text.contains(u'-')
488 && (negHourForm.contains(u'\u2212') || negHourForm.contains(locale.negativeSign()));
489 // See zoneOffsetFormat() for the eccentric meaning of scale.
490 if (scale == QLocale::ShortFormat) {
491 if (auto match = matchOffsetText(text, posHourForm, locale, scale))
492 return match;
493 if (auto match = matchOffsetText(text, negHourForm, locale, scale))
494 return { match.size, -match.offset };
495 if (mapNeg) {
496 const QString mapped = negHourForm.toString()
497 .replace(u'\u2212', u'-').replace(locale.negativeSign(), "-"_L1);
498 if (auto match = matchOffsetText(text, mapped, locale, scale))
499 return { match.size, -match.offset };
500 }
501 } else {
502 const QStringView offsetFormat = locData.offsetGmtFormat().viewData(gmtFormatTable);
503 if (const qsizetype cut = offsetFormat.indexOf(u"%0"); cut >= 0) { // Should be present
504 const QStringView gmtPrefix = offsetFormat.first(cut);
505 const QStringView gmtSuffix = offsetFormat.sliced(cut + 2); // After %0
506 const qsizetype gmtSize = cut + gmtSuffix.size();
507 const auto crossMatch = [gmtPrefix, text]
508 (QLatin1StringView lhs, QLatin1StringView rhs) {
509 const qsizetype len = lhs.size();
510 Q_ASSERT(len == rhs.size());
511 if (!gmtPrefix.startsWith(lhs) || !text.startsWith(rhs))
512 return false;
513 if (gmtPrefix.size() == len)
514 return true;
515 return text.sliced(len).startsWith(gmtPrefix.sliced(len));
516 };
517 // Cheap pre-test: check suffix does appear after prefix, albeit we must
518 // later check it actually appears right after the offset text:
519 if ((gmtPrefix.isEmpty() || text.startsWith(gmtPrefix)
520 // Treat GMT and UTC as matches for one another to match
521 // QUtcTimeZonePrivate::displayName()'s kludges:
522 || crossMatch("GMT"_L1, "UTC"_L1) || crossMatch("UTC"_L1, "GMT"_L1))
523 && (gmtSuffix.isEmpty() || text.sliced(cut).indexOf(gmtSuffix) >= 0)) {
524 if (auto match = matchOffsetText(text.sliced(cut), posHourForm, locale, scale)) {
525 if (text.sliced(cut + match.size).startsWith(gmtSuffix)) // too sliced ?
526 return { gmtSize + match.size, match.offset };
527 }
528 if (auto match = matchOffsetText(text.sliced(cut), negHourForm, locale, scale)) {
529 if (text.sliced(cut + match.size).startsWith(gmtSuffix))
530 return { gmtSize + match.size, -match.offset };
531 } else if (mapNeg) {
532 const QString mapped = negHourForm.toString()
533 .replace(u'\u2212', u'-').replace(locale.negativeSign(), "-"_L1);
534 if (auto match = matchOffsetText(text.sliced(cut), mapped, locale, scale)) {
535 if (text.sliced(cut + match.size).startsWith(gmtSuffix))
536 return { gmtSize + match.size, -match.offset };
537 }
538 }
539 // Match empty offset as UTC (unless that'd be an empty match):
540 if (gmtSize > 0 && text.sliced(cut).startsWith(gmtSuffix))
541 return { gmtSize, 0 };
542 }
543 }
544 }
545 return {};
546}
547
548} // nameless namespace
549
550namespace QtTimeZoneLocale {
551
552QList<QByteArrayView> ianaIdsForTerritory(QLocale::Territory territory)
553{
554 QList<QByteArrayView> result;
555 {
556 const TerritoryZone *row =
557 std::lower_bound(std::begin(territoryZoneMap), std::end(territoryZoneMap),
558 qint16(territory),
559 [](const TerritoryZone &row, qint16 territory) {
560 return row.territory < territory;
561 });
562 if (row != std::end(territoryZoneMap) && QLocale::Territory(row->territory) == territory)
563 result << row->ianaId();
564 }
565 for (const MetaZoneData &row : metaZoneTable) {
566 if (QLocale::Territory(row.territory) == territory)
567 result << row.ianaId();
568 }
569 return result;
570}
571
572// The QDateTime is only needed by the fall-back implementation in qlocale.cpp;
573// the calls below don't need to pass a valid QDateTime (based on its
574// atMSecsSinceEpoch); an invalid QDateTime() will suffice and be ignored.
575QString zoneOffsetFormat(const QLocale &locale, qsizetype locInd, QLocale::FormatType width,
576 const QDateTime &, int offsetSeconds)
577{
578 // QLocale::LongFormat gets the full GMT-prefix plus hour offset.
579 // QLocale::ShortFormat gets just the hour offset (with full with).
580 // QLocale::NarrowFormat gets the GMT-prefix plus the pruned hour format.
581 // The last drops :00 for zero minutes and removes leading 0 from the hour.
582 // See the final "zone" section of the table
583 // https://www.unicode.org/reports/tr35/tr35-dates.html#table-date-field-symbol-table
584 // for the full range of LDML-specified formats.
585 const LocaleZoneData &locData = localeZoneData[locInd];
586
587 auto hourFormatR = offsetSeconds < 0 ? locData.negHourFormat() : locData.posHourFormat();
588 QStringView hourFormat = hourFormatR.viewData(hourFormatTable);
589 Q_ASSERT(!hourFormat.isEmpty());
590 // Sign is already handled by choice of the hourFormat:
591 offsetSeconds = qAbs(offsetSeconds);
592 // Offsets are only displayed in minutes - round seconds (if any) to nearest
593 // minute (prefering an even minute when rounding an exact half):
594 const int offsetMinutes = (offsetSeconds + 29 + (1 & (offsetSeconds / 60))) / 60;
595
596 const QString hourOffset = formatOffset(hourFormat, offsetMinutes, locale, width);
597 if (width == QLocale::ShortFormat)
598 return hourOffset;
599
600 QStringView offsetFormat = locData.offsetGmtFormat().viewData(gmtFormatTable);
601 Q_ASSERT(!offsetFormat.isEmpty());
602 return offsetFormat.arg(hourOffset);
603}
604
605} // QtTimeZoneLocale
606
607QString QTimeZonePrivate::localeName(qint64 atMSecsSinceEpoch, int offsetFromUtc,
608 QTimeZone::TimeType timeType,
609 QTimeZone::NameType nameType,
610 const QLocale &locale) const
611{
612 if (nameType == QTimeZone::OffsetName) {
613 // Doesn't need fallbacks, since every locale has hour and offset formats.
614 return QtTimeZoneLocale::zoneOffsetFormat(locale, locale.d->m_index, QLocale::LongFormat,
615 QDateTime(), offsetFromUtc);
616 }
617 // Handling of long names must stay in sync with findLongNamePrefix(), below.
618
619 // An IANA ID may give clues to fall back on for abbreviation or exemplar city:
620 QByteArray ianaAbbrev, ianaTail;
621 const auto scanIana = [&](QByteArrayView iana) {
622 // Scan the name of each zone whose data we consider using and, if the
623 // name gives us a clue to a fallback for which we have nothing better
624 // yet, remember it (and ignore later clues for that fallback).
625 if (!ianaAbbrev.isEmpty() && !ianaTail.isEmpty())
626 return;
627 qsizetype cut = iana.lastIndexOf('/');
628 QByteArrayView tail = cut < 0 ? iana : iana.sliced(cut + 1);
629 // Deal with a couple of special cases
630 if (tail == "McMurdo") { // Exceptional lowercase-uppercase sequence without space
631 if (ianaTail.isEmpty())
632 ianaTail = "McMurdo"_ba;
633 return;
634 } else if (tail == "DumontDUrville") { // Chopped to fit into IANA's 14-char limit
635 if (ianaTail.isEmpty())
636 ianaTail = "Dumont d'Urville"_ba;
637 return;
638 } else if (tail.isEmpty()) {
639 // Custom zone with perverse m_id ?
640 return;
641 }
642 const auto isMixedCaseAbbrev = [tail](char ch) {
643 // cv-RU and en-GU abbreviate Chamorro as ChST
644 // scn-IT abbreviates Cuba as CuT/CuST/CuDT
645 // blo-BJ abbreviates GMT as Gk
646 switch (tail.size()) {
647 case 2: return tail == "Gk";
648 case 3: return tail == "CuT";
649 case 4:
650 if (tail[0] == 'C' && tail[1] == ch && tail[3] == 'T') {
651 switch (ch) {
652 case 'h': return tail[2] == 'S';
653 case 'u': return tail[2] == 'S' || tail[2] == 'D';
654 default: break;
655 }
656 }
657 return false;
658 default:
659 break;
660 }
661 return false;
662 };
663
664 // Even if it is abbr or city name, we don't care if we've found one before.
665 bool maybeAbbr = ianaAbbrev.isEmpty(), maybeCityName = ianaTail.isEmpty(), inword = false;
666 char sign = '\0';
667 for (char ch : tail) {
668 if (ch == '+' || ch == '-') {
669 if (ch == '+' || !inword)
670 maybeCityName = false;
671 inword = false;
672 if (maybeAbbr) {
673 if (sign)
674 maybeAbbr = false; // two signs: no
675 else
676 sign = ch;
677 }
678 } else if (ch == '_') {
679 maybeAbbr = false;
680 if (!inword) // No double-underscore, or leading underscore
681 maybeCityName = false;
682 inword = false;
683 } else if (QChar::isLower(ch)) {
684 maybeAbbr = isMixedCaseAbbrev(ch);
685 // Dar_es_Salaam shows both cases as word starts
686 inword = true;
687 } else if (QChar::isUpper(ch)) {
688 if (sign)
689 maybeAbbr = false;
690 if (inword)
691 maybeCityName = false;
692 inword = true;
693 } else if (QChar::isDigit(ch)) {
694 if (!sign)
695 maybeAbbr = false;
696 maybeCityName = false;
697 inword = false;
698 }
699
700 if (!maybeAbbr && !maybeCityName)
701 break;
702 }
703 if (maybeAbbr && maybeCityName) // No real IANA ID matches both
704 return;
705
706 if (maybeAbbr) {
707 if (tail.endsWith("-0") || tail.endsWith("+0"))
708 tail = tail.chopped(2);
709 ianaAbbrev = tail.toByteArray();
710 if (sign && iana.startsWith("Etc/")) { // Reverse convention for offsets
711 if (sign == '-')
712 ianaAbbrev = ianaAbbrev.replace('-', '+');
713 else if (sign == '+')
714 ianaAbbrev = ianaAbbrev.replace('+', '-');
715 }
716 }
717 // See https://www.unicode.org/reports/tr35/tr35-dates.html#Time_Zone_Goals
718 // under "Composition", point 3:
719 if (maybeCityName)
720 ianaTail = tail.toByteArray().replace('_', ' ');
721 }; // end scanIana
722
723 scanIana(m_id);
724 if (QByteArray iana = aliasToIana(m_id); !iana.isEmpty() && iana != m_id)
725 scanIana(iana);
726
727 // Requires locData, nextData set suitably - save repetition of member:
728#define tableLookup(table, member, sought, test)
729 findTableEntryFor(QSpan(table).first(nextData.member).sliced(locData.member), sought, test)
730 // Note: any commas in test need to be within parentheses; but the only
731 // comma a comparison should need is in its (parenthesised) parameter list.
732
733 const QList<qsizetype> indices = fallbackLocalesFor(locale.d->m_index);
734 QString exemplarCity; // In case we need it.
735 const auto metaIdBefore = [](auto &row, quint16 key) { return row.metaIdIndex < key; };
736
737 // First try for an actual name:
738 for (const qsizetype locInd : indices) {
739 const LocaleZoneData &locData = localeZoneData[locInd];
740 // After the row for the last actual locale, there's a terminal row:
741 Q_ASSERT(std::size_t(locInd) < std::size(localeZoneData) - 1);
742 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
743
744 QByteArrayView iana{m_id};
745 if (quint16 metaKey = metaZoneAt(iana, atMSecsSinceEpoch)) {
746 if (const MetaZoneData *metaFrom = metaZoneStart(metaKey)) {
747 quint16 metaIdIndex = metaFrom->metaIdIndex;
748 QLocaleData::DataRange range{0, 0};
749 const char16_t *strings = nullptr;
750 if (nameType == QTimeZone::ShortName) {
751 auto row = tableLookup(localeMetaZoneShortNameTable, m_metaShortTableStart,
752 metaIdIndex, metaIdBefore);
753 if (row && row->metaIdIndex == metaIdIndex) {
754 range = row->shortName(timeType);
755 strings = shortMetaZoneNameTable;
756 }
757 } else { // LongName or DefaultName
758 auto row = tableLookup(localeMetaZoneLongNameTable, m_metaLongTableStart,
759 metaIdIndex, metaIdBefore);
760 if (row && row->metaIdIndex == metaIdIndex) {
761 range = row->longName(timeType);
762 strings = longMetaZoneNameTable;
763 }
764 }
765 Q_ASSERT(strings || !range.size);
766
767 if (range.size)
768 return range.getData(strings);
769
770 if (const auto *metaRow = metaZoneDataFor(metaFrom, locale.territory()))
771 iana = metaRow->ianaId(); // Use IANA ID of zone in use at that time
772 }
773 }
774
775 // Use exemplar city from closest match to locale, m_id:
776 if (exemplarCity.isEmpty()) {
777 exemplarCity = exemplarCityFor(locData, nextData, m_id);
778 if (exemplarCity.isEmpty())
779 exemplarCity = exemplarCityFor(locData, nextData, iana);
780 }
781 if (iana != m_id) // Check for hints to abbreviation and exemplar city:
782 scanIana(iana);
783
784 // That may give us a revised IANA ID; if the first search fails, fall back
785 // to m_id, if different.
786 do {
787 auto row = tableLookup(
788 localeZoneNameTable, m_zoneTableStart,
789 iana, [](auto &row, QByteArrayView key) { return row.ianaId() < key; });
790 if (row && row->ianaId() == iana) {
791 QLocaleData::DataRange range = row->name(nameType, timeType);
792 if (range.size) {
793 auto table = nameType == QTimeZone::ShortName
794 ? shortZoneNameTable
795 : longZoneNameTable;
796 return range.getData(table);
797 }
798 }
799 } while (std::exchange(iana, QByteArrayView{m_id}) != m_id);
800 }
801 // Most zones should now have ianaAbbrev or ianaTail set, maybe even both.
802 // We've now tried all the candidates we'll see for those.
803 // If an IANA ID's last component looked like a city name, use it.
804 if (exemplarCity.isEmpty() && !ianaTail.isEmpty())
805 exemplarCity = QString::fromLatin1(ianaTail); // It's ASCII
806
807 switch (nameType) {
808 case QTimeZone::DefaultName:
809 case QTimeZone::LongName:
810 for (const qsizetype locInd : indices) {
811 const LocaleZoneData &locData = localeZoneData[locInd];
812 QStringView regionFormat
813 = locData.regionFormatRange(timeType).viewData(regionFormatTable);
814 if (!regionFormat.isEmpty()) {
815 QString where = exemplarCity;
816 // TODO: if empty, use territory name
817 if (!where.isEmpty())
818 return regionFormat.arg(where);
819 }
820 }
821#if 0 // See comment within.
822 for (const qsizetype locInd : indices) {
823 const LocaleZoneData &locData = localeZoneData[locInd];
824 QStringView fallbackFormat = locData.fallbackFormat().viewData(fallbackFormatTable);
825 // Use fallbackFormat - probably never needed, as regionFormat is
826 // never empty, and this also needs city or territory name (along
827 // with metazone name).
828 }
829#endif
830 break;
831
832 case QTimeZone::ShortName:
833 // If an IANA ID's last component looked like an abbreviation (UTC, EST, ...) use it.
834 if (!ianaAbbrev.isEmpty())
835 return QString::fromLatin1(ianaAbbrev); // It's ASCII
836 break;
837
838 case QTimeZone::OffsetName:
839 Q_UNREACHABLE_RETURN(QString());
840 }
841
842#undef tableLookup
843
844 // Final fall-back: ICU seems to use a compact form of offset time for
845 // short-forms it doesn't know. This seems to correspond to the short form
846 // of LDML's Localized GMT format.
847 return QtTimeZoneLocale::zoneOffsetFormat(locale, locale.d->m_index, QLocale::NarrowFormat,
848 QDateTime(), offsetFromUtc);
849}
850
851// Match what the above might return at the start of a text (usually a tail of a
852// datetime string).
853QTimeZonePrivate::NamePrefixMatch
854QTimeZonePrivate::findLongNamePrefix(QStringView text, const QLocale &locale,
855 std::optional<qint64> atEpochMillis)
856{
857 constexpr std::size_t invalidMetaId = std::size(metaIdData);
858 constexpr std::size_t invalidIanaId = std::size(ianaIdData);
859 constexpr QTimeZone::TimeType timeTypes[] = {
860 // In preference order, should more than one match:
861 QTimeZone::GenericTime,
862 QTimeZone::StandardTime,
863 QTimeZone::DaylightTime,
864 };
865 struct {
866 qsizetype nameLength = 0;
867 QTimeZone::TimeType timeType = QTimeZone::GenericTime;
868 quint16 ianaIdIndex = invalidIanaId;
869 quint16 metaIdIndex = invalidMetaId;
870 QLocale::Territory where = QLocale::AnyTerritory;
871 } best;
872#define localeRows(table, member) QSpan(table).first(nextData.member).sliced(locData.member)
873
874 const QList<qsizetype> indices = fallbackLocalesFor(locale.d->m_index);
875 for (const qsizetype locInd : indices) {
876 const LocaleZoneData &locData = localeZoneData[locInd];
877 // After the row for the last actual locale, there's a terminal row:
878 Q_ASSERT(std::size_t(locInd) < std::size(localeZoneData) - 1);
879 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
880
881 const auto metaRows = localeRows(localeMetaZoneLongNameTable, m_metaLongTableStart);
882 for (const LocaleMetaZoneLongNames &row : metaRows) {
883 for (const QTimeZone::TimeType type : timeTypes) {
884 QLocaleData::DataRange range = row.longName(type);
885 if (range.size > best.nameLength) {
886 QStringView name = range.viewData(longMetaZoneNameTable);
887 if (text.startsWith(name)) {
888 best = { static_cast<qsizetype>(range.size), type,
889 invalidIanaId, row.metaIdIndex };
890 if (best.nameLength >= text.size())
891 break;
892 }
893 }
894 }
895 if (best.nameLength >= text.size())
896 break;
897 }
898
899 const auto ianaRows = localeRows(localeZoneNameTable, m_zoneTableStart);
900 for (const LocaleZoneNames &row : ianaRows) {
901 for (const QTimeZone::TimeType type : timeTypes) {
902 QLocaleData::DataRange range = row.longName(type);
903 if (range.size > best.nameLength) {
904 QStringView name = range.viewData(longZoneNameTable);
905 // Save potentially expensive "zone is supported" check when possible:
906 bool gotZone = row.ianaIdIndex == best.ianaIdIndex
907 || QTimeZone::isTimeZoneIdAvailable(row.ianaId().toByteArray());
908 if (text.startsWith(name) && gotZone)
909 best = { static_cast<qsizetype>(range.size), type, row.ianaIdIndex };
910 }
911 }
912 }
913 }
914 // That's found us our best match, possibly as a meta-zone
915 if (best.metaIdIndex != invalidMetaId) {
916 const auto metaIdBefore = [](auto &row, quint16 key) { return row.metaIdIndex < key; };
917 // Find the standard IANA ID for this meta-zone (or one for another
918 // supported zone using the meta-zone at the specified time).
919 const MetaZoneData *metaRow =
920 std::lower_bound(std::begin(metaZoneTable), std::end(metaZoneTable),
921 best.metaIdIndex, metaIdBefore);
922 // Table is sorted by metazone, then territory.
923 for (; metaRow < std::end(metaZoneTable)
924 && metaRow->metaIdIndex == best.metaIdIndex; ++metaRow) {
925 auto metaLand = QLocale::Territory(metaRow->territory);
926 // World entry is the "standard" zone for this metazone, so always
927 // prefer it over any territory-specific one (from an earlier row):
928 if ((best.where == QLocale::AnyTerritory || metaLand == QLocale::World)
929 && (atEpochMillis
930 ? metaRow->metaZoneKey == metaZoneAt(metaRow->ianaId(), *atEpochMillis)
931 : zoneEverInMeta(metaRow->ianaId(), metaRow->metaZoneKey))) {
932 if (metaRow->ianaIdIndex == best.ianaIdIndex
933 || QTimeZone::isTimeZoneIdAvailable(metaRow->ianaId().toByteArray())) {
934 best.ianaIdIndex = metaRow->ianaIdIndex;
935 best.where = metaLand;
936 if (best.where == QLocale::World)
937 break;
938 }
939 }
940 }
941 }
942 if (best.ianaIdIndex != invalidIanaId)
943 return { QByteArray(ianaIdData + best.ianaIdIndex), best.nameLength, best.timeType };
944
945 // Now try for a region format.
946 // Since we may get the IANA ID directly from a zone, we may not need an
947 // ianaIdIndex from CLDR-derived tables: and the active backend may know
948 // some zones newer than our latest CLDR.
949 NamePrefixMatch found;
950 for (const qsizetype locInd : indices) {
951 const LocaleZoneData &locData = localeZoneData[locInd];
952 const LocaleZoneData &nextData = localeZoneData[locInd + 1];
953 for (const QTimeZone::TimeType timeType : timeTypes) {
954 QStringView regionFormat
955 = locData.regionFormatRange(timeType).viewData(regionFormatTable);
956 // "%0 [Season] Time", "Time in %0 [during Season]" &c.
957 const qsizetype cut = regionFormat.indexOf(u"%0");
958 if (cut < 0) // Shouldn't happen unless empty.
959 continue;
960
961 QStringView prefix = regionFormat.first(cut);
962 // Any text before %0 must appear verbatim at the start of our text:
963 if (cut > 0 && !text.startsWith(prefix))
964 continue;
965 QStringView suffix = regionFormat.sliced(cut + 2); // after %0
966 // This must start with an exemplar city or territory, followed by suffix:
967 QStringView tail = text.sliced(cut);
968
969 // Cheap pretest - any text after %0 must appear *somewhere* in our text:
970 if (suffix.size() && tail.indexOf(suffix) < 0)
971 continue; // No match possible
972
973 // Of course, particularly if just punctuation, a copy of our suffix
974 // might appear within the city or territory name.
975 const auto textMatches = [tail, suffix](QStringView where) {
976 return (where.isEmpty() || tail.startsWith(where))
977 && (suffix.isEmpty() || tail.sliced(where.size()).startsWith(suffix));
978 };
979
980 const auto cityRows = localeRows(localeZoneExemplarTable, m_exemplarTableStart);
981 for (const LocaleZoneExemplar &row : cityRows) {
982 QStringView city = row.exemplarCity().viewData(exemplarCityTable);
983 if (textMatches(city)) {
984 qsizetype length = cut + city.size() + suffix.size();
985 if (length > found.nameLength) {
986 bool gotZone = row.ianaId() == found.ianaId // (cheap pre-test)
987 || QTimeZone::isTimeZoneIdAvailable(row.ianaId().toByteArray());
988 if (gotZone)
989 found = { row.ianaId().toByteArray(), length, timeType };
990 }
991 }
992 }
993 // In localeName() we fall back to the last part of the IANA ID:
994 const QList<QByteArray> allZones = QTimeZone::availableTimeZoneIds();
995 for (const auto &iana : allZones) {
996 Q_ASSERT(!iana.isEmpty());
997 qsizetype slash = iana.lastIndexOf('/');
998 QByteArray local = slash > 0 ? iana.sliced(slash + 1) : iana;
999 QString city = QString::fromLatin1(local.replace('_', ' '));
1000 if (textMatches(city)) {
1001 qsizetype length = cut + city.size() + suffix.size();
1002 if (length > found.nameLength)
1003 found = { iana, length, timeType };
1004 }
1005 }
1006 // TODO: similar for territories, at least once localeName() does so.
1007 }
1008 }
1009#undef localeRows
1010
1011 return found;
1012}
1013
1014QTimeZonePrivate::NamePrefixMatch
1015QTimeZonePrivate::findNarrowOffsetPrefix(QStringView text, const QLocale &locale)
1016{
1017 // NB: uses QLocale::FormatType with non-canonical meaning !
1018 if (const auto match = matchOffsetFormat(text, locale, locale.d->m_index,
1019 QLocale::NarrowFormat)) {
1020 // Check offset is sane:
1021 if (QTimeZone::MinUtcOffsetSecs <= match.offset
1022 && match.offset <= QTimeZone::MaxUtcOffsetSecs) {
1023
1024 // Although we don't have an IANA ID, the ISO offset format text
1025 // should match what the QLocale(ianaId) constructor accepts, which
1026 // is good enough for our purposes.
1027 return { isoOffsetFormat(match.offset, QTimeZone::OffsetName).toLatin1(),
1028 match.size, QTimeZone::GenericTime };
1029 }
1030 }
1031 return {};
1032}
1033#endif // ICU or not
1034
1035QT_END_NAMESPACE
QList< QByteArrayView > ianaIdsForTerritory(QLocale::Territory territory)
#define tableLookup(table, member, sought, test)
#define localeRows(table, member)