Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtparsetemporal.cpp
Go to the documentation of this file.
1// Copyright (C) 2026 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4#include "private/qtparsetemporal_p.h"
5
6#include "private/qcalendarmath_p.h"
7#include "private/qlocale_p.h"
8#include "private/qstringiterator_p.h"
9#include "private/qttemporalpattern_p.h"
10
11#include <algorithm> // sort, stable_sort
12#include <optional>
13#include <utility> // exchange, move, pair
14#include <vector>
15
16QT_BEGIN_NAMESPACE
17
18namespace {
19using namespace QtParseTemporal;
20using namespace QtTemporalPattern;
21
23{
25 int periodInDay = -1; // 0: am, 1: pm
26 int hourMod12 = 0; // 1 through 12
27 int yearWithinCentury = -1; // 0 through 99
28 static constexpr int UnknownAmHour = -12, UnknownPmHour = 36;
29 enum Flaw : quint16 {
30 // Flaws that justify prefering a shorter parse without the flaw over
31 // longer with it, in order of decreasing severity:
32 Irreconcilable = 1, // field values cannot be reconciled
33 // Ideally continuations() would catch irreconcilable issues, but if one
34 // is expensive to spot it can be left for resolve() to flag up.
35 LegacyResolves = 2, // field values can only be resolved by ignoring timeType
36 ResolutionChanges = 4, // resolved values don't match parsed values
37 ZeroPad = 8, // used zero-padded part of text where field didn't require it
38 Narrow = 0x10, // used fewer digits from text than field width, where allowed
39
40 // Flaws not worth giving up a longer parse over, in decreasing order of
41 // strength of preference among those of equal length:
42 SelfResolved = 0x100, // ambiguous field values resolve cleanly
43 };
44 Q_DECLARE_FLAGS(Flaws, Flaw)
46
47 // Constructor for initial empty parse:
48 PartialParse(qsizetype from) { results.startIndex = results.endIndex = from; }
49 // Constructors extending a parse with something more:
50 PartialParse(const PartialParse &base, const QtParseCommon::ParsedText &more)
51 : PartialParse(base)
52 {
53 Q_ASSERT(results.endIndex == more.startIndex);
54 results.endIndex = more.endIndex;
55 }
56 PartialParse(const PartialParse &base, const QtParseTimeZone::ParsedZone &more)
58 {
59 results.zone = more.zone;
60 results.timeType = more.timeType;
61 }
62
63 Qt::weak_ordering compare(const PartialParse &alt) const noexcept
64 {
65 // Measures of how this->wanton differs from alt.wanton: it's worse if
66 // it has a flaw that alt lacks, better if the opposite. Here, "less" is
67 // used to mean "this is better than alt" as we sort better entries
68 // earlier in our lists of partial parse candidates.
69 const auto order = [better = alt.wanton & ~wanton,
70 worse = wanton & ~alt.wanton](Flaw test) {
71 Q_ASSERT(!(worse & better)); // So at most one of these testFlag()s is true:
72 if (better.testFlag(test))
73 return Qt::weak_ordering::less;
74 if (worse.testFlag(test))
75 return Qt::weak_ordering::greater;
76 return Qt::weak_ordering::equivalent;
77 };
78
79 if (auto res = order(Flaw::Irreconcilable); res != 0)
80 return res;
81
82 // In decreasing order of severity
83 if (auto res = order(Flaw::LegacyResolves); res != 0)
84 return res;
85 if (auto res = order(Flaw::ResolutionChanges); res != 0)
86 return res;
87 if (auto res = order(Flaw::Narrow); res != 0)
88 return res;
89
90 // The above take precedence over size: a shorter parse without those
91 // flaws is better than a longer one with them.
92
93 // Longer is better, to be understood as "sorts before" i.e. less than.
94 if (auto res = Qt::compareThreeWay(alt.results.size(), results.size()); res != 0)
95 return res;
96
97 // The following remains as a preference only among matches of the same
98 // length: it's nice to avoid, but a longer parse is still better.
99
100 if (auto res = order(Flaw::ZeroPad); res != 0)
101 return res;
102 if (auto res = order(Flaw::SelfResolved); res != 0)
103 return res;
104
105 return Qt::weak_ordering::equivalent;
106 }
107};
108Q_DECLARE_OPERATORS_FOR_FLAGS(PartialParse::Flaws)
109
110QLocaleData::DigitSequence
111parseDigitSequence(QStringView text, qsizetype from, const QLocale &locale, bool allowSign)
112{
113 const auto *const data = QLocalePrivate::get(locale)->m_data;
114 using DS = QLocaleData::DigitSequence;
115 DS::Options flags;
116 if (allowSign)
117 flags.setFlag(DS::Option::AllowSign, true);
118 return data->digitSequence(text, flags, from);
119}
120
121std::vector<PartialParse> spacePadExtend(std::vector<PartialParse> &&matched, QStringView text)
122{
123 // Pass the whole text: the results.endIndex of the last entry in matched is
124 // an index into it that we shall use to add entries that extend that entry.
125 Q_ASSERT(!matched.empty());
126 // Assumes matched.back()'s last field is allowed to end in space-padding
127 // and inserts a partial parse resulting from accepting each subsequent
128 // space as extending the match. Each longer match is inserted before all
129 // shorter matches. Only extensions of matched.back() are added, so call
130 // after adding each entry to matched, if adding several.
131 const qsizetype position = matched.size() - 1;
132 PartialParse copy = matched.back();
133 QStringIterator iter(text, copy.results.endIndex);
134 while (iter.hasNext() && QChar::isSpace(iter.next())) {
135 Q_ASSERT(iter.index() > copy.results.endIndex);
136 copy.results.endIndex = iter.index();
137 matched.insert(matched.begin() + position, copy);
138 }
139 return matched;
140}
141
142QtParseCommon::ParsedText matchesAt(QStringView text, qsizetype from, const QString &sought,
143 TemporalFieldFlags flags)
144{
145 using F = TemporalFieldFlag;
146 const bool allowLeadingSpace = flags.testFlag(F::SpacePad);
147 Q_ASSERT(sought.size() > 0);
148 // Note: returns the first match within text[from:]. If sought is all space
149 // and SpacePad is set, there may be later matches if text[from:] starts
150 // with more space than (possibly some non-matching spaces, then) that.
151 // caller is expected to follow the match implied by the return from this
152 // with more generated by spacePadExtend().
153
154 const auto beginLength = [flex = flags.testFlag(F::FlexSpace)]
155 (QStringView view, QStringView target, Qt::CaseSensitivity cs = Qt::CaseSensitive) {
156 // Technical hitch: case-insensitive comparison may match a string of
157 // different length. Roll a brute-force length-determining version:
158 const auto matchFront = [cs](QStringView view, QStringView target) {
159 if (view.startsWith(target, cs)) {
160 qsizetype length = target.size();
161 while (view.first(length - 1).startsWith(target, cs))
162 --length;
163 while (!view.first(length).startsWith(target, cs))
164 ++length;
165 Q_ASSERT(length > 0);
166 return length;
167 }
168 return qsizetype(-1);
169 };
170 const auto spaceForward = [](QStringIterator &iter) {
171 // Steps iter past next non-space, returns index at which it appeared.
172 qsizetype used;
173 do {
174 used = iter.index();
175 } while (iter.hasNext() && QChar::isSpace(iter.next()));
176 return used;
177 };
178 constexpr qsizetype failed = 0;
179 qsizetype matched = 0;
180 if (flex) {
181 QStringIterator iter(target);
182 while (iter.hasNext()) {
183 qsizetype head = iter.index();
184 if (QChar::isSpace(iter.next())) {
185 qsizetype same = head > 0 ? matchFront(view, target.first(head)) : 0;
186 if (same < 0)
187 return failed;
188 QStringIterator viter(view, same);
189 // Require at least one spacing character in view to match those in target:
190 if (!viter.hasNext() || !QChar::isSpace(viter.next()))
191 return failed;
192 same = spaceForward(viter);
193 matched += same;
194 view = view.sliced(same);
195 target = target.sliced(spaceForward(iter));
196 iter = QStringIterator(target);
197 }
198 }
199 }
200 const qsizetype tail = target.isEmpty() ? 0 : matchFront(view, target);
201 if (tail < 0)
202 return failed;
203 return matched + tail;
204 };
205 // TODO: consider a comparison that ignores Unicode invisibles, like BiDi
206 // markers, when matching.
207 qsizetype offset = 0;
208 do {
209 QStringView view = text.sliced(from + offset);
210 if (flags.testFlag(F::IgnoreCase)) {
211 if (qsizetype match = beginLength(view, sought, Qt::CaseInsensitive))
212 return {from, from + offset + match};
213 } else if (flags.testAnyFlags(F::LowerCase | F::UpperCase)) {
214 // If either case is specified, only match specified cases.
215 // If both cases are specified, accept either (but not mixed).
216 if (flags.testFlag(F::LowerCase)) {
217 if (qsizetype match = beginLength(view, sought.toLower()))
218 return {from, from + offset + match};
219 }
220 if (flags.testFlag(F::UpperCase)) {
221 if (qsizetype match = beginLength(view, sought.toUpper()))
222 return {from, from + offset + match};
223 }
224 // Otherwise, only an exact match is accepted:
225 } else if (qsizetype match = beginLength(view, sought)) {
226 return {from, from + offset + match};
227 }
228
229 // No match at this position; maybe later if leading space is allowed:
230 if (!allowLeadingSpace) {
231 Q_ASSERT(!offset);
232 break;
233 }
234
235 // Consume one space at a time until we find a match:
236 QStringIterator iter(text.sliced(from), offset);
237 if (!iter.hasNext() || !QChar::isSpace(iter.next()))
238 break;
239
240 Q_ASSERT(iter.index() > offset);
241 offset = iter.index();
242 } while (text.size() >= offset + sought.size() / 2);
243 // Loop wants to test text.size() >= offset + sought.size(), but see beginLength().
244 return {};
245}
246
247bool longerEarlier(const PartialParse &left, const PartialParse &right)
248{
249 // True if we want left before right in our sorted lists.
250 // We want longer matches before shorter:
251 return left.results.endIndex > right.results.endIndex;
252}
253
254template <typename Action>
255void forEachLocaleFormat(TemporalFieldFlags flags, Action action)
256{
257 using Flag = TemporalFieldFlag;
258 constexpr auto Widths = FieldGroup::WidthMask;
259 if (matchesFlagWithin(flags, Flag::Wide, Widths))
260 action(QLocale::LongFormat);
261 if (matchesFlagsWithin(flags, Flag::Short | Flag::Abbreviated, Widths))
262 action(QLocale::ShortFormat);
263 if (matchesFlagWithin(flags, Flag::Narrow, Widths))
264 action(QLocale::NarrowFormat);
265}
266
268{
269 const QLocale locale;
270 const QCalendar calendar;
271 const std::optional<int> baseYear;
272
273 // Numeric
274 struct FieldConfig
275 {
276 // Where to write the int, once read:
277 int &(*target)(PartialParse &);
278 // Acceptable values:
279 int maxValue = 0; // 0 means unbounded
280 int unset; // Default value in ParsedTemporal, invalid for field.
281 // Form of the parsed text:
282 qsizetype width; // min digits
283 qsizetype maxDigits = 0; // <= 0 means unbounded
284 // If unbounded, beyond max(width, roundAfter, -maxDigits) prefer fewer digits to more.
285 qsizetype roundAfter = -1; // >= 0: is fractional part: round to this many digits
286 bool allowSign = false;
287 };
288 // For use as FieldConfig::target:
289 static int &millisTarget(PartialParse &grow) { return grow.results.millis; }
290 static int &secondTarget(PartialParse &grow) { return grow.results.second; }
291 static int &minuteTarget(PartialParse &grow) { return grow.results.minute; }
292 static int &hourTarget(PartialParse &grow) { return grow.results.hour; }
293 static int &hourMod12Target(PartialParse &grow) { return grow.hourMod12; }
294 static int &dayOfWeekTarget(PartialParse &grow) { return grow.results.dayOfWeek; }
295 static int &dayOfMonthTarget(PartialParse &grow) { return grow.results.dayOfMonth; }
296 static int &monthTarget(PartialParse &grow) { return grow.results.month; }
297 static int &yearTarget(PartialParse &grow)
298 {
299 if (!grow.results.year)
300 grow.results.year = 0;
301 return *grow.results.year;
302 }
303 static int &yearWithinCenturyTarget(PartialParse &grow) { return grow.yearWithinCentury; }
304
305 std::vector<PartialParse>
306 numericExtend(const PartialParse &base, QStringView text,
307 TemporalFieldFlags flags, FieldConfig &&config) const;
308
309 // Verbal, Standalone:
310 std::vector<PartialParse> monthNameExtend(const PartialParse &base, QStringView text,
311 TemporalFieldFlags flags) const;
312 std::vector<PartialParse> dayNameExtend(const PartialParse &base, QStringView text,
313 TemporalFieldFlags flags) const;
314 std::pair<qsizetype, int> dayPeriodPrefix(const PartialParse &base, QStringView text,
315 TemporalFieldFlags flags) const;
316public:
317 TemporalFieldMatcher(const QLocale &loc, QCalendar cal, std::optional<int> centuryStart)
319 {}
320
321 std::vector<PartialParse> continuations(const PartialParse &base, QStringView text,
322 const TemporalField &field) const;
323 bool isSelfConsistent(const PartialParse &parsed, TemporalFieldCategory category) const;
324 bool resolve(PartialParse &parsed) const;
325};
326
328 TemporalFieldCategory category) const
329{
330 // Take into account calendar, and potentially baseYear, but only do cheap
331 // checks. This will be run on *each* candidate parse after *each* field,
332 // need not check conditions the current field could not have affected.
333 using Cat = TemporalFieldCategory;
334 if (category == Cat::Literal) // Can't have introduced any inconsistency.
335 return true;
336
337 const bool newYear = category == Cat::Year || category == Cat::YearWithinCentury;
338 if (newYear && parse.yearWithinCentury >= 0 && parse.results.year
339 && (*parse.results.year - parse.yearWithinCentury) % 100) {
340 return false;
341 }
342
343 const bool newDate = (newYear || category == Cat::Month || category == Cat::DayOfMonth
344 || category == Cat::DayOfWeek);
345 if (newDate && parse.results.month && parse.results.dayOfMonth) {
346 // Calendrical calculations: somewhat expensive, but still arithmetic.
347 if (parse.results.year) {
348 if (!calendar.isDateValid(*parse.results.year, parse.results.month,
349 parse.results.dayOfMonth)) {
350 return false;
351 }
352 if (parse.results.dayOfWeek) {
353 QDate date = calendar.dateFromParts(*parse.results.year, parse.results.month,
354 parse.results.dayOfMonth);
355 if (calendar.dayOfWeek(date) != parse.results.dayOfWeek)
356 return false;
357 }
358 } else if (calendar.daysInMonth(parse.results.month) < parse.results.dayOfMonth) {
359 return false;
360 }
361 }
362
363 if ((category == Cat::PeriodInDay && parse.results.hour >= 0)
364 || (category == Cat::Hour && parse.periodInDay >= 0)) {
365 // 00, 01, ... 11 are 12, 1, ... 11 am; 12, 13, ... 23 are 12, 1, ..., 11 pm.
366 if (parse.periodInDay ? parse.results.hour < 12 : parse.results.hour >= 12)
367 return false;
368 }
369
370 if ((category == Cat::Hour && parse.hourMod12 > 0)
371 || (category == Cat::HourMod12 && parse.results.hour >= 0)) {
372 if ((parse.results.hour - parse.hourMod12) % 12)
373 return false;
374 }
375 return true;
376}
377
379{
380 // Final pass, modifying parsed as needed, true if parse.result has been
381 // given a value consistent with all fields of parse. Applies fully rigorous
382 // checks, given what isSelfConsistent() already checked. May record flaws
383 // in parse.wanton where relevant tests reveal them.
384 if (parse.yearWithinCentury >= 0) {
385 if (parse.results.year) {
386 // Previously checked by isSelfConsistent():
387 Q_ASSERT((*parse.results.year - parse.yearWithinCentury) % 100 == 0);
388 } else if (baseYear) {
389 const auto baseSplit =QRoundingDown::qDivMod<100>(*baseYear);
390 int year = baseSplit.quotient * 100 + parse.yearWithinCentury;
391 if (parse.yearWithinCentury < baseSplit.remainder)
392 year += 100;
393
394 if (parse.results.month) {
395 // Check the year has this month and, if given, enough days in
396 // it for dayOfMonth:
397 const auto enough = [dom = parse.results.dayOfMonth](int dim) {
398 return dim > 0 && (!dom || dom <= dim);
399 };
400 if (!enough(calendar.daysInMonth(parse.results.month, year))) {
401 // Search outwards for a better century:
402 bool fixed = false;
403 for (int off = 1; off < 10; ++off) {
404 int offset = off * 100;
405 if (enough(calendar.daysInMonth(parse.results.month, year + offset))) {
406 year += offset;
407 fixed = true;
408 break;
409 }
410 if (enough(calendar.daysInMonth(parse.results.month, year - offset))) {
411 year -= offset;
412 fixed = true;
413 break;
414 }
415 }
416 // No century within a millennium each way will do:
417 if (!fixed)
418 return false;
419 }
420
421 if (parse.results.dayOfMonth) {
422 if (parse.results.dayOfWeek) {
423 QCalendar::YearMonthDay ymd
424 = { year, parse.results.month, parse.results.dayOfMonth };
425 const QDate resolved
426 = calendar.matchCenturyToWeekday(ymd, parse.results.dayOfWeek);
427 if (!resolved.isValid())
428 return false;
429 year = resolved.year(calendar);
430 } else {
431 const QDate resolved(year, parse.results.month, parse.results.dayOfMonth);
432 if (!resolved.isValid())
433 return false;
434 }
435 }
436 }
437
438 parse.results.year = year;
439 }
440 }
441
442 if (parse.results.hour < 0 && parse.hourMod12 > 0) {
443 Q_ASSERT(parse.hourMod12 <= 12);
444 parse.results.hour = parse.hourMod12 < 12 || parse.periodInDay < 0 ? parse.hourMod12 : 0;
445 if (parse.periodInDay > 0)
446 parse.results.hour += 12;
447 }
448
449 if (parse.results.year && parse.results.month && parse.results.dayOfMonth
450 && parse.results.zone.isValid() && parse.results.hour >= 0) {
451 // Should be able to construct a datetime with this:
452 const QDate date(*parse.results.year, parse.results.month, parse.results.dayOfMonth,
453 calendar);
454 Q_ASSERT(date.isValid()); // Should be ensured by earlier checks.
455 const QTime time = parse.results.time(QTime());
456 Q_ASSERT(time.isValid()); // Should be ensured by earlier checks.
457
458 // Is the given time in a transition of the given zone, on the given date ?
459 if (!Q_LIKELY(QDateTime(date, time, parse.results.zone,
460 QDateTime::TransitionResolution::Reject).isValid())) {
461 // Ambiguity, gap or outright borkage.
462 using Flaw = PartialParse::Flaw;
463 QDateTime dt(date, time, parse.results.zone, parse.results.resolveType());
464 if (!dt.isValid()) {
465 // Fall back to default resolution (same as LegacyBehavior):
466 dt = QDateTime(date, time, parse.results.zone);
467 // If that succeeded, Abbreviated (bad); otherwise Narrow (worse).
468 parse.wanton |= dt.isValid() ? Flaw::LegacyResolves : Flaw::Irreconcilable;
469 }
470 if (dt.date() != date || dt.time() != time
471 || dt.timeRepresentation() != parse.results.zone) {
472 // OK, resolution *worked* but didn't get exactly what we asked
473 // for (presumably a spring-forward's gap):
474 parse.wanton |= Flaw::ResolutionChanges;
475 // ... but we don't change parse.results because they should
476 // reflect what parsing learned; the caller can rediscover this.
477 } else {
478 // We got what we asked for (presumably the expected branch of a
479 // fall-back):
480 parse.wanton |= Flaw::SelfResolved;
481 }
482 }
483 }
484
485 if (parse.results.hour < 0) {
486 // Leave ParsedTemporal::time() a clue to am/pm, if known:
487 if (parse.periodInDay > 0)
488 parse.results.hour = PartialParse::UnknownPmHour;
489 else if (parse.periodInDay == 0)
490 parse.results.hour = PartialParse::UnknownAmHour;
491 }
492 return true;
493}
494
495std::vector<PartialParse>
496TemporalFieldMatcher::numericExtend(const PartialParse &base, QStringView text,
497 TemporalFieldFlags flags, FieldConfig &&config) const
498{
499 using Flag = TemporalFieldFlag;
500 qsizetype leadingSpace = 0;
501 const bool spacePad = flags.testFlag(Flag::SpacePad);
502 if (spacePad) {
503 QStringIterator iter(text, base.results.endIndex);
504 while (iter.hasNext() && QChar::isSpace(iter.next()))
505 ++leadingSpace;
506 // If that's used up the string, the code below shall reject the field.
507 }
508
509 const auto parsed = parseDigitSequence(text, base.results.endIndex + leadingSpace,
510 locale, config.allowSign);
511 const bool zeroPad = flags.testFlag(Flag::ZeroPad);
512 // If !zeroPad, we allow < config.width but flag with Narrow in wanton fields.
513 const int width = zeroPad || spacePad ? qMax(1, config.width - leadingSpace) : 1;
514 // This is necessarily positive: the use of chop(1) below depends on that.
515
516 QByteArrayView digits{parsed.digits};
517 if (config.maxDigits > 0) {
518 // Allow config.width to override config.maxDigits:
519 const int maxWidth = qMax(config.maxDigits, config.width);
520 if (digits.size() > maxWidth)
521 digits = digits.first(maxWidth);
522 } else if (flags.testFlag(Flag::YearSignIso8601) && !parsed.sign) {
523 // Limit width because a field longer than width would need a sign.
524 const int maxWidth = config.width > 0 ? config.width : qMax(-config.maxDigits, 1);
525 if (digits.size() > maxWidth)
526 digits = digits.first(maxWidth);
527 }
528 // For unbounded, work out in advance when to switch from prepending to
529 // appending; otherwise, set a cut-off that'll be true already.
530 const qsizetype appendThreshold = config.maxDigits <= 0
531 ? qMax(-config.maxDigits, qMax(config.width, config.roundAfter)) - 1
532 : digits.size();
533
534 std::vector<PartialParse> matches;
535 for (; digits.size() >= width; digits.chop(1)) {
536 bool ok = false;
537 unsigned whole = digits.toUInt(&ok);
538 if (!ok)
539 continue;
540 if (config.maxValue > 0 && config.roundAfter < 0 && whole > unsigned(config.maxValue))
541 continue;
542
543 int value = whole;
544 if (value < 0 || value <= config.unset) // Overflow or too low
545 continue;
546 if (parsed.sign == '-')
547 value = -value;
548
549 if (config.roundAfter >= 0) {
550 // Fractional part
551 if (digits.size() < config.roundAfter) {
552 // Interpolate omitted zero-padding up to rounding size:
553 for (int i = int(digits.size()); i < config.roundAfter; ++i)
554 value *= 10;
555 } else if (digits.size() > config.roundAfter) {
556 double v = value;
557 for (int i = int(digits.size()); i > config.roundAfter; --i)
558 v /= 10.;
559 // A timestamp that's before the end of a specified second
560 // should be rounded to the last we can before that second,
561 // especially if it's the last second of its minute, in turn
562 // especially if that's the last second of its hour (and so on).
563 value = v > config.maxValue ? config.maxValue : qRound(v);
564 // There may of course be use-cases where rounding up to the
565 // next second is desired. If it turns out those are
566 // significant, we can perhaps add a field option for it.
567 }
568 // else: exact match to number of digits, nothing to frob.
569 }
570
571 PartialParse grow = base;
572 int &target = config.target(grow);
573 if (target <= config.unset) // If unset, store:
574 target = value;
575 else if (target != value) // Conflicts with earlier field: skip this reading.
576 continue;
577 grow.results.endIndex = parsed.digitStart + digits.size() * parsed.digitWidth;
578
579 if (!zeroPad && digits.size() > qMax(1, config.width)
580 && (config.roundAfter < 0 ? digits.startsWith('0') : digits.endsWith('0'))) {
581 grow.wanton |= PartialParse::Flaw::ZeroPad;
582 }
583 if (digits.size() + leadingSpace < config.width) // (can only happen if !zeroPad)
584 grow.wanton |= PartialParse::Flaw::Narrow;
585
586 // Entries in matches are all longer than this one, as we're reducing
587 // digits. Mostly we want shorter after longer, but (for example) we
588 // prefer 4-digit years over longer matches.
589 if (digits.size() > appendThreshold)
590 matches.insert(matches.begin(), std::move(grow));
591 else
592 matches.push_back(std::move(grow));
593 }
594 return matches;
595}
596
597/* Some month names may be prefixes of others.
598 For example, the English long forms of Islamic calendar month names include:
599 * RabiÊ» I, RabiÊ» II
600 * Jumada I, Jumada II
601 Their short-forms are likewise:
602 * Rab. I, Rab. II
603 * Jum. I, Jum. II
604 In each case, one month name is a prefix of the next month's name.
605
606 In any sane format, greedy parsing shall suffice but ill-considered formats
607 happen. So the initial parse recognizes every possible match and we sort out
608 any mistakes greed might make as we parse later fields.
609*/
610std::vector<PartialParse>
611TemporalFieldMatcher::monthNameExtend(const PartialParse &base, QStringView text,
612 TemporalFieldFlags flags) const
613{
614 std::vector<PartialParse> matches;
615 using Flag = TemporalFieldFlag;
616
617 auto addIfMatch = [&matches, base, text, flags](int month, QString &&name) {
618 // tryEachMonth() has ensured this:
619 Q_ASSERT(!base.results.month || base.results.month == month);
620 if (name.isEmpty()) // Locale doesn't know this month's name.
621 return;
622 // If matchesAt(), add to matches:
623 auto match = matchesAt(text, base.results.endIndex, name, flags);
624 if (match) {
625 PartialParse grow(base, match);
626 grow.results.month = month;
627 matches.push_back(grow);
628 if (flags.testFlag(Flag::SpacePad))
629 matches = spacePadExtend(std::move(matches), text);
630 }
631 };
632
633 constexpr auto Forms = FieldGroup::FormMask;
634 constexpr int noYear = QCalendar::Unspecified;
635 const bool verb = matchesFlagWithin(flags, Flag::Verbal, Forms);
636 const bool lone = matchesFlagWithin(flags, Flag::Standalone, Forms);
637 const int year = base.results.year ? *base.results.year : noYear;
638 // We could try to take account of baseYear, when yearWithinCentury is
639 // known, but that's susceptible to tweaks and perturbation from other
640 // fields, so stick with noYear and the usual naming of months if we don't
641 // know year. We can consider adding a QCalendar::parseMonthName() that can
642 // consult the internal lists of localized month names, both for efficiency
643 // and to ensure we try all names, including those that appear only in some
644 // years. If we do that, its return should package month number, whether the
645 // month appears in all years and whether it was standalone or plain, along
646 // with the start and end indices of the match within the text.
647 auto tryEachNameType = [this, verb, lone, year,
648 addIfMatch](QLocale::FormatType form, int month) {
649 if (lone)
650 addIfMatch(month, calendar.standaloneMonthName(locale, month, year, form));
651 if (verb)
652 addIfMatch(month, calendar.monthName(locale, month, year, form));
653 };
654 // This could in principle, for non-system locales, be done more efficiently
655 // by walking the internal ';'-joined list of month names QCalendarBackend
656 // can give us. The entanglement between QCalendarBackend and QLocale
657 // internals is, however, already quite untidy enough, so leave that for
658 // if/when we discover it's a significant bottle-neck and/or we've unpicked
659 // the existing entanglement a bit first.
660
661 auto tryEachMonth = [month = base.results.month, bound = calendar.maximumMonthsInYear(),
662 tryEachNameType](QLocale::FormatType form) {
663 if (month > 0) {
664 tryEachNameType(form, month);
665 } else {
666 for (int i = bound; i > 0; --i)
667 tryEachNameType(form, i);
668 }
669 };
670 forEachLocaleFormat(flags, tryEachMonth);
671
672 return matches;
673}
674
675std::vector<PartialParse>
676TemporalFieldMatcher::dayNameExtend(const PartialParse &base, QStringView text,
677 TemporalFieldFlags flags) const
678{
679 std::vector<PartialParse> matches;
680 using Flag = TemporalFieldFlag;
681
682 auto addIfMatch = [&matches, base, text, flags](int dow, QString &&name) {
683 // tryEachDayOfWeek() has ensured this:
684 Q_ASSERT(!base.results.dayOfWeek || base.results.dayOfWeek == dow);
685 if (name.isEmpty()) // Locale doesn't know this day of the week's name.
686 return;
687 // If matchesAt(), add to matches:
688 auto match = matchesAt(text, base.results.endIndex, name, flags);
689 if (match) {
690 PartialParse grow(base,match);
691 grow.results.dayOfWeek = dow;
692 matches.push_back(grow);
693 if (flags.testFlag(Flag::SpacePad))
694 matches = spacePadExtend(std::move(matches), text);
695 }
696 };
697
698 constexpr auto Forms = FieldGroup::FormMask;
699 const bool verb = matchesFlagWithin(flags, Flag::Verbal, Forms);
700 const bool lone = matchesFlagWithin(flags, Flag::Standalone, Forms);
701 auto tryEachNameType = [this, addIfMatch, verb, lone](QLocale::FormatType form, int dow) {
702 if (lone)
703 addIfMatch(dow, calendar.standaloneWeekDayName(locale, dow, form));
704 if (verb)
705 addIfMatch(dow, calendar.weekDayName(locale, dow, form));
706 };
707 // As for month names (see above), some collaboration with QCalendarBackend
708 // might make this more efficient for non-system locales, at the expense of
709 // adding to the existing tangle of complexity.
710
711 auto tryEachDayOfWeek = [dow = base.results.dayOfWeek,
712 tryEachNameType](QLocale::FormatType form) {
713 if (dow > 0) {
714 tryEachNameType(form, dow);
715 } else {
716 // Iterate possible day numbers. Issue: some calendars might have
717 // intercalary days with numbers > 7. When that happens, we may
718 // need to let this run past 7 until it's seen some empty answers.
719 for (int i = 1; i <= 7; ++i)
720 tryEachNameType(form, i);
721 }
722 };
723 forEachLocaleFormat(flags, tryEachDayOfWeek);
724
725 std::sort(matches.begin(), matches.end(), longerEarlier);
726 return matches;
727}
728
729std::pair<qsizetype, int>
730TemporalFieldMatcher::dayPeriodPrefix(const PartialParse &base, QStringView text,
731 TemporalFieldFlags flags) const
732{
733 std::pair<qsizetype, int> result = {0, -1};
734 for (int i = 0; i < 2; ++i) {
735 if (base.periodInDay >= 0 && base.periodInDay != i)
736 continue;
737 if (const QString token = i ? locale.pmText() : locale.amText(); !token.isEmpty()) {
738 if (auto match = matchesAt(text, base.results.endIndex, token, flags);
739 match.endIndex > result.first) {
740 result = { match.endIndex, i };
741 }
742 }
743 }
744 return result;
745}
746
747/*!
748 \internal
749 Find all matches to \a field, within \a text, that extend \a base.
750
751 Each match must begin at offset \c{base.results.endIndex} within \a text.
752 For each match, update a copy of \a base with the match's result, to include
753 in the returned list.
754
755 May use \c calendar to determine the range of values allowed for field.
756 Does not attempt to determine consistency between fields; see resolve() and
757 isSelfConsistent() for that. Updates the copy's member holding the value
758 described by \a field to reflect the match.
759
760 Ignores base.result.startIndex and base.result.bounds and updates each
761 copy's .endIndex to reflect the end of the match. (This leaves the caller to
762 decide whether to transfer that to .bounds.)
763
764 For fields that allow space padding, this consumes leading space as
765 necessary to make a match and includes a match for each end position at
766 which it could end; before any dangling space and after each space that
767 follows. Later calls to \c continuations() shall filter out any earlier
768 matches that precludes later fields matching just after its end. Successive
769 space-padded fields surrounded by large amounts of space are apt to lead to
770 many matches, as are final space-padded fields followed by large amounts of
771 space. (TODO: we can almost certainly mitigate this with a trivial
772 heuristic, once everything is working.)
773
774 For those matches with various flaws, relative to the field specification
775 (such as using zero padding when not obliged to), the copy's .wanton records
776 that flaw.
777
778 Sort order of the returned list should put entries likely to represent more
779 suitable matches (ignoring .wanton complications) earlier. For most fields,
780 that means longer matches come first. (For full year field matches with > 4
781 digits, though, that reverses.)
782*/
784TemporalFieldMatcher::continuations(const PartialParse &base, QStringView text,
785 const TemporalField &field) const
786{
787 std::vector<PartialParse> matches;
788 const qsizetype textPos = base.results.endIndex;
789 switch (field.category) {
790 using Cat = TemporalFieldCategory;
791 using Flag = TemporalFieldFlag;
792 case Cat::Literal:
793 if (auto match = matchesAt(text, textPos, field.literal, field.options)) {
794 matches.push_back(PartialParse(base, match));
795 if (field.options.testFlag(Flag::SpacePad))
796 matches = spacePadExtend(std::move(matches), text);
797 }
798 break;
799 case Cat::TimeZone:
800 if (const auto zones = QtParseTimeZone::prefix(text, locale, textPos, field.options);
801 !zones.isEmpty()) {
802 for (const auto &match : zones) {
803 matches.push_back(PartialParse(base, match));
804 if (field.options.testFlag(Flag::SpacePad))
805 matches = spacePadExtend(std::move(matches), text);
806 }
807 }
808 break;
809
810 // case Cat::MillisecondInDay: break;
811 case Cat::SecondFraction:
812 matches = numericExtend(base, text, field.options,
813 {millisTarget, 999, -1, field.width, 0, 3});
814 break;
815 case Cat::Second:
816 matches = numericExtend(base, text, field.options, {secondTarget, 59, -1, field.width, 2});
817 break;
818 // case Cat::MinuteFraction: break;
819 case Cat::Minute:
820 matches = numericExtend(base, text, field.options, {minuteTarget, 59, -1, field.width, 2});
821 break;
822 // case Cat::HourFraction: break;
823 case Cat::HourMod12:
824 matches = numericExtend(base, text, field.options,
825 {hourMod12Target, 12, 0, field.width, 2});
826 break;
827 case Cat::Hour:
828 matches = numericExtend(base, text, field.options, {hourTarget, 23, -1, field.width, 2});
829 break;
830 case Cat::PeriodInDay: // am/pm; LDML also has noon, midnight, "at night" and others.
831 if (const auto match = dayPeriodPrefix(base, text, field.options); match.second >= 0) {
832 // Ensured by dayPeriodPrefix:
833 Q_ASSERT(base.periodInDay < 0 || base.periodInDay == match.second);
834 PartialParse grow = base;
835 grow.results.endIndex = match.first;
836 grow.periodInDay = match.second;
837 matches.push_back(grow);
838 if (field.options.testFlag(Flag::SpacePad))
839 matches = spacePadExtend(std::move(matches), text);
840 }
841 break;
842
843 case Cat::DayOfWeek:
844 matches = dayNameExtend(base, text, field.options);
845 break;
846 case Cat::DayOfMonth: {
847 const int maxDays = calendar.maximumDaysInMonth();
848 matches = numericExtend(base, text, field.options,
849 {dayOfMonthTarget, maxDays, 0, field.width,
850 maxDays < 10 ? 1 : maxDays < 100 ? 2 : 3});
851 }
852 break;
853 // case Cat::DayOfYear: break;
854 // case Cat::JulianDay: break;
855 // case Cat::WeekOfMonth: break;
856 // case Cat::WeekOfYear: break;
857 case Cat::Month:
858 // Verbal and Standalone, in so far as supported:
859 matches = monthNameExtend(base, text, field.options);
860 if (matchesFlagWithin(field.options, Flag::Numeric, FieldGroup::FormMask)) {
861 auto extend = numericExtend(base, text, field.options,
862 {monthTarget, calendar.maximumMonthsInYear(),
863 0, field.width, 2});
864 if (matches.empty())
865 matches = std::move(extend);
866 else
867 matches.insert(matches.end(), extend.begin(), extend.end());
868 }
869 std::sort(matches.begin(), matches.end(), longerEarlier);
870 break;
871 // case Cat::Quarter: break;
872 case Cat::YearWithinCentury:
873 matches = numericExtend(base, text, field.options,
874 {yearWithinCenturyTarget, 99, -1, field.width, 2});
875 break;
876 case Cat::Year:
877 matches = numericExtend(base, text, field.options,
878 {yearTarget, 0, 0, field.width, -4, -1, true});
879 break;
880 // case Cat::RelatedGregorianYear: break;
881 // case Cat::Century: break;
882 // case Cat::Era: break;
883 }
884 return matches;
885}
886
887} // nameless namespace
888
891{
892 if (defaults.isValid()) {
895
897 // Defaults conflict with parsed day of the week.
898 if (!dayOfMonth) {
899 // (Assumes no intercalary days.)
900 // Number of days to the nearest with the right day of the week:
901 const int offset = (dayOfWeek + 10 - draft.dayOfWeek(cal)) % 7 - 3;
902 Q_ASSERT(offset != 0); // Otherwise, day of week matched, already.
903 Q_ASSERT(-4 < offset && offset < 4);
904 // Prefer closer unless nearby has more in common with what we asked for:
906 QDate nearby = draft.addDays(offset < 0 ? offset + 7 : offset - 7);
907 if (nearby.isValid()
908 && (!closer.isValid()
911 || (closer.month(cal) != draft.month(cal)
912 && nearby.month(cal) == draft.month(cal)))) {
913 // (We could also give year(cal) the same treatment, but
914 // different year, for dates within ten days of one another,
915 // plies different month, so check would be redundant.)
916 std::swap(nearby, draft);
917 } else if (closer.isValid() && closer.dayOfWeek(cal) == dayOfWeek) {
918 std::swap(closer, draft);
919 }
920
921 } else if (!month) {
923 auto use = [&draft, cal, dow=dayOfWeek](int yr, int mon, int day) {
924 QDate maybe(yr, mon, day, cal);
925 if (!maybe.isValid() || maybe.dayOfWeek(cal) != dow)
926 return false;
927 std::swap(maybe, draft);
928 return true;
929 };
930 // Find nearest month with the right dayOfMonth and dayOfWeek.
931 // If year was specified we're limited to it; otherwise,
932 // draft.year() is derived from defaults so the search can
933 // spread to nearby years.
934 int loYear = draft.year(cal), hiYear = loYear;
935 int loMon = draft.month(cal), hiMon = loMon;
936 bool maybeLo = true, maybeHi = true;
937 while (maybeLo || maybeHi) {
938 if (maybeHi) {
939 if (hiMon < cal.monthsInYear(hiYear)) {
940 ++hiMon;
941 } else if (year) {
942 Q_ASSERT(hiYear == *year);
943 maybeHi = false;
944 } else if (hiYear + 1 || cal.hasYearZero()) {
945 ++hiYear;
946 hiMon = 1;
947 } else if (cal.isProleptic()) {
948 hiYear = +1;
949 hiMon = 1;
950 } else {
951 maybeHi = false;
952 }
953 }
955 break;
956
957 if (maybeLo) {
958 if (loMon > 1) {
959 --loMon;
960 } else if (year) {
961 Q_ASSERT(loYear == *year);
962 maybeLo = false;
963 } else if (loYear - 1 || cal.hasYearZero()) {
964 --loYear;
966 } else if (cal.isProleptic()) {
967 loYear = -1;
969 } else {
970 maybeLo = false;
971 }
972 }
974 break;
975
976 // Avoid looping for ever: if we can't find a match within a
977 // 30 year window we probably never shall. If we haven't
978 // found a match by then, the likelihood that the input has
979 // a typo in it is fairly high, in any case.
980 if (hiYear - loYear > 30)
981 break;
982 }
983 } else if (!year) {
984 // As for resolve()'s handling of two-digit centuries:
988 std::swap(maybe, draft);
989 }
991 return {};
992 }
993 return draft;
994 }
995 if (year && month && dayOfMonth)
996 return QDate(*year, month, dayOfMonth, cal);
997 return {};
998}
999
1001{
1002 if (defaults.isValid()) {
1003 int hr = defaults.hour();
1004 // hour: -1 means we have no information, less means unknown am, > 24 means unknown pm.
1005 if (hour < -1) // UnknownAmHour
1006 hr = hr % 12;
1007 else if (hour > 24) // UnknownPmHour
1008 hr = hr % 12 + 12;
1009 else if (hour >= 0)
1010 hr = hour;
1011 // (Note: hour == 24 is currently unused but may be relevant for 24:00:00 in future.)
1012 return QTime(hr,
1013 minute < 0 ? defaults.minute() : minute,
1014 second < 0 ? defaults.second() : second,
1015 millis < 0 ? defaults.msec() : millis);
1016 }
1017
1018 if (hour < 0 || hour > 24)
1019 return {};
1020 if (minute < 0)
1021 return QTime(hour, 0);
1022 if (second < 0)
1023 return QTime(hour, minute);
1024 if (millis < 0)
1025 return QTime(hour, minute, second);
1026 return QTime(hour, minute, second, millis);
1027}
1028
1029ParsedTemporal prefix(QStringView text, QSpan<const QtTemporalPattern::TemporalField> fields,
1030 const QLocale &locale, QCalendar cal,
1031 std::optional<int> baseYear, qsizetype from)
1032{
1033 if (from < 0 || from >= text.size())
1034 return {};
1035
1036 const TemporalFieldMatcher matcher(locale, cal, baseYear);
1037 // Technically this is the correct (empty) result when fields.isEmpty():
1038 std::vector<PartialParse> maybe{PartialParse(from)};
1039
1040 qsizetype toCome = fields.size();
1041 for (const QtTemporalPattern::TemporalField &field : fields) {
1042 --toCome;
1043 const std::vector<PartialParse> prior = std::exchange(maybe, {});
1044 for (const PartialParse &base : prior) {
1045 std::vector<PartialParse> more
1046 = matcher.continuations(base, text, field);
1047 for (PartialParse &candidate : more) {
1048 // Consistency won't have been changed by a literal field:
1049 if ((field.category == TemporalFieldCategory::Literal
1050 || matcher.isSelfConsistent(candidate, field.category))) {
1051 if (toCome) // Earlier fields' ends go in bounds:
1052 candidate.results.bounds.push_back(candidate.results.endIndex);
1053 else if (!matcher.resolve(candidate)) // Last field: makes sense of it all.
1054 continue;
1055 maybe.push_back(std::move(candidate));
1056 }
1057 }
1058 }
1059 if (maybe.empty()) // No point continuing
1060 return {};
1061 }
1062 // Now select our most favourable entry from maybe.
1063
1064 // Although we've, thus far, prefered sensible-length matches over longer
1065 // ones in individual numeric fields, so that later numeric fields can take
1066 // up the slack and win, we still want to be greedy over-all, so prefer
1067 // overall longer matches to shorter ones. None the less, between matches of
1068 // equal length, preserve our preference, up to now, for sane lengths of
1069 // each field within that, as long as later fields are taking up the slack.
1070 // That preference can be fine-tuned via .wanton, see PartialParse::Flaw.
1071 PartialParse best = maybe.front();
1072 for (const PartialParse &match : QSpan{maybe}.sliced(1)) {
1073 if (match.compare(best) < 0)
1074 best = match;
1075 }
1076 return best.results;
1077}
1078
1079} // QtParseTemporal
1080
1081QT_END_NAMESPACE
bool isSelfConsistent(const PartialParse &parsed, TemporalFieldCategory category) const
bool resolve(PartialParse &parsed) const
std::vector< PartialParse > continuations(const PartialParse &base, QStringView text, const TemporalField &field) const
TemporalFieldMatcher(const QLocale &loc, QCalendar cal, std::optional< int > centuryStart)
bool longerEarlier(const PartialParse &left, const PartialParse &right)
void forEachLocaleFormat(TemporalFieldFlags flags, Action action)
std::vector< PartialParse > spacePadExtend(std::vector< PartialParse > &&matched, QStringView text)
QtParseCommon::ParsedText matchesAt(QStringView text, qsizetype from, const QString &sought, TemporalFieldFlags flags)
ParsedTemporal prefix(QStringView text, QSpan< const QtTemporalPattern::TemporalField > fields, const QLocale &locale, QCalendar cal, std::optional< int > baseYear, qsizetype from)
Qt::weak_ordering compare(const PartialParse &alt) const noexcept
PartialParse(const PartialParse &base, const QtParseCommon::ParsedText &more)