Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtparsetemporal.cpp
Go to the documentation of this file.
1// Copyright (C) 2026 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4#include "private/qtparsetemporal_p.h"
5
6#include "private/qcalendarmath_p.h"
7#include "private/qlocale_p.h"
8#include "private/qstringiterator_p.h"
9#include "private/qttemporalpattern_p.h"
10
11#include <algorithm> // sort, stable_sort
12#include <optional>
13#include <utility> // exchange, move, pair
14#include <vector>
15
16QT_BEGIN_NAMESPACE
17
18namespace {
19using namespace QtParseTemporal;
20using namespace QtTemporalPattern;
21
23{
25 int periodInDay = -1; // 0: am, 1: pm
26 int hourMod12 = 0; // 1 through 12
27 int yearWithinCentury = -1; // 0 through 99
28 static constexpr int UnknownAmHour = -12, UnknownPmHour = 36;
29 enum Flaw : quint16 {
30 // Flaws that justify prefering a shorter parse without the flaw over
31 // longer with it, in order of decreasing severity:
32 Irreconcilable = 1, // field values cannot be reconciled
33 // Ideally continuations() would catch irreconcilable issues, but if one
34 // is expensive to spot it can be left for resolve() to flag up.
35 LegacyResolves = 2, // field values can only be resolved by ignoring timeType
36 ResolutionChanges = 4, // resolved values don't match parsed values
37 ZeroPad = 8, // used zero-padded part of text where field didn't require it
38 Narrow = 0x10, // used fewer digits from text than field width, where allowed
39
40 // Flaws not worth giving up a longer parse over, in decreasing order of
41 // strength of preference among those of equal length:
42 SelfResolved = 0x100, // ambiguous field values resolve cleanly
43 };
44 Q_DECLARE_FLAGS(Flaws, Flaw)
46
47 // Constructor for initial empty parse:
48 PartialParse(qsizetype from) { results.startIndex = results.endIndex = from; }
49 // Constructors extending a parse with something more:
50 PartialParse(const PartialParse &base, const QtParseCommon::ParsedText &more)
51 : PartialParse(base)
52 {
53 Q_ASSERT(results.endIndex == more.startIndex);
54 results.endIndex = more.endIndex;
55 }
56 PartialParse(const PartialParse &base, const QtParseTimeZone::ParsedZone &more)
58 {
59 results.zone = more.zone;
60 results.timeType = more.timeType;
61 }
62
63 Qt::weak_ordering compare(const PartialParse &alt) const noexcept
64 {
65 // Measures of how this->wanton differs from alt.wanton: it's worse if
66 // it has a flaw that alt lacks, better if the opposite. Here, "less" is
67 // used to mean "this is better than alt" as we sort better entries
68 // earlier in our lists of partial parse candidates.
69 const auto order = [better = alt.wanton & ~wanton,
70 worse = wanton & ~alt.wanton](Flaw test) {
71 Q_ASSERT(!(worse & better)); // So at most one of these testFlag()s is true:
72 if (better.testFlag(test))
73 return Qt::weak_ordering::less;
74 if (worse.testFlag(test))
75 return Qt::weak_ordering::greater;
76 return Qt::weak_ordering::equivalent;
77 };
78
79 if (auto res = order(Flaw::Irreconcilable); res != 0)
80 return res;
81
82 // In decreasing order of severity
83 if (auto res = order(Flaw::LegacyResolves); res != 0)
84 return res;
85 if (auto res = order(Flaw::ResolutionChanges); res != 0)
86 return res;
87 if (auto res = order(Flaw::ZeroPad); res != 0)
88 return res;
89 if (auto res = order(Flaw::Narrow); res != 0)
90 return res;
91
92 // The above take precedence over size: a shorter parse without those
93 // flaws is better than a longer one with them.
94
95 // Longer is better, to be understood as "sorts before" i.e. less than.
96 if (auto res = Qt::compareThreeWay(alt.results.size(), results.size()); res != 0)
97 return res;
98
99 // The following remains as a preference only among matches of the same
100 // length: it's nice to avoid, but a longer parse is still better.
101
102 if (auto res = order(Flaw::SelfResolved); res != 0)
103 return res;
104
105 return Qt::weak_ordering::equivalent;
106 }
107};
108Q_DECLARE_OPERATORS_FOR_FLAGS(PartialParse::Flaws)
109
110QLocaleData::DigitSequence
111parseDigitSequence(QStringView text, qsizetype from, const QLocale &locale, bool allowSign)
112{
113 const auto *const data = QLocalePrivate::get(locale)->m_data;
114 using DS = QLocaleData::DigitSequence;
115 DS::Options flags;
116 if (allowSign)
117 flags.setFlag(DS::Option::AllowSign, true);
118 return data->digitSequence(text, flags, from);
119}
120
121std::vector<PartialParse> spacePadExtend(std::vector<PartialParse> &&matched, QStringView text)
122{
123 // Pass the whole text: the results.endIndex of the last entry in matched is
124 // an index into it that we shall use to add entries that extend that entry.
125 Q_ASSERT(!matched.empty());
126 // Assumes matched.back()'s last field is allowed to end in space-padding
127 // and inserts a partial parse resulting from accepting each subsequent
128 // space as extending the match. Each longer match is inserted before all
129 // shorter matches. Only extensions of matched.back() are added, so call
130 // after adding each entry to matched, if adding several.
131 const qsizetype position = matched.size() - 1;
132 PartialParse copy = matched.back();
133 QStringIterator iter(text, copy.results.endIndex);
134 while (iter.hasNext() && QChar::isSpace(iter.next())) {
135 Q_ASSERT(iter.index() > copy.results.endIndex);
136 copy.results.endIndex = iter.index();
137 matched.insert(matched.begin() + position, copy);
138 }
139 return matched;
140}
141
142QtParseCommon::ParsedText matchesAt(QStringView text, qsizetype from, const QString &sought,
143 TemporalFieldFlags flags)
144{
145 using F = TemporalFieldFlag;
146 const bool allowLeadingSpace = flags.testFlag(F::SpacePad);
147 Q_ASSERT(sought.size() > 0);
148 // Note: returns the first match within text[from:]. If sought is all space
149 // and SpacePad is set, there may be later matches if text[from:] starts
150 // with more space than (possibly some non-matching spaces, then) that.
151 // caller is expected to follow the match implied by the return from this
152 // with more generated by spacePadExtend().
153
154 const auto beginLength = [flex = flags.testFlag(F::FlexSpace)]
155 (QStringView view, QStringView target, Qt::CaseSensitivity cs = Qt::CaseSensitive) {
156 // Technical hitch: case-insensitive comparison may match a string of
157 // different length. Roll a brute-force length-determining version:
158 const auto matchFront = [cs](QStringView view, QStringView target) {
159 if (view.startsWith(target, cs)) {
160 qsizetype length = target.size();
161 while (view.first(length - 1).startsWith(target, cs))
162 --length;
163 while (!view.first(length).startsWith(target, cs))
164 ++length;
165 Q_ASSERT(length > 0);
166 return length;
167 }
168 return qsizetype(-1);
169 };
170 const auto spaceForward = [](QStringIterator &iter) {
171 // Steps iter past next non-space, returns index at which it appeared.
172 qsizetype used;
173 do {
174 used = iter.index();
175 } while (iter.hasNext() && QChar::isSpace(iter.next()));
176 return used;
177 };
178 constexpr qsizetype failed = 0;
179 qsizetype matched = 0;
180 if (flex) {
181 QStringIterator iter(target);
182 while (iter.hasNext()) {
183 qsizetype head = iter.index();
184 if (QChar::isSpace(iter.next())) {
185 qsizetype same = head > 0 ? matchFront(view, target.first(head)) : 0;
186 if (same < 0)
187 return failed;
188 QStringIterator viter(view, same);
189 // Require at least one spacing character in view to match those in target:
190 if (!viter.hasNext() || !QChar::isSpace(viter.next()))
191 return failed;
192 same = spaceForward(viter);
193 matched += same;
194 view = view.sliced(same);
195 target = target.sliced(spaceForward(iter));
196 iter = QStringIterator(target);
197 }
198 }
199 }
200 const qsizetype tail = target.isEmpty() ? 0 : matchFront(view, target);
201 if (tail < 0)
202 return failed;
203 return matched + tail;
204 };
205 // TODO: consider a comparison that ignores Unicode invisibles, like BiDi
206 // markers, when matching.
207 qsizetype offset = 0;
208 do {
209 QStringView view = text.sliced(from + offset);
210 if (flags.testFlag(F::IgnoreCase)) {
211 if (qsizetype match = beginLength(view, sought, Qt::CaseInsensitive))
212 return {from, from + offset + match};
213 } else if (flags.testAnyFlags(F::LowerCase | F::UpperCase)) {
214 // If either case is specified, only match specified cases.
215 // If both cases are specified, accept either (but not mixed).
216 if (flags.testFlag(F::LowerCase)) {
217 if (qsizetype match = beginLength(view, sought.toLower()))
218 return {from, from + offset + match};
219 }
220 if (flags.testFlag(F::UpperCase)) {
221 if (qsizetype match = beginLength(view, sought.toUpper()))
222 return {from, from + offset + match};
223 }
224 // Otherwise, only an exact match is accepted:
225 } else if (qsizetype match = beginLength(view, sought)) {
226 return {from, from + offset + match};
227 }
228
229 // No match at this position; maybe later if leading space is allowed:
230 if (!allowLeadingSpace) {
231 Q_ASSERT(!offset);
232 break;
233 }
234
235 // Consume one space at a time until we find a match:
236 QStringIterator iter(text.sliced(from), offset);
237 if (!iter.hasNext() || !QChar::isSpace(iter.next()))
238 break;
239
240 Q_ASSERT(iter.index() > offset);
241 offset = iter.index();
242 } while (text.size() >= offset + sought.size() / 2);
243 // Loop wants to test text.size() >= offset + sought.size(), but see beginLength().
244 return {};
245}
246
247bool longerEarlier(const PartialParse &left, const PartialParse &right)
248{
249 // True if we want left before right in our sorted lists.
250 // We want longer matches before shorter:
251 return left.results.endIndex > right.results.endIndex;
252}
253
254template <typename Action>
255void forEachLocaleFormat(TemporalFieldFlags flags, Action action)
256{
257 using Flag = TemporalFieldFlag;
258 constexpr auto Widths = FieldGroup::WidthMask;
259 if (matchesFlagWithin(flags, Flag::Wide, Widths))
260 action(QLocale::LongFormat);
261 if (matchesFlagsWithin(flags, Flag::Short | Flag::Abbreviated, Widths))
262 action(QLocale::ShortFormat);
263 if (matchesFlagWithin(flags, Flag::Narrow, Widths))
264 action(QLocale::NarrowFormat);
265}
266
268{
269 const QLocale locale;
270 const QCalendar calendar;
271 const std::optional<int> baseYear;
272
273 // Numeric
274 struct FieldConfig
275 {
276 // Where to write the int, once read:
277 int &(*target)(PartialParse &);
278 // Acceptable values:
279 int maxValue = 0; // 0 means unbounded
280 int unset; // Default value in ParsedTemporal, invalid for field.
281 // Form of the parsed text:
282 qsizetype width; // min digits
283 qsizetype maxDigits = 0; // <= 0 means unbounded
284 // If unbounded, beyond max(width, roundAfter, -maxDigits) prefer fewer digits to more.
285 qsizetype roundAfter = -1; // >= 0: is fractional part: round to this many digits
286 bool allowSign = false;
287 };
288 // For use as FieldConfig::target:
289 static int &millisTarget(PartialParse &grow) { return grow.results.millis; }
290 static int &secondTarget(PartialParse &grow) { return grow.results.second; }
291 static int &minuteTarget(PartialParse &grow) { return grow.results.minute; }
292 static int &hourTarget(PartialParse &grow) { return grow.results.hour; }
293 static int &hourMod12Target(PartialParse &grow) { return grow.hourMod12; }
294 static int &dayOfWeekTarget(PartialParse &grow) { return grow.results.dayOfWeek; }
295 static int &dayOfMonthTarget(PartialParse &grow) { return grow.results.dayOfMonth; }
296 static int &monthTarget(PartialParse &grow) { return grow.results.month; }
297 static int &yearTarget(PartialParse &grow)
298 {
299 if (!grow.results.year)
300 grow.results.year = 0;
301 return *grow.results.year;
302 }
303 static int &yearWithinCenturyTarget(PartialParse &grow) { return grow.yearWithinCentury; }
304
305 std::vector<PartialParse>
306 numericExtend(const PartialParse &base, QStringView text,
307 TemporalFieldFlags flags, FieldConfig &&config) const;
308
309 // Verbal, Standalone:
310 std::vector<PartialParse> monthNameExtend(const PartialParse &base, QStringView text,
311 TemporalFieldFlags flags) const;
312 std::vector<PartialParse> dayNameExtend(const PartialParse &base, QStringView text,
313 TemporalFieldFlags flags) const;
314 std::pair<qsizetype, int> dayPeriodPrefix(const PartialParse &base, QStringView text,
315 TemporalFieldFlags flags) const;
316public:
317 TemporalFieldMatcher(const QLocale &loc, QCalendar cal, std::optional<int> centuryStart)
319 {}
320
321 std::vector<PartialParse> continuations(const PartialParse &base, QStringView text,
322 const TemporalField &field) const;
323 bool isSelfConsistent(const PartialParse &parsed, TemporalFieldCategory category) const;
324 bool resolve(PartialParse &parsed) const;
325};
326
328 TemporalFieldCategory category) const
329{
330 // Take into account calendar, and potentially baseYear, but only do cheap
331 // checks. This will be run on *each* candidate parse after *each* field,
332 // need not check conditions the current field could not have affected.
333 using Cat = TemporalFieldCategory;
334 if (category == Cat::Literal) // Can't have introduced any inconsistency.
335 return true;
336
337 const bool newYear = category == Cat::Year || category == Cat::YearWithinCentury;
338 if (newYear && parse.yearWithinCentury >= 0 && parse.results.year
339 && (*parse.results.year - parse.yearWithinCentury) % 100) {
340 return false;
341 }
342
343 const bool newDate = (newYear || category == Cat::Month || category == Cat::DayOfMonth
344 || category == Cat::DayOfWeek);
345 if (newDate && parse.results.month && parse.results.dayOfMonth) {
346 // Calendrical calculations: somewhat expensive, but still arithmetic.
347 if (parse.results.year) {
348 if (!calendar.isDateValid(*parse.results.year, parse.results.month,
349 parse.results.dayOfMonth)) {
350 return false;
351 }
352 if (parse.results.dayOfWeek) {
353 QDate date = calendar.dateFromParts(*parse.results.year, parse.results.month,
354 parse.results.dayOfMonth);
355 if (calendar.dayOfWeek(date) != parse.results.dayOfWeek)
356 return false;
357 }
358 } else if (calendar.daysInMonth(parse.results.month) < parse.results.dayOfMonth) {
359 return false;
360 }
361 }
362
363 if ((category == Cat::PeriodInDay && parse.results.hour >= 0)
364 || (category == Cat::Hour && parse.periodInDay >= 0)) {
365 // 00, 01, ... 11 are 12, 1, ... 11 am; 12, 13, ... 23 are 12, 1, ..., 11 pm.
366 if (parse.periodInDay ? parse.results.hour < 12 : parse.results.hour >= 12)
367 return false;
368 }
369
370 if ((category == Cat::Hour && parse.hourMod12 > 0)
371 || (category == Cat::HourMod12 && parse.results.hour >= 0)) {
372 if ((parse.results.hour - parse.hourMod12) % 12)
373 return false;
374 }
375 return true;
376}
377
379{
380 // Final pass, modifying parsed as needed, true if parse.result has been
381 // given a value consistent with all fields of parse. Applies fully rigorous
382 // checks, given what isSelfConsistent() already checked. May record flaws
383 // in parse.wanton where relevant tests reveal them.
384 if (parse.yearWithinCentury >= 0) {
385 if (parse.results.year) {
386 // Previously checked by isSelfConsistent():
387 Q_ASSERT((*parse.results.year - parse.yearWithinCentury) % 100 == 0);
388 } else if (baseYear) {
389 const auto baseSplit =QRoundingDown::qDivMod<100>(*baseYear);
390 int year = baseSplit.quotient * 100 + parse.yearWithinCentury;
391 if (parse.yearWithinCentury < baseSplit.remainder)
392 year += 100;
393
394 if (parse.results.dayOfWeek && parse.results.month && parse.results.dayOfMonth) {
395 QCalendar::YearMonthDay ymd
396 = { year, parse.results.month, parse.results.dayOfMonth };
397 QDate resolved = calendar.matchCenturyToWeekday(ymd, parse.results.dayOfWeek);
398 if (!resolved.isValid())
399 return false;
400 year = resolved.year(calendar);
401 }
402
403 parse.results.year = year;
404 }
405 }
406
407 if (parse.results.hour < 0 && parse.hourMod12 > 0) {
408 Q_ASSERT(parse.hourMod12 <= 12);
409 parse.results.hour = parse.hourMod12 < 12 || parse.periodInDay < 0 ? parse.hourMod12 : 0;
410 if (parse.periodInDay > 0)
411 parse.results.hour += 12;
412 }
413
414 if (parse.results.year && parse.results.month && parse.results.dayOfMonth
415 && parse.results.zone.isValid() && parse.results.hour >= 0) {
416 // Should be able to construct a datetime with this:
417 const QDate date(*parse.results.year, parse.results.month, parse.results.dayOfMonth,
418 calendar);
419 Q_ASSERT(date.isValid()); // Should be ensured by earlier checks.
420 const QTime time = parse.results.time(QTime());
421 Q_ASSERT(time.isValid()); // Should be ensured by earlier checks.
422
423 // Is the given time in a transition of the given zone, on the given date ?
424 if (!Q_LIKELY(QDateTime(date, time, parse.results.zone,
425 QDateTime::TransitionResolution::Reject).isValid())) {
426 // Ambiguity, gap or outright borkage.
427 const auto res = [type = parse.results.timeType]() {
428 using Res = QDateTime::TransitionResolution;
429 switch (type) {
430 case QTimeZone::StandardTime: return Res::PreferStandard;
431 case QTimeZone::DaylightTime: return Res::PreferDaylightSaving;
432 case QTimeZone::GenericTime: return Res::LegacyBehavior;
433 }
434 Q_UNREACHABLE_RETURN(Res::LegacyBehavior);
435 }();
436 using Flaw = PartialParse::Flaw;
437 QDateTime dt(date, time, parse.results.zone, res);
438 if (!dt.isValid()) {
439 // Fall back to default resolution (same as LegacyBehavior):
440 dt = QDateTime(date, time, parse.results.zone);
441 // If that succeeded, Abbreviated (bad); otherwise Narrow (worse).
442 parse.wanton |= dt.isValid() ? Flaw::LegacyResolves : Flaw::Irreconcilable;
443 }
444 if (dt.date() != date || dt.time() != time
445 || dt.timeRepresentation() != parse.results.zone) {
446 // OK, resolution *worked* but didn't get exactly what we asked
447 // for (presumably a spring-forward's gap):
448 parse.wanton |= Flaw::ResolutionChanges;
449 // ... but we don't change parse.results because they should
450 // reflect what parsing learned; the caller can rediscover this.
451 } else {
452 // We got what we asked for (presumably the expected branch of a
453 // fall-back):
454 parse.wanton |= Flaw::SelfResolved;
455 }
456 }
457 }
458
459 if (parse.results.hour < 0) {
460 // Leave ParsedTemporal::time() a clue to am/pm, if known:
461 if (parse.periodInDay > 0)
462 parse.results.hour = PartialParse::UnknownPmHour;
463 else if (parse.periodInDay == 0)
464 parse.results.hour = PartialParse::UnknownAmHour;
465 }
466 return true;
467}
468
469std::vector<PartialParse>
470TemporalFieldMatcher::numericExtend(const PartialParse &base, QStringView text,
471 TemporalFieldFlags flags, FieldConfig &&config) const
472{
473 using Flag = TemporalFieldFlag;
474 qsizetype leadingSpace = 0;
475 const bool spacePad = flags.testFlag(Flag::SpacePad);
476 if (spacePad) {
477 QStringIterator iter(text, base.results.endIndex);
478 while (iter.hasNext() && QChar::isSpace(iter.next()))
479 ++leadingSpace;
480 // If that's used up the string, the code below shall reject the field.
481 }
482
483 const auto parsed = parseDigitSequence(text, base.results.endIndex + leadingSpace,
484 locale, config.allowSign);
485 const bool zeroPad = flags.testFlag(Flag::ZeroPad);
486 // If !zeroPad, we allow < config.width but flag with Narrow in wanton fields.
487 const int width = zeroPad || spacePad ? qMax(1, config.width - leadingSpace) : 1;
488 // This is necessarily positive: the use of chop(1) below depends on that.
489
490 QByteArrayView digits{parsed.digits};
491 if (config.maxDigits > 0) {
492 // Allow config.width to override config.maxDigits:
493 const int maxWidth = qMax(config.maxDigits, config.width);
494 if (digits.size() > maxWidth)
495 digits = digits.first(maxWidth);
496 } else if (flags.testFlag(Flag::YearSignIso8601) && !parsed.sign) {
497 // Limit width because a field longer than width would need a sign.
498 const int maxWidth = config.width > 0 ? config.width : qMax(-config.maxDigits, 1);
499 if (digits.size() > maxWidth)
500 digits = digits.first(maxWidth);
501 }
502 // For unbounded, work out in advance when to switch from prepending to
503 // appending; otherwise, set a cut-off that'll be true already.
504 const qsizetype appendThreshold = config.maxDigits <= 0
505 ? qMax(-config.maxDigits, qMax(config.width, config.roundAfter)) - 1
506 : digits.size();
507
508 std::vector<PartialParse> matches;
509 for (; digits.size() >= width; digits.chop(1)) {
510 bool ok = false;
511 unsigned whole = digits.toUInt(&ok);
512 if (!ok)
513 continue;
514 if (config.maxValue > 0 && config.roundAfter < 0 && whole > unsigned(config.maxValue))
515 continue;
516
517 int value = whole;
518 if (value < 0 || value <= config.unset) // Overflow or too low
519 continue;
520 if (parsed.sign == '-')
521 value = -value;
522
523 if (config.roundAfter >= 0) {
524 // Fractional part
525 if (digits.size() < config.roundAfter) {
526 // Interpolate omitted zero-padding up to rounding size:
527 for (int i = int(digits.size()); i < config.roundAfter; ++i)
528 value *= 10;
529 } else if (digits.size() > config.roundAfter) {
530 double v = value;
531 for (int i = int(digits.size()); i > config.roundAfter; --i)
532 v /= 10.;
533 // A timestamp that's before the end of a specified second
534 // should be rounded to the last we can before that second,
535 // especially if it's the last second of its minute, in turn
536 // especially if that's the last second of its hour (and so on).
537 value = v > config.maxValue ? config.maxValue : qRound(v);
538 // There may of course be use-cases where rounding up to the
539 // next second is desired. If it turns out those are
540 // significant, we can perhaps add a field option for it.
541 }
542 // else: exact match to number of digits, nothing to frob.
543 }
544
545 PartialParse grow = base;
546 int &target = config.target(grow);
547 if (target <= config.unset) // If unset, store:
548 target = value;
549 else if (target != value) // Conflicts with earlier field: skip this reading.
550 continue;
551 grow.results.endIndex = parsed.digitStart + digits.size() * parsed.digitWidth;
552
553 if (!zeroPad && digits.size() > qMax(1, config.width)
554 && (config.roundAfter < 0 ? digits.startsWith('0') : digits.endsWith('0'))) {
555 grow.wanton |= PartialParse::Flaw::ZeroPad;
556 }
557 if (digits.size() + leadingSpace < config.width) // (can only happen if !zeroPad)
558 grow.wanton |= PartialParse::Flaw::Narrow;
559
560 // Entries in matches are all longer than this one, as we're reducing
561 // digits. Mostly we want shorter after longer, but (for example) we
562 // prefer 4-digit years over longer matches.
563 if (digits.size() > appendThreshold)
564 matches.insert(matches.begin(), std::move(grow));
565 else
566 matches.push_back(std::move(grow));
567 }
568 return matches;
569}
570
571/* Some month names may be prefixes of others.
572 For example, the English long forms of Islamic calendar month names include:
573 * RabiÊ» I, RabiÊ» II
574 * Jumada I, Jumada II
575 Their short-forms are likewise:
576 * Rab. I, Rab. II
577 * Jum. I, Jum. II
578 In each case, one month name is a prefix of the next month's name.
579
580 In any sane format, greedy parsing shall suffice but ill-considered formats
581 happen. So the initial parse recognizes every possible match and we sort out
582 any mistakes greed might make as we parse later fields.
583*/
584std::vector<PartialParse>
585TemporalFieldMatcher::monthNameExtend(const PartialParse &base, QStringView text,
586 TemporalFieldFlags flags) const
587{
588 std::vector<PartialParse> matches;
589 using Flag = TemporalFieldFlag;
590
591 auto addIfMatch = [&matches, base, text, flags](int month, QString &&name) {
592 // tryEachMonth() has ensured this:
593 Q_ASSERT(!base.results.month || base.results.month == month);
594 if (name.isEmpty()) // Locale doesn't know this month's name.
595 return;
596 // If matchesAt(), add to matches:
597 auto match = matchesAt(text, base.results.endIndex, name, flags);
598 if (match) {
599 PartialParse grow(base, match);
600 grow.results.month = month;
601 matches.push_back(grow);
602 if (flags.testFlag(Flag::SpacePad))
603 matches = spacePadExtend(std::move(matches), text);
604 }
605 };
606
607 constexpr auto Forms = FieldGroup::FormMask;
608 constexpr int noYear = QCalendar::Unspecified;
609 const bool verb = matchesFlagWithin(flags, Flag::Verbal, Forms);
610 const bool lone = matchesFlagWithin(flags, Flag::Standalone, Forms);
611 const int year = base.results.year ? *base.results.year : noYear;
612 // We could try to take account of baseYear, when yearWithinCentury is
613 // known, but that's susceptible to tweaks and perturbation from other
614 // fields, so stick with noYear and the usual naming of months if we don't
615 // know year. We can consider adding a QCalendar::parseMonthName() that can
616 // consult the internal lists of localized month names, both for efficiency
617 // and to ensure we try all names, including those that appear only in some
618 // years. If we do that, its return should package month number, whether the
619 // month appears in all years and whether it was standalone or plain, along
620 // with the start and end indices of the match within the text.
621 auto tryEachNameType = [this, verb, lone, year,
622 addIfMatch](QLocale::FormatType form, int month) {
623 if (lone)
624 addIfMatch(month, calendar.standaloneMonthName(locale, month, year, form));
625 if (verb)
626 addIfMatch(month, calendar.monthName(locale, month, year, form));
627 };
628 // This could in principle, for non-system locales, be done more efficiently
629 // by walking the internal ';'-joined list of month names QCalendarBackend
630 // can give us. The entanglement between QCalendarBackend and QLocale
631 // internals is, however, already quite untidy enough, so leave that for
632 // if/when we discover it's a significant bottle-neck and/or we've unpicked
633 // the existing entanglement a bit first.
634
635 auto tryEachMonth = [month = base.results.month, bound = calendar.maximumMonthsInYear(),
636 tryEachNameType](QLocale::FormatType form) {
637 if (month > 0) {
638 tryEachNameType(form, month);
639 } else {
640 for (int i = bound; i > 0; --i)
641 tryEachNameType(form, i);
642 }
643 };
644 forEachLocaleFormat(flags, tryEachMonth);
645
646 return matches;
647}
648
649std::vector<PartialParse>
650TemporalFieldMatcher::dayNameExtend(const PartialParse &base, QStringView text,
651 TemporalFieldFlags flags) const
652{
653 std::vector<PartialParse> matches;
654 using Flag = TemporalFieldFlag;
655
656 auto addIfMatch = [&matches, base, text, flags](int dow, QString &&name) {
657 // tryEachDayOfWeek() has ensured this:
658 Q_ASSERT(!base.results.dayOfWeek || base.results.dayOfWeek == dow);
659 if (name.isEmpty()) // Locale doesn't know this day of the week's name.
660 return;
661 // If matchesAt(), add to matches:
662 auto match = matchesAt(text, base.results.endIndex, name, flags);
663 if (match) {
664 PartialParse grow(base,match);
665 grow.results.dayOfWeek = dow;
666 matches.push_back(grow);
667 if (flags.testFlag(Flag::SpacePad))
668 matches = spacePadExtend(std::move(matches), text);
669 }
670 };
671
672 constexpr auto Forms = FieldGroup::FormMask;
673 const bool verb = matchesFlagWithin(flags, Flag::Verbal, Forms);
674 const bool lone = matchesFlagWithin(flags, Flag::Standalone, Forms);
675 auto tryEachNameType = [this, addIfMatch, verb, lone](QLocale::FormatType form, int dow) {
676 if (lone)
677 addIfMatch(dow, calendar.standaloneWeekDayName(locale, dow, form));
678 if (verb)
679 addIfMatch(dow, calendar.weekDayName(locale, dow, form));
680 };
681 // As for month names (see above), some collaboration with QCalendarBackend
682 // might make this more efficient for non-system locales, at the expense of
683 // adding to the existing tangle of complexity.
684
685 auto tryEachDayOfWeek = [dow = base.results.dayOfWeek,
686 tryEachNameType](QLocale::FormatType form) {
687 if (dow > 0) {
688 tryEachNameType(form, dow);
689 } else {
690 // Iterate possible day numbers. Issue: some calendars might have
691 // intercalary days with numbers > 7. When that happens, we may
692 // need to let this run past 7 until it's seen some empty answers.
693 for (int i = 1; i <= 7; ++i)
694 tryEachNameType(form, i);
695 }
696 };
697 forEachLocaleFormat(flags, tryEachDayOfWeek);
698
699 std::sort(matches.begin(), matches.end(), longerEarlier);
700 return matches;
701}
702
703std::pair<qsizetype, int>
704TemporalFieldMatcher::dayPeriodPrefix(const PartialParse &base, QStringView text,
705 TemporalFieldFlags flags) const
706{
707 std::pair<qsizetype, int> result = {0, -1};
708 for (int i = 0; i < 2; ++i) {
709 if (base.periodInDay >= 0 && base.periodInDay != i)
710 continue;
711 if (const QString token = i ? locale.pmText() : locale.amText(); !token.isEmpty()) {
712 if (auto match = matchesAt(text, base.results.endIndex, token, flags);
713 match.endIndex > result.first) {
714 result = { match.endIndex, i };
715 }
716 }
717 }
718 return result;
719}
720
721/*!
722 \internal
723 Find all matches to \a field, within \a text, that extend \a base.
724
725 Each match must begin at offset \c{base.results.endIndex} within \a text.
726 For each match, update a copy of \a base with the match's result, to include
727 in the returned list.
728
729 May use \c calendar to determine the range of values allowed for field.
730 Does not attempt to determine consistency between fields; see resolve() and
731 isSelfConsistent() for that. Updates the copy's member holding the value
732 described by \a field to reflect the match.
733
734 Ignores base.result.startIndex and base.result.bounds and updates each
735 copy's .endIndex to reflect the end of the match. (This leaves the caller to
736 decide whether to transfer that to .bounds.)
737
738 For fields that allow space padding, this consumes leading space as
739 necessary to make a match and includes a match for each end position at
740 which it could end; before any dangling space and after each space that
741 follows. Later calls to \c continuations() shall filter out any earlier
742 matches that precludes later fields matching just after its end. Successive
743 space-padded fields surrounded by large amounts of space are apt to lead to
744 many matches, as are final space-padded fields followed by large amounts of
745 space. (TODO: we can almost certainly mitigate this with a trivial
746 heuristic, once everything is working.)
747
748 For those matches with various flaws, relative to the field specification
749 (such as using zero padding when not obliged to), the copy's .wanton records
750 that flaw.
751
752 Sort order of the returned list should put entries likely to represent more
753 suitable matches (ignoring .wanton complications) earlier. For most fields,
754 that means longer matches come first. (For full year field matches with > 4
755 digits, though, that reverses.)
756*/
758TemporalFieldMatcher::continuations(const PartialParse &base, QStringView text,
759 const TemporalField &field) const
760{
761 std::vector<PartialParse> matches;
762 const qsizetype textPos = base.results.endIndex;
763 switch (field.category) {
764 using Cat = TemporalFieldCategory;
765 using Flag = TemporalFieldFlag;
766 case Cat::Literal:
767 if (auto match = matchesAt(text, textPos, field.literal, field.options)) {
768 matches.push_back(PartialParse(base, match));
769 if (field.options.testFlag(Flag::SpacePad))
770 matches = spacePadExtend(std::move(matches), text);
771 }
772 break;
773 case Cat::TimeZone:
774 if (const auto zones = QtParseTimeZone::prefix(text, locale, textPos, field.options);
775 !zones.isEmpty()) {
776 for (const auto &match : zones) {
777 matches.push_back(PartialParse(base, match));
778 if (field.options.testFlag(Flag::SpacePad))
779 matches = spacePadExtend(std::move(matches), text);
780 }
781 }
782 break;
783
784 // case Cat::MillisecondInDay: break;
785 case Cat::SecondFraction:
786 matches = numericExtend(base, text, field.options,
787 {millisTarget, 999, -1, field.width, 0, 3});
788 break;
789 case Cat::Second:
790 matches = numericExtend(base, text, field.options, {secondTarget, 59, -1, field.width, 2});
791 break;
792 // case Cat::MinuteFraction: break;
793 case Cat::Minute:
794 matches = numericExtend(base, text, field.options, {minuteTarget, 59, -1, field.width, 2});
795 break;
796 // case Cat::HourFraction: break;
797 case Cat::HourMod12:
798 matches = numericExtend(base, text, field.options,
799 {hourMod12Target, 12, 0, field.width, 2});
800 break;
801 case Cat::Hour:
802 matches = numericExtend(base, text, field.options, {hourTarget, 23, -1, field.width, 2});
803 break;
804 case Cat::PeriodInDay: // am/pm; LDML also has noon, midnight, "at night" and others.
805 if (const auto match = dayPeriodPrefix(base, text, field.options); match.second >= 0) {
806 // Ensured by dayPeriodPrefix:
807 Q_ASSERT(base.periodInDay < 0 || base.periodInDay == match.second);
808 PartialParse grow = base;
809 grow.results.endIndex = match.first;
810 grow.periodInDay = match.second;
811 matches.push_back(grow);
812 if (field.options.testFlag(Flag::SpacePad))
813 matches = spacePadExtend(std::move(matches), text);
814 }
815 break;
816
817 case Cat::DayOfWeek:
818 matches = dayNameExtend(base, text, field.options);
819 break;
820 case Cat::DayOfMonth: {
821 const int maxDays = calendar.maximumDaysInMonth();
822 matches = numericExtend(base, text, field.options,
823 {dayOfMonthTarget, maxDays, 0, field.width,
824 maxDays < 10 ? 1 : maxDays < 100 ? 2 : 3});
825 }
826 break;
827 // case Cat::DayOfYear: break;
828 // case Cat::JulianDay: break;
829 // case Cat::WeekOfMonth: break;
830 // case Cat::WeekOfYear: break;
831 case Cat::Month:
832 // Verbal and Standalone, in so far as supported:
833 matches = monthNameExtend(base, text, field.options);
834 if (matchesFlagWithin(field.options, Flag::Numeric, FieldGroup::FormMask)) {
835 auto extend = numericExtend(base, text, field.options,
836 {monthTarget, calendar.maximumMonthsInYear(),
837 0, field.width, 2});
838 if (matches.empty())
839 matches = std::move(extend);
840 else
841 matches.insert(matches.end(), extend.begin(), extend.end());
842 }
843 std::sort(matches.begin(), matches.end(), longerEarlier);
844 break;
845 // case Cat::Quarter: break;
846 case Cat::YearWithinCentury:
847 matches = numericExtend(base, text, field.options,
848 {yearWithinCenturyTarget, 99, -1, field.width, 2});
849 break;
850 case Cat::Year:
851 matches = numericExtend(base, text, field.options,
852 {yearTarget, 0, 0, field.width, -4, -1, true});
853 break;
854 // case Cat::RelatedGregorianYear: break;
855 // case Cat::Century: break;
856 // case Cat::Era: break;
857 }
858 return matches;
859}
860
861} // nameless namespace
862
865{
866 if (defaults.isValid()) {
869
871 // Defaults conflict with parsed day of the week.
872 if (!dayOfMonth) {
873 // (Assumes no intercalary days.)
874 // Number of days to the nearest with the right day of the week:
875 const int offset = (dayOfWeek + 10 - draft.dayOfWeek(cal)) % 7 - 3;
876 Q_ASSERT(offset != 0); // Otherwise, day of week matched, already.
877 Q_ASSERT(-4 < offset && offset < 4);
878 // Prefer closer unless nearby has more in common with what we asked for:
880 QDate nearby = draft.addDays(offset < 0 ? offset + 7 : offset - 7);
881 if (nearby.isValid()
882 && (!closer.isValid()
885 || (closer.month(cal) != draft.month(cal)
886 && nearby.month(cal) == draft.month(cal)))) {
887 // (We could also give year(cal) the same treatment, but
888 // different year, for dates within ten days of one another,
889 // plies different month, so check would be redundant.)
890 std::swap(nearby, draft);
891 } else if (closer.isValid() && closer.dayOfWeek(cal) == dayOfWeek) {
892 std::swap(closer, draft);
893 }
894
895 } else if (!month) {
897 auto use = [&draft, cal, dow=dayOfWeek](int yr, int mon, int day) {
898 QDate maybe(yr, mon, day, cal);
899 if (!maybe.isValid() || maybe.dayOfWeek(cal) != dow)
900 return false;
901 std::swap(maybe, draft);
902 return true;
903 };
904 // Find nearest month with the right dayOfMonth and dayOfWeek.
905 // If year was specified we're limited to it; otherwise,
906 // draft.year() is derived from defaults so the search can
907 // spread to nearby years.
908 int loYear = draft.year(cal), hiYear = loYear;
909 int loMon = draft.month(cal), hiMon = loMon;
910 bool maybeLo = true, maybeHi = true;
911 while (maybeLo || maybeHi) {
912 if (maybeHi) {
913 if (hiMon < cal.monthsInYear(hiYear)) {
914 ++hiMon;
915 } else if (year) {
916 Q_ASSERT(hiYear == *year);
917 maybeHi = false;
918 } else if (hiYear + 1 || cal.hasYearZero()) {
919 ++hiYear;
920 hiMon = 1;
921 } else if (cal.isProleptic()) {
922 hiYear = +1;
923 hiMon = 1;
924 } else {
925 maybeHi = false;
926 }
927 }
929 break;
930
931 if (maybeLo) {
932 if (loMon > 1) {
933 --loMon;
934 } else if (year) {
935 Q_ASSERT(loYear == *year);
936 maybeLo = false;
937 } else if (loYear - 1 || cal.hasYearZero()) {
938 --loYear;
940 } else if (cal.isProleptic()) {
941 loYear = -1;
943 } else {
944 maybeLo = false;
945 }
946 }
948 break;
949
950 // Avoid looping for ever: if we can't find a match within a
951 // 30 year window we probably never shall. If we haven't
952 // found a match by then, the likelihood that the input has
953 // a typo in it is fairly high, in any case.
954 if (hiYear - loYear > 30)
955 break;
956 }
957 } else if (!year) {
958 // As for resolve()'s handling of two-digit centuries:
962 std::swap(maybe, draft);
963 }
965 return {};
966 }
967 return draft;
968 }
969 if (year && month && dayOfMonth)
970 return QDate(*year, month, dayOfMonth, cal);
971 return {};
972}
973
975{
976 if (defaults.isValid()) {
977 int hr = defaults.hour();
978 // hour: -1 means we have no information, less means unknown am, > 24 means unknown pm.
979 if (hour < -1) // UnknownAmHour
980 hr = hr % 12;
981 else if (hour > 24) // UnknownPmHour
982 hr = hr % 12 + 12;
983 else if (hour >= 0)
984 hr = hour;
985 // (Note: hour == 24 is currently unused but may be relevant for 24:00:00 in future.)
986 return QTime(hr,
987 minute < 0 ? defaults.minute() : minute,
988 second < 0 ? defaults.second() : second,
989 millis < 0 ? defaults.msec() : millis);
990 }
991
992 if (hour < 0 || hour > 24)
993 return {};
994 if (minute < 0)
995 return QTime(hour, 0);
996 if (second < 0)
997 return QTime(hour, minute);
998 if (millis < 0)
999 return QTime(hour, minute, second);
1000 return QTime(hour, minute, second, millis);
1001}
1002
1003ParsedTemporal prefix(QStringView text, QSpan<const QtTemporalPattern::TemporalField> fields,
1004 const QLocale &locale, QCalendar cal,
1005 std::optional<int> baseYear, qsizetype from)
1006{
1007 if (from < 0 || from >= text.size())
1008 return {};
1009
1010 const TemporalFieldMatcher matcher(locale, cal, baseYear);
1011 // Technically this is the correct (empty) result when fields.isEmpty():
1012 std::vector<PartialParse> maybe{PartialParse(from)};
1013
1014 qsizetype toCome = fields.size();
1015 for (const QtTemporalPattern::TemporalField &field : fields) {
1016 --toCome;
1017 const std::vector<PartialParse> prior = std::exchange(maybe, {});
1018 for (const PartialParse &base : prior) {
1019 std::vector<PartialParse> more
1020 = matcher.continuations(base, text, field);
1021 for (PartialParse &candidate : more) {
1022 // Consistency won't have been changed by a literal field:
1023 if ((field.category == TemporalFieldCategory::Literal
1024 || matcher.isSelfConsistent(candidate, field.category))) {
1025 if (toCome) // Earlier fields' ends go in bounds:
1026 candidate.results.bounds.push_back(candidate.results.endIndex);
1027 else if (!matcher.resolve(candidate)) // Last field: makes sense of it all.
1028 continue;
1029 maybe.push_back(std::move(candidate));
1030 }
1031 }
1032 }
1033 if (maybe.empty()) // No point continuing
1034 return {};
1035 }
1036 // Now select our most favourable entry from maybe.
1037
1038 // Although we've, thus far, prefered sensible-length matches over longer
1039 // ones in individual numeric fields, so that later numeric fields can take
1040 // up the slack and win, we still want to be greedy over-all, so prefer
1041 // overall longer matches to shorter ones. None the less, between matches of
1042 // equal length, preserve our preference, up to now, for sane lengths of
1043 // each field within that, as long as later fields are taking up the slack.
1044 // That preference can be fine-tuned via .wanton, see PartialParse::Flaw.
1045 PartialParse best = maybe.front();
1046 for (const PartialParse &match : QSpan{maybe}.sliced(1)) {
1047 if (match.compare(best) < 0)
1048 best = match;
1049 }
1050 return best.results;
1051}
1052
1053} // QtParseTemporal
1054
1055QT_END_NAMESPACE
bool isSelfConsistent(const PartialParse &parsed, TemporalFieldCategory category) const
bool resolve(PartialParse &parsed) const
std::vector< PartialParse > continuations(const PartialParse &base, QStringView text, const TemporalField &field) const
TemporalFieldMatcher(const QLocale &loc, QCalendar cal, std::optional< int > centuryStart)
bool longerEarlier(const PartialParse &left, const PartialParse &right)
void forEachLocaleFormat(TemporalFieldFlags flags, Action action)
std::vector< PartialParse > spacePadExtend(std::vector< PartialParse > &&matched, QStringView text)
QtParseCommon::ParsedText matchesAt(QStringView text, qsizetype from, const QString &sought, TemporalFieldFlags flags)
ParsedTemporal prefix(QStringView text, QSpan< const QtTemporalPattern::TemporalField > fields, const QLocale &locale, QCalendar cal, std::optional< int > baseYear, qsizetype from)
Qt::weak_ordering compare(const PartialParse &alt) const noexcept
PartialParse(const PartialParse &base, const QtParseCommon::ParsedText &more)