Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtparseqttemporalformat.cpp
Go to the documentation of this file.
1// Copyright (C) 2026 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4#include "private/qtparseqttemporalformat_p.h"
5
6#include "private/qlocale_p.h"
7#include "private/qstringiterator_p.h"
8
9QT_BEGIN_NAMESPACE
10
11using namespace Qt::StringLiterals;
12
14
15inline constexpr char timeFormats[] = "Hhmsz"; // Omits [aA][pP]? deliberately.
16inline constexpr char dateFormats[] = "Mdy";
17
18ParsedDateTimeFormat prefix(QStringView pattern, QtTemporalPattern::DateTimeParts form)
19{
20 using namespace QtTemporalPattern;
21
22 ParsedDateTimeFormat result;
23 constexpr char32_t Invalid = ~char32_t(0);
24 static_assert(Invalid > QChar::LastValidCodePoint);
25 const bool includeDate = form.testFlag(DateTimePart::Date);
26 const bool includeTime = form.testFlag(DateTimePart::Time);
27 const bool includeZone = form.testFlag(DateTimePart::Zone);
28
29 QStringIterator iter(pattern);
30 char32_t pending = 0;
31 const auto countRepeats = [&pending, &iter, &result](char32_t first, qsizetype bound) {
32 // Consumes all repeats of \a first, returns min(bound, number of repeats).
33 Q_ASSERT(!QChar::requiresSurrogates(first)); // It's always an ASCII format char
34 Q_ASSERT(pending == 0);
35 qsizetype count = 1; // We've already seen first
36 result.endIndex = iter.index(); // ... and tacitly consumed it.
37 while (iter.hasNext() && count < bound) {
38 const auto read = iter.next(Invalid);
39 if (read > QChar::LastValidCodePoint) {
40 pending = Invalid;
41 break;
42 }
43 if (read != first) {
44 pending = read;
45 break;
46 }
47 ++count;
48 result.endIndex = iter.index();
49 }
50 return count;
51 };
52
53 constexpr char32_t SingleQuote = U'\'';
54 static constexpr auto matchTimeFormats = QtPrivate::makeCharacterSetMatch<timeFormats>();
55 static constexpr auto matchDateFormats = QtPrivate::makeCharacterSetMatch<dateFormats>();
56 const auto isFormatChar = [includeDate, includeTime, includeZone](char32_t ch) {
57 if (ch >= 0x80)
58 return false;
59 if (includeTime) {
60 if (matchTimeFormats.matches(uchar(ch)) || ch == U'A' || ch == U'a')
61 return true;
62 }
63 if (includeZone && ch == U't')
64 return true;
65 return includeDate && matchDateFormats.matches(uchar(ch));
66 };
67
68 constexpr auto formatCategory = [](uchar ch, int count) {
69 using Cat = TemporalFieldCategory;
70 switch (ch) {
71 case 'A': // case 'P':
72 case 'a': // case 'p':
73 return Cat::PeriodInDay;
74 case 'd': return count < 3 ? Cat::DayOfMonth : Cat::DayOfWeek;
75 case 'H': return Cat::Hour;
76 case 'h': return Cat::HourMod12;
77 case 'M': return Cat::Month;
78 case 'm': return Cat::Minute;
79 case 's': return Cat::Second;
80 case 't': return Cat::TimeZone;
81 case 'y': return count < 3 ? Cat::YearWithinCentury : Cat::Year;
82 case 'z': return Cat::SecondFraction;
83 }
84 // Should only be called with a ch that would pass an isFormatChar() check.
85 Q_UNREACHABLE_RETURN(Cat::Literal);
86 };
87 constexpr auto formatFlags = [](uchar ch, int count) -> TemporalFieldFlags {
88 using F = TemporalFieldFlag;
89 constexpr TemporalFieldFlags TextCommon = F::IgnoreCase | F::FlexSpace;
90 switch (ch) {
91 case 'A': // case 'P':
92 return count ? F::UpperCase | TextCommon : TextCommon;
93 case 'a': // case 'p':
94 return count ? F::LowerCase | TextCommon : TextCommon;
95 case 'd': case 'M': // Day and Month share a pattern:
96 switch (count) {
97 case 2: return F::Numeric | F::ZeroPad;
98 case 3: return F::Verbal | F::Abbreviated | TextCommon;
99 default:
100 return count < 2 ? F::Numeric : F::Verbal | F::Wide | TextCommon;
101 };
102 Q_UNREACHABLE();
103 break;
104 case 'H': case 'h': case 'm': case 's': // Shared pattern:
105 return count > 1 ? F::Numeric | F::ZeroPad : F::Numeric;
106 case 't':
107 switch (count) {
108 case 1: // 't': matches everything, serializes as abbreviation
109 return F::AllowZSuffix | F::LocalTimeName;
110 // The next two forms aren't localized - should they be ?
111 // The issue is that they're documented to differ in whether
112 // separators are used, but we don't control that (nor should
113 // we, or the format author) for localized forms.
114 case 2: // 'tt': offset (no-prefix, no separator)
115 return F::Iso8601 | F::Numeric | F::ZeroPad;
116 case 3: // 'ttt': offset (no-prefix, separator)
117 return F::Iso8601 | F::Verbal | F::ZeroPad;
118 default: // 'tttt': long name or IANA ID
119 return F::LocalizedZone | F::Verbal | F::Standalone | F::Wide | F::Short;
120 // This includes both metazone and exemplar city versions of long name.
121 }
122 Q_UNREACHABLE();
123 break;
124 case 'y':
125 if (count > 2)
126 return F::Numeric | F::ZeroPad | F::YearSignIso8601;
127 return F::Numeric | F::ZeroPad;
128 case 'z':
129 if (count > 2)
130 return F::Numeric | F::ZeroPad;
131 return F::Numeric;
132 }
133 // Should only be called by branches that passed an isFormatChar() check.
134 Q_UNREACHABLE_RETURN({});
135 };
136
137 const auto store = [&result](QString &&literal, qsizetype count,
138 TemporalFieldFlags flags,
139 TemporalFieldCategory category) {
140 result.fields.append(TemporalField{std::move(literal), count, flags, category});
141 };
142
143 bool seenDayPeriod = false, seenHourMod12 = false; // See post-processing.
144 while (pending <= QChar::LastValidCodePoint && (pending || iter.hasNext())) {
145 char32_t ch;
146 if (pending) {
147 ch = std::exchange(pending, 0);
148 } else {
149 result.endIndex = iter.index();
150 ch = iter.next(Invalid);
151 }
152 if (ch > QChar::LastValidCodePoint)
153 break;
154
155 if (ch < 0x80 && includeTime) {
156 if (matchTimeFormats.matches(uchar(ch))) {
157 qsizetype count = countRepeats(ch, ch == U'z' ? 3 : 2);
158 if (ch == U'z' && count == 2) // Backwards compatibility
159 count = 1; // (but we still consume both 'z' characters from the format)
160 store(QString(), count, formatFlags(uchar(ch), count),
161 formatCategory(uchar(ch), count));
162 if (ch == U'h')
163 seenHourMod12 = true;
164 continue;
165 }
166 if (ch == U'A' || ch == U'a') {
167 // Follow old QDTP (for now, at least) in using count to represent case choice.
168 qsizetype count = ch == U'a' ? 1 : 2;
169 // AP or ap are just the same as A or a; but Ap or aP selects
170 // locale-appropriate case:
171 result.endIndex = iter.index();
172 const auto read = iter.hasNext() ? iter.next(Invalid) : Invalid;
173 if (read > QChar::LastValidCodePoint) {
174 pending = Invalid;
175 } else if (read == U'P') {
176 if (ch == U'a')
177 count = 0;
178 result.endIndex = iter.index();
179 } else if (read == U'p') {
180 if (ch == U'A')
181 count = 0;
182 result.endIndex = iter.index();
183 } else {
184 pending = read;
185 }
186 store(QString(), count, formatFlags(uchar(ch), count),
187 formatCategory(uchar(ch), count));
188 seenDayPeriod = true;
189 continue;
190 }
191 }
192 // Date and Zone fields are more straightforward, except for 'y':
193 if (ch == U'y' && includeDate) {
194 // For 'y', a pair is a year-within-century, double that for a full
195 // year; beyond that, evenly many more are more of those but an odd
196 // 'y' is a literal. We thus need to only consume 2 or 4 'y' tokens,
197 // so can't use countRepeat() with its simple maximum. We need to
198 // leave the odd 'y', if present, for a later iteration to consume
199 // or, if it's all there is, for use as a literal - in which case we
200 // mustn't have set pending. Fortunately 'y' is ASCII so we don't
201 // have to worry about surrogates:
202 qsizetype count = 1;
203 QStringView tail = pattern.sliced(iter.index() - 1);
204 if (tail.size() > 4)
205 tail = tail.first(4);
206 while (count < tail.size() && char32_t(tail[count].unicode()) == ch)
207 ++count;
208 if (count == 3)
209 --count;
210 if (count > 1) {
211 Q_ASSERT(count == 2 || count == 4);
212 // Advance iter over what we've accepted:
213 iter.setPosition(iter.position() - 1 + count);
214 store(QString(), count, formatFlags(uchar(ch), count),
215 formatCategory(uchar(ch), count));
216 result.endIndex = iter.index();
217 continue;
218 }
219 // else: fall through to treat the lone 'y' as a literal.
220 } else if (ch < 0x80 && ((includeDate && matchDateFormats.matches(uchar(ch)))
221 || (includeZone && ch == U't'))) {
222 qsizetype count = countRepeats(ch, 4);
223 store(QString(), count, formatFlags(uchar(ch), count),
224 formatCategory(uchar(ch), count));
225 continue;
226 }
227 Q_ASSERT(pending == 0); // Everything that might set it has continue;d
228
229 // Not a field indicator, so parse as a literal:
230 QString literal;
231 QString quote; // If non-null: unfinished quote, to be appended to literal when closed.
232 if (ch == SingleQuote) { // Defer it to first iteration of loop below.
233 pending = ch;
234 } else {
235 literal = QString(QStringView(QChar::fromUcs4(ch)));
236 result.endIndex = iter.index();
237 }
238 while (pending <= QChar::LastValidCodePoint && (pending || iter.hasNext())) {
239 if (pending) {
240 ch = std::exchange(pending, 0);
241 } else {
242 if (quote.isNull()) // i.e. we're not in an incomplete quote
243 result.endIndex = iter.index();
244 ch = iter.next(Invalid);
245 }
246 if (ch > QChar::LastValidCodePoint)
247 break;
248
249 if (ch == SingleQuote) {
250 if (quote.isNull()) { // Provisionally start a quote
251 quote = u""_s; // empty is not null
252 } else {
253 // Even if this is the first quote of a pair, denoting a
254 // single quote within the quote, there's a valid parse that
255 // ends at it, adding the quote-so-far to literal.
256 literal += quote;
257 result.endIndex = iter.index();
258 quote = QString(); // Set back to null
259 }
260 ch = iter.hasNext() ? iter.next(Invalid) : Invalid;
261 if (ch == SingleQuote) {
262 // Paired single quote denotes a single quote:
263 if (quote.isNull()) {
264 // The quote we thought was ending actually continues:
265 // the continuation starts with a literal single quote.
266 quote = u"'"_s;
267 } else {
268 // Our provisionally-started quote was actually the
269 // first half of an pair of quotes not inside others.
270 Q_ASSERT(quote.isEmpty()); // We just set it.
271 quote = QString();
272 literal.append(u'\'');
273 result.endIndex = iter.index();
274 }
275 continue;
276 }
277
278 if (ch > QChar::LastValidCodePoint) {
279 pending = ch;
280 break;
281 }
282 }
283
284 Q_ASSERT(ch != SingleQuote);
285 if (quote.isNull() && isFormatChar(ch)) {
286 pending = ch;
287 break;
288 }
289 if (ch <= QChar::LastValidCodePoint) {
290 if (quote.isNull()) {
291 result.endIndex = iter.index();
292 literal.append(QStringView(QChar::fromUcs4(ch)));
293 } else {
294 quote.append(QStringView(QChar::fromUcs4(ch)));
295 }
296 }
297 }
298 // Even if we truncated due to an unclosed quote, we have a literal to
299 // include in the prefix we can parse as a pattern:
300 if (!literal.isEmpty()) {
301 store(std::move(literal), 0,
302 TemporalFieldFlag::FlexSpace, TemporalFieldCategory::Literal);
303 }
304 // If we're in an unclosed quote, we cleared pending or marked it invalid:
305 Q_ASSERT(quote.isNull() || !pending || pending > QChar::LastValidCodePoint);
306 }
307
308 // Post-process to deal with a quirk of the legacy format: if there's no
309 // AM/PM field, then 'h' format is read as 'H' format.
310 if (seenHourMod12 && !seenDayPeriod) {
311 for (TemporalField &field : result.fields) {
312 if (field.category == TemporalFieldCategory::HourMod12)
313 field.category = TemporalFieldCategory::Hour;
314 }
315 }
316
317 return result;
318}
319
320} // QtParseQtTemporalFormat
321
322QT_END_NAMESPACE
ParsedDateTimeFormat prefix(QStringView pattern, QtTemporalPattern::DateTimeParts form)