Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtcontenttypeparser_p.h
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#ifndef QTCORE_QTCONTENTTYPEPARSER_P_H
6#define QTCORE_QTCONTENTTYPEPARSER_P_H
7
8//
9// W A R N I N G
10// -------------
11//
12// This file is not part of the Qt API. It exists for the convenience
13// of the Network Access API. This header file may change from
14// version to version without notice, or even be removed.
15//
16// We mean it.
17//
18
19#include <QtCore/qbytearrayview.h>
20#include <QtCore/qlatin1stringview.h>
21
22#include <QtCore/qxpfunctional.h>
23#include <string>
24
25QT_BEGIN_NAMESPACE
26
28
29constexpr auto parse_OWS(QByteArrayView data) noexcept
30{
31 struct R {
32 QByteArrayView ows, tail;
33 };
34
35 constexpr auto is_OWS_char = [](auto ch) { return ch == ' ' || ch == '\t'; };
36
37 qsizetype i = 0;
38 while (i < data.size() && is_OWS_char(data[i]))
39 ++i;
40
41 return R{data.first(i), data.sliced(i)};
42}
43
44constexpr void eat_OWS(QByteArrayView &data) noexcept
45{
46 data = parse_OWS(data).tail;
47}
48
49constexpr auto parse_quoted_string(QByteArrayView data, qxp::function_ref<void(char) const> yield)
50{
51 struct R {
52 QByteArrayView quotedString, tail;
53 constexpr explicit operator bool() const noexcept { return !quotedString.isEmpty(); }
54 };
55
56 if (!data.startsWith('"'))
57 return R{{}, data};
58
59 qsizetype i = 1; // one past initial DQUOTE
60 while (i < data.size()) {
61 switch (auto ch = data[i++]) {
62 case '"': // final DQUOTE -> end of string
63 return R{data.first(i), data.sliced(i)};
64 case '\\': // quoted-pair
65 // https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.4-3:
66 // Recipients that process the value of a quoted-string MUST handle a
67 // quoted-pair as if it were replaced by the octet following the backslash.
68 if (i == data.size())
69 break; // premature end
70 ch = data[i++]; // eat '\\'
71 [[fallthrough]];
72 default:
73 // we don't validate quoted-string octets to be only qdtext (Postel's Law)
74 yield(ch);
75 }
76 }
77
78 return R{{}, data}; // premature end
79}
80
81constexpr bool is_tchar(char ch) noexcept
82{
83 // ### optimize
84 switch (ch) {
85 case '!':
86 case '#':
87 case '$':
88 case '%':
89 case '&':
90 case '\'':
91 case '*':
92 case '+':
93 case '-':
94 case '.':
95 case '^':
96 case '_':
97 case '`':
98 case '|':
99 case '~':
100 return true;
101 default:
102 return (ch >= 'a' && ch <= 'z')
103 || (ch >= '0' && ch <= '9')
104 || (ch >= 'A' && ch <= 'Z');
105 }
106}
107
108constexpr auto parse_comment(QByteArrayView data) noexcept
109{
110 struct R {
111 QByteArrayView comment, tail;
112 constexpr explicit operator bool() const noexcept { return !comment.isEmpty(); }
113 };
114
115 const auto invalid = R{{}, data}; // preserves original `data`
116
117 // comment = "(" *( ctext / quoted-pair / comment ) ")"
118 // ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text
119
120 if (!data.startsWith('('))
121 return invalid;
122
123 qsizetype i = 1;
124 qsizetype level = 1;
125 while (i < data.size()) {
126 switch (data[i++]) {
127 case '(': // nested comment
128 ++level;
129 break;
130 case ')': // end of comment
131 if (--level == 0)
132 return R{data.first(i), data.sliced(i)};
133 break;
134 case '\\': // quoted-pair
135 if (i == data.size())
136 return invalid; // premature end
137 ++i; // eat escaped character
138 break;
139 default:
140 ; // don't validate ctext - accept everything (Postel's Law)
141 }
142 }
143
144 return invalid; // premature end / unbalanced nesting levels
145}
146
147constexpr void eat_CWS(QByteArrayView &data) noexcept
148{
149 eat_OWS(data);
150 while (const auto comment = parse_comment(data)) {
151 data = comment.tail;
152 eat_OWS(data);
153 }
154}
155
156constexpr auto parse_token(QByteArrayView data) noexcept
157{
158 struct R {
159 QByteArrayView token, tail;
160 constexpr explicit operator bool() const noexcept { return !token.isEmpty(); }
161 };
162
163 qsizetype i = 0;
164 while (i < data.size() && is_tchar(data[i]))
165 ++i;
166
167 return R{data.first(i), data.sliced(i)};
168}
169
170constexpr auto parse_parameter(QByteArrayView data, qxp::function_ref<void(char) const> yield)
171{
172 struct R {
173 QLatin1StringView name; QByteArrayView value; QByteArrayView tail;
174 constexpr explicit operator bool() const noexcept { return !name.isEmpty(); }
175 };
176
177 const auto invalid = R{{}, {}, data}; // preserves original `data`
178
179 // parameter = parameter-name "=" parameter-value
180 // parameter-name = token
181 // parameter-value = ( token / quoted-string )
182
183 const auto name = parse_token(data);
184 if (!name)
185 return invalid;
186 data = name.tail;
187
188 eat_CWS(data); // not in the grammar, but accepted under Postel's Law
189
190 if (!data.startsWith('='))
191 return invalid;
192 data = data.sliced(1);
193
194 eat_CWS(data); // not in the grammar, but accepted under Postel's Law
195
196 if (Q_UNLIKELY(data.startsWith('"'))) { // value is a quoted-string
197
198 const auto value = parse_quoted_string(data, yield);
199 if (!value)
200 return invalid;
201 data = value.tail;
202
203 return R{QLatin1StringView{name.token}, value.quotedString, data};
204
205 } else { // value is a token
206
207 const auto value = parse_token(data);
208 if (!value)
209 return invalid;
210 data = value.tail;
211
212 return R{QLatin1StringView{name.token}, value.token, data};
213 }
214}
215
216inline auto parse_content_type(QByteArrayView data)
217{
218 using namespace Qt::StringLiterals;
219
220 struct R {
221 QLatin1StringView type, subtype;
222 std::string charset;
223 constexpr explicit operator bool() const noexcept { return !type.isEmpty(); }
224 };
225
226 eat_CWS(data); // not in the grammar, but accepted under Postel's Law
227
228 const auto type = parse_token(data);
229 if (!type)
230 return R{};
231 data = type.tail;
232
233 eat_CWS(data); // not in the grammar, but accepted under Postel's Law
234
235 if (!data.startsWith('/'))
236 return R{};
237 data = data.sliced(1);
238
239 eat_CWS(data); // not in the grammar, but accepted under Postel's Law
240
241 const auto subtype = parse_token(data);
242 if (!subtype)
243 return R{};
244 data = subtype.tail;
245
246 eat_CWS(data);
247
248 auto r = R{QLatin1StringView{type.token}, QLatin1StringView{subtype.token}, {}};
249
250 while (data.startsWith(';')) {
251
252 data = data.sliced(1); // eat ';'
253
254 eat_CWS(data);
255
256 const auto param = parse_parameter(data, [&](char ch) { r.charset.append(1, ch); });
257 if (param.name.compare("charset"_L1, Qt::CaseInsensitive) == 0) {
258 if (r.charset.empty() && !param.value.startsWith('"')) // wasn't a quoted-string
259 r.charset.assign(param.value.begin(), param.value.end());
260 return r; // charset found
261 }
262 r.charset.clear(); // wasn't an actual charset
263 if (param.tail.size() == data.size()) // no progress was made
264 break; // returns {type, subtype}
265 // otherwise, continue (accepting e.g. `;;`)
266 data = param.tail;
267
268 eat_CWS(data);
269 }
270
271 return r; // no charset found
272}
273
274} // namespace QtContentTypeParser
275
276QT_END_NAMESPACE
277
278#endif // QTCORE_QTCONTENTTYPEPARSER_P_H
constexpr void eat_OWS(QByteArrayView &data) noexcept
constexpr auto parse_parameter(QByteArrayView data, qxp::function_ref< void(char) const > yield)
constexpr void eat_CWS(QByteArrayView &data) noexcept
constexpr bool is_tchar(char ch) noexcept
constexpr auto parse_quoted_string(QByteArrayView data, qxp::function_ref< void(char) const > yield)
constexpr auto parse_token(QByteArrayView data) noexcept
constexpr auto parse_comment(QByteArrayView data) noexcept
auto parse_content_type(QByteArrayView data)
constexpr auto parse_OWS(QByteArrayView data) noexcept