Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qurlrecode.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 Intel Corporation.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#include "qurl.h"
6#include "private/qstringconverter_p.h"
7#include "private/qtools_p.h"
8#include "private/qsimd_p.h"
9
11
12// ### move to qurl_p.h
18
19// From RFC 3896, Appendix A Collected ABNF for URI
20// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
21// reserved = gen-delims / sub-delims
22// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
23// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
24// / "*" / "+" / "," / ";" / "="
25static const uchar defaultActionTable[96] = {
26 0, // space
27 1, // '!' (sub-delim)
28 2, // '"'
29 1, // '#' (gen-delim)
30 1, // '$' (gen-delim)
31 2, // '%' (percent)
32 1, // '&' (gen-delim)
33 1, // "'" (sub-delim)
34 1, // '(' (sub-delim)
35 1, // ')' (sub-delim)
36 1, // '*' (sub-delim)
37 1, // '+' (sub-delim)
38 1, // ',' (sub-delim)
39 0, // '-' (unreserved)
40 0, // '.' (unreserved)
41 1, // '/' (gen-delim)
42
43 0, 0, 0, 0, 0, // '0' to '4' (unreserved)
44 0, 0, 0, 0, 0, // '5' to '9' (unreserved)
45 1, // ':' (gen-delim)
46 1, // ';' (sub-delim)
47 2, // '<'
48 1, // '=' (sub-delim)
49 2, // '>'
50 1, // '?' (gen-delim)
51
52 1, // '@' (gen-delim)
53 0, 0, 0, 0, 0, // 'A' to 'E' (unreserved)
54 0, 0, 0, 0, 0, // 'F' to 'J' (unreserved)
55 0, 0, 0, 0, 0, // 'K' to 'O' (unreserved)
56 0, 0, 0, 0, 0, // 'P' to 'T' (unreserved)
57 0, 0, 0, 0, 0, 0, // 'U' to 'Z' (unreserved)
58 1, // '[' (gen-delim)
59 2, // '\'
60 1, // ']' (gen-delim)
61 2, // '^'
62 0, // '_' (unreserved)
63
64 2, // '`'
65 0, 0, 0, 0, 0, // 'a' to 'e' (unreserved)
66 0, 0, 0, 0, 0, // 'f' to 'j' (unreserved)
67 0, 0, 0, 0, 0, // 'k' to 'o' (unreserved)
68 0, 0, 0, 0, 0, // 'p' to 't' (unreserved)
69 0, 0, 0, 0, 0, 0, // 'u' to 'z' (unreserved)
70 2, // '{'
71 2, // '|'
72 2, // '}'
73 0, // '~' (unreserved)
74
75 2 // BSKP
76};
77
78// mask tables, in negative polarity
79// 0x00 if it belongs to this category
80// 0xff if it doesn't
81
82static const uchar reservedMask[96] = {
83 0xff, // space
84 0xff, // '!' (sub-delim)
85 0x00, // '"'
86 0xff, // '#' (gen-delim)
87 0xff, // '$' (gen-delim)
88 0xff, // '%' (percent)
89 0xff, // '&' (gen-delim)
90 0xff, // "'" (sub-delim)
91 0xff, // '(' (sub-delim)
92 0xff, // ')' (sub-delim)
93 0xff, // '*' (sub-delim)
94 0xff, // '+' (sub-delim)
95 0xff, // ',' (sub-delim)
96 0xff, // '-' (unreserved)
97 0xff, // '.' (unreserved)
98 0xff, // '/' (gen-delim)
99
100 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved)
101 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved)
102 0xff, // ':' (gen-delim)
103 0xff, // ';' (sub-delim)
104 0x00, // '<'
105 0xff, // '=' (sub-delim)
106 0x00, // '>'
107 0xff, // '?' (gen-delim)
108
109 0xff, // '@' (gen-delim)
110 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved)
111 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved)
112 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved)
113 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved)
114 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved)
115 0xff, // '[' (gen-delim)
116 0x00, // '\'
117 0xff, // ']' (gen-delim)
118 0x00, // '^'
119 0xff, // '_' (unreserved)
120
121 0x00, // '`'
122 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved)
123 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved)
124 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved)
125 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved)
126 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved)
127 0x00, // '{'
128 0x00, // '|'
129 0x00, // '}'
130 0xff, // '~' (unreserved)
131
132 0xff // BSKP
133};
134
135static inline bool isHex(char16_t c)
136{
137 return (c >= u'a' && c <= u'f') || (c >= u'A' && c <= u'F') || (c >= u'0' && c <= u'9');
138}
139
140static inline bool isUpperHex(char16_t c)
141{
142 // undefined behaviour if c isn't an hex char!
143 return c < 0x60;
144}
145
146static inline char16_t toUpperHex(char16_t c)
147{
148 return isUpperHex(c) ? c : c - 0x20;
149}
150
151static inline ushort decodeNibble(char16_t c)
152{
153 return c >= u'a' ? c - u'a' + 0xA : c >= u'A' ? c - u'A' + 0xA : c - u'0';
154}
155
156// if the sequence at input is 2*HEXDIG, returns its decoding
157// returns -1 if it isn't.
158// assumes that the range has been checked already
159static inline char16_t decodePercentEncoding(const char16_t *input)
160{
161 char16_t c1 = input[1];
162 char16_t c2 = input[2];
163 if (!isHex(c1) || !isHex(c2))
164 return char16_t(-1);
165 return decodeNibble(c1) << 4 | decodeNibble(c2);
166}
167
168static inline char16_t encodeNibble(ushort c)
169{
170 return QtMiscUtils::toHexUpper(c);
171}
172
173static void ensureDetached(QString &result, char16_t *&output, const char16_t *begin, const char16_t *input, const char16_t *end,
174 int add = 0)
175{
176 if (!output) {
177 // now detach
178 // create enough space if the rest of the string needed to be percent-encoded
179 int charsProcessed = input - begin;
180 int charsRemaining = end - input;
181 int spaceNeeded = end - begin + 2 * charsRemaining + add;
182 int origSize = result.size();
183 result.resize(origSize + spaceNeeded);
184
185 // we know that resize() above detached, so we bypass the reference count check
186 output = const_cast<char16_t *>(reinterpret_cast<const char16_t *>(result.constData()))
187 + origSize;
188
189 // copy the chars we've already processed
190 int i;
191 for (i = 0; i < charsProcessed; ++i)
192 output[i] = begin[i];
193 output += i;
194 }
195}
196
197namespace {
198struct QUrlUtf8Traits : public QUtf8BaseTraitsNoAscii
199{
200 // From RFC 3987:
201 // iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
202 //
203 // ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
204 // / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
205 // / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
206 // / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
207 // / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
208 // / %xD0000-DFFFD / %xE1000-EFFFD
209 //
210 // iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
211 //
212 // That RFC allows iprivate only as part of iquery, but we don't know here
213 // whether we're looking at a query or another part of an URI, so we accept
214 // them too. The definition above excludes U+FFF0 to U+FFFD from appearing
215 // unencoded, but we see no reason for its exclusion, so we allow them to
216 // be decoded (and we need U+FFFD the replacement character to indicate
217 // failure to decode).
218 //
219 // That means we must disallow:
220 // * unpaired surrogates (QUtf8Functions takes care of that for us)
221 // * non-characters
222 static const bool allowNonCharacters = false;
223
224 // override: our "bytes" are three percent-encoded UTF-16 characters
225 static void appendByte(char16_t *&ptr, uchar b)
226 {
227 // b >= 0x80, by construction, so percent-encode
228 *ptr++ = '%';
229 *ptr++ = encodeNibble(b >> 4);
230 *ptr++ = encodeNibble(b & 0xf);
231 }
232
233 static uchar peekByte(const char16_t *ptr, qsizetype n = 0)
234 {
235 // decodePercentEncoding returns char16_t(-1) if it can't decode,
236 // which means we return 0xff, which is not a valid continuation byte.
237 // If ptr[i * 3] is not '%', we'll multiply by zero and return 0,
238 // also not a valid continuation byte (if it's '%', we multiply by 1).
239 return uchar(decodePercentEncoding(ptr + n * 3))
240 * uchar(ptr[n * 3] == '%');
241 }
242
243 static qptrdiff availableBytes(const char16_t *ptr, const char16_t *end)
244 {
245 return (end - ptr) / 3;
246 }
247
248 static void advanceByte(const char16_t *&ptr, int n = 1)
249 {
250 ptr += n * 3;
251 }
252};
253}
254
255// returns true if we performed an UTF-8 decoding
256static bool encodedUtf8ToUtf16(QString &result, char16_t *&output, const char16_t *begin,
257 const char16_t *&input, const char16_t *end, char16_t decoded)
258{
259 char32_t buffer[1];
260 char32_t &ucs4 = buffer[0];
261 char32_t *dst = buffer;
262 const char16_t *src = input + 3;// skip the %XX that yielded \a decoded
263 int charsNeeded = QUtf8Functions::fromUtf8<QUrlUtf8Traits>(decoded, dst, src, end);
264 if (charsNeeded < 0)
265 return false;
266
267 if (!QChar::requiresSurrogates(ucs4)) {
268 // UTF-8 decoded and no surrogates are required
269 // detach if necessary
270 // possibilities are: 6 chars (%XX%XX) -> one char; 9 chars (%XX%XX%XX) -> one char
271 ensureDetached(result, output, begin, input, end, -3 * charsNeeded + 1);
272 *output++ = ucs4;
273 } else {
274 // UTF-8 decoded to something that requires a surrogate pair
275 // compressing from %XX%XX%XX%XX (12 chars) to two
276 ensureDetached(result, output, begin, input, end, -10);
277 *output++ = QChar::highSurrogate(ucs4);
278 *output++ = QChar::lowSurrogate(ucs4);
279 }
280
281 input = src - 1;
282 return true;
283}
284
285static void unicodeToEncodedUtf8(QString &result, char16_t *&output, const char16_t *begin,
286 const char16_t *&input, const char16_t *end, char16_t decoded)
287{
288 // calculate the utf8 length and ensure enough space is available
289 int utf8len = QChar::isHighSurrogate(decoded) ? 4 : decoded >= 0x800 ? 3 : 2;
290
291 // detach
292 if (!output) {
293 // we need 3 * utf8len for the encoded UTF-8 sequence
294 // but ensureDetached already adds 3 for the char we're processing
295 ensureDetached(result, output, begin, input, end, 3*utf8len - 3);
296 } else {
297 // verify that there's enough space or expand
298 int charsRemaining = end - input - 1; // not including this one
299 int pos = output - reinterpret_cast<const char16_t *>(result.constData());
300 int spaceRemaining = result.size() - pos;
301 if (spaceRemaining < 3*charsRemaining + 3*utf8len) {
302 // must resize
303 result.resize(result.size() + 3*utf8len);
304
305 // we know that resize() above detached, so we bypass the reference count check
306 output = const_cast<char16_t *>(reinterpret_cast<const char16_t *>(result.constData()));
307 output += pos;
308 }
309 }
310
311 ++input;
312 int res = QUtf8Functions::toUtf8<QUrlUtf8Traits>(decoded, output, input, end);
313 --input;
314 if (res < 0) {
315 // bad surrogate pair sequence
316 // we will encode bad UTF-16 to UTF-8
317 // but they don't get decoded back
318
319 // first of three bytes
320 uchar c = 0xe0 | uchar(decoded >> 12);
321 *output++ = '%';
322 *output++ = 'E';
323 *output++ = encodeNibble(c & 0xf);
324
325 // second byte
326 c = 0x80 | (uchar(decoded >> 6) & 0x3f);
327 *output++ = '%';
328 *output++ = encodeNibble(c >> 4);
329 *output++ = encodeNibble(c & 0xf);
330
331 // third byte
332 c = 0x80 | (decoded & 0x3f);
333 *output++ = '%';
334 *output++ = encodeNibble(c >> 4);
335 *output++ = encodeNibble(c & 0xf);
336 }
337}
338
339static int recode(QString &result, const char16_t *begin, const char16_t *end,
340 QUrl::ComponentFormattingOptions encoding, const uchar *actionTable,
341 bool retryBadEncoding)
342{
343 const int origSize = result.size();
344 const char16_t *input = begin;
345 char16_t *output = nullptr;
346
348 for ( ; input != end; ++input) {
349 char16_t c;
350 // try a run where no change is necessary
351 for ( ; input != end; ++input) {
352 c = *input;
353 if (c < 0x20U)
354 action = EncodeCharacter;
355 if (c < 0x20U || c >= 0x80U) // also: (c - 0x20 < 0x60U)
356 goto non_trivial;
357 action = EncodingAction(actionTable[c - ' ']);
358 if (action == EncodeCharacter)
359 goto non_trivial;
360 if (output)
361 *output++ = c;
362 }
363 break;
364
365non_trivial:
366 char16_t decoded;
367 if (c == '%' && retryBadEncoding) {
368 // always write "%25"
369 ensureDetached(result, output, begin, input, end);
370 *output++ = '%';
371 *output++ = '2';
372 *output++ = '5';
373 continue;
374 } else if (c == '%') {
375 // check if the input is valid
376 if (input + 2 >= end || (decoded = decodePercentEncoding(input)) == char16_t(-1)) {
377 // not valid, retry
378 result.resize(origSize);
379 return recode(result, begin, end, encoding, actionTable, true);
380 }
381
382 if (decoded >= 0x80) {
383 // decode the UTF-8 sequence
384 if (!(encoding & QUrl::EncodeUnicode) &&
385 encodedUtf8ToUtf16(result, output, begin, input, end, decoded))
386 continue;
387
388 // decoding the encoded UTF-8 failed
389 action = LeaveCharacter;
390 } else if (decoded >= 0x20) {
391 action = EncodingAction(actionTable[decoded - ' ']);
392 }
393 } else {
394 decoded = c;
395 if (decoded >= 0x80 && encoding & QUrl::EncodeUnicode) {
396 // encode the UTF-8 sequence
397 unicodeToEncodedUtf8(result, output, begin, input, end, decoded);
398 continue;
399 } else if (decoded >= 0x80) {
400 if (output)
401 *output++ = c;
402 continue;
403 }
404 }
405
406 // there are six possibilities:
407 // current \ action | DecodeCharacter | LeaveCharacter | EncodeCharacter
408 // decoded | 1:leave | 2:leave | 3:encode
409 // encoded | 4:decode | 5:leave | 6:leave
410 // cases 1 and 2 were handled before this section
411
412 if (c == '%' && action != DecodeCharacter) {
413 // cases 5 and 6: it's encoded and we're leaving it as it is
414 // except we're pedantic and we'll uppercase the hex
415 if (output || !isUpperHex(input[1]) || !isUpperHex(input[2])) {
416 ensureDetached(result, output, begin, input, end);
417 *output++ = '%';
418 *output++ = toUpperHex(*++input);
419 *output++ = toUpperHex(*++input);
420 }
421 } else if (c == '%' && action == DecodeCharacter) {
422 // case 4: we need to decode
423 ensureDetached(result, output, begin, input, end);
424 *output++ = decoded;
425 input += 2;
426 } else {
427 // must be case 3: we need to encode
428 ensureDetached(result, output, begin, input, end);
429 *output++ = '%';
430 *output++ = encodeNibble(c >> 4);
431 *output++ = encodeNibble(c & 0xf);
432 }
433 }
434
435 if (output) {
436 int len = output - reinterpret_cast<const char16_t *>(result.constData());
437 result.truncate(len);
438 return len - origSize;
439 }
440 return 0;
441}
442
443/*
444 * Returns true if the input it checked (if it checked anything) is not
445 * encoded. A return of false indicates there's a percent at \a input that
446 * needs to be decoded.
447 */
448#ifdef __SSE2__
449static bool simdCheckNonEncoded(QChar *&output, const char16_t *&input, const char16_t *end)
450{
451# ifdef __AVX2__
452 const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128('%'));
453 const __m128i percents = _mm256_castsi256_si128(percents256);
454# else
455 const __m128i percents = _mm_set1_epi16('%');
456# endif
457
458 uint idx = 0;
459 quint32 mask = 0;
460 if (input + 16 <= end) {
461 qptrdiff offset = 0;
462 for ( ; input + offset + 16 <= end; offset += 16) {
463# ifdef __AVX2__
464 // do 32 bytes at a time using AVX2
465 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset));
466 __m256i comparison = _mm256_cmpeq_epi16(data, percents256);
467 mask = _mm256_movemask_epi8(comparison);
468 _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
469# else
470 // do 32 bytes at a time using unrolled SSE2
471 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset));
472 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset + 8));
473 __m128i comparison1 = _mm_cmpeq_epi16(data1, percents);
474 __m128i comparison2 = _mm_cmpeq_epi16(data2, percents);
475 uint mask1 = _mm_movemask_epi8(comparison1);
476 uint mask2 = _mm_movemask_epi8(comparison2);
477
478 _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset), data1);
479 if (!mask1)
480 _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset + 8), data2);
481 mask = mask1 | (mask2 << 16);
482# endif
483
484 if (mask) {
485 idx = qCountTrailingZeroBits(mask) / 2;
486 break;
487 }
488 }
489
490 input += offset;
491 if (output)
492 output += offset;
493 } else if (input + 8 <= end) {
494 // do 16 bytes at a time
495 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input));
496 __m128i comparison = _mm_cmpeq_epi16(data, percents);
497 mask = _mm_movemask_epi8(comparison);
498 _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data);
499 idx = qCountTrailingZeroBits(quint16(mask)) / 2;
500 } else if (input + 4 <= end) {
501 // do 8 bytes only
502 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(input));
503 __m128i comparison = _mm_cmpeq_epi16(data, percents);
504 mask = _mm_movemask_epi8(comparison) & 0xffu;
505 _mm_storel_epi64(reinterpret_cast<__m128i *>(output), data);
506 idx = qCountTrailingZeroBits(quint8(mask)) / 2;
507 } else {
508 // no percents found (because we didn't check)
509 return true;
510 }
511
512 // advance to the next non-encoded
513 input += idx;
514 output += idx;
515
516 return !mask;
517}
518#else
519static bool simdCheckNonEncoded(...)
520{
521 return true;
522}
523#endif
524
525/*!
526 \since 5.0
527 \internal
528
529 This function decodes a percent-encoded string located in \a in
530 by appending each character to \a appendTo. It returns the number of
531 characters appended. Each percent-encoded sequence is decoded as follows:
532
533 \list
534 \li from %00 to %7F: the exact decoded value is appended;
535 \li from %80 to %FF: QChar::ReplacementCharacter is appended;
536 \li bad encoding: original input is copied to the output, undecoded.
537 \endlist
538
539 Given the above, it's important for the input to already have all UTF-8
540 percent sequences decoded by qt_urlRecode (that is, the input should not
541 have been processed with QUrl::EncodeUnicode).
542
543 The input should also be a valid percent-encoded sequence (the output of
544 qt_urlRecode is always valid).
545*/
546static qsizetype decode(QString &appendTo, QStringView in)
547{
548 const char16_t *begin = in.utf16();
549 const char16_t *end = begin + in.size();
550
551 // fast check whether there's anything to be decoded in the first place
552 const char16_t *input = QtPrivate::qustrchr(in, '%');
553
554 if (Q_LIKELY(input == end))
555 return 0; // nothing to do, it was already decoded!
556
557 // detach
558 const int origSize = appendTo.size();
559 appendTo.resize(origSize + (end - begin));
560 QChar *output = appendTo.data() + origSize;
561 memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(QChar));
562 output += input - begin;
563
564 while (input != end) {
565 // something was encoded
566 Q_ASSERT(*input == '%');
567
568 if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) {
569 // badly-encoded data
570 appendTo.resize(origSize + (end - begin));
571 memcpy(static_cast<void *>(appendTo.begin() + origSize),
572 static_cast<const void *>(begin), (end - begin) * sizeof(*end));
573 return end - begin;
574 }
575
576 ++input;
577 *output++ = QChar::fromUcs2(decodeNibble(input[0]) << 4 | decodeNibble(input[1]));
578 if (output[-1].unicode() >= 0x80)
579 output[-1] = QChar::ReplacementCharacter;
580 input += 2;
581
582 // search for the next percent, copying from input to output
583 if (simdCheckNonEncoded(output, input, end)) {
584 while (input != end) {
585 const char16_t uc = *input;
586 if (uc == '%')
587 break;
588 *output++ = uc;
589 ++input;
590 }
591 }
592 }
593
594 const qsizetype len = output - appendTo.begin();
595 appendTo.truncate(len);
596 return len - origSize;
597}
598
599template <size_t N>
600static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
601{
602 for (size_t i = 0; i < N; ++i)
603 table[i] &= mask[i];
604}
605
606/*!
607 \internal
608
609 Recodes the string from \a begin to \a end. If any transformations are
610 done, append them to \a appendTo and return the number of characters added.
611 If no transformations were required, return 0.
612
613 The \a encoding option modifies the default behaviour:
614 \list
615 \li QUrl::DecodeReserved: if set, reserved characters will be decoded;
616 if unset, reserved characters will be encoded
617 \li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
618 \li QUrl::EncodeUnicode: if set, characters above U+0080 will be encoded to their UTF-8
619 percent-encoded form; if unset, they will be decoded to UTF-16
620 \li QUrl::FullyDecoded: if set, this function will decode all percent-encoded sequences,
621 including that of the percent character. The resulting string
622 will not be percent-encoded anymore. Use with caution!
623 In this mode, the behaviour is undefined if the input string
624 contains any percent-encoding sequences above %80.
625 Also, the function will not correct bad % sequences.
626 \endlist
627
628 Other flags are ignored (including QUrl::EncodeReserved).
629
630 The \a tableModifications argument can be used to supply extra
631 modifications to the tables, to be applied after the flags above are
632 handled. It consists of a sequence of 16-bit values, where the low 8 bits
633 indicate the character in question and the high 8 bits are either \c
634 EncodeCharacter, \c LeaveCharacter or \c DecodeCharacter.
635
636 This function corrects percent-encoded errors by interpreting every '%' as
637 meaning "%25" (all percents in the same content).
638 */
639
640Q_AUTOTEST_EXPORT qsizetype
641qt_urlRecode(QString &appendTo, QStringView in,
642 QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
643{
644 uchar actionTable[sizeof defaultActionTable];
645 if ((encoding & QUrl::FullyDecoded) == QUrl::FullyDecoded) {
646 return decode(appendTo, in);
647 }
648
649 memcpy(actionTable, defaultActionTable, sizeof actionTable);
650 if (encoding & QUrl::DecodeReserved)
651 maskTable(actionTable, reservedMask);
652 if (encoding & QUrl::EncodeSpaces)
653 actionTable[0] = EncodeCharacter;
654
655 if (tableModifications) {
656 for (const ushort *p = tableModifications; *p; ++p)
657 actionTable[uchar(*p) - ' '] = *p >> 8;
658 }
659
660 return recode(appendTo, reinterpret_cast<const char16_t *>(in.begin()),
661 reinterpret_cast<const char16_t *>(in.end()), encoding, actionTable, false);
662}
663
664qsizetype qt_encodeFromUser(QString &appendTo, const QString &in, const ushort *tableModifications)
665{
666 uchar actionTable[sizeof defaultActionTable];
667 memcpy(actionTable, defaultActionTable, sizeof actionTable);
668
669 // Different defaults to the regular encoded-to-encoded recoding
670 actionTable['[' - ' '] = EncodeCharacter;
671 actionTable[']' - ' '] = EncodeCharacter;
672
673 if (tableModifications) {
674 for (const ushort *p = tableModifications; *p; ++p)
675 actionTable[uchar(*p) - ' '] = *p >> 8;
676 }
677
678 return recode(appendTo, reinterpret_cast<const char16_t *>(in.begin()),
679 reinterpret_cast<const char16_t *>(in.end()), {}, actionTable, true);
680}
681
682QT_END_NAMESPACE
qsizetype qt_encodeFromUser(QString &appendTo, const QString &input, const ushort *tableModifications)
static char16_t decodePercentEncoding(const char16_t *input)
static bool encodedUtf8ToUtf16(QString &result, char16_t *&output, const char16_t *begin, const char16_t *&input, const char16_t *end, char16_t decoded)
static qsizetype decode(QString &appendTo, QStringView in)
static int recode(QString &result, const char16_t *begin, const char16_t *end, QUrl::ComponentFormattingOptions encoding, const uchar *actionTable, bool retryBadEncoding)
static ushort decodeNibble(char16_t c)
static char16_t encodeNibble(ushort c)
static const uchar reservedMask[96]
EncodingAction
@ DecodeCharacter
@ EncodeCharacter
@ LeaveCharacter
static char16_t toUpperHex(char16_t c)
static bool isUpperHex(char16_t c)
static void maskTable(uchar(&table)[N], const uchar(&mask)[N])
static void unicodeToEncodedUtf8(QString &result, char16_t *&output, const char16_t *begin, const char16_t *&input, const char16_t *end, char16_t decoded)
static const uchar defaultActionTable[96]
static void ensureDetached(QString &result, char16_t *&output, const char16_t *begin, const char16_t *input, const char16_t *end, int add=0)
static bool isHex(char16_t c)