6#include "private/qstringconverter_p.h"
7#include "private/qtools_p.h"
8#include "private/qsimd_p.h"
100 0xff, 0xff, 0xff, 0xff, 0xff,
101 0xff, 0xff, 0xff, 0xff, 0xff,
110 0xff, 0xff, 0xff, 0xff, 0xff,
111 0xff, 0xff, 0xff, 0xff, 0xff,
112 0xff, 0xff, 0xff, 0xff, 0xff,
113 0xff, 0xff, 0xff, 0xff, 0xff,
114 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
122 0xff, 0xff, 0xff, 0xff, 0xff,
123 0xff, 0xff, 0xff, 0xff, 0xff,
124 0xff, 0xff, 0xff, 0xff, 0xff,
125 0xff, 0xff, 0xff, 0xff, 0xff,
126 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
137 return (c >= u'a' && c <= u'f') || (c >= u'A' && c <= u'F') || (c >= u'0' && c <= u'9');
153 return c >= u'a' ? c - u'a' + 0xA : c >= u'A' ? c - u'A' + 0xA : c - u'0';
161 char16_t c1 = input[1];
162 char16_t c2 = input[2];
165 return decodeNibble(c1) << 4 | decodeNibble(c2);
170 return QtMiscUtils::toHexUpper(c);
173static void ensureDetached(QString &result,
char16_t *&output,
const char16_t *begin,
const char16_t *input,
const char16_t *end,
179 int charsProcessed = input - begin;
180 int charsRemaining = end - input;
181 int spaceNeeded = end - begin + 2 * charsRemaining + add;
182 int origSize = result.size();
183 result.resize(origSize + spaceNeeded);
186 output =
const_cast<
char16_t *>(
reinterpret_cast<
const char16_t *>(result.constData()))
191 for (i = 0; i < charsProcessed; ++i)
192 output[i] = begin[i];
198struct QUrlUtf8Traits :
public QUtf8BaseTraitsNoAscii
222 static const bool allowNonCharacters =
false;
225 static void appendByte(
char16_t *&ptr, uchar b)
229 *ptr++ = encodeNibble(b >> 4);
230 *ptr++ = encodeNibble(b & 0xf);
233 static uchar peekByte(
const char16_t *ptr, qsizetype n = 0)
239 return uchar(decodePercentEncoding(ptr + n * 3))
240 * uchar(ptr[n * 3] ==
'%');
243 static qptrdiff availableBytes(
const char16_t *ptr,
const char16_t *end)
245 return (end - ptr) / 3;
248 static void advanceByte(
const char16_t *&ptr,
int n = 1)
257 const char16_t *&input,
const char16_t *end,
char16_t decoded)
260 char32_t &ucs4 = buffer[0];
261 char32_t *dst = buffer;
262 const char16_t *src = input + 3;
263 int charsNeeded = QUtf8Functions::fromUtf8<QUrlUtf8Traits>(decoded, dst, src, end);
267 if (!QChar::requiresSurrogates(ucs4)) {
271 ensureDetached(result, output, begin, input, end, -3 * charsNeeded + 1);
276 ensureDetached(result, output, begin, input, end, -10);
277 *output++ = QChar::highSurrogate(ucs4);
278 *output++ = QChar::lowSurrogate(ucs4);
286 const char16_t *&input,
const char16_t *end,
char16_t decoded)
289 int utf8len = QChar::isHighSurrogate(decoded) ? 4 : decoded >= 0x800 ? 3 : 2;
295 ensureDetached(result, output, begin, input, end, 3*utf8len - 3);
298 int charsRemaining = end - input - 1;
299 int pos = output -
reinterpret_cast<
const char16_t *>(result.constData());
300 int spaceRemaining = result.size() - pos;
301 if (spaceRemaining < 3*charsRemaining + 3*utf8len) {
303 result.resize(result.size() + 3*utf8len);
306 output =
const_cast<
char16_t *>(
reinterpret_cast<
const char16_t *>(result.constData()));
312 int res = QUtf8Functions::toUtf8<QUrlUtf8Traits>(decoded, output, input, end);
320 uchar c = 0xe0 | uchar(decoded >> 12);
323 *output++ = encodeNibble(c & 0xf);
326 c = 0x80 | (uchar(decoded >> 6) & 0x3f);
328 *output++ = encodeNibble(c >> 4);
329 *output++ = encodeNibble(c & 0xf);
332 c = 0x80 | (decoded & 0x3f);
334 *output++ = encodeNibble(c >> 4);
335 *output++ = encodeNibble(c & 0xf);
339static int recode(QString &result,
const char16_t *begin,
const char16_t *end,
340 QUrl::ComponentFormattingOptions encoding,
const uchar *actionTable,
341 bool retryBadEncoding)
343 const int origSize = result.size();
344 const char16_t *input = begin;
345 char16_t *output =
nullptr;
348 for ( ; input != end; ++input) {
351 for ( ; input != end; ++input) {
355 if (c < 0x20U || c >= 0x80U)
367 if (c ==
'%' && retryBadEncoding) {
369 ensureDetached(result, output, begin, input, end);
374 }
else if (c ==
'%') {
378 result.resize(origSize);
379 return recode(result, begin, end, encoding, actionTable,
true);
382 if (decoded >= 0x80) {
384 if (!(encoding & QUrl::EncodeUnicode) &&
385 encodedUtf8ToUtf16(result, output, begin, input, end, decoded))
390 }
else if (decoded >= 0x20) {
395 if (decoded >= 0x80 && encoding & QUrl::EncodeUnicode) {
397 unicodeToEncodedUtf8(result, output, begin, input, end, decoded);
399 }
else if (decoded >= 0x80) {
416 ensureDetached(result, output, begin, input, end);
423 ensureDetached(result, output, begin, input, end);
428 ensureDetached(result, output, begin, input, end);
430 *output++ = encodeNibble(c >> 4);
431 *output++ = encodeNibble(c & 0xf);
436 int len = output -
reinterpret_cast<
const char16_t *>(result.constData());
437 result.truncate(len);
438 return len - origSize;
444
445
446
447
449static bool simdCheckNonEncoded(QChar *&output,
const char16_t *&input,
const char16_t *end)
452 const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(
'%'));
453 const __m128i percents = _mm256_castsi256_si128(percents256);
455 const __m128i percents = _mm_set1_epi16(
'%');
460 if (input + 16 <= end) {
462 for ( ; input + offset + 16 <= end; offset += 16) {
465 __m256i data = _mm256_loadu_si256(
reinterpret_cast<
const __m256i *>(input + offset));
466 __m256i comparison = _mm256_cmpeq_epi16(data, percents256);
467 mask = _mm256_movemask_epi8(comparison);
468 _mm256_storeu_si256(
reinterpret_cast<__m256i *>(output + offset), data);
471 __m128i data1 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(input + offset));
472 __m128i data2 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(input + offset + 8));
473 __m128i comparison1 = _mm_cmpeq_epi16(data1, percents);
474 __m128i comparison2 = _mm_cmpeq_epi16(data2, percents);
475 uint mask1 = _mm_movemask_epi8(comparison1);
476 uint mask2 = _mm_movemask_epi8(comparison2);
478 _mm_storeu_si128(
reinterpret_cast<__m128i *>(output + offset), data1);
480 _mm_storeu_si128(
reinterpret_cast<__m128i *>(output + offset + 8), data2);
481 mask = mask1 | (mask2 << 16);
485 idx = qCountTrailingZeroBits(mask) / 2;
493 }
else if (input + 8 <= end) {
495 __m128i data = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(input));
496 __m128i comparison = _mm_cmpeq_epi16(data, percents);
497 mask = _mm_movemask_epi8(comparison);
498 _mm_storeu_si128(
reinterpret_cast<__m128i *>(output), data);
499 idx = qCountTrailingZeroBits(quint16(mask)) / 2;
500 }
else if (input + 4 <= end) {
502 __m128i data = _mm_loadl_epi64(
reinterpret_cast<
const __m128i *>(input));
503 __m128i comparison = _mm_cmpeq_epi16(data, percents);
504 mask = _mm_movemask_epi8(comparison) & 0xffu;
505 _mm_storel_epi64(
reinterpret_cast<__m128i *>(output), data);
506 idx = qCountTrailingZeroBits(quint8(mask)) / 2;
519static bool simdCheckNonEncoded(...)
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
548 const char16_t *begin = in.utf16();
549 const char16_t *end = begin + in.size();
552 const char16_t *input = QtPrivate::qustrchr(in,
'%');
554 if (Q_LIKELY(input == end))
558 const int origSize = appendTo.size();
559 appendTo.resize(origSize + (end - begin));
560 QChar *output = appendTo.data() + origSize;
561 memcpy(
static_cast<
void *>(output),
static_cast<
const void *>(begin), (input - begin) *
sizeof(QChar));
562 output += input - begin;
564 while (input != end) {
566 Q_ASSERT(*input ==
'%');
568 if (Q_UNLIKELY(end - input < 3 || !
isHex(input[1]
) || !
isHex(input[2]
))) {
570 appendTo.resize(origSize + (end - begin));
571 memcpy(
static_cast<
void *>(appendTo.begin() + origSize),
572 static_cast<
const void *>(begin), (end - begin) *
sizeof(*end));
577 *output++ = QChar::fromUcs2(decodeNibble(input[0]) << 4 | decodeNibble(input[1]));
578 if (output[-1].unicode() >= 0x80)
579 output[-1] = QChar::ReplacementCharacter;
583 if (simdCheckNonEncoded(output, input, end)) {
584 while (input != end) {
585 const char16_t uc = *input;
594 const qsizetype len = output - appendTo.begin();
595 appendTo.truncate(len);
596 return len - origSize;
600static void maskTable(uchar (&table)[N],
const uchar (&mask)[N])
602 for (size_t i = 0; i < N; ++i)
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
640Q_AUTOTEST_EXPORT qsizetype
641qt_urlRecode(QString &appendTo, QStringView in,
642 QUrl::ComponentFormattingOptions encoding,
const ushort *tableModifications)
644 uchar actionTable[
sizeof defaultActionTable];
645 if ((encoding & QUrl::FullyDecoded) == QUrl::FullyDecoded) {
646 return decode(appendTo, in);
649 memcpy(actionTable, defaultActionTable,
sizeof actionTable);
650 if (encoding & QUrl::DecodeReserved)
651 maskTable(actionTable, reservedMask);
652 if (encoding & QUrl::EncodeSpaces)
653 actionTable[0] = EncodeCharacter;
655 if (tableModifications) {
656 for (
const ushort *p = tableModifications; *p; ++p)
657 actionTable[uchar(*p) -
' '] = *p >> 8;
660 return recode(appendTo,
reinterpret_cast<
const char16_t *>(in.begin()),
661 reinterpret_cast<
const char16_t *>(in.end()), encoding, actionTable,
false);
667 memcpy(actionTable, defaultActionTable,
sizeof actionTable);
673 if (tableModifications) {
674 for (
const ushort *p = tableModifications; *p; ++p)
675 actionTable[uchar(*p) -
' '] = *p >> 8;
678 return recode(appendTo,
reinterpret_cast<
const char16_t *>(in.begin()),
679 reinterpret_cast<
const char16_t *>(in.end()), {}, actionTable,
true);
qsizetype qt_encodeFromUser(QString &appendTo, const QString &input, const ushort *tableModifications)
static char16_t decodePercentEncoding(const char16_t *input)
static bool encodedUtf8ToUtf16(QString &result, char16_t *&output, const char16_t *begin, const char16_t *&input, const char16_t *end, char16_t decoded)
static qsizetype decode(QString &appendTo, QStringView in)
static int recode(QString &result, const char16_t *begin, const char16_t *end, QUrl::ComponentFormattingOptions encoding, const uchar *actionTable, bool retryBadEncoding)
static ushort decodeNibble(char16_t c)
static char16_t encodeNibble(ushort c)
static const uchar reservedMask[96]
static char16_t toUpperHex(char16_t c)
static bool isUpperHex(char16_t c)
static void maskTable(uchar(&table)[N], const uchar(&mask)[N])
static void unicodeToEncodedUtf8(QString &result, char16_t *&output, const char16_t *begin, const char16_t *&input, const char16_t *end, char16_t decoded)
static const uchar defaultActionTable[96]
static void ensureDetached(QString &result, char16_t *&output, const char16_t *begin, const char16_t *input, const char16_t *end, int add=0)
static bool isHex(char16_t c)