8#include <QtCore/qstringlist.h>
9#include <QtCore/private/qnumeric_p.h>
10#include <QtCore/private/qoffsetstringarray_p.h>
11#include <QtCore/private/qstringiterator_p.h>
12#include <QtCore/private/qunicodetables_p.h>
18using namespace Qt::StringLiterals;
21static const uint
base = 36;
23static const uint
tmax = 26;
24static const uint
skew = 38;
25static const uint
damp = 700;
33 return digit + 22 + 75 * (digit < 26);
36static inline uint
adapt(uint delta, uint numpoints,
bool firsttime)
38 delta /= (firsttime ?
damp : 2);
39 delta += (delta / numpoints);
45 return k + (((
base -
tmin + 1) * delta) / (delta +
skew));
48static inline void appendEncode(QString *output, uint delta, uint bias)
55 for (qq = delta, k = base;; k += base) {
58 t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
61 *output += QChar(encodeDigit(t + (qq - t) % (base - t)));
62 qq = (qq - t) / (base - t);
65 *output += QChar(encodeDigit(qq));
77 if (in.size() > MaxDomainLabelLength * 2)
80 int outLen = output->size();
81 output->resize(outLen + in.size());
83 QChar *d = output->data() + outLen;
87 if (c.unicode() < 0x80)
98 output->truncate(d - output->constData());
99 int copied = output->size() - outLen;
110 uint inputLength = 0;
111 for (QStringIterator iter(in); iter.hasNext();) {
114 if (iter.next(
char32_t(-1)) ==
char32_t(-1)) {
115 output->truncate(outLen);
122 while (h < inputLength) {
124 uint m = std::numeric_limits<uint>::max();
125 for (QStringIterator iter(in); iter.hasNext();) {
126 auto c = iter.nextUnchecked();
127 static_assert(std::numeric_limits<
decltype(m)>::max()
128 >= std::numeric_limits<
decltype(c)>::max(),
129 "Punycode uint should be able to cover all codepoints");
136 if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
137 output->truncate(outLen);
142 for (QStringIterator iter(in); iter.hasNext();) {
143 auto c = iter.nextUnchecked();
148 if (qAddOverflow<uint>(delta, 1, &delta)) {
149 output->truncate(outLen);
168 output->insert(outLen,
"xn--"_L1);
176 uint bias = initial_bias;
181 if (pc.size() > MaxDomainLabelLength)
185 int start = pc.startsWith(
"xn--"_L1) ? 4 : 0;
191 int delimiterPos = pc.lastIndexOf(u'-');
192 auto output = delimiterPos < 4 ? std::u32string()
193 : pc.mid(start, delimiterPos - start).toStdU32String();
198 uint cnt = delimiterPos + 1;
202 while (cnt < (uint) pc.size()) {
207 for (uint k = base; cnt < (uint) pc.size(); k += base) {
211 uint digit = pc.at(cnt++).unicode();
212 if (digit - 48 < 10) digit -= 22;
213 else if (digit - 65 < 26) digit -= 65;
214 else if (digit - 97 < 26) digit -= 97;
223 if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
228 if (k <= bias) t = tmin;
229 else if (k >= bias + tmax) t = tmax;
232 if (digit < t)
break;
235 if (qMulOverflow<uint>(w, base - t, &w))
241 uint outputLength =
static_cast<uint>(output.length());
242 bias = adapt(i - oldi, outputLength + 1, oldi == 0);
245 if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
249 i %= (outputLength + 1);
255 qWarning(
"Attempt to insert a basic codepoint. Unhandled overflow?");
270 if (QChar::isSurrogate(n) || n > QChar::LastValidCodePoint)
274 output.insert(i, 1,
static_cast<
char32_t>(n));
278 return QString::fromStdU32String(output);
282 "ac",
"ar",
"asia",
"at",
284 "cat",
"ch",
"cl",
"cn",
"com",
290 "il",
"info",
"io",
"ir",
"is",
293 "li",
"lt",
"lu",
"lv",
295 "name",
"net",
"no",
"nu",
"nz",
299 "tel",
"th",
"tm",
"tw",
320Q_CONSTINIT
static QStringList *user_idn_whitelist =
nullptr;
322static bool lessThan(
const QChar *a,
int l,
const char *c)
324 const auto *uc =
reinterpret_cast<
const char16_t *>(a);
325 const char16_t *e = uc + l;
331 if (uc == e || *uc !=
static_cast<
unsigned char>(*c))
336 return uc == e ? *c : (*uc <
static_cast<
unsigned char>(*c));
339static bool equal(
const QChar *a,
int l,
const char *b)
341 while (l && a->unicode() && *b) {
342 if (*a != QLatin1Char(*b))
353 auto idx = aceDomain.lastIndexOf(u'.');
357 auto tldString = aceDomain.mid(idx + 1);
358 const auto len = tldString.size();
360 const QChar *tld = tldString.constData();
362 if (user_idn_whitelist)
363 return user_idn_whitelist->contains(tldString);
366 int r = idn_whitelist.count() - 1;
367 int i = (l + r + 1) / 2;
370 if (lessThan(tld, len, idn_whitelist.at(i)))
376 return equal(tld, len, idn_whitelist.at(i));
382 return c == u'-' || c == u'_' || (c >= u'0' && c <= u'9') || (c >= u'a' && c <= u'z');
388 return isValidInNormalizedAsciiLabel(c) || c == u'.';
392
393
394
395
396
397
401 *resultIsAscii =
true;
406 if (c.unicode() >= 0x80 || !isValidInNormalizedAsciiName(c))
415 result.reserve(in.size());
416 result.append(in.constData(), i);
417 bool allAscii =
true;
419 for (QStringIterator iter(QStringView(in).sliced(i)); iter.hasNext();) {
420 char32_t uc = iter.next();
423 if (Q_LIKELY(uc < 0x80)) {
424 if (uc >= U'A' && uc <= U'Z')
427 if (isValidInNormalizedAsciiName(uc)) {
428 result.append(
static_cast<
char16_t>(uc));
436 if (uc == 0x1E9E && options.testFlag(QUrl::AceTransitionalProcessing)) {
437 result.append(u"ss"_s);
441 QUnicodeTables::IdnaStatus status = QUnicodeTables::idnaStatus(uc);
443 if (status == QUnicodeTables::IdnaStatus::Deviation)
444 status = options.testFlag(QUrl::AceTransitionalProcessing)
445 ? QUnicodeTables::IdnaStatus::Mapped
446 : QUnicodeTables::IdnaStatus::Valid;
449 case QUnicodeTables::IdnaStatus::Ignored:
451 case QUnicodeTables::IdnaStatus::Valid:
452 case QUnicodeTables::IdnaStatus::Disallowed:
453 for (
auto c : QChar::fromUcs4(uc))
456 case QUnicodeTables::IdnaStatus::Mapped:
457 result.append(QUnicodeTables::idnaMapping(uc));
464 *resultIsAscii = allAscii;
469
470
471
472
473
474
477 if (label.size() > MaxDomainLabelLength)
480 if (label.first() == u'-' || label.last() == u'-')
483 return std::all_of(label.begin(), label.end(), isValidInNormalizedAsciiLabel<QChar>);
488class DomainValidityChecker
490 bool domainNameIsBidi =
false;
491 bool hadBidiErrors =
false;
492 bool ignoreBidiErrors;
494 static constexpr char32_t ZWNJ = U'\u200C';
495 static constexpr char32_t ZWJ = U'\u200D';
498 DomainValidityChecker(
bool ignoreBidiErrors =
false) : ignoreBidiErrors(ignoreBidiErrors) { }
499 bool checkLabel(
const QString &label, QUrl::AceProcessingOptions options);
502 static bool checkContextJRules(QStringView label);
503 static bool checkBidiRules(QStringView label);
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528bool DomainValidityChecker::checkContextJRules(QStringView label)
530 constexpr unsigned char CombiningClassVirama = 9;
537 State regexpState = State::Initial;
538 bool previousIsVirama =
false;
540 for (QStringIterator iter(label); iter.hasNext();) {
541 auto ch = iter.next();
544 if (!previousIsVirama)
546 regexpState = State::Initial;
547 }
else if (ch == ZWNJ) {
548 if (!previousIsVirama && regexpState != State::LD_T)
550 regexpState = previousIsVirama ? State::Initial : State::ZWNJ_T;
552 switch (QChar::joiningType(ch)) {
553 case QChar::Joining_Left:
554 if (regexpState == State::ZWNJ_T)
556 regexpState = State::LD_T;
558 case QChar::Joining_Right:
559 regexpState = State::Initial;
561 case QChar::Joining_Dual:
562 regexpState = State::LD_T;
564 case QChar::Joining_Transparent:
567 regexpState = State::Initial;
572 previousIsVirama = QChar::combiningClass(ch) == CombiningClassVirama;
575 return regexpState != State::ZWNJ_T;
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602bool DomainValidityChecker::checkBidiRules(QStringView label)
607 QStringIterator iter(label);
608 Q_ASSERT(iter.hasNext());
610 char32_t ch = iter.next();
611 bool labelIsRTL =
false;
613 switch (QChar::direction(ch)) {
625 bool labelHasEN =
false;
626 bool labelHasAN =
false;
628 while (iter.hasNext()) {
631 switch (QChar::direction(ch)) {
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711bool DomainValidityChecker::checkLabel(
const QString &label, QUrl::AceProcessingOptions options)
716 if (label != label.normalized(QString::NormalizationForm_C))
719 if (label.size() >= 4) {
723 if (label[2] == u'-' && label[3] == u'-')
724 return ignoreBidiErrors && label.startsWith(u"xn") && validateAsciiLabel(label);
727 if (label.startsWith(u'-') || label.endsWith(u'-'))
730 if (label.contains(u'.'))
733 QStringIterator iter(label);
734 auto c = iter.next();
736 if (QChar::isMark(c))
741 bool hasJoiners =
false;
744 hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
746 if (!ignoreBidiErrors && !domainNameIsBidi) {
747 switch (QChar::direction(c)) {
751 domainNameIsBidi =
true;
760 switch (QUnicodeTables::idnaStatus(c)) {
761 case QUnicodeTables::IdnaStatus::Valid:
763 case QUnicodeTables::IdnaStatus::Deviation:
764 if (options.testFlag(QUrl::AceTransitionalProcessing))
776 if (hasJoiners && !checkContextJRules(label))
779 hadBidiErrors = hadBidiErrors || !checkBidiRules(label);
781 if (domainNameIsBidi && hadBidiErrors)
789 qsizetype lastIdx = 0;
794 qsizetype idx = normalizedDomain.indexOf(u'.', lastIdx);
796 idx = normalizedDomain.size();
798 const qsizetype labelLength = idx - lastIdx;
800 const auto label = normalizedDomain.sliced(lastIdx, labelLength);
802 qt_punycodeEncoder(label, &aceForm);
803 if (aceForm.isEmpty())
806 aceResult.append(aceForm);
809 if (idx == normalizedDomain.size())
812 if (labelLength == 0 && (dot == ForbidLeadingDot || idx > 0))
825 qsizetype lastIdx = 0;
826 bool hasPunycode =
false;
827 *usesPunycode =
false;
829 while (lastIdx < normalizedDomain.size()) {
830 auto idx = normalizedDomain.indexOf(u'.', lastIdx);
832 idx = normalizedDomain.size();
834 const auto labelLength = idx - lastIdx;
835 if (labelLength == 0) {
836 if (idx == normalizedDomain.size())
841 const auto label = normalizedDomain.sliced(lastIdx, labelLength);
842 if (!validateAsciiLabel(label))
845 hasPunycode = hasPunycode || label.startsWith(
"xn--"_L1);
851 *usesPunycode = hasPunycode;
858 result.reserve(asciiDomain.size());
859 qsizetype lastIdx = 0;
861 DomainValidityChecker checker;
864 auto idx = asciiDomain.indexOf(u'.', lastIdx);
866 idx = asciiDomain.size();
868 const auto labelLength = idx - lastIdx;
869 if (labelLength == 0) {
870 if (idx == asciiDomain.size())
873 const auto label = asciiDomain.sliced(lastIdx, labelLength);
874 const auto unicodeLabel = qt_punycodeDecoder(label);
876 if (unicodeLabel.isEmpty())
879 if (!checker.checkLabel(unicodeLabel, options))
882 result.append(unicodeLabel);
885 if (idx == asciiDomain.size())
896 qsizetype lastIdx = 0;
898 DomainValidityChecker checker(
true);
901 qsizetype idx = domainName.indexOf(u'.', lastIdx);
903 idx = domainName.size();
905 const qsizetype labelLength = idx - lastIdx;
907 const auto label = domainName.sliced(lastIdx, labelLength);
909 if (!checker.checkLabel(label, options))
913 if (idx == domainName.size())
922 QUrl::AceProcessingOptions options)
924 if (domain.isEmpty())
928 const QString mapped = mapDomainName(domain, options, &mappedToAscii);
929 const QString normalized =
930 mappedToAscii ? mapped : mapped.normalized(QString::NormalizationForm_C);
932 if (normalized.isEmpty())
935 if (!mappedToAscii && !checkUnicodeName(normalized, options))
938 bool needsConversionToUnicode;
939 const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
940 if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsConversionToUnicode))
943 if (op == ToAceOnly || !needsConversionToUnicode
944 || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
948 return convertToUnicode(aceResult, options);
952
953
954
955
956
957
958
959
960
961QStringList QUrl::idnWhitelist()
963 if (user_idn_whitelist)
964 return *user_idn_whitelist;
965 static const QStringList list = [] {
967 list.reserve(idn_whitelist.count());
969 while (i < idn_whitelist.count()) {
970 list << QLatin1StringView(idn_whitelist.at(i));
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999void QUrl::setIdnWhitelist(
const QStringList &list)
1001 if (!user_idn_whitelist)
1002 user_idn_whitelist =
new QStringList;
1003 *user_idn_whitelist = list;
static const uint initial_bias
static constexpr auto idn_whitelist
static constexpr qsizetype MaxDomainLabelLength
static QString convertToAscii(QStringView normalizedDomain, AceLeadingDot dot)
static bool checkAsciiDomainName(QStringView normalizedDomain, AceLeadingDot dot, bool *usesPunycode)
QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options)
static bool checkUnicodeName(const QString &domainName, QUrl::AceProcessingOptions options)
static bool isValidInNormalizedAsciiName(C c)
static bool qt_is_idn_enabled(QStringView aceDomain)
static const uint initial_n
static bool validateAsciiLabel(QStringView label)
static bool isValidInNormalizedAsciiLabel(C c)
static void appendEncode(QString *output, uint delta, uint bias)
static bool lessThan(const QChar *a, int l, const char *c)
static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions options, bool *resultIsAscii)
static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
static bool equal(const QChar *a, int l, const char *b)
static uint encodeDigit(uint digit)
static uint adapt(uint delta, uint numpoints, bool firsttime)