Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
textutils.cpp
Go to the documentation of this file.
1// Copyright (C) 2026 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4#include "textutils.h"
5
6#include <QtCore/qcryptographichash.h>
7
9
10/*!
11 \namespace TextUtils
12 \internal
13 \brief Pure string helpers with no dependencies on QDoc driver types.
14
15 TextUtils groups text-manipulation helpers that the IR builders, the
16 template generator, and the legacy generators all need, but that do
17 not touch Node, Tree, Config, or Generator. Keeping them here lets
18 QDocLib components call them without dragging in the rest of QDoc.
19 */
20namespace TextUtils {
21
22/*!
23 \internal
24 Returns the punctuation character for the word at \a wordPosition in a
25 list of \a numberOfWords length. For the last position, returns "."
26 (full stop). For any other word, delegates to comma().
27
28 \sa comma()
29 */
30QString separator(qsizetype wordPosition, qsizetype numberOfWords)
31{
32 static QString terminator = QStringLiteral(".");
33 if (wordPosition == numberOfWords - 1)
34 return terminator;
35 else
36 return comma(wordPosition, numberOfWords);
37}
38
39/*!
40 \internal
41 Returns the inter-item punctuation for a list of \a numberOfWords words
42 at \a wordPosition.
43
44 For a list of length one, returns an empty QString. For a list of length
45 two, returns " and ". For longer lists, returns ", " for early items and
46 ", and " for the item before the last. The last position returns an
47 empty QString; its punctuation is the period returned by separator().
48
49 \sa separator()
50 */
51QString comma(qsizetype wordPosition, qsizetype numberOfWords)
52{
53 if (wordPosition == numberOfWords - 1)
54 return QString();
55 if (numberOfWords == 2)
56 return QStringLiteral(" and ");
57 if (wordPosition == 0 || wordPosition < numberOfWords - 2)
58 return QStringLiteral(", ");
59 return QStringLiteral(", and ");
60}
61
62/*!
63 \brief Returns an ASCII-printable representation of \a str, preserving
64 alphanumeric (alnum) characters ([a-zA-Z0-9]) and hyphens, but replacing
65 all other characters with hyphens unless this would result in multiple
66 adjacent hyphens. Upper case characters are converted to lower case.
67
68 The string that is returned is normalized for use where ASCII-printable
69 strings are required, such as in file names or fragment identifiers in URLs.
70
71 We distinguish between hyphens in \a str and hyphens used as placeholders
72 for non-alphanumeric characters.
73
74 Placeholder hyphens are removed from the start and end of the output string,
75 such that the resulting string does not start or end with a hyphen unless
76 the original string included them. Placeholder hyphens are only generated
77 to separate spans of alphanumeric characters.
78
79 If any character in \a str is non-latin, or latin and not found in the
80 aforementioned subset (e.g. 'ß', 'å', or 'ö'), a hash of \a str is appended
81 to the final string.
82
83 \b{Implementation note}
84
85 Previously, an optimized implementation was used that was equivalent to
86 this code for the basic translation to an ASCII-printable representation:
87
88 \code
89 name.replace(QRegularExpression("[^A-Za-z0-9]+"), " ");
90 name = name.simplified();
91 name.replace(QLatin1Char(' '), QLatin1Char('-'));
92 name = name.toLower();
93 \endcode
94
95 Although it was measured to be approximately four times faster than this
96 simple sequence of transformations, it could not distinguish between
97 hyphens that were intentionally part of the input string and those that
98 were generated by the implementation.
99*/
100QString asAsciiPrintable(const QString &str)
101{
102 auto legal_ascii = [](const uint value) {
103 const uint start_ascii_subset{ 32 };
104 const uint end_ascii_subset{ 126 };
105
106 return value >= start_ascii_subset && value <= end_ascii_subset;
107 };
108
109 QString result;
110 bool inAlphaNum{false};
111 bool has_non_alnum_content{ false };
112
113 // Replace all non-alphanumeric characters and non-hyphens with spaces.
114 // Remove spaces that are adjacent to hyphens to prevent multiple,
115 // unintentional hyphens. This generates an intermediate string.
116 for (const auto &c : str) {
117 char16_t u = c.unicode();
118 if (!legal_ascii(u))
119 has_non_alnum_content = true;
120 if (u >= 'A' && u <= 'Z')
121 u += 'a' - 'A';
122 if ((u >= 'a' && u <= 'z') || (u >= '0' && u <= '9')) {
123 result += QLatin1Char(u);
124 inAlphaNum = true;
125 } else if (u == '-') {
126 // Remove any existing trailing space.
127 if (result.endsWith(' '))
128 result.chop(1);
129
130 result += QLatin1Char(u);
131 inAlphaNum = false;
132 } else if (inAlphaNum || (!result.endsWith(' ') && !result.endsWith('-'))) {
133 // Only append spaces to alphanumeric, non-space and non-hyphen characters.
134 // This prevents multiple spaces in the intermediate string.
135 result += QLatin1Char(' ');
136 inAlphaNum = false;
137 }
138 // Any other character outside a span of alphanumeric characters
139 // is ignored if it follows a space or a hyphen.
140 }
141
142 // Remove leading and trailing spaces before finally replacing spaces with hyphens.
143 result = result.trimmed();
144 result.replace(QLatin1Char(' '), QLatin1Char('-'));
145
146 if (has_non_alnum_content) {
147 auto title_hash = QString::fromLocal8Bit(
148 QCryptographicHash::hash(str.toUtf8(), QCryptographicHash::Md5).toHex());
149 title_hash.truncate(8);
150 if (!result.isEmpty())
151 result.append(QLatin1Char('-'));
152 result.append(title_hash);
153 }
154
155 return result;
156}
157
158/*!
159 \internal
160 HTML-escapes the ampersand, less-than, greater-than, and double-quote
161 characters in \a str, leaving other characters untouched.
162 */
163QString protect(const QString &str)
164{
165 qsizetype n = str.size();
166 QString marked;
167 marked.reserve(n * 2 + 30);
168 const QChar *data = str.constData();
169 for (int i = 0; i != n; ++i) {
170 switch (data[i].unicode()) {
171 case '&':
172 marked += samp;
173 break;
174 case '<':
175 marked += slt;
176 break;
177 case '>':
178 marked += sgt;
179 break;
180 case '"':
181 marked += squot;
182 break;
183 default:
184 marked += data[i];
185 }
186 }
187 return marked;
188}
189
190} // namespace TextUtils
191
192QT_END_NAMESPACE
Combined button and popup list for selecting options.
Pure string helpers with no dependencies on QDoc driver types.
Definition textutils.h:11
QString protect(const QString &string)
QString asAsciiPrintable(const QString &name)
Returns an ASCII-printable representation of str, preserving alphanumeric (alnum) characters ([a-zA-Z...
QString separator(qsizetype wordPosition, qsizetype numberOfWords)
Definition textutils.cpp:30
QString comma(qsizetype wordPosition, qsizetype numberOfWords)
Definition textutils.cpp:51