Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstringmatcher.cpp
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2019 Mail.ru Group.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4// Qt-Security score:critical reason:data-parser
5
7
9
10static constexpr qsizetype FoldBufferCapacity = 256;
11
12static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs)
13{
14 const char16_t *uc = needle.utf16();
15 const qsizetype len =
16 cs == Qt::CaseSensitive ? needle.size() : qMin(needle.size(), FoldBufferCapacity);
17 int l = qMin(int(len), 255);
18 memset(skiptable, l, 256 * sizeof(uchar));
19 uc += len - l;
20 if (cs == Qt::CaseSensitive) {
21 while (l--) {
22 skiptable[*uc & 0xff] = l;
23 ++uc;
24 }
25 } else {
26 const char16_t *start = uc;
27 while (l--) {
28 skiptable[foldCase(uc, start) & 0xff] = l;
29 ++uc;
30 }
31 }
32}
33
34static inline qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle,
35 const uchar *skiptable, Qt::CaseSensitivity cs)
36{
37 const char16_t *uc = haystack.utf16();
38 const qsizetype l = haystack.size();
39 const char16_t *puc = needle.utf16();
40 const qsizetype pl = needle.size();
41
42 if (pl == 0)
43 return index > l ? -1 : index;
44
45 if (cs == Qt::CaseSensitive) {
46 const qsizetype pl_minus_one = pl - 1;
47 const char16_t *current = uc + index + pl_minus_one;
48 const char16_t *end = uc + l;
49
50 while (current < end) {
51 qsizetype skip = skiptable[*current & 0xff];
52 if (!skip) {
53 // possible match
54 while (skip < pl) {
55 if (*(current - skip) != puc[pl_minus_one-skip])
56 break;
57 ++skip;
58 }
59 if (skip > pl_minus_one) // we have a match
60 return (current - uc) - pl_minus_one;
61
62 // in case we don't have a match we are a bit inefficient as we only skip by one
63 // when we have the non matching char in the string.
64 if (skiptable[*(current - skip) & 0xff] == pl)
65 skip = pl - skip;
66 else
67 skip = 1;
68 }
69 if (current > end - skip)
70 break;
71 current += skip;
72 }
73 } else {
74 char16_t foldBuffer[FoldBufferCapacity];
75 const qsizetype foldBufferLength = qMin(FoldBufferCapacity, pl);
76 const char16_t *start = puc;
77 for (qsizetype i = 0; i < foldBufferLength; ++i)
78 foldBuffer[i] = foldCase(&puc[i], start);
79 QStringView restNeedle = needle.sliced(foldBufferLength);
80 const qsizetype foldBufferEnd = foldBufferLength - 1;
81 const char16_t *current = uc + index + foldBufferEnd;
82 const char16_t *end = uc + l;
83
84 while (current < end) {
85 qsizetype skip = skiptable[foldCase(current, uc) & 0xff];
86 if (!skip) {
87 // possible match
88 while (skip < foldBufferLength) {
89 if (foldCase(current - skip, uc) != foldBuffer[foldBufferEnd - skip])
90 break;
91 ++skip;
92 }
93 if (skip > foldBufferEnd) { // Matching foldBuffer
94 qsizetype candidatePos = (current - uc) - foldBufferEnd;
95 QStringView restHaystack =
96 haystack.sliced(qMin(haystack.size(), candidatePos + foldBufferLength));
97 if (restNeedle.size() == 0
98 || restHaystack.startsWith(
99 restNeedle, Qt::CaseInsensitive)) // Check the rest of the string
100 return candidatePos;
101 }
102 // in case we don't have a match we are a bit inefficient as we only skip by one
103 // when we have the non matching char in the string.
104 if (skiptable[foldCase(current - skip, uc) & 0xff] == foldBufferLength)
105 skip = foldBufferLength - skip;
106 else
107 skip = 1;
108 }
109 if (current > end - skip)
110 break;
111 current += skip;
112 }
113 }
114 return -1; // not found
115}
116
117void QStringMatcher::updateSkipTable()
118{
119 bm_init_skiptable(q_sv, q_skiptable, q_cs);
120}
121
122/*!
123 \class QStringMatcher
124 \inmodule QtCore
125 \brief The QStringMatcher class holds a sequence of characters that
126 can be quickly matched in a Unicode string.
127
128 \ingroup tools
129 \ingroup string-processing
130
131 This class is useful when you have a sequence of \l{QChar}s that
132 you want to repeatedly match against some strings (perhaps in a
133 loop), or when you want to search for the same sequence of
134 characters multiple times in the same string. Using a matcher
135 object and indexIn() is faster than matching a plain QString with
136 QString::indexOf() if repeated matching takes place. This class
137 offers no benefit if you are doing one-off string matches.
138
139 Create the QStringMatcher with the QString you want to search
140 for. Then call indexIn() on the QString that you want to search.
141
142 \sa QString, QByteArrayMatcher, QRegularExpression
143*/
144
145/*! \fn QStringMatcher::QStringMatcher()
146
147 Constructs an empty string matcher that won't match anything.
148 Call setPattern() to give it a pattern to match.
149*/
150
151/*!
152 Constructs a string matcher that will search for \a pattern, with
153 case sensitivity \a cs.
154
155 Call indexIn() to perform a search.
156*/
157QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
158 : d_ptr(nullptr), q_cs(cs), q_pattern(pattern)
159{
160 q_sv = q_pattern;
161 updateSkipTable();
162}
163
164/*!
165 \fn QStringMatcher::QStringMatcher(const QChar *uc, qsizetype length, Qt::CaseSensitivity cs)
166 \since 4.5
167
168 Constructs a string matcher that will search for the pattern referred to
169 by \a uc with the given \a length and case sensitivity specified by \a cs.
170*/
171
172/*!
173 \fn QStringMatcher::QStringMatcher(QStringView pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive)
174 \since 5.14
175
176 Constructs a string matcher that will search for \a pattern, with
177 case sensitivity \a cs.
178
179 Call indexIn() to perform a search.
180*/
181QStringMatcher::QStringMatcher(QStringView str, Qt::CaseSensitivity cs)
182 : d_ptr(nullptr), q_cs(cs), q_sv(str)
183{
184 updateSkipTable();
185}
186/*!
187 Copies the \a other string matcher to this string matcher.
188*/
189QStringMatcher::QStringMatcher(const QStringMatcher &other)
190 : d_ptr(nullptr)
191{
192 operator=(other);
193}
194
195/*!
196 Destroys the string matcher.
197*/
198QStringMatcher::~QStringMatcher()
199{
200 Q_UNUSED(d_ptr);
201}
202
203/*!
204 Assigns the \a other string matcher to this string matcher.
205*/
206QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
207{
208 if (this != &other) {
209 q_pattern = other.q_pattern;
210 q_cs = other.q_cs;
211 q_sv = other.q_sv;
212 memcpy(q_skiptable, other.q_skiptable, sizeof(q_skiptable));
213 }
214 return *this;
215}
216
217/*!
218 Sets the string that this string matcher will search for to \a
219 pattern.
220
221 \sa pattern(), setCaseSensitivity(), indexIn()
222*/
223void QStringMatcher::setPattern(const QString &pattern)
224{
225 q_pattern = pattern;
226 q_sv = q_pattern;
227 updateSkipTable();
228}
229
230/*!
231 \fn QString QStringMatcher::pattern() const
232
233 Returns the string pattern that this string matcher will search
234 for.
235
236 \sa setPattern()
237*/
238
239QString QStringMatcher::pattern() const
240{
241 if (!q_pattern.isEmpty())
242 return q_pattern;
243 return q_sv.toString();
244}
245
246/*!
247 \fn QStringView QStringMatcher::patternView() const noexcept
248 \since 6.7
249
250 Returns a string view of the pattern that this string matcher will search for.
251
252 \sa setPattern()
253*/
254
255/*!
256 Sets the case sensitivity setting of this string matcher to \a
257 cs.
258
259 \sa caseSensitivity(), setPattern(), indexIn()
260*/
261void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
262{
263 if (cs == q_cs)
264 return;
265 q_cs = cs;
266 updateSkipTable();
267}
268
269/*! \fn qsizetype QStringMatcher::indexIn(const QString &str, qsizetype from) const
270
271 Searches the string \a str from character position \a from
272 (default 0, i.e. from the first character), for the string
273 pattern() that was set in the constructor or in the most recent
274 call to setPattern(). Returns the position where the pattern()
275 matched in \a str, or -1 if no match was found.
276
277 \sa setPattern(), setCaseSensitivity()
278*/
279
280/*! \fn qsizetype QStringMatcher::indexIn(const QChar *str, qsizetype length, qsizetype from) const
281 \since 4.5
282
283 Searches the string starting at \a str (of length \a length) from
284 character position \a from (default 0, i.e. from the first
285 character), for the string pattern() that was set in the
286 constructor or in the most recent call to setPattern(). Returns
287 the position where the pattern() matched in \a str, or -1 if no
288 match was found.
289
290 \sa setPattern(), setCaseSensitivity()
291*/
292
293/*!
294 \since 5.14
295
296 Searches the string \a str from character position \a from
297 (default 0, i.e. from the first character), for the string
298 pattern() that was set in the constructor or in the most recent
299 call to setPattern(). Returns the position where the pattern()
300 matched in \a str, or -1 if no match was found.
301
302 \sa setPattern(), setCaseSensitivity()
303*/
304qsizetype QStringMatcher::indexIn(QStringView str, qsizetype from) const
305{
306 if (from < 0)
307 from = 0;
308 return bm_find(str, from, q_sv, q_skiptable, q_cs);
309}
310
311/*!
312 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
313
314 Returns the case sensitivity setting for this string matcher.
315
316 \sa setCaseSensitivity()
317*/
318
319/*!
320 \internal
321*/
322
324 QStringView haystack, qsizetype haystackOffset,
325 QStringView needle, Qt::CaseSensitivity cs)
326{
327 uchar skiptable[256];
328 bm_init_skiptable(needle, skiptable, cs);
329 if (haystackOffset < 0)
330 haystackOffset = 0;
331 return bm_find(haystack, haystackOffset, needle, skiptable, cs);
332}
333
334QT_END_NAMESPACE
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static qsizetype bm_find(QStringView haystack, qsizetype index, QStringView needle, const uchar *skiptable, Qt::CaseSensitivity cs)
static void bm_init_skiptable(QStringView needle, uchar *skiptable, Qt::CaseSensitivity cs)
static QT_BEGIN_NAMESPACE constexpr qsizetype FoldBufferCapacity