Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qbytearraymatcher.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
6
7#include <qtconfiginclude.h>
8#ifndef QT_BOOTSTRAPPED
9# include <private/qtcore-config_p.h>
10#endif
11
12#include <limits.h>
13
15
16static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
17{
18 int l = int(qMin(len, qsizetype(255)));
19 memset(skiptable, l, 256*sizeof(uchar));
20 cc += len - l;
21 while (l--)
22 skiptable[*cc++] = l;
23}
24
25static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc,
26 qsizetype pl, const uchar *skiptable)
27{
28 if (pl == 0)
29 return index > l ? -1 : index;
30 const qsizetype pl_minus_one = pl - 1;
31
32 const uchar *current = cc + index + pl_minus_one;
33 const uchar *end = cc + l;
34 while (current < end) {
35 qsizetype skip = skiptable[*current];
36 if (!skip) {
37 // possible match
38 while (skip < pl) {
39 if (*(current - skip) != puc[pl_minus_one - skip])
40 break;
41 skip++;
42 }
43 if (skip > pl_minus_one) // we have a match
44 return (current - cc) - skip + 1;
45
46 // in case we don't have a match we are a bit inefficient as we only skip by one
47 // when we have the non matching char in the string.
48 if (skiptable[*(current - skip)] == pl)
49 skip = pl - skip;
50 else
51 skip = 1;
52 }
53 if (current > end - skip)
54 break;
55 current += skip;
56 }
57 return -1; // not found
58}
59
60/*! \class QByteArrayMatcher
61 \inmodule QtCore
62 \brief The QByteArrayMatcher class holds a sequence of bytes that
63 can be quickly matched in a byte array.
64
65 \ingroup tools
66 \ingroup string-processing
67
68 This class is useful when you have a sequence of bytes that you
69 want to repeatedly match against some byte arrays (perhaps in a
70 loop), or when you want to search for the same sequence of bytes
71 multiple times in the same byte array. Using a matcher object and
72 indexIn() is faster than matching a plain QByteArray with
73 QByteArray::indexOf() if repeated matching takes place. This
74 class offers no benefit if you are doing one-off byte array
75 matches.
76
77 Create the QByteArrayMatcher with the QByteArray you want to
78 search for. Then call indexIn() on the QByteArray that you want to
79 search.
80
81 \sa QByteArray, QStringMatcher
82*/
83
84/*!
85 Constructs an empty byte array matcher that won't match anything.
86 Call setPattern() to give it a pattern to match.
87*/
88QByteArrayMatcher::QByteArrayMatcher()
89 : d(nullptr)
90{
91 p.p = nullptr;
92 p.l = 0;
93 memset(p.q_skiptable, 0, sizeof(p.q_skiptable));
94}
95
96/*!
97 Constructs a byte array matcher from \a pattern. \a pattern
98 has the given \a length. Call indexIn() to perform a search.
99
100 \note the data that \a pattern is referencing must remain valid while this
101 object is used.
102*/
103QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr)
104{
105 p.p = reinterpret_cast<const uchar *>(pattern);
106 if (length < 0)
107 p.l = qstrlen(pattern);
108 else
109 p.l = length;
110 bm_init_skiptable(p.p, p.l, p.q_skiptable);
111}
112
113/*!
114 Constructs a byte array matcher that will search for \a pattern.
115 Call indexIn() to perform a search.
116*/
117QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
118 : d(nullptr), q_pattern(pattern)
119{
120 p.p = reinterpret_cast<const uchar *>(pattern.constData());
121 p.l = pattern.size();
122 bm_init_skiptable(p.p, p.l, p.q_skiptable);
123}
124
125/*!
126 \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern)
127 \since 6.3
128 \overload
129
130 Constructs a byte array matcher that will search for \a pattern.
131 Call indexIn() to perform a search.
132
133 \note the data that \a pattern is referencing must remain valid while this
134 object is used.
135*/
136
137/*!
138 Copies the \a other byte array matcher to this byte array matcher.
139*/
140QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
141 : d(nullptr)
142{
143 operator=(other);
144}
145
146/*!
147 Destroys the byte array matcher.
148*/
149QByteArrayMatcher::~QByteArrayMatcher()
150{
151 Q_UNUSED(d);
152}
153
154/*!
155 Assigns the \a other byte array matcher to this byte array matcher.
156*/
157QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
158{
159 q_pattern = other.q_pattern;
160 memcpy(&p, &other.p, sizeof(p));
161 return *this;
162}
163
164/*!
165 Sets the byte array that this byte array matcher will search for
166 to \a pattern.
167
168 \sa pattern(), indexIn()
169*/
170void QByteArrayMatcher::setPattern(const QByteArray &pattern)
171{
172 q_pattern = pattern;
173 p.p = reinterpret_cast<const uchar *>(pattern.constData());
174 p.l = pattern.size();
175 bm_init_skiptable(p.p, p.l, p.q_skiptable);
176}
177
178/*!
179 Searches the char string \a str, which has length \a len, from
180 byte position \a from (default 0, i.e. from the first byte), for
181 the byte array pattern() that was set in the constructor or in the
182 most recent call to setPattern(). Returns the position where the
183 pattern() matched in \a str, or -1 if no match was found.
184*/
185qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const
186{
187 if (from < 0)
188 from = 0;
189 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
190 p.p, p.l, p.q_skiptable);
191}
192
193/*!
194 \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
195 \since 6.3
196 \overload
197
198 Searches the byte array \a data, from byte position \a from (default
199 0, i.e. from the first byte), for the byte array pattern() that
200 was set in the constructor or in the most recent call to
201 setPattern(). Returns the position where the pattern() matched in
202 \a data, or -1 if no match was found.
203*/
204qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
205{
206 if (from < 0)
207 from = 0;
208 return bm_find(reinterpret_cast<const uchar *>(data.data()), data.size(), from,
209 p.p, p.l, p.q_skiptable);
210}
211
212/*!
213 \fn QByteArray QByteArrayMatcher::pattern() const
214
215 Returns the byte array pattern that this byte array matcher will
216 search for.
217
218 \sa setPattern()
219*/
220
221/*!
222 \internal
223 */
224Q_NEVER_INLINE
225static qsizetype qFindByteArrayBoyerMoore(
226 const char *haystack, qsizetype haystackLen, qsizetype haystackOffset,
227 const char *needle, qsizetype needleLen)
228{
229 uchar skiptable[256];
230 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
231 if (haystackOffset < 0)
232 haystackOffset = 0;
233 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
234 (const uchar *)needle, needleLen, skiptable);
235}
236
237/*!
238 \internal
239 */
240static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
241 const char *needle, qsizetype sl);
242qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept
243{
244 const auto haystack0 = haystack.data();
245 const auto l = haystack.size();
246 const auto sl = needle.size();
247#if !QT_CONFIG(memmem)
248 if (sl == 1)
249 return findByteArray(haystack, from, needle.front());
250#endif
251
252 if (from < 0)
253 from += l;
254 if (std::size_t(sl + from) > std::size_t(l))
255 return -1;
256 if (!sl)
257 return from;
258 if (!l)
259 return -1;
260
261#if QT_CONFIG(memmem)
262 auto where = memmem(haystack0 + from, l - from, needle.data(), sl);
263 return where ? static_cast<const char *>(where) - haystack0 : -1;
264#endif
265
266 /*
267 We use the Boyer-Moore algorithm in cases where the overhead
268 for the skip table should pay off, otherwise we use a simple
269 hash function.
270 */
271 if (l > 500 && sl > 5)
272 return qFindByteArrayBoyerMoore(haystack0, l, from, needle.data(), sl);
273 return qFindByteArray(haystack0, l, from, needle.data(), sl);
274}
275
276qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
277 const char *needle, qsizetype sl)
278{
279 /*
280 We use some hashing for efficiency's sake. Instead of
281 comparing strings, we compare the hash value of str with that
282 of a part of this QByteArray. Only if that matches, we call memcmp().
283 */
284 const char *haystack = haystack0 + from;
285 const char *end = haystack0 + (l - sl);
286 const qregisteruint sl_minus_1 = sl - 1;
287 qregisteruint hashNeedle = 0, hashHaystack = 0;
288 qsizetype idx;
289 for (idx = 0; idx < sl; ++idx) {
290 hashNeedle = ((hashNeedle<<1) + needle[idx]);
291 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
292 }
293 hashHaystack -= *(haystack + sl_minus_1);
294
295 while (haystack <= end) {
296 hashHaystack += *(haystack + sl_minus_1);
297 if (hashHaystack == hashNeedle && *needle == *haystack
298 && memcmp(needle, haystack, sl) == 0)
299 return haystack - haystack0;
300
301 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
302 hashHaystack -= qregisteruint(*haystack) << sl_minus_1;
303 hashHaystack <<= 1;
304 ++haystack;
305 }
306 return -1;
307}
308
309/*!
310 \class QStaticByteArrayMatcherBase
311 \since 5.9
312 \internal
313 \brief Non-template base class of QStaticByteArrayMatcher.
314*/
315
316/*!
317 \class QStaticByteArrayMatcher
318 \since 5.9
319 \inmodule QtCore
320 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
321
322 \ingroup tools
323 \ingroup string-processing
324
325 QStaticByteArrayMatcher<N> is a template class where \a N is the length
326 of the byte array pattern to search for (including the null terminator for
327 string literals). This class is useful when you have a sequence of bytes
328 that you want to repeatedly match against some byte arrays (perhaps in a
329 loop), or when you want to search for the same sequence of bytes multiple
330 times in the same byte array. Using a matcher object and indexIn() is
331 faster than matching a plain QByteArray with QByteArray::indexOf(), in
332 particular if repeated matching takes place.
333
334 Unlike QByteArrayMatcher, this class calculates the internal
335 representation at \e{compile-time}, so it can
336 even benefit if you are doing one-off byte array matches.
337
338 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
339 passing it the C string literal you want to search for. Store the return
340 value of that function in a \c{static const auto} variable, so you don't need
341 to pass the \c{N} template parameter explicitly:
342
343 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
344
345 Then call indexIn() on the QByteArray in which you want to search, just like
346 with QByteArrayMatcher.
347
348 Since this class is designed to do all the up-front calculations at compile-time,
349 it does not offer a setPattern() method.
350
351 \sa QByteArrayMatcher, QStringMatcher
352*/
353
354/*!
355 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
356
357 Searches the char string \a haystack, which has length \a hlen, from
358 byte position \a from (default 0, i.e. from the first byte), for
359 the byte array pattern() that was set in the constructor.
360
361 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
362*/
363
364/*!
365 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
366
367 Searches the char string \a haystack, from byte position \a from
368 (default 0, i.e. from the first byte), for the byte array pattern()
369 that was set in the constructor.
370
371 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
372*/
373
374/*!
375 \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
376
377 Returns the byte array pattern that this byte array matcher will
378 search for.
379
380 \sa QByteArrayMatcher::setPattern()
381*/
382
383/*!
384 \internal
385*/
386qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
387{
388 if (from < 0)
389 from = 0;
390 return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
391 reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
392}
393
394/*!
395 \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
396 \internal
397*/
398
399/*!
400 \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
401 \since 5.9
402 \relates QStaticByteArrayMatcher
403
404 Return a QStaticByteArrayMatcher with the correct \a N determined
405 automatically from the \a pattern passed.
406
407 To take full advantage of this function, assign the result to an
408 \c{auto} variable:
409
410 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
411*/
412
413QT_END_NAMESPACE
Non-template base class of QStaticByteArrayMatcher.
Combined button and popup list for selecting options.
static QT_BEGIN_NAMESPACE void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
static qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc, qsizetype pl, const uchar *skiptable)
static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, const char *needle, qsizetype sl)