Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qbytearraymatcher.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
6
7#include <qtconfiginclude.h>
8#ifndef QT_BOOTSTRAPPED
9# include <private/qtcore-config_p.h>
10#endif
11
12#include <limits.h>
13
15
16static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
17{
18 int l = int(qMin(len, qsizetype(255)));
19 memset(skiptable, l, 256*sizeof(uchar));
20 cc += len - l;
21 while (l--)
22 skiptable[*cc++] = l;
23}
24
25static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc,
26 qsizetype pl, const uchar *skiptable)
27{
28 if (pl == 0)
29 return index > l ? -1 : index;
30 const qsizetype pl_minus_one = pl - 1;
31
32 const uchar *current = cc + index + pl_minus_one;
33 const uchar *end = cc + l;
34 while (current < end) {
35 qsizetype skip = skiptable[*current];
36 if (!skip) {
37 // possible match
38 while (skip < pl) {
39 if (*(current - skip) != puc[pl_minus_one - skip])
40 break;
41 skip++;
42 }
43 if (skip > pl_minus_one) // we have a match
44 return (current - cc) - skip + 1;
45
46 // in case we don't have a match we are a bit inefficient as we only skip by one
47 // when we have the non matching char in the string.
48 if (skiptable[*(current - skip)] == pl)
49 skip = pl - skip;
50 else
51 skip = 1;
52 }
53 if (current > end - skip)
54 break;
55 current += skip;
56 }
57 return -1; // not found
58}
59
60/*! \class QByteArrayMatcher
61 \inmodule QtCore
62 \brief The QByteArrayMatcher class holds a sequence of bytes that
63 can be quickly matched in a byte array.
64
65 \ingroup tools
66 \ingroup string-processing
67
68 This class is useful when you have a sequence of bytes that you
69 want to repeatedly match against some byte arrays (perhaps in a
70 loop), or when you want to search for the same sequence of bytes
71 multiple times in the same byte array. Using a matcher object and
72 indexIn() is faster than matching a plain QByteArray with
73 QByteArray::indexOf() if repeated matching takes place. This
74 class offers no benefit if you are doing one-off byte array
75 matches.
76
77 Create the QByteArrayMatcher with the QByteArray you want to
78 search for. Then call indexIn() on the QByteArray that you want to
79 search.
80
81 \sa QByteArray, QStringMatcher
82*/
83
84/*!
85 Constructs an empty byte array matcher that won't match anything.
86 Call setPattern() to give it a pattern to match.
87*/
88QByteArrayMatcher::QByteArrayMatcher()
89 : d(nullptr)
90{
91 p.p = nullptr;
92 p.l = 0;
93 memset(p.q_skiptable, 0, sizeof(p.q_skiptable));
94}
95
96/*!
97 Constructs a byte array matcher from \a pattern. \a pattern
98 has the given \a length. Call indexIn() to perform a search.
99
100 \note the data that \a pattern is referencing must remain valid while this
101 object is used.
102*/
103QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr)
104{
105 p.p = reinterpret_cast<const uchar *>(pattern);
106 if (length < 0)
107 p.l = qstrlen(pattern);
108 else
109 p.l = length;
110 bm_init_skiptable(p.p, p.l, p.q_skiptable);
111}
112
113/*!
114 Constructs a byte array matcher that will search for \a pattern.
115 Call indexIn() to perform a search.
116*/
117QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
118 : d(nullptr), q_pattern(pattern)
119{
120 p.p = reinterpret_cast<const uchar *>(pattern.constData());
121 p.l = pattern.size();
122 bm_init_skiptable(p.p, p.l, p.q_skiptable);
123}
124
125/*!
126 \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern)
127 \since 6.3
128 \overload
129
130 Constructs a byte array matcher that will search for \a pattern.
131 Call indexIn() to perform a search.
132
133 \note the data that \a pattern is referencing must remain valid while this
134 object is used.
135*/
136
137/*!
138 Copies the \a other byte array matcher to this byte array matcher.
139*/
140QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
141 : d(nullptr)
142{
143 operator=(other);
144}
145
146/*!
147 Destroys the byte array matcher.
148*/
149QByteArrayMatcher::~QByteArrayMatcher()
150{
151 Q_UNUSED(d);
152}
153
154/*!
155 Assigns the \a other byte array matcher to this byte array matcher.
156*/
157QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
158{
159 q_pattern = other.q_pattern;
160 memcpy(&p, &other.p, sizeof(p));
161 return *this;
162}
163
164/*!
165 Sets the byte array that this byte array matcher will search for
166 to \a pattern.
167
168 \sa pattern(), indexIn()
169*/
170void QByteArrayMatcher::setPattern(const QByteArray &pattern)
171{
172 q_pattern = pattern;
173 p.p = reinterpret_cast<const uchar *>(pattern.constData());
174 p.l = pattern.size();
175 bm_init_skiptable(p.p, p.l, p.q_skiptable);
176}
177
178/*!
179 Searches the char string \a str, which has length \a len, from
180 byte position \a from (default 0, i.e. from the first byte), for
181 the byte array pattern() that was set in the constructor or in the
182 most recent call to setPattern(). Returns the position where the
183 pattern() matched in \a str, or -1 if no match was found.
184*/
185qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const
186{
187 if (from < 0)
188 from = 0;
189 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
190 p.p, p.l, p.q_skiptable);
191}
192
193/*!
194 \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
195 \since 6.3
196 \overload
197
198 Searches the byte array \a data, from byte position \a from (default
199 0, i.e. from the first byte), for the byte array pattern() that
200 was set in the constructor or in the most recent call to
201 setPattern(). Returns the position where the pattern() matched in
202 \a data, or -1 if no match was found.
203*/
204qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
205{
206 if (from < 0)
207 from = 0;
208 return bm_find(reinterpret_cast<const uchar *>(data.data()), data.size(), from,
209 p.p, p.l, p.q_skiptable);
210}
211
212/*!
213 \fn QByteArray QByteArrayMatcher::pattern() const
214
215 Returns the byte array pattern that this byte array matcher will
216 search for.
217
218 \sa setPattern()
219*/
220
221/*!
222 \internal
223 */
224Q_NEVER_INLINE
225static qsizetype qFindByteArrayBoyerMoore(
226 const char *haystack, qsizetype haystackLen, qsizetype haystackOffset,
227 const char *needle, qsizetype needleLen)
228{
229 uchar skiptable[256];
230 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
231 if (haystackOffset < 0)
232 haystackOffset = 0;
233 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
234 (const uchar *)needle, needleLen, skiptable);
235}
236
237/*!
238 \internal
239 */
240static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
241 const char *needle, qsizetype sl);
242qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept
243{
244 const auto haystack0 = haystack.data();
245 const auto l = haystack.size();
246 const auto sl = needle.size();
247#if !QT_CONFIG(memmem)
248 if (sl == 1)
249 return findByteArray(haystack, from, needle.front());
250#endif
251
252 if (from < 0)
253 from += l;
254 if (std::size_t(sl + from) > std::size_t(l))
255 return -1;
256 if (!sl)
257 return from;
258 if (!l)
259 return -1;
260
261#if QT_CONFIG(memmem)
262 auto where = memmem(haystack0 + from, l - from, needle.data(), sl);
263 return where ? static_cast<const char *>(where) - haystack0 : -1;
264#endif
265
266 /*
267 We use the Boyer-Moore algorithm in cases where the overhead
268 for the skip table should pay off, otherwise we use a simple
269 hash function.
270 */
271 if (l > 500 && sl > 5)
272 return qFindByteArrayBoyerMoore(haystack0, l, from, needle.data(), sl);
273 return qFindByteArray(haystack0, l, from, needle.data(), sl);
274}
275
276qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
277 const char *needle, qsizetype sl)
278{
279 /*
280 We use some hashing for efficiency's sake. Instead of
281 comparing strings, we compare the hash value of str with that
282 of a part of this QByteArray. Only if that matches, we call memcmp().
283 */
284 const char *haystack = haystack0 + from;
285 const char *end = haystack0 + (l - sl);
286 const qregisteruint sl_minus_1 = sl - 1;
287 qregisteruint hashNeedle = 0, hashHaystack = 0;
288 qsizetype idx;
289 for (idx = 0; idx < sl; ++idx) {
290 hashNeedle = ((hashNeedle<<1) + needle[idx]);
291 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
292 }
293 hashHaystack -= *(haystack + sl_minus_1);
294
295 while (haystack <= end) {
296 hashHaystack += *(haystack + sl_minus_1);
297 if (hashHaystack == hashNeedle && *needle == *haystack
298 && memcmp(needle, haystack, sl) == 0)
299 return haystack - haystack0;
300
301 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
302 hashHaystack -= qregisteruint(*haystack) << sl_minus_1;
303 hashHaystack <<= 1;
304 ++haystack;
305 }
306 return -1;
307}
308
309/*!
310 \class QStaticByteArrayMatcherBase
311 \since 5.9
312 \internal
313 \brief Non-template base class of QStaticByteArrayMatcher.
314*/
315
316/*!
317 \class QStaticByteArrayMatcher
318 \since 5.9
319 \inmodule QtCore
320 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
321
322 \ingroup tools
323 \ingroup string-processing
324
325 This class is useful when you have a sequence of bytes that you
326 want to repeatedly match against some byte arrays (perhaps in a
327 loop), or when you want to search for the same sequence of bytes
328 multiple times in the same byte array. Using a matcher object and
329 indexIn() is faster than matching a plain QByteArray with
330 QByteArray::indexOf(), in particular if repeated matching takes place.
331
332 Unlike QByteArrayMatcher, this class calculates the internal
333 representation at \e{compile-time}, so it can
334 even benefit if you are doing one-off byte array matches.
335
336 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
337 passing it the C string literal you want to search for. Store the return
338 value of that function in a \c{static const auto} variable, so you don't need
339 to pass the \c{N} template parameter explicitly:
340
341 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
342
343 Then call indexIn() on the QByteArray in which you want to search, just like
344 with QByteArrayMatcher.
345
346 Since this class is designed to do all the up-front calculations at compile-time,
347 it does not offer a setPattern() method.
348
349 \sa QByteArrayMatcher, QStringMatcher
350*/
351
352/*!
353 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
354
355 Searches the char string \a haystack, which has length \a hlen, from
356 byte position \a from (default 0, i.e. from the first byte), for
357 the byte array pattern() that was set in the constructor.
358
359 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
360*/
361
362/*!
363 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
364
365 Searches the char string \a haystack, from byte position \a from
366 (default 0, i.e. from the first byte), for the byte array pattern()
367 that was set in the constructor.
368
369 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
370*/
371
372/*!
373 \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
374
375 Returns the byte array pattern that this byte array matcher will
376 search for.
377
378 \sa QByteArrayMatcher::setPattern()
379*/
380
381/*!
382 \internal
383*/
384qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
385{
386 if (from < 0)
387 from = 0;
388 return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
389 reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
390}
391
392/*!
393 \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
394 \internal
395*/
396
397/*!
398 \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
399 \since 5.9
400 \relates QStaticByteArrayMatcher
401
402 Return a QStaticByteArrayMatcher with the correct \c{N} determined
403 automatically from the \a pattern passed.
404
405 To take full advantage of this function, assign the result to an
406 \c{auto} variable:
407
408 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
409*/
410
411QT_END_NAMESPACE
Non-template base class of QStaticByteArrayMatcher.
static QT_BEGIN_NAMESPACE void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
static qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc, qsizetype pl, const uchar *skiptable)
static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, const char *needle, qsizetype sl)