Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qbytearraymatcher.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5
6#include <qtconfiginclude.h>
7#ifndef QT_BOOTSTRAPPED
8# include <private/qtcore-config_p.h>
9#endif
10
11#include <limits.h>
12
14
15static inline void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
16{
17 int l = int(qMin(len, qsizetype(255)));
18 memset(skiptable, l, 256*sizeof(uchar));
19 cc += len - l;
20 while (l--)
21 skiptable[*cc++] = l;
22}
23
24static inline qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc,
25 qsizetype pl, const uchar *skiptable)
26{
27 if (pl == 0)
28 return index > l ? -1 : index;
29 const qsizetype pl_minus_one = pl - 1;
30
31 const uchar *current = cc + index + pl_minus_one;
32 const uchar *end = cc + l;
33 while (current < end) {
34 qsizetype skip = skiptable[*current];
35 if (!skip) {
36 // possible match
37 while (skip < pl) {
38 if (*(current - skip) != puc[pl_minus_one - skip])
39 break;
40 skip++;
41 }
42 if (skip > pl_minus_one) // we have a match
43 return (current - cc) - skip + 1;
44
45 // in case we don't have a match we are a bit inefficient as we only skip by one
46 // when we have the non matching char in the string.
47 if (skiptable[*(current - skip)] == pl)
48 skip = pl - skip;
49 else
50 skip = 1;
51 }
52 if (current > end - skip)
53 break;
54 current += skip;
55 }
56 return -1; // not found
57}
58
59/*! \class QByteArrayMatcher
60 \inmodule QtCore
61 \brief The QByteArrayMatcher class holds a sequence of bytes that
62 can be quickly matched in a byte array.
63
64 \ingroup tools
65 \ingroup string-processing
66
67 This class is useful when you have a sequence of bytes that you
68 want to repeatedly match against some byte arrays (perhaps in a
69 loop), or when you want to search for the same sequence of bytes
70 multiple times in the same byte array. Using a matcher object and
71 indexIn() is faster than matching a plain QByteArray with
72 QByteArray::indexOf() if repeated matching takes place. This
73 class offers no benefit if you are doing one-off byte array
74 matches.
75
76 Create the QByteArrayMatcher with the QByteArray you want to
77 search for. Then call indexIn() on the QByteArray that you want to
78 search.
79
80 \sa QByteArray, QStringMatcher
81*/
82
83/*!
84 Constructs an empty byte array matcher that won't match anything.
85 Call setPattern() to give it a pattern to match.
86*/
87QByteArrayMatcher::QByteArrayMatcher()
88 : d(nullptr)
89{
90 p.p = nullptr;
91 p.l = 0;
92 memset(p.q_skiptable, 0, sizeof(p.q_skiptable));
93}
94
95/*!
96 Constructs a byte array matcher from \a pattern. \a pattern
97 has the given \a length. Call indexIn() to perform a search.
98
99 \note the data that \a pattern is referencing must remain valid while this
100 object is used.
101*/
102QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr)
103{
104 p.p = reinterpret_cast<const uchar *>(pattern);
105 if (length < 0)
106 p.l = qstrlen(pattern);
107 else
108 p.l = length;
109 bm_init_skiptable(p.p, p.l, p.q_skiptable);
110}
111
112/*!
113 Constructs a byte array matcher that will search for \a pattern.
114 Call indexIn() to perform a search.
115*/
116QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
117 : d(nullptr), q_pattern(pattern)
118{
119 p.p = reinterpret_cast<const uchar *>(pattern.constData());
120 p.l = pattern.size();
121 bm_init_skiptable(p.p, p.l, p.q_skiptable);
122}
123
124/*!
125 \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern)
126 \since 6.3
127 \overload
128
129 Constructs a byte array matcher that will search for \a pattern.
130 Call indexIn() to perform a search.
131
132 \note the data that \a pattern is referencing must remain valid while this
133 object is used.
134*/
135
136/*!
137 Copies the \a other byte array matcher to this byte array matcher.
138*/
139QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
140 : d(nullptr)
141{
142 operator=(other);
143}
144
145/*!
146 Destroys the byte array matcher.
147*/
148QByteArrayMatcher::~QByteArrayMatcher()
149{
150 Q_UNUSED(d);
151}
152
153/*!
154 Assigns the \a other byte array matcher to this byte array matcher.
155*/
156QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
157{
158 q_pattern = other.q_pattern;
159 memcpy(&p, &other.p, sizeof(p));
160 return *this;
161}
162
163/*!
164 Sets the byte array that this byte array matcher will search for
165 to \a pattern.
166
167 \sa pattern(), indexIn()
168*/
169void QByteArrayMatcher::setPattern(const QByteArray &pattern)
170{
171 q_pattern = pattern;
172 p.p = reinterpret_cast<const uchar *>(pattern.constData());
173 p.l = pattern.size();
174 bm_init_skiptable(p.p, p.l, p.q_skiptable);
175}
176
177/*!
178 Searches the char string \a str, which has length \a len, from
179 byte position \a from (default 0, i.e. from the first byte), for
180 the byte array pattern() that was set in the constructor or in the
181 most recent call to setPattern(). Returns the position where the
182 pattern() matched in \a str, or -1 if no match was found.
183*/
184qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const
185{
186 if (from < 0)
187 from = 0;
188 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
189 p.p, p.l, p.q_skiptable);
190}
191
192/*!
193 \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
194 \since 6.3
195 \overload
196
197 Searches the byte array \a data, from byte position \a from (default
198 0, i.e. from the first byte), for the byte array pattern() that
199 was set in the constructor or in the most recent call to
200 setPattern(). Returns the position where the pattern() matched in
201 \a data, or -1 if no match was found.
202*/
203qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const
204{
205 if (from < 0)
206 from = 0;
207 return bm_find(reinterpret_cast<const uchar *>(data.data()), data.size(), from,
208 p.p, p.l, p.q_skiptable);
209}
210
211/*!
212 \fn QByteArray QByteArrayMatcher::pattern() const
213
214 Returns the byte array pattern that this byte array matcher will
215 search for.
216
217 \sa setPattern()
218*/
219
220/*!
221 \internal
222 */
223Q_NEVER_INLINE
224static qsizetype qFindByteArrayBoyerMoore(
225 const char *haystack, qsizetype haystackLen, qsizetype haystackOffset,
226 const char *needle, qsizetype needleLen)
227{
228 uchar skiptable[256];
229 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
230 if (haystackOffset < 0)
231 haystackOffset = 0;
232 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
233 (const uchar *)needle, needleLen, skiptable);
234}
235
236/*!
237 \internal
238 */
239static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
240 const char *needle, qsizetype sl);
241qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept
242{
243 const auto haystack0 = haystack.data();
244 const auto l = haystack.size();
245 const auto sl = needle.size();
246#if !QT_CONFIG(memmem)
247 if (sl == 1)
248 return findByteArray(haystack, from, needle.front());
249#endif
250
251 if (from < 0)
252 from += l;
253 if (std::size_t(sl + from) > std::size_t(l))
254 return -1;
255 if (!sl)
256 return from;
257 if (!l)
258 return -1;
259
260#if QT_CONFIG(memmem)
261 auto where = memmem(haystack0 + from, l - from, needle.data(), sl);
262 return where ? static_cast<const char *>(where) - haystack0 : -1;
263#endif
264
265 /*
266 We use the Boyer-Moore algorithm in cases where the overhead
267 for the skip table should pay off, otherwise we use a simple
268 hash function.
269 */
270 if (l > 500 && sl > 5)
271 return qFindByteArrayBoyerMoore(haystack0, l, from, needle.data(), sl);
272 return qFindByteArray(haystack0, l, from, needle.data(), sl);
273}
274
275qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from,
276 const char *needle, qsizetype sl)
277{
278 /*
279 We use some hashing for efficiency's sake. Instead of
280 comparing strings, we compare the hash value of str with that
281 of a part of this QByteArray. Only if that matches, we call memcmp().
282 */
283 const char *haystack = haystack0 + from;
284 const char *end = haystack0 + (l - sl);
285 const qregisteruint sl_minus_1 = sl - 1;
286 qregisteruint hashNeedle = 0, hashHaystack = 0;
287 qsizetype idx;
288 for (idx = 0; idx < sl; ++idx) {
289 hashNeedle = ((hashNeedle<<1) + needle[idx]);
290 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
291 }
292 hashHaystack -= *(haystack + sl_minus_1);
293
294 while (haystack <= end) {
295 hashHaystack += *(haystack + sl_minus_1);
296 if (hashHaystack == hashNeedle && *needle == *haystack
297 && memcmp(needle, haystack, sl) == 0)
298 return haystack - haystack0;
299
300 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
301 hashHaystack -= qregisteruint(*haystack) << sl_minus_1;
302 hashHaystack <<= 1;
303 ++haystack;
304 }
305 return -1;
306}
307
308/*!
309 \class QStaticByteArrayMatcherBase
310 \since 5.9
311 \internal
312 \brief Non-template base class of QStaticByteArrayMatcher.
313*/
314
315/*!
316 \class QStaticByteArrayMatcher
317 \since 5.9
318 \inmodule QtCore
319 \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher.
320
321 \ingroup tools
322 \ingroup string-processing
323
324 This class is useful when you have a sequence of bytes that you
325 want to repeatedly match against some byte arrays (perhaps in a
326 loop), or when you want to search for the same sequence of bytes
327 multiple times in the same byte array. Using a matcher object and
328 indexIn() is faster than matching a plain QByteArray with
329 QByteArray::indexOf(), in particular if repeated matching takes place.
330
331 Unlike QByteArrayMatcher, this class calculates the internal
332 representation at \e{compile-time}, so it can
333 even benefit if you are doing one-off byte array matches.
334
335 Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
336 passing it the C string literal you want to search for. Store the return
337 value of that function in a \c{static const auto} variable, so you don't need
338 to pass the \c{N} template parameter explicitly:
339
340 \snippet code/src_corelib_text_qbytearraymatcher.cpp 0
341
342 Then call indexIn() on the QByteArray in which you want to search, just like
343 with QByteArrayMatcher.
344
345 Since this class is designed to do all the up-front calculations at compile-time,
346 it does not offer a setPattern() method.
347
348 \sa QByteArrayMatcher, QStringMatcher
349*/
350
351/*!
352 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
353
354 Searches the char string \a haystack, which has length \a hlen, from
355 byte position \a from (default 0, i.e. from the first byte), for
356 the byte array pattern() that was set in the constructor.
357
358 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
359*/
360
361/*!
362 \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
363
364 Searches the char string \a haystack, from byte position \a from
365 (default 0, i.e. from the first byte), for the byte array pattern()
366 that was set in the constructor.
367
368 Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
369*/
370
371/*!
372 \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
373
374 Returns the byte array pattern that this byte array matcher will
375 search for.
376
377 \sa QByteArrayMatcher::setPattern()
378*/
379
380/*!
381 \internal
382*/
383qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
384{
385 if (from < 0)
386 from = 0;
387 return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
388 reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
389}
390
391/*!
392 \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
393 \internal
394*/
395
396/*!
397 \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
398 \since 5.9
399 \relates QStaticByteArrayMatcher
400
401 Return a QStaticByteArrayMatcher with the correct \c{N} determined
402 automatically from the \a pattern passed.
403
404 To take full advantage of this function, assign the result to an
405 \c{auto} variable:
406
407 \snippet code/src_corelib_text_qbytearraymatcher.cpp 1
408*/
409
410QT_END_NAMESPACE
Non-template base class of QStaticByteArrayMatcher.
Combined button and popup list for selecting options.
static QT_BEGIN_NAMESPACE void bm_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable)
static qsizetype bm_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc, qsizetype pl, const uchar *skiptable)
static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, const char *needle, qsizetype sl)