Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qlatin1stringmatcher.cpp
Go to the documentation of this file.
1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
6#include <limits.h>
7
9
10/*! \class QLatin1StringMatcher
11 \inmodule QtCore
12 \brief Optimized search for substring in Latin-1 text.
13
14 A QLatin1StringMatcher can search for one QLatin1StringView
15 as a substring of another, either ignoring case or taking it into
16 account.
17
18 \since 6.5
19 \ingroup tools
20 \ingroup string-processing
21
22 This class is useful when you have a Latin-1 encoded string that
23 you want to repeatedly search for in some QLatin1StringViews
24 (perhaps in a loop), or when you want to search for all
25 instances of it in a given QLatin1StringView. Using a matcher
26 object and indexIn() is faster than matching a plain
27 QLatin1StringView with QLatin1StringView::indexOf() if repeated
28 matching takes place. This class offers no benefit if you are
29 doing one-off matches. The string to be searched for must not
30 be destroyed or changed before the matcher object is destroyed,
31 as the matcher accesses the string when searching for it.
32
33 Create a QLatin1StringMatcher for the QLatin1StringView
34 you want to search for and the case sensitivity. Then call
35 indexIn() with the QLatin1StringView that you want to search
36 within.
37
38 \sa QLatin1StringView, QStringMatcher, QByteArrayMatcher
39*/
40
41/*!
42 Construct an empty Latin-1 string matcher.
43 This will match at each position in any string.
44 \sa setPattern(), setCaseSensitivity(), indexIn()
45*/
46QLatin1StringMatcher::QLatin1StringMatcher() noexcept
47 : m_pattern(),
48 m_cs(Qt::CaseSensitive),
49 m_caseSensitiveSearcher(m_pattern.data(), m_pattern.data())
50{
51}
52
53/*!
54 Constructs a Latin-1 string matcher that searches for the given \a pattern
55 with given case sensitivity \a cs. The \a pattern argument must
56 not be destroyed before this matcher object. Call indexIn()
57 to find the \a pattern in the given QLatin1StringView.
58*/
59QLatin1StringMatcher::QLatin1StringMatcher(QLatin1StringView pattern,
60 Qt::CaseSensitivity cs) noexcept
61 : m_pattern(pattern), m_cs(cs)
62{
63 setSearcher();
64}
65
66/*!
67 Destroys the Latin-1 string matcher.
68*/
70{
71 freeSearcher();
72}
73
74/*!
75 \internal
76*/
77void QLatin1StringMatcher::setSearcher() noexcept
78{
79 if (m_cs == Qt::CaseSensitive) {
80 new (&m_caseSensitiveSearcher) CaseSensitiveSearcher(m_pattern.data(), m_pattern.end());
81 } else {
83 qsizetype bufferSize = std::min(m_pattern.size(), qsizetype(sizeof m_foldBuffer));
84 for (qsizetype i = 0; i < bufferSize; ++i)
85 m_foldBuffer[i] = static_cast<char>(foldCase(m_pattern[i].toLatin1()));
86
87 new (&m_caseInsensitiveSearcher)
88 CaseInsensitiveSearcher(m_foldBuffer, &m_foldBuffer[bufferSize]);
89 }
90}
91
92/*!
93 \internal
94*/
95void QLatin1StringMatcher::freeSearcher() noexcept
96{
97 if (m_cs == Qt::CaseSensitive)
98 m_caseSensitiveSearcher.~CaseSensitiveSearcher();
99 else
100 m_caseInsensitiveSearcher.~CaseInsensitiveSearcher();
101}
102
103/*!
104 Sets the \a pattern to search for. The string pointed to by the
105 QLatin1StringView must not be destroyed before the matcher is
106 destroyed, unless it is set to point to a different \a pattern
107 with longer lifetime first.
108
109 \sa pattern(), indexIn()
110*/
111void QLatin1StringMatcher::setPattern(QLatin1StringView pattern) noexcept
112{
113 if (m_pattern.latin1() == pattern.latin1() && m_pattern.size() == pattern.size())
114 return; // Same address and size
115
116 freeSearcher();
117 m_pattern = pattern;
118 setSearcher();
119}
120
121/*!
122 Returns the Latin-1 pattern that the matcher searches for.
123
124 \sa setPattern(), indexIn()
125*/
126QLatin1StringView QLatin1StringMatcher::pattern() const noexcept
127{
128 return m_pattern;
129}
130
131/*!
132 Sets the case sensitivity to \a cs.
133
134 \sa caseSensitivity(), indexIn()
135*/
136void QLatin1StringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) noexcept
137{
138 if (m_cs == cs)
139 return;
140
141 freeSearcher();
142 m_cs = cs;
143 setSearcher();
144}
145
146/*!
147 Returns the case sensitivity the matcher uses.
148
149 \sa setCaseSensitivity(), indexIn()
150*/
151Qt::CaseSensitivity QLatin1StringMatcher::caseSensitivity() const noexcept
152{
153 return m_cs;
154}
155
156/*!
157 Searches for the pattern in the given \a haystack starting from
158 \a from.
159
160 \sa caseSensitivity(), pattern()
161*/
162qsizetype QLatin1StringMatcher::indexIn(QLatin1StringView haystack, qsizetype from) const noexcept
163{
164 return indexIn_helper(haystack, from);
165}
166
167/*!
168 \since 6.8
169 \overload
170
171 Searches for the pattern in the given \a haystack starting from index
172 position \a from.
173
174 \sa caseSensitivity(), pattern()
175*/
176qsizetype QLatin1StringMatcher::indexIn(QStringView haystack, qsizetype from) const noexcept
177{
178 return indexIn_helper(haystack, from);
179}
180
181/*!
182 \internal
183*/
184template <typename String>
185qsizetype QLatin1StringMatcher::indexIn_helper(String haystack, qsizetype from) const noexcept
186{
187 static_assert(QtPrivate::isLatin1OrUtf16View<String>);
188
189 if (m_pattern.isEmpty() && from == haystack.size())
190 return from;
191 if (from < 0) // Historical behavior (see QString::indexOf and co.)
192 from += haystack.size();
193 if (from >= haystack.size())
194 return -1;
195
196 const auto start = [haystack] {
197 if constexpr (std::is_same_v<String, QStringView>)
198 return haystack.utf16();
199 else
200 return haystack.begin();
201 }();
202
203 auto begin = start + from;
204 auto end = start + haystack.size();
205 auto found = begin;
206 if (m_cs == Qt::CaseSensitive) {
207 found = m_caseSensitiveSearcher(begin, end, m_pattern.begin(), m_pattern.end()).begin;
208 if (found == end)
209 return -1;
210 } else {
211 const qsizetype bufferSize = std::min(m_pattern.size(), qsizetype(sizeof m_foldBuffer));
212 const QLatin1StringView restNeedle = m_pattern.sliced(bufferSize);
213 const bool needleLongerThanBuffer = restNeedle.size() > 0;
214 String restHaystack = haystack;
215 do {
216 found = m_caseInsensitiveSearcher(found, end, m_foldBuffer, &m_foldBuffer[bufferSize])
217 .begin;
218 if (found == end) {
219 return -1;
220 } else if (!needleLongerThanBuffer) {
221 break;
222 }
223 restHaystack = haystack.sliced(
224 qMin(haystack.size(),
225 bufferSize + qsizetype(std::distance(start, found))));
226 if (restHaystack.startsWith(restNeedle, Qt::CaseInsensitive))
227 break;
228 ++found;
229 } while (true);
230 }
231 return std::distance(start, found);
232}
233
234QT_END_NAMESPACE
Q_CORE_EXPORT void setPattern(QLatin1StringView pattern) noexcept
Sets the pattern to search for.
Q_CORE_EXPORT ~QLatin1StringMatcher() noexcept
Destroys the Latin-1 string matcher.
CaseInsensitiveSearcher m_caseInsensitiveSearcher
Q_CORE_EXPORT void setCaseSensitivity(Qt::CaseSensitivity cs) noexcept
Sets the case sensitivity to cs.