Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstringtokenizer.cpp
Go to the documentation of this file.
1
// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
2
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
// Qt-Security score:significant reason:docs-only
4
5
#
include
"qstringtokenizer.h"
6
#
include
"qstringalgorithms.h"
7
8
QT_BEGIN_NAMESPACE
9
10
/*!
11
\class QStringTokenizer
12
\inmodule QtCore
13
\since 6.0
14
\brief The QStringTokenizer class splits strings into tokens along given separators.
15
\reentrant
16
\ingroup tools
17
\ingroup string-processing
18
19
Splits a string into substrings wherever a given separator occurs,
20
returning a (lazily constructed) list of those strings. If the separator does
21
not match anywhere in the string, produces a single-element list
22
containing this string. If the separator is empty,
23
QStringTokenizer produces an empty string, followed by each of the
24
string's characters, followed by another empty string. The two
25
enumerations Qt::SplitBehavior and Qt::CaseSensitivity further
26
control the output.
27
28
QStringTokenizer drives QStringView::tokenize(), but you can use it
29
directly, too:
30
31
\code
32
for (auto it : QStringTokenizer{string, separator})
33
use(*it);
34
\endcode
35
36
\note You should never name the template arguments of a
37
QStringTokenizer explicitly. You may write
38
\c{QStringTokenizer{string, separator}} (without template arguments),
39
or use either QStringView::tokenize() or QLatin1StringView::tokenize(),
40
then store the return value only in an \c{auto} variable:
41
42
\code
43
auto result = strview.tokenize(sep);
44
\endcode
45
46
This is because the template arguments of QStringTokenizer have a
47
very subtle dependency on the specific string and separator types
48
from with which they are constructed, and they don't usually
49
correspond to the actual types passed.
50
51
\section1 Lazy Sequences
52
53
QStringTokenizer acts as a so-called lazy sequence, that is, each
54
next element is only computed once you ask for it. Lazy sequences
55
have the advantage that they only require O(1) memory. They have
56
the disadvantage that, at least for QStringTokenizer, they only
57
allow forward, not random-access, iteration.
58
59
The intended use-case is that you just plug it into a ranged for loop:
60
61
\code
62
for (auto it : QStringTokenizer{string, separator})
63
use(*it);
64
\endcode
65
66
or a C++20 ranged algorithm:
67
68
\code
69
std::ranges::for_each(QStringTokenizer{string, separator},
70
[] (auto token) { use(token); });
71
\endcode
72
73
\section1 End Sentinel
74
75
The QStringTokenizer iterators cannot be used with classical STL
76
algorithms, because those require iterator/iterator pairs, while
77
QStringTokenizer uses sentinels. That is, it uses a different
78
type, QStringTokenizer::sentinel, to mark the end of the
79
range. This improves performance, because the sentinel is an empty
80
type. Sentinels are supported from C++17 (for ranged for)
81
and C++20 (for algorithms using the new ranges library).
82
83
\section1 Temporaries
84
85
QStringTokenizer is very carefully designed to avoid dangling
86
references. If you construct a tokenizer from a temporary string
87
(an rvalue), that argument is stored internally, so the referenced
88
data isn't deleted before it is tokenized:
89
90
\code
91
auto tok = QStringTokenizer{widget.text(), u','};
92
// return value of `widget.text()` is destroyed, but content was moved into `tok`
93
for (auto e : tok)
94
use(e);
95
\endcode
96
97
If you pass named objects (lvalues), then QStringTokenizer does
98
not store a copy. You are responsible to keep the named object's
99
data around for longer than the tokenizer operates on it:
100
101
\code
102
auto text = widget.text();
103
auto tok = QStringTokenizer{text, u','};
104
text.clear(); // destroy content of `text`
105
for (auto e : tok) // ERROR: `tok` references deleted data!
106
use(e);
107
\endcode
108
109
\sa QStringView::split(), QString::split(), QRegularExpression
110
*/
111
112
/*!
113
\typealias QStringTokenizer::value_type
114
115
Alias for \c{const QStringView} or \c{const QLatin1StringView},
116
depending on the tokenizer's \c Haystack template argument.
117
*/
118
119
/*!
120
\typealias QStringTokenizer::difference_type
121
122
Alias for qsizetype.
123
*/
124
125
/*!
126
\typealias QStringTokenizer::size_type
127
128
Alias for qsizetype.
129
*/
130
131
/*!
132
\typealias QStringTokenizer::reference
133
134
Alias for \c{value_type &}.
135
136
QStringTokenizer does not support mutable references, so this is
137
the same as const_reference.
138
*/
139
140
/*!
141
\typealias QStringTokenizer::const_reference
142
143
Alias for \c{value_type &}.
144
*/
145
146
/*!
147
\typealias QStringTokenizer::pointer
148
149
Alias for \c{value_type *}.
150
151
QStringTokenizer does not support mutable iterators, so this is
152
the same as const_pointer.
153
*/
154
155
/*!
156
\typealias QStringTokenizer::const_pointer
157
158
Alias for \c{value_type *}.
159
*/
160
161
/*!
162
\typealias QStringTokenizer::iterator
163
164
This typedef provides an STL-style const iterator for
165
QStringTokenizer.
166
167
QStringTokenizer does not support mutable iterators, so this is
168
the same as const_iterator.
169
170
\sa const_iterator
171
*/
172
173
/*!
174
\typedef QStringTokenizer::const_iterator
175
176
This typedef provides an STL-style const iterator for
177
QStringTokenizer.
178
179
\sa iterator
180
*/
181
182
/*!
183
\typealias QStringTokenizer::sentinel
184
185
This typedef provides an STL-style sentinel for
186
QStringTokenizer::iterator and QStringTokenizer::const_iterator.
187
188
\sa const_iterator
189
*/
190
191
/*!
192
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::QStringTokenizer(Haystack haystack, Needle needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb)
193
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::QStringTokenizer(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs)
194
195
Constructs a string tokenizer that splits the string \a haystack
196
into substrings wherever \a needle occurs, and allows iteration
197
over those strings as they are found. If \a needle does not match
198
anywhere in \a haystack, a single element containing \a haystack
199
is produced.
200
201
\a cs specifies whether \a needle should be matched case
202
sensitively or case insensitively.
203
204
If \a sb is Qt::SkipEmptyParts, empty entries don't
205
appear in the result. By default, empty entries are included.
206
207
\sa QStringView::split(), QString::split(), Qt::CaseSensitivity, Qt::SplitBehavior
208
*/
209
210
/*!
211
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::iterator QStringTokenizer<Haystack, Needle>::begin() const
212
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::iterator QStringTokenizer<Haystack, Needle>::cbegin() const
213
214
Returns a const \l{STL-style iterators}{STL-style iterator}
215
pointing to the first token in the list.
216
217
\sa end(), cend()
218
*/
219
220
/*!
221
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::sentinel QStringTokenizer<Haystack, Needle>::end() const
222
223
Returns a const \l{STL-style iterators}{STL-style sentinel}
224
pointing to the imaginary token after the last token in the list.
225
226
\sa begin(), cend()
227
*/
228
229
/*!
230
\fn template <typename Haystack, typename Needle> QStringTokenizer<Haystack, Needle>::sentinel QStringTokenizer<Haystack, Needle>::cend() const
231
232
Same as end().
233
234
\sa cbegin(), end()
235
*/
236
237
/*!
238
\fn template <typename Haystack, typename Needle> template<typename LContainer> LContainer QStringTokenizer<Haystack, Needle>::toContainer(LContainer &&c) const &
239
240
Converts the lazy sequence into a (typically) random-access container of
241
type \c LContainer.
242
243
This function is only available if \c Container has a \c value_type
244
matching this tokenizer's value_type.
245
246
If you pass in a named container (an lvalue) for \a c, then that container
247
is filled, and a reference to it is returned. If you pass in a temporary
248
container (an rvalue, incl. the default argument), then that container is
249
filled, and returned by value.
250
251
\code
252
// assuming tok's value_type is QStringView, then...
253
auto tok = QStringTokenizer{~~~};
254
// ... rac1 is a QList:
255
auto rac1 = tok.toContainer();
256
// ... rac2 is std::pmr::vector<QStringView>:
257
auto rac2 = tok.toContainer<std::pmr::vector<QStringView>>();
258
auto rac3 = QVarLengthArray<QStringView, 12>{};
259
// appends the token sequence produced by tok to rac3
260
// and returns a reference to rac3 (which we ignore here):
261
tok.toContainer(rac3);
262
\endcode
263
264
This gives you maximum flexibility in how you want the sequence to
265
be stored.
266
*/
267
268
/*!
269
\fn template <typename Haystack, typename Needle> template<typename RContainer> RContainer QStringTokenizer<Haystack, Needle>::toContainer(RContainer &&c) const &&
270
\overload
271
272
Converts the lazy sequence into a (typically) random-access container of
273
type \c RContainer.
274
275
In addition to the constraints on the lvalue-this overload, this
276
rvalue-this overload is only available when this QStringTokenizer
277
does not store the haystack internally, as this could create a
278
container full of dangling references:
279
280
\code
281
auto tokens = QStringTokenizer{widget.text(), u','}.toContainer();
282
// ERROR: cannot call toContainer() on rvalue
283
// 'tokens' references the data of the copy of widget.text()
284
// stored inside the QStringTokenizer, which has since been deleted
285
\endcode
286
287
To fix, store the QStringTokenizer in a temporary:
288
289
\code
290
auto tokenizer = QStringTokenizer{widget.text90, u','};
291
auto tokens = tokenizer.toContainer();
292
// OK: the copy of widget.text() stored in 'tokenizer' keeps the data
293
// referenced by 'tokens' alive.
294
\endcode
295
296
You can force this function into existence by passing a view instead:
297
298
\code
299
func(QStringTokenizer{QStringView{widget.text()}, u','}.toContainer());
300
// OK: compiler keeps widget.text() around until after func() has executed
301
\endcode
302
303
If you pass in a named container (an lvalue)for \a c, then that container
304
is filled, and a reference to it is returned. If you pass in a temporary
305
container (an rvalue, incl. the default argument), then that container is
306
filled, and returned by value.
307
*/
308
309
/*!
310
\fn template <typename Haystack, typename Needle, typename...Flags> auto qTokenize(Haystack &&haystack, Needle &&needle, Flags...flags)
311
\relates QStringTokenizer
312
\since 6.0
313
314
Factory function for a QStringTokenizer that splits the string \a haystack
315
into substrings wherever \a needle occurs, and allows iteration
316
over those strings as they are found. If \a needle does not match
317
anywhere in \a haystack, a single element containing \a haystack
318
is produced.
319
320
Pass values from Qt::CaseSensitivity and Qt::SplitBehavior enumerators
321
as \a flags to modify the behavior of the tokenizer.
322
*/
323
324
QT_END_NAMESPACE
QPlatformGraphicsBufferHelper
\inmodule QtGui
qtbase
src
corelib
text
qstringtokenizer.cpp
Generated on
for Qt by
1.14.0