Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtextboundaryfinder.cpp
Go to the documentation of this file.
1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
5#include <QtCore/qtextboundaryfinder.h>
6#include <QtCore/qvarlengtharray.h>
7
8#include <private/qunicodetools_p.h>
9
11
12static void init(QTextBoundaryFinder::BoundaryType type, QStringView str, QCharAttributes *attributes)
13{
14 QUnicodeTools::ScriptItemArray scriptItems;
15 QUnicodeTools::initScripts(str, &scriptItems);
16
17 QUnicodeTools::CharAttributeOptions options;
18 switch (type) {
19 case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break;
20 case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break;
21 case QTextBoundaryFinder::Sentence: options |= QUnicodeTools::SentenceBreaks; break;
22 case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break;
23 default: break;
24 }
25 QUnicodeTools::initCharAttributes(str, scriptItems.data(), scriptItems.size(), attributes, options);
26}
27
28/*!
29 \class QTextBoundaryFinder
30 \inmodule QtCore
31
32 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
33
34 \since 4.4
35 \ingroup tools
36 \ingroup shared
37 \ingroup string-processing
38 \reentrant
39
40 QTextBoundaryFinder allows to find Unicode text boundaries in a
41 string, accordingly to the Unicode text boundary specification (see
42 \l{https://www.unicode.org/reports/tr14/}{Unicode Standard Annex #14} and
43 \l{https://www.unicode.org/reports/tr29/}{Unicode Standard Annex #29}).
44
45 QTextBoundaryFinder can operate on a QString in four possible
46 modes depending on the value of \a BoundaryType.
47
48 Units of Unicode characters that make up what the user thinks of
49 as a character or basic unit of the language are here called
50 Grapheme clusters. The two unicode characters 'A' + diaeresis do
51 for example form one grapheme cluster as the user thinks of them
52 as one character, yet it is in this case represented by two
53 unicode code points
54 (see \l{https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries}).
55
56 Word boundaries are there to locate the start and end of what a
57 language considers to be a word
58 (see \l{https://www.unicode.org/reports/tr29/#Word_Boundaries}).
59
60 Line break boundaries give possible places where a line break
61 might happen and sentence boundaries will show the beginning and
62 end of whole sentences
63 (see \l{https://www.unicode.org/reports/tr29/#Sentence_Boundaries} and
64 \l{https://www.unicode.org/reports/tr14/}).
65
66 The first position in a string is always a valid boundary and
67 refers to the position before the first character. The last
68 position at the length of the string is also valid and refers
69 to the position after the last character.
70*/
71
72/*!
73 \enum QTextBoundaryFinder::BoundaryType
74
75 \value Grapheme Finds a grapheme which is the smallest boundary. It
76 including letters, punctuation marks, numerals and more.
77 \value Word Finds a word.
78 \value Line Finds possible positions for breaking the text into multiple
79 lines.
80 \value Sentence Finds sentence boundaries. These include periods, question
81 marks etc.
82*/
83
84/*!
85 \enum QTextBoundaryFinder::BoundaryReason
86
87 \value NotAtBoundary The boundary finder is not at a boundary position.
88 \value BreakOpportunity The boundary finder is at a break opportunity position.
89 Such a break opportunity might also be an item boundary
90 (either StartOfItem, EndOfItem, or combination of both),
91 a mandatory line break, or a soft hyphen.
92 \value [since 5.0] StartOfItem The boundary finder is at the start of
93 a grapheme, a word, a sentence, or a line.
94 \value [since 5.0] EndOfItem The boundary finder is at the end of
95 a grapheme, a word, a sentence, or a line.
96 \value [since 5.0] MandatoryBreak The boundary finder is at the end of line
97 (can occur for a Line boundary type only).
98 \value SoftHyphen The boundary finder is at the soft hyphen
99 (can occur for a Line boundary type only).
100*/
101
102/*!
103 Constructs an invalid QTextBoundaryFinder object.
104*/
105QTextBoundaryFinder::QTextBoundaryFinder()
106{
107}
108
109/*!
110 Copies the QTextBoundaryFinder object, \a other.
111*/
112QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
113 : s(other.s), sv(other.sv), pos(other.pos)
114{
115 t = other.t;
116 if (other.attributes) {
117 Q_ASSERT(sv.size() > 0);
118 attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
119 Q_CHECK_PTR(attributes);
120 memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
121 }
122}
123
124/*!
125 \since 6.11
126 \fn QTextBoundaryFinder::QTextBoundaryFinder(QTextBoundaryFinder &&other)
127
128 Move-constructs a new QTextBoundaryFinder from \a other.
129
130 \note The moved-from object other is placed in a partially-formed state, in
131 which the only valid operations are destruction and assignment of a new
132 value.
133*/
134
135/*!
136 Assigns the object, \a other, to another QTextBoundaryFinder object.
137*/
138QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
139{
140 if (&other == this)
141 return *this;
142
143 if (other.attributes) {
144 Q_ASSERT(other.sv.size() > 0);
145 size_t newCapacity = (size_t(other.sv.size()) + 1) * sizeof(QCharAttributes);
146 QCharAttributes *newD = (QCharAttributes *) realloc(freeBuffer ? attributes : nullptr, newCapacity);
147 Q_CHECK_PTR(newD);
148 freeBuffer = true;
149 attributes = newD;
150 }
151
152 t = other.t;
153 s = other.s;
154 sv = other.sv;
155 pos = other.pos;
156
157 if (other.attributes) {
158 memcpy(attributes, other.attributes, (sv.size() + 1) * sizeof(QCharAttributes));
159 } else {
160 if (freeBuffer)
161 free(attributes);
162 attributes = nullptr;
163 }
164
165 return *this;
166}
167
168/*!
169 \since 6.11
170 \fn QTextBoundaryFinder::operator=(QTextBoundaryFinder &&other)
171
172 Move-assigns \a other to this QTextBoundaryFinder instance.
173
174 \note The moved-from object other is placed in a partially-formed state, in
175 which the only valid operations are destruction and assignment of a new
176 value.
177*/
178
179/*!
180 Destructs the QTextBoundaryFinder object.
181*/
182QTextBoundaryFinder::~QTextBoundaryFinder()
183{
184 if (freeBuffer)
185 free(attributes);
186}
187
188/*!
189 \since 6.11
190 \fn void QTextBoundaryFinder::swap(QTextBoundaryFinder &other)
191 \memberswap{text boundary finder}
192*/
193
194/*!
195 Creates a QTextBoundaryFinder object of \a type operating on \a string.
196*/
197QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
198 : s(string), sv(s)
199{
200 t = type;
201 if (sv.size() > 0) {
202 attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
203 Q_CHECK_PTR(attributes);
204 init(t, sv, attributes);
205 }
206}
207
208/*!
209 \fn QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, qsizetype length, unsigned char *buffer, qsizetype bufferSize)
210 \overload
211
212 The same as QTextBoundaryFinder(type, QStringView(chars, length), buffer, bufferSize).
213*/
214
215/*!
216 Creates a QTextBoundaryFinder object of \a type operating on \a string.
217 \since 6.0
218
219 \a buffer is an optional working buffer of size \a bufferSize you can pass to
220 the QTextBoundaryFinder. If the buffer is large enough to hold the working
221 data required (bufferSize >= length + 1), it will use this
222 instead of allocating its own buffer.
223
224 \warning QTextBoundaryFinder does not create a copy of \a string. It is the
225 application programmer's responsibility to ensure the array is allocated for
226 as long as the QTextBoundaryFinder object stays alive. The same applies to
227 \a buffer.
228*/
229QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, QStringView string, unsigned char *buffer, qsizetype bufferSize)
230 : sv(string)
231{
232 t = type;
233 if (!sv.isEmpty()) {
234 if (buffer && bufferSize / int(sizeof(QCharAttributes)) >= sv.size() + 1) {
235 attributes = reinterpret_cast<QCharAttributes *>(buffer);
236 freeBuffer = false;
237 } else {
238 attributes = (QCharAttributes *) malloc((sv.size() + 1) * sizeof(QCharAttributes));
239 Q_CHECK_PTR(attributes);
240 }
241 init(t, sv, attributes);
242 }
243}
244
245/*!
246 Moves the finder to the start of the string. This is equivalent to setPosition(0).
247
248 \sa setPosition(), position()
249*/
250void QTextBoundaryFinder::toStart()
251{
252 pos = 0;
253}
254
255/*!
256 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
257
258 \sa setPosition(), position()
259*/
260void QTextBoundaryFinder::toEnd()
261{
262 pos = sv.size();
263}
264
265/*!
266 Returns the current position of the QTextBoundaryFinder.
267
268 The range is from 0 (the beginning of the string) to the length of
269 the string inclusive.
270
271 \sa setPosition()
272*/
273qsizetype QTextBoundaryFinder::position() const
274{
275 return pos;
276}
277
278/*!
279 Sets the current position of the QTextBoundaryFinder to \a position.
280
281 If \a position is out of bounds, it will be bound to only valid
282 positions. In this case, valid positions are from 0 to the length of
283 the string inclusive.
284
285 \sa position()
286*/
287void QTextBoundaryFinder::setPosition(qsizetype position)
288{
289 pos = qBound(0, position, sv.size());
290}
291
292/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
293
294 Returns the type of the QTextBoundaryFinder.
295*/
296
297/*! \fn bool QTextBoundaryFinder::isValid() const
298
299 Returns \c true if the text boundary finder is valid; otherwise returns \c false.
300 A default QTextBoundaryFinder is invalid.
301*/
302
303/*!
304 Returns the string the QTextBoundaryFinder object operates on.
305*/
306QString QTextBoundaryFinder::string() const
307{
308 if (sv.data() == s.unicode() && sv.size() == s.size())
309 return s;
310 return sv.toString();
311}
312
313
314/*!
315 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
316
317 Returns -1 if there is no next boundary.
318*/
319qsizetype QTextBoundaryFinder::toNextBoundary()
320{
321 if (!attributes || pos < 0 || pos >= sv.size()) {
322 pos = -1;
323 return pos;
324 }
325
326 ++pos;
327 switch(t) {
328 case Grapheme:
329 while (pos < sv.size() && !attributes[pos].graphemeBoundary)
330 ++pos;
331 break;
332 case Word:
333 while (pos < sv.size() && !attributes[pos].wordBreak)
334 ++pos;
335 break;
336 case Sentence:
337 while (pos < sv.size() && !attributes[pos].sentenceBoundary)
338 ++pos;
339 break;
340 case Line:
341 while (pos < sv.size() && !attributes[pos].lineBreak)
342 ++pos;
343 break;
344 }
345
346 return pos;
347}
348
349/*!
350 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
351
352 Returns -1 if there is no previous boundary.
353*/
354qsizetype QTextBoundaryFinder::toPreviousBoundary()
355{
356 if (!attributes || pos <= 0 || pos > sv.size()) {
357 pos = -1;
358 return pos;
359 }
360
361 --pos;
362 switch(t) {
363 case Grapheme:
364 while (pos > 0 && !attributes[pos].graphemeBoundary)
365 --pos;
366 break;
367 case Word:
368 while (pos > 0 && !attributes[pos].wordBreak)
369 --pos;
370 break;
371 case Sentence:
372 while (pos > 0 && !attributes[pos].sentenceBoundary)
373 --pos;
374 break;
375 case Line:
376 while (pos > 0 && !attributes[pos].lineBreak)
377 --pos;
378 break;
379 }
380
381 return pos;
382}
383
384/*!
385 Returns \c true if the object's position() is currently at a valid text boundary.
386*/
387bool QTextBoundaryFinder::isAtBoundary() const
388{
389 if (!attributes || pos < 0 || pos > sv.size())
390 return false;
391
392 switch(t) {
393 case Grapheme:
394 return attributes[pos].graphemeBoundary;
395 case Word:
396 return attributes[pos].wordBreak;
397 case Sentence:
398 return attributes[pos].sentenceBoundary;
399 case Line:
400 // ### TR#14 LB2 prohibits break at sot
401 return attributes[pos].lineBreak || pos == 0;
402 }
403 return false;
404}
405
406/*!
407 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
408*/
409QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
410{
411 BoundaryReasons reasons = NotAtBoundary;
412 if (!attributes || pos < 0 || pos > sv.size())
413 return reasons;
414
415 const QCharAttributes attr = attributes[pos];
416 switch (t) {
417 case Grapheme:
418 if (attr.graphemeBoundary) {
419 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
420 if (pos == 0)
421 reasons &= (~EndOfItem);
422 else if (pos == sv.size())
423 reasons &= (~StartOfItem);
424 }
425 break;
426 case Word:
427 if (attr.wordBreak) {
428 reasons |= BreakOpportunity;
429 if (attr.wordStart)
430 reasons |= StartOfItem;
431 if (attr.wordEnd)
432 reasons |= EndOfItem;
433 }
434 break;
435 case Sentence:
436 if (attr.sentenceBoundary) {
437 reasons |= BreakOpportunity | StartOfItem | EndOfItem;
438 if (pos == 0)
439 reasons &= (~EndOfItem);
440 else if (pos == sv.size())
441 reasons &= (~StartOfItem);
442 }
443 break;
444 case Line:
445 // ### TR#14 LB2 prohibits break at sot
446 if (attr.lineBreak || pos == 0) {
447 reasons |= BreakOpportunity;
448 if (attr.mandatoryBreak || pos == 0) {
449 reasons |= MandatoryBreak | StartOfItem | EndOfItem;
450 if (pos == 0)
451 reasons &= (~EndOfItem);
452 else if (pos == sv.size())
453 reasons &= (~StartOfItem);
454 } else if (pos > 0 && sv[pos - 1].unicode() == QChar::SoftHyphen) {
455 reasons |= SoftHyphen;
456 }
457 }
458 break;
459 default:
460 break;
461 }
462
463 return reasons;
464}
465
466QT_END_NAMESPACE
static QT_BEGIN_NAMESPACE void init(QTextBoundaryFinder::BoundaryType type, QStringView str, QCharAttributes *attributes)