Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstring.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5// Qt-Security score:critical reason:data-parser
6
7#include "qstringlist.h"
8#if QT_CONFIG(regularexpression)
9#include "qregularexpression.h"
10#endif
12#include <private/qstringconverter_p.h>
13#include <private/qtools_p.h>
15#include "private/qsimd_p.h"
16#include <qnumeric.h>
17#include <qdatastream.h>
18#include <qlist.h>
19#include "qlocale.h"
20#include "qlocale_p.h"
21#include "qspan.h"
22#include "qstringbuilder.h"
23#include "qstringmatcher.h"
25#include "qdebug.h"
26#include "qendian.h"
27#include "qcollator.h"
28#include "qttypetraits.h"
29
30#ifdef Q_OS_DARWIN
31#include <private/qcore_mac_p.h>
32#endif
33
34#include <private/qfunctions_p.h>
35
36#include <limits.h>
37#include <string.h>
38#include <stdlib.h>
39#include <stdio.h>
40#include <stdarg.h>
41#include <wchar.h>
42
43#include "qchar.cpp"
48#include "qthreadstorage.h"
49
50#include <algorithm>
51#include <functional>
52
53#ifdef Q_OS_WIN
54# include <qt_windows.h>
55# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
56// MSVC requires this, but let's apply it to MinGW compilers too, just in case
57# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, "
58 "otherwise some QString functions will not get exported."
59# endif
60#endif
61
62#ifdef truncate
63# undef truncate
64#endif
65
66#define REHASH(a)
67 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
68 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1;
69 hashHaystack <<= 1
70
72
73using namespace Qt::StringLiterals;
74using namespace QtMiscUtils;
75
76const char16_t QString::_empty = 0;
77
78// in qstringmatcher.cpp
79qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
80
81namespace {
82enum StringComparisonMode {
83 CompareStringsForEquality,
84 CompareStringsForOrdering
85};
86
87template <typename Pointer>
88char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
89
90template <>
91char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
92{
93 return foldCase(reinterpret_cast<const char16_t*>(ch),
94 reinterpret_cast<const char16_t*>(start));
95}
96
97template <>
98char32_t foldCaseHelper<const char*>(const char* ch, const char*)
99{
100 return foldCase(char16_t(uchar(*ch)));
101}
102
103template <typename T>
104char16_t valueTypeToUtf16(T t) = delete;
105
106template <>
107char16_t valueTypeToUtf16<QChar>(QChar t)
108{
109 return t.unicode();
110}
111
112template <>
113char16_t valueTypeToUtf16<char>(char t)
114{
115 return char16_t{uchar(t)};
116}
117
118template <typename T>
119static inline bool foldAndCompare(const T a, const T b)
120{
121 return foldCase(a) == b;
122}
123
124/*!
125 \internal
126
127 Returns the index position of the first occurrence of the
128 character \a ch in the string given by \a str and \a len,
129 searching forward from index
130 position \a from. Returns -1 if \a ch could not be found.
131*/
132template <typename Haystack>
133static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
134 qsizetype from, Qt::CaseSensitivity cs) noexcept
135{
136 if (haystack.size() == 0)
137 return -1;
138 if (from < 0)
139 from += haystack.size();
140 else if (std::size_t(from) > std::size_t(haystack.size()))
141 from = haystack.size() - 1;
142 if (from >= 0) {
143 char16_t c = needle.unicode();
144 const auto b = haystack.data();
145 auto n = b + from;
146 if (cs == Qt::CaseSensitive) {
147 for (; n >= b; --n)
148 if (valueTypeToUtf16(*n) == c)
149 return n - b;
150 } else {
151 c = foldCase(c);
152 for (; n >= b; --n)
153 if (foldCase(valueTypeToUtf16(*n)) == c)
154 return n - b;
155 }
156 }
157 return -1;
158}
159template <> qsizetype
160qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
161
162template<typename Haystack, typename Needle>
163static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
164 Needle needle0, Qt::CaseSensitivity cs) noexcept
165{
166 const qsizetype sl = needle0.size();
167 if (sl == 1)
168 return qLastIndexOf(haystack0, needle0.front(), from, cs);
169
170 const qsizetype l = haystack0.size();
171 if (from < 0)
172 from += l;
173 if (from == l && sl == 0)
174 return from;
175 const qsizetype delta = l - sl;
176 if (std::size_t(from) > std::size_t(l) || delta < 0)
177 return -1;
178 if (from > delta)
179 from = delta;
180
181 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
182
183 auto haystack = haystack0.data();
184 const auto needle = needle0.data();
185 const auto *end = haystack;
186 haystack += from;
187 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
188 const auto *n = needle + sl_minus_1;
189 const auto *h = haystack + sl_minus_1;
190 qregisteruint hashNeedle = 0, hashHaystack = 0;
191
192 if (cs == Qt::CaseSensitive) {
193 for (qsizetype idx = 0; idx < sl; ++idx) {
194 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
195 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
196 }
197 hashHaystack -= valueTypeToUtf16(*haystack);
198
199 while (haystack >= end) {
200 hashHaystack += valueTypeToUtf16(*haystack);
201 if (hashHaystack == hashNeedle
202 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
203 return haystack - end;
204 --haystack;
205 REHASH(valueTypeToUtf16(haystack[sl]));
206 }
207 } else {
208 for (qsizetype idx = 0; idx < sl; ++idx) {
209 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
210 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
211 }
212 hashHaystack -= foldCaseHelper(haystack, end);
213
214 while (haystack >= end) {
215 hashHaystack += foldCaseHelper(haystack, end);
216 if (hashHaystack == hashNeedle
217 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
218 return haystack - end;
219 --haystack;
220 REHASH(foldCaseHelper(haystack + sl, end));
221 }
222 }
223 return -1;
224}
225
226template <typename Haystack, typename Needle>
227bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
228{
229 if (haystack.isNull())
230 return needle.isNull();
231 const auto haystackLen = haystack.size();
232 const auto needleLen = needle.size();
233 if (haystackLen == 0)
234 return needleLen == 0;
235 if (needleLen > haystackLen)
236 return false;
237
238 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
239}
240
241template <typename Haystack, typename Needle>
242bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
243{
244 if (haystack.isNull())
245 return needle.isNull();
246 const auto haystackLen = haystack.size();
247 const auto needleLen = needle.size();
248 if (haystackLen == 0)
249 return needleLen == 0;
250 if (haystackLen < needleLen)
251 return false;
252
253 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
254}
255
256template <typename T>
257static void append_helper(QString &self, T view)
258{
259 const auto strData = view.data();
260 const qsizetype strSize = view.size();
261 auto &d = self.data_ptr();
262 if (strData && strSize > 0) {
263 // the number of UTF-8 code units is always at a minimum equal to the number
264 // of equivalent UTF-16 code units
265 d.detachAndGrow(QArrayData::GrowsAtEnd, strSize, nullptr, nullptr);
266 Q_CHECK_PTR(d.data());
267 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
268
269 auto dst = std::next(d.data(), d.size);
270 if constexpr (std::is_same_v<T, QUtf8StringView>) {
271 dst = QUtf8::convertToUnicode(dst, view);
272 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
273 QLatin1::convertToUnicode(dst, view);
274 dst += strSize;
275 } else {
276 static_assert(QtPrivate::type_dependent_false<T>(),
277 "Can only operate on UTF-8 and Latin-1");
278 }
279 self.resize(std::distance(d.begin(), dst));
280 } else if (d.isNull() && !view.isNull()) { // special case
281 self = QLatin1StringView("");
282 }
283}
284
285template <uint MaxCount> struct UnrollTailLoop
286{
287 template <typename RetType, typename Functor1, typename Functor2, typename Number>
288 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
289 {
290 /* equivalent to:
291 * while (count--) {
292 * if (loopCheck(i))
293 * return returnIfFailed(i);
294 * }
295 * return returnIfExited;
296 */
297
298 if (!count)
299 return returnIfExited;
300
301 bool check = loopCheck(i);
302 if (check)
303 return returnIfFailed(i);
304
305 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
306 }
307
308 template <typename Functor, typename Number>
309 static inline void exec(Number count, Functor code)
310 {
311 /* equivalent to:
312 * for (Number i = 0; i < count; ++i)
313 * code(i);
314 */
315 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
316 }
317};
318template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
319inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
320{
321 return returnIfExited;
322}
323} // unnamed namespace
324
325/*
326 * Note on the use of SIMD in qstring.cpp:
327 *
328 * Several operations with strings are improved with the use of SIMD code,
329 * since they are repetitive. For MIPS, we have hand-written assembly code
330 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
331 * x86, we can only use intrinsics and therefore everything is contained in
332 * qstring.cpp. We need to use intrinsics only for those platforms due to the
333 * different compilers and toolchains used, which have different syntax for
334 * assembly sources.
335 *
336 * ** SSE notes: **
337 *
338 * Whenever multiple alternatives are equivalent or near so, we prefer the one
339 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
340 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
341 * SSE versions should be done when there is a clear performance benefit and
342 * requires fallback code to SSE2, if it exists.
343 *
344 * Performance measurement in the past shows that most strings are short in
345 * size and, therefore, do not benefit from alignment prologues. That is,
346 * trying to find a 16-byte-aligned boundary to operate on is often more
347 * expensive than executing the unaligned operation directly. In addition, note
348 * that the QString private data is designed so that the data is stored on
349 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
350 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
351 * 50% of the time), so skipping the alignment prologue is actually optimizing
352 * for the common case.
353 */
354
355#if defined(__mips_dsp)
356// From qstring_mips_dsp_asm.S
357extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
358extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
359extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
360#endif
361
362#if defined(__SSE2__) && defined(Q_CC_GNU)
363// We may overrun the buffer, but that's a false positive:
364// this won't crash nor produce incorrect results
365# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
366#else
367# define ATTRIBUTE_NO_SANITIZE
368#endif
369
370#ifdef __SSE2__
371static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
372static constexpr bool UseAvx2 = UseSse4_1 &&
373 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
374
375[[maybe_unused]]
376Q_ALWAYS_INLINE static __m128i mm_load8_zero_extend(const void *ptr)
377{
378 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
379 if constexpr (UseSse4_1) {
380 // use a MOVQ followed by PMOVZXBW
381 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
382 __m128i data = _mm_loadl_epi64(dataptr);
383 return _mm_cvtepu8_epi16(data);
384 }
385
386 // use MOVQ followed by PUNPCKLBW
387 __m128i data = _mm_loadl_epi64(dataptr);
388 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
389}
390
391[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
392static qsizetype qustrlen_sse2(const char16_t *str) noexcept
393{
394 // find the 16-byte alignment immediately prior or equal to str
395 quintptr misalignment = quintptr(str) & 0xf;
396 Q_ASSERT((misalignment & 1) == 0);
397 const char16_t *ptr = str - (misalignment / 2);
398
399 // load 16 bytes and see if we have a null
400 // (aligned loads can never segfault)
401 const __m128i zeroes = _mm_setzero_si128();
402 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
403 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
404 uint mask = _mm_movemask_epi8(comparison);
405
406 // ignore the result prior to the beginning of str
407 mask >>= misalignment;
408
409 // Have we found something in the first block? Need to handle it now
410 // because of the left shift above.
411 if (mask)
412 return qCountTrailingZeroBits(mask) / sizeof(char16_t);
413
414 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
415 qsizetype size = Step - misalignment / sizeof(char16_t);
416
417 size -= Step;
418 do {
419 size += Step;
420 data = _mm_load_si128(reinterpret_cast<const __m128i *>(str + size));
421
422 comparison = _mm_cmpeq_epi16(data, zeroes);
423 mask = _mm_movemask_epi8(comparison);
424 } while (mask == 0);
425
426 // found a null
427 return size + qCountTrailingZeroBits(mask) / sizeof(char16_t);
428}
429
430// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
431// the no non-zero was found. Returns false and updates \a ptr to point to the
432// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
433// may be updated to one byte short).
434static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
435{
436 auto updatePtr = [&](uint result) {
437 // found a character matching the mask
438 uint idx = qCountTrailingZeroBits(~result);
439 ptr += idx;
440 return false;
441 };
442
443 if constexpr (UseSse4_1) {
444# ifndef Q_OS_QNX // compiler fails in the code below
445 __m128i mask;
446 auto updatePtrSimd = [&](__m128i data) -> bool {
447 __m128i masked = _mm_and_si128(mask, data);
448 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
449 uint result = _mm_movemask_epi8(comparison);
450 return updatePtr(result);
451 };
452
453 if constexpr (UseAvx2) {
454 // AVX2 implementation: test 32 bytes at a time
455 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
456 while (ptr + 32 <= end) {
457 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
458 if (!_mm256_testz_si256(mask256, data)) {
459 // found a character matching the mask
460 __m256i masked256 = _mm256_and_si256(mask256, data);
461 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
462 return updatePtr(_mm256_movemask_epi8(comparison256));
463 }
464 ptr += 32;
465 }
466
467 mask = _mm256_castsi256_si128(mask256);
468 } else {
469 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
470 // comparisons, unrolled)
471 mask = _mm_set1_epi32(maskval);
472 while (ptr + 32 <= end) {
473 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
474 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
475 if (!_mm_testz_si128(mask, data1))
476 return updatePtrSimd(data1);
477
478 ptr += 16;
479 if (!_mm_testz_si128(mask, data2))
480 return updatePtrSimd(data2);
481 ptr += 16;
482 }
483 }
484
485 // AVX2 and SSE4.1: final 16-byte comparison
486 if (ptr + 16 <= end) {
487 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
488 if (!_mm_testz_si128(mask, data1))
489 return updatePtrSimd(data1);
490 ptr += 16;
491 }
492
493 // and final 8-byte comparison
494 if (ptr + 8 <= end) {
495 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
496 if (!_mm_testz_si128(mask, data1))
497 return updatePtrSimd(data1);
498 ptr += 8;
499 }
500
501 return true;
502# endif // QNX
503 }
504
505 // SSE2 implementation: test 16 bytes at a time.
506 const __m128i mask = _mm_set1_epi32(maskval);
507 while (ptr + 16 <= end) {
508 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
509 __m128i masked = _mm_and_si128(mask, data);
510 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
511 quint16 result = _mm_movemask_epi8(comparison);
512 if (result != 0xffff)
513 return updatePtr(result);
514 ptr += 16;
515 }
516
517 // and one 8-byte comparison
518 if (ptr + 8 <= end) {
519 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
520 __m128i masked = _mm_and_si128(mask, data);
521 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
522 quint8 result = _mm_movemask_epi8(comparison);
523 if (result != 0xff)
524 return updatePtr(result);
525 ptr += 8;
526 }
527
528 return true;
529}
530
531template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
532static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
533{
534 static_assert(std::is_unsigned_v<Char>);
535
536 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
537 // we compare. This lambda helps extract the code unit.
538 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
539 constexpr int Stride = 2;
540 // this is the same as:
541 // return n[idx / Stride];
542 // but using pointer arithmetic to avoid the compiler dividing by two
543 // and multiplying by two in the case of char16_t (we know idx is even,
544 // but the compiler does not). This is not UB.
545
546 auto ptr = reinterpret_cast<const uchar *>(n);
547 ptr += idx / (Stride / sizeof(*n));
548 return *reinterpret_cast<decltype(n)>(ptr);
549 };
550 auto difference = [a, b](uint mask, qptrdiff offset) {
551 if (Mode == CompareStringsForEquality)
552 return 1;
553 uint idx = qCountTrailingZeroBits(mask);
554 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
555 };
556
557 static const auto load8Chars = [](const auto *ptr) {
558 if (sizeof(*ptr) == 2)
559 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
560 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
561 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
562 };
563 static const auto load4Chars = [](const auto *ptr) {
564 if (sizeof(*ptr) == 2)
565 return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
566 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
567 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
568 };
569
570 // we're going to read a[0..15] and b[0..15] (32 bytes)
571 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
572 if constexpr (UseAvx2) {
573 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
574 __m256i b_data;
575 if (sizeof(Char) == 1) {
576 // expand to UTF-16 via zero-extension
577 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
578 b_data = _mm256_cvtepu8_epi16(chunk);
579 } else {
580 b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
581 }
582 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
583 return _mm256_movemask_epi8(result);
584 }
585
586 __m128i a_data1 = load8Chars(a + offset);
587 __m128i a_data2 = load8Chars(a + offset + 8);
588 __m128i b_data1, b_data2;
589 if (sizeof(Char) == 1) {
590 // expand to UTF-16 via unpacking
591 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
592 b_data1 = _mm_unpacklo_epi8(b_data, _mm_setzero_si128());
593 b_data2 = _mm_unpackhi_epi8(b_data, _mm_setzero_si128());
594 } else {
595 b_data1 = load8Chars(b + offset);
596 b_data2 = load8Chars(b + offset + 8);
597 }
598 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
599 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
600 return _mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16;
601 };
602
603 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
604 qptrdiff offset = 0;
605 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
606 uint mask = ~processChunk16Chars(offset);
607 if (mask)
608 return difference(mask, offset);
609 }
610
611 // maybe overlap the last 32 bytes
612 if (size_t(offset) < l) {
613 offset = l - sizeof(__m256i) / sizeof(char16_t);
614 uint mask = ~processChunk16Chars(offset);
615 return mask ? difference(mask, offset) : 0;
616 }
617 } else if (l >= 4) {
618 __m128i a_data1, b_data1;
619 __m128i a_data2, b_data2;
620 int width;
621 if (l >= 8) {
622 width = 8;
623 a_data1 = load8Chars(a);
624 b_data1 = load8Chars(b);
625 a_data2 = load8Chars(a + l - width);
626 b_data2 = load8Chars(b + l - width);
627 } else {
628 // we're going to read a[0..3] and b[0..3] (8 bytes)
629 width = 4;
630 a_data1 = load4Chars(a);
631 b_data1 = load4Chars(b);
632 a_data2 = load4Chars(a + l - width);
633 b_data2 = load4Chars(b + l - width);
634 }
635
636 __m128i result = _mm_cmpeq_epi16(a_data1, b_data1);
637 ushort mask = ~_mm_movemask_epi8(result);
638 if (mask)
639 return difference(mask, 0);
640
641 result = _mm_cmpeq_epi16(a_data2, b_data2);
642 mask = ~_mm_movemask_epi8(result);
643 if (mask)
644 return difference(mask, l - width);
645 } else {
646 // reset l
647 l &= 3;
648
649 const auto lambda = [=](size_t i) -> int {
650 return a[i] - b[i];
651 };
652 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
653 }
654 return 0;
655}
656#endif
657
658Q_NEVER_INLINE
659qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
660{
661#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
662 return qustrlen_sse2(str);
663#endif
664
665 if (sizeof(wchar_t) == sizeof(char16_t))
666 return wcslen(reinterpret_cast<const wchar_t *>(str));
667
668 qsizetype result = 0;
669 while (*str++)
670 ++result;
671 return result;
672}
673
674qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
675{
676 return qustrchr({ str, maxlen }, u'\0') - str;
677}
678
679/*!
680 * \internal
681 *
682 * Searches for character \a c in the string \a str and returns a pointer to
683 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
684 * character is not found, this function returns a pointer to the end of the
685 * string -- that is, \c{str.end()}.
686 */
688const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
689{
690 const char16_t *n = str.utf16();
691 const char16_t *e = n + str.size();
692
693#ifdef __SSE2__
694 bool loops = true;
695 // Using the PMOVMSKB instruction, we get two bits for each character
696 // we compare.
697 __m128i mch;
698 if constexpr (UseAvx2) {
699 // we're going to read n[0..15] (32 bytes)
700 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
701 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
702 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
703 __m256i result = _mm256_cmpeq_epi16(data, mch256);
704 uint mask = uint(_mm256_movemask_epi8(result));
705 if (mask) {
706 uint idx = qCountTrailingZeroBits(mask);
707 return n + idx / 2;
708 }
709 }
710 loops = false;
711 mch = _mm256_castsi256_si128(mch256);
712 } else {
713 mch = _mm_set1_epi32(c | (c << 16));
714 }
715
716 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
717 __m128i result = _mm_cmpeq_epi16(data, mch);
718 uint mask = uint(_mm_movemask_epi8(result));
719 if ((mask & validityMask) == 0)
720 return false;
721 uint idx = qCountTrailingZeroBits(mask);
722 n += idx / 2;
723 return true;
724 };
725
726 // we're going to read n[0..7] (16 bytes)
727 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
728 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
729 if (hasMatch(data, 0xffff))
730 return n;
731
732 if (!loops) {
733 n += 8;
734 break;
735 }
736 }
737
738# if !defined(__OPTIMIZE_SIZE__)
739 // we're going to read n[0..3] (8 bytes)
740 if (e - n > 3) {
741 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
742 if (hasMatch(data, 0xff))
743 return n;
744
745 n += 4;
746 }
747
748 return UnrollTailLoop<3>::exec(e - n, e,
749 [=](qsizetype i) { return n[i] == c; },
750 [=](qsizetype i) { return n + i; });
751# endif
752#elif defined(__ARM_NEON__)
753 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
754 const uint16x8_t ch_vec = vdupq_n_u16(c);
755 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
756 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
757 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
758 if (ushort(mask)) {
759 // found a match
760 return n + qCountTrailingZeroBits(mask);
761 }
762 }
763#endif // aarch64
764
765 return std::find(n, e, c);
766}
767
768/*!
769 * \internal
770 *
771 * Searches case-insensitively for character \a c in the string \a str and
772 * returns a pointer to it. Iif the character is not found, this function
773 * returns a pointer to the end of the string -- that is, \c{str.end()}.
774 */
776const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
777{
778 const QChar *n = str.begin();
779 const QChar *e = str.end();
780 c = foldCase(c);
781 auto it = std::find_if(n, e, [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
782 return reinterpret_cast<const char16_t *>(it);
783}
784
785// Note: ptr on output may be off by one and point to a preceding US-ASCII
786// character. Usually harmless.
787bool qt_is_ascii(const char *&ptr, const char *end) noexcept
788{
789#if defined(__SSE2__)
790 // Testing for the high bit can be done efficiently with just PMOVMSKB
791 bool loops = true;
792 if constexpr (UseAvx2) {
793 while (ptr + 32 <= end) {
794 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
795 quint32 mask = _mm256_movemask_epi8(data);
796 if (mask) {
797 uint idx = qCountTrailingZeroBits(mask);
798 ptr += idx;
799 return false;
800 }
801 ptr += 32;
802 }
803 loops = false;
804 }
805
806 while (ptr + 16 <= end) {
807 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
808 quint32 mask = _mm_movemask_epi8(data);
809 if (mask) {
810 uint idx = qCountTrailingZeroBits(mask);
811 ptr += idx;
812 return false;
813 }
814 ptr += 16;
815
816 if (!loops)
817 break;
818 }
819 if (ptr + 8 <= end) {
820 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
821 quint8 mask = _mm_movemask_epi8(data);
822 if (mask) {
823 uint idx = qCountTrailingZeroBits(mask);
824 ptr += idx;
825 return false;
826 }
827 ptr += 8;
828 }
829#endif
830
831 while (ptr + 4 <= end) {
832 quint32 data = qFromUnaligned<quint32>(ptr);
833 if (data &= 0x80808080U) {
834 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
835 ? qCountLeadingZeroBits(data)
836 : qCountTrailingZeroBits(data);
837 ptr += idx / 8;
838 return false;
839 }
840 ptr += 4;
841 }
842
843 while (ptr != end) {
844 if (quint8(*ptr) & 0x80)
845 return false;
846 ++ptr;
847 }
848 return true;
849}
850
851bool QtPrivate::isAscii(QLatin1StringView s) noexcept
852{
853 const char *ptr = s.begin();
854 const char *end = s.end();
855
856 return qt_is_ascii(ptr, end);
857}
858
859static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
860{
861#ifdef __SSE2__
862 const char *ptr8 = reinterpret_cast<const char *>(ptr);
863 const char *end8 = reinterpret_cast<const char *>(end);
864 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
865 ptr = reinterpret_cast<const char16_t *>(ptr8);
866 if (!ok)
867 return false;
868#endif
869
870 while (ptr != end) {
871 if (*ptr & 0xff80)
872 return false;
873 ++ptr;
874 }
875 return true;
876}
877
878bool QtPrivate::isAscii(QStringView s) noexcept
879{
880 const char16_t *ptr = s.utf16();
881 const char16_t *end = ptr + s.size();
882
883 return isAscii_helper(ptr, end);
884}
885
886bool QtPrivate::isLatin1(QStringView s) noexcept
887{
888 const char16_t *ptr = s.utf16();
889 const char16_t *end = ptr + s.size();
890
891#ifdef __SSE2__
892 const char *ptr8 = reinterpret_cast<const char *>(ptr);
893 const char *end8 = reinterpret_cast<const char *>(end);
894 if (!simdTestMask(ptr8, end8, 0xff00ff00))
895 return false;
896 ptr = reinterpret_cast<const char16_t *>(ptr8);
897#endif
898
899 while (ptr != end) {
900 if (*ptr++ > 0xff)
901 return false;
902 }
903 return true;
904}
905
906bool QtPrivate::isValidUtf16(QStringView s) noexcept
907{
908 constexpr char32_t InvalidCodePoint = UINT_MAX;
909
910 QStringIterator i(s);
911 while (i.hasNext()) {
912 const char32_t c = i.next(InvalidCodePoint);
913 if (c == InvalidCodePoint)
914 return false;
915 }
916
917 return true;
918}
919
920// conversion between Latin 1 and UTF-16
921Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
922{
923 /* SIMD:
924 * Unpacking with SSE has been shown to improve performance on recent CPUs
925 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
926 * itself in exactly the same way as one would do it with intrinsics.
927 */
928#if defined(__SSE2__)
929 // we're going to read str[offset..offset+15] (16 bytes)
930 const __m128i nullMask = _mm_setzero_si128();
931 auto processOneChunk = [=](qptrdiff offset) {
932 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
933 if constexpr (UseAvx2) {
934 // zero extend to an YMM register
935 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
936
937 // store
938 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
939 } else {
940 // unpack the first 8 bytes, padding with zeros
941 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
942 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
943
944 // unpack the last 8 bytes, padding with zeros
945 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
946 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
947 }
948 };
949
950 const char *e = str + size;
951 if (size >= sizeof(__m128i)) {
952 qptrdiff offset = 0;
953 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
954 processOneChunk(offset);
955 if (str + offset < e)
956 processOneChunk(size - sizeof(__m128i));
957 return;
958 }
959
960# if !defined(__OPTIMIZE_SIZE__)
961 if (size >= 4) {
962 // two overlapped loads & stores, of either 64-bit or of 32-bit
963 if (size >= 8) {
964 const __m128i unpacked1 = mm_load8_zero_extend(str);
965 const __m128i unpacked2 = mm_load8_zero_extend(str + size - 8);
966 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), unpacked1);
967 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + size - 8), unpacked2);
968 } else {
969 const __m128i chunk1 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str));
970 const __m128i chunk2 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str + size - 4));
971 const __m128i unpacked1 = _mm_unpacklo_epi8(chunk1, nullMask);
972 const __m128i unpacked2 = _mm_unpacklo_epi8(chunk2, nullMask);
973 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), unpacked1);
974 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + size - 4), unpacked2);
975 }
976 return;
977 } else {
978 size = size % 4;
979 return UnrollTailLoop<3>::exec(qsizetype(size), [=](qsizetype i) { dst[i] = uchar(str[i]); });
980 }
981# endif
982#endif
983#if defined(__mips_dsp)
984 static_assert(sizeof(qsizetype) == sizeof(int),
985 "oops, the assembler implementation needs to be called in a loop");
986 if (size > 20)
987 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
988 else
989 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
990#else
991 while (size--)
992 *dst++ = (uchar)*str++;
993#endif
994}
995
996static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
997{
998 const qsizetype len = str.size();
999 QVarLengthArray<char16_t> arr(len);
1000 qt_from_latin1(arr.data(), str.data(), len);
1001 return arr;
1002}
1003
1004template <bool Checked>
1005static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1006{
1007#if defined(__SSE2__)
1008 auto questionMark256 = []() {
1009 if constexpr (UseAvx2)
1010 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
1011 else
1012 return 0;
1013 }();
1014 auto outOfRange256 = []() {
1015 if constexpr (UseAvx2)
1016 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
1017 else
1018 return 0;
1019 }();
1020 __m128i questionMark, outOfRange;
1021 if constexpr (UseAvx2) {
1022 questionMark = _mm256_castsi256_si128(questionMark256);
1023 outOfRange = _mm256_castsi256_si128(outOfRange256);
1024 } else {
1025 questionMark = _mm_set1_epi16('?');
1026 outOfRange = _mm_set1_epi16(0x100);
1027 }
1028
1029 auto mergeQuestionMarks = [=](__m128i chunk) {
1030 if (!Checked)
1031 return chunk;
1032
1033 // SSE has no compare instruction for unsigned comparison.
1034 if constexpr (UseSse4_1) {
1035 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1036 chunk = _mm_min_epu16(chunk, outOfRange);
1037 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
1038 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
1039 return chunk;
1040 }
1041 // The variables must be shiffted + 0x8000 to be compared
1042 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
1043 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
1044
1045 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
1046 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
1047
1048 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1049 // the 16 bits that were correct contains zeros
1050 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
1051
1052 // correctBytes contains the bytes that were in limit
1053 // the 16 bits that were off limits contains zeros
1054 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
1055
1056 // merge offLimitQuestionMark and correctBytes to have the result
1057 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
1058
1059 Q_UNUSED(outOfRange);
1060 return chunk;
1061 };
1062
1063 // we're going to read to src[offset..offset+15] (16 bytes)
1064 auto loadChunkAt = [=](qptrdiff offset) {
1065 __m128i chunk1, chunk2;
1066 if constexpr (UseAvx2) {
1067 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
1068 if (Checked) {
1069 // See mergeQuestionMarks lambda above for details
1070 chunk = _mm256_min_epu16(chunk, outOfRange256);
1071 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1072 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1073 }
1074
1075 chunk2 = _mm256_extracti128_si256(chunk, 1);
1076 chunk1 = _mm256_castsi256_si128(chunk);
1077 } else {
1078 chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
1079 chunk1 = mergeQuestionMarks(chunk1);
1080
1081 chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
1082 chunk2 = mergeQuestionMarks(chunk2);
1083 }
1084
1085 // pack the two vector to 16 x 8bits elements
1086 return _mm_packus_epi16(chunk1, chunk2);
1087 };
1088
1089 if (size_t(length) >= sizeof(__m128i)) {
1090 // because of possible overlapping, we won't process the last chunk in the loop
1091 qptrdiff offset = 0;
1092 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1093 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1094
1095 // overlapped conversion of the last full chunk and the tail
1096 __m128i last1 = loadChunkAt(offset);
1097 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1098 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), last1);
1099 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), last2);
1100 return;
1101 }
1102
1103# if !defined(__OPTIMIZE_SIZE__)
1104 if (length >= 4) {
1105 // this code is fine even for in-place conversion because we load both
1106 // before any store
1107 if (length >= 8) {
1108 __m128i chunk1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
1109 __m128i chunk2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + length - 8));
1110 chunk1 = mergeQuestionMarks(chunk1);
1111 chunk2 = mergeQuestionMarks(chunk2);
1112
1113 // pack, where the upper half is ignored
1114 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1115 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1116 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), result1);
1117 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + length - 8), result2);
1118 } else {
1119 __m128i chunk1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
1120 __m128i chunk2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + length - 4));
1121 chunk1 = mergeQuestionMarks(chunk1);
1122 chunk2 = mergeQuestionMarks(chunk2);
1123
1124 // pack, we'll zero the upper three quarters
1125 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1126 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1127 qToUnaligned(_mm_cvtsi128_si32(result1), dst);
1128 qToUnaligned(_mm_cvtsi128_si32(result2), dst + length - 4);
1129 }
1130 return;
1131 }
1132
1133 length = length % 4;
1134 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1135 if (Checked)
1136 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1137 else
1138 dst[i] = src[i];
1139 });
1140# else
1141 length = length % 16;
1142# endif // optimize size
1143#elif defined(__ARM_NEON__)
1144 // Refer to the documentation of the SSE2 implementation.
1145 // This uses exactly the same method as for SSE except:
1146 // 1) neon has unsigned comparison
1147 // 2) packing is done to 64 bits (8 x 8bits component).
1148 if (length >= 16) {
1149 const qsizetype chunkCount = length >> 3; // divided by 8
1150 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1151 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1152 for (qsizetype i = 0; i < chunkCount; ++i) {
1153 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1154 src += 8;
1155
1156 if (Checked) {
1157 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1158 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1159 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1160 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1161 }
1162 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1163 vst1_u8(dst, result); // store
1164 dst += 8;
1165 }
1166 length = length % 8;
1167 }
1168#endif
1169#if defined(__mips_dsp)
1170 static_assert(sizeof(qsizetype) == sizeof(int),
1171 "oops, the assembler implementation needs to be called in a loop");
1172 qt_toLatin1_mips_dsp_asm(dst, src, length);
1173#else
1174 while (length--) {
1175 if (Checked)
1176 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1177 else
1178 *dst++ = *src;
1179 ++src;
1180 }
1181#endif
1182}
1183
1184void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1185{
1186 qt_to_latin1_internal<true>(dst, src, length);
1187}
1188
1189void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1190{
1191 qt_to_latin1_internal<false>(dst, src, length);
1192}
1193
1194// Unicode case-insensitive comparison (argument order matches QStringView)
1195Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1196{
1197 if (a == b)
1198 return qt_lencmp(alen, blen);
1199
1200 qsizetype l = qMin(alen, blen);
1201 qsizetype i;
1202 for (i = 0; i < l; ++i) {
1203// qDebug() << Qt::hex << alast << blast;
1204// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1205// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1206 int diff = foldCase(a + i, a) - foldCase(b + i, b);
1207 if ((diff))
1208 return diff;
1209 }
1210 if (i == alen) {
1211 if (i == blen)
1212 return 0;
1213 return -1;
1214 }
1215 return 1;
1216}
1217
1218// Case-insensitive comparison between a QStringView and a QLatin1StringView
1219// (argument order matches those types)
1220Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1221{
1222 qsizetype l = qMin(alen, blen);
1223 qsizetype i;
1224 for (i = 0; i < l; ++i) {
1225 int diff = foldCase(a[i]) - foldCase(char16_t{uchar(b[i])});
1226 if ((diff))
1227 return diff;
1228 }
1229 if (i == alen) {
1230 if (i == blen)
1231 return 0;
1232 return -1;
1233 }
1234 return 1;
1235}
1236
1237// Case-insensitive comparison between a Unicode string and a UTF-8 string
1238Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1239{
1240 auto src1 = reinterpret_cast<const qchar8_t *>(utf8);
1241 auto end1 = reinterpret_cast<const qchar8_t *>(utf8end);
1242 QStringIterator src2(utf16, utf16end);
1243
1244 while (src1 < end1 && src2.hasNext()) {
1245 char32_t uc1 = QChar::toCaseFolded(QUtf8Functions::nextUcs4FromUtf8(src1, end1));
1246 char32_t uc2 = QChar::toCaseFolded(src2.next());
1247 int diff = uc1 - uc2; // can't underflow
1248 if (diff)
1249 return diff;
1250 }
1251
1252 // the shorter string sorts first
1253 return (end1 > src1) - int(src2.hasNext());
1254}
1255
1256#if defined(__mips_dsp)
1257// From qstring_mips_dsp_asm.S
1258extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1259 const char16_t *b,
1260 unsigned len);
1261#endif
1262
1263// Unicode case-sensitive compare two same-sized strings
1264template <StringComparisonMode Mode>
1265static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1266{
1267 // This function isn't memcmp() because that can return the wrong sorting
1268 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1269 // but the bytes in memory are FF 00 and 00 01.
1270
1271#ifndef __OPTIMIZE_SIZE__
1272# if defined(__mips_dsp)
1273 static_assert(sizeof(uint) == sizeof(size_t));
1274 if (l >= 8) {
1275 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1276 }
1277# elif defined(__SSE2__)
1278 return ucstrncmp_sse2<Mode>(a, b, l);
1279# elif defined(__ARM_NEON__)
1280 if (l >= 8) {
1281 const char16_t *end = a + l;
1282 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1283 while (end - a > 7) {
1284 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1285 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1286
1287 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1288 if (r) {
1289 // found a different QChar
1290 if (Mode == CompareStringsForEquality)
1291 return 1;
1292 uint idx = qCountTrailingZeroBits(r);
1293 return a[idx] - b[idx];
1294 }
1295 a += 8;
1296 b += 8;
1297 }
1298 l &= 7;
1299 }
1300 const auto lambda = [=](size_t i) -> int {
1301 return a[i] - b[i];
1302 };
1303 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1304# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1305#endif // __OPTIMIZE_SIZE__
1306
1307 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1308 return memcmp(a, b, l * sizeof(char16_t));
1309
1310 for (size_t i = 0; i < l; ++i) {
1311 if (int diff = a[i] - b[i])
1312 return diff;
1313 }
1314 return 0;
1315}
1316
1317template <StringComparisonMode Mode>
1318static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1319{
1320 const uchar *c = reinterpret_cast<const uchar *>(b);
1321 const char16_t *uc = a;
1322 const char16_t *e = uc + l;
1323
1324#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1325 return ucstrncmp_sse2<Mode>(uc, c, l);
1326#endif
1327
1328 while (uc < e) {
1329 int diff = *uc - *c;
1330 if (diff)
1331 return diff;
1332 uc++, c++;
1333 }
1334
1335 return 0;
1336}
1337
1338// Unicode case-sensitive equality
1339template <typename Char2>
1340static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1341{
1342 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1343}
1344
1345// Unicode case-sensitive comparison
1346template <typename Char2>
1347static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1348{
1349 const size_t l = qMin(alen, blen);
1350 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1351 return cmp ? cmp : qt_lencmp(alen, blen);
1352}
1353
1355
1356static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1357{
1358 // We're called with QLatin1StringView's .data() and .size():
1359 Q_ASSERT(lSize >= 0 && rSize >= 0);
1360 if (!lSize)
1361 return rSize ? -1 : 0;
1362 if (!rSize)
1363 return 1;
1364 const qsizetype size = std::min(lSize, rSize);
1365
1366 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1367 for (qsizetype i = 0; i < size; i++) {
1368 if (int res = CaseInsensitiveL1::difference(lhsChar[i], rhsChar[i]))
1369 return res;
1370 }
1371 return qt_lencmp(lSize, rSize);
1372}
1373
1374bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1375{
1376 Q_ASSERT(lhs.size() == rhs.size());
1377 return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16());
1378}
1379
1380bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1381{
1382 Q_ASSERT(lhs.size() == rhs.size());
1383 return ucstreq(lhs.utf16(), lhs.size(), rhs.latin1());
1384}
1385
1386bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1387{
1388 return QtPrivate::equalStrings(rhs, lhs);
1389}
1390
1391bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1392{
1393 Q_ASSERT(lhs.size() == rhs.size());
1394 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1395}
1396
1397bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1398{
1399 return QUtf8::compareUtf8(lhs, rhs) == 0;
1400}
1401
1402bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1403{
1404 return QtPrivate::equalStrings(rhs, lhs);
1405}
1406
1407bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1408{
1409 return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
1410}
1411
1412bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1413{
1414 return QtPrivate::equalStrings(rhs, lhs);
1415}
1416
1417bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1418{
1419#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1420 Q_ASSERT(lhs.size() == rhs.size());
1421#else
1422 // operator== didn't enforce size prior to Qt 6.2
1423 if (lhs.size() != rhs.size())
1424 return false;
1425#endif
1426 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1427}
1428
1429bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1430{
1431 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1432 return false;
1433 return lhs.visit([rhs](auto lhs) {
1434 return rhs.visit([lhs](auto rhs) {
1435 return QtPrivate::equalStrings(lhs, rhs);
1436 });
1437 });
1438}
1439
1440/*!
1441 \relates QStringView
1442 \internal
1443 \since 5.10
1444
1445 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1446
1447 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1448
1449 Case-sensitive comparison is based exclusively on the numeric Unicode values
1450 of the characters and is very fast, but is not what a human would expect.
1451 Consider sorting user-visible strings with QString::localeAwareCompare().
1452
1453 \sa {Comparing Strings}
1454*/
1455int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1456{
1457 if (cs == Qt::CaseSensitive)
1458 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size());
1459 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.utf16());
1460}
1461
1462/*!
1463 \relates QStringView
1464 \internal
1465 \since 5.10
1466 \overload
1467
1468 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1469
1470 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1471
1472 Case-sensitive comparison is based exclusively on the numeric Unicode values
1473 of the characters and is very fast, but is not what a human would expect.
1474 Consider sorting user-visible strings with QString::localeAwareCompare().
1475
1476 \sa {Comparing Strings}
1477*/
1478int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1479{
1480 if (cs == Qt::CaseSensitive)
1481 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.latin1(), rhs.size());
1482 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.latin1());
1483}
1484
1485/*!
1486 \relates QStringView
1487 \internal
1488 \since 6.0
1489 \overload
1490*/
1491int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1492{
1493 return -compareStrings(rhs, lhs, cs);
1494}
1495
1496/*!
1497 \relates QStringView
1498 \internal
1499 \since 5.10
1500 \overload
1501*/
1502int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1503{
1504 return -compareStrings(rhs, lhs, cs);
1505}
1506
1507/*!
1508 \relates QStringView
1509 \internal
1510 \since 5.10
1511 \overload
1512
1513 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1514
1515 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1516
1517 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1518 of the characters and is very fast, but is not what a human would expect.
1519 Consider sorting user-visible strings with QString::localeAwareCompare().
1520
1521 \sa {Comparing Strings}
1522*/
1523int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1524{
1525 if (lhs.isEmpty())
1526 return qt_lencmp(qsizetype(0), rhs.size());
1527 if (rhs.isEmpty())
1528 return qt_lencmp(lhs.size(), qsizetype(0));
1529 if (cs == Qt::CaseInsensitive)
1530 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1531 const auto l = std::min(lhs.size(), rhs.size());
1532 int r = memcmp(lhs.data(), rhs.data(), l);
1533 return r ? r : qt_lencmp(lhs.size(), rhs.size());
1534}
1535
1536/*!
1537 \relates QStringView
1538 \internal
1539 \since 6.0
1540 \overload
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
1545}
1546
1547/*!
1548 \relates QStringView
1549 \internal
1550 \since 6.0
1551 \overload
1552*/
1553int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1554{
1555 if (cs == Qt::CaseSensitive)
1556 return QUtf8::compareUtf8(lhs, rhs);
1557 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1558}
1559
1560/*!
1561 \relates QStringView
1562 \internal
1563 \since 6.0
1564 \overload
1565*/
1566int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1567{
1568 return -compareStrings(rhs, lhs, cs);
1569}
1570
1571/*!
1572 \relates QStringView
1573 \internal
1574 \since 6.0
1575 \overload
1576*/
1577int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1578{
1579 return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
1580}
1581
1582int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1583{
1584 return lhs.visit([rhs, cs](auto lhs) {
1585 return rhs.visit([lhs, cs](auto rhs) {
1586 return QtPrivate::compareStrings(lhs, rhs, cs);
1587 });
1588 });
1589}
1590
1591// ### Qt 7: do not allow anything but ASCII digits
1592// in arg()'s replacements.
1593#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1594static bool supportUnicodeDigitValuesInArg()
1595{
1596 static const bool result = []() {
1597 static const char supportUnicodeDigitValuesEnvVar[]
1598 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1599
1600 if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
1601 return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
1602
1603#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1604 return true;
1605#else
1606 return false;
1607#endif
1608 }();
1609
1610 return result;
1611}
1612#endif
1613
1614static int qArgDigitValue(QChar ch) noexcept
1615{
1616#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1617 if (supportUnicodeDigitValuesInArg())
1618 return ch.digitValue();
1619#endif
1620 if (ch >= u'0' && ch <= u'9')
1621 return int(ch.unicode() - u'0');
1622 return -1;
1623}
1624
1625#if QT_CONFIG(regularexpression)
1626Q_DECL_COLD_FUNCTION
1627static void qtWarnAboutInvalidRegularExpression(const QRegularExpression &re, const char *cls, const char *method)
1628{
1629 extern void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *cls, const char *method);
1630 qtWarnAboutInvalidRegularExpression(re.pattern(), cls, method);
1631}
1632#endif
1633
1634/*!
1635 \macro QT_RESTRICTED_CAST_FROM_ASCII
1636 \relates QString
1637
1638 Disables most automatic conversions from source literals and 8-bit data
1639 to unicode QStrings, but allows the use of
1640 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1641 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1642 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1643 but does not require user code to wrap character and string literals
1644 with QLatin1Char, QLatin1StringView or similar.
1645
1646 Using this macro together with source strings outside the 7-bit range,
1647 non-literals, or literals with embedded NUL characters is undefined.
1648
1649 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1650*/
1651
1652/*!
1653 \macro QT_NO_CAST_FROM_ASCII
1654 \relates QString
1655 \relates QChar
1656
1657 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1658 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1659 \c{unsigned char}) to QChar.
1660
1661 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1662 QT_NO_CAST_FROM_BYTEARRAY
1663*/
1664
1665/*!
1666 \macro QT_NO_CAST_TO_ASCII
1667 \relates QString
1668
1669 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1670
1671 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1672 QT_NO_CAST_FROM_BYTEARRAY
1673*/
1674
1675/*!
1676 \macro QT_ASCII_CAST_WARNINGS
1677 \internal
1678 \relates QString
1679
1680 This macro can be defined to force a warning whenever a function is
1681 called that automatically converts between unicode and 8-bit encodings.
1682
1683 Note: This only works for compilers that support warnings for
1684 deprecated API.
1685
1686 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1687*/
1688
1689/*!
1690 \class QString
1691 \inmodule QtCore
1692 \reentrant
1693
1694 \brief The QString class provides a Unicode character string.
1695
1696 \ingroup tools
1697 \ingroup shared
1698 \ingroup string-processing
1699
1700 \compares strong
1701 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1702 QStringView QUtf8StringView
1703 \endcompareswith
1704 \compareswith strong QByteArray QByteArrayView {const char *}
1705 When comparing with byte arrays, their content is interpreted as UTF-8.
1706 \endcompareswith
1707
1708 QString stores a string of 16-bit \l{QChar}s, where each QChar
1709 corresponds to one UTF-16 code unit. (Unicode characters
1710 with code values above 65535 are stored using surrogate pairs,
1711 that is, two consecutive \l{QChar}s.)
1712
1713 \l{Unicode} is an international standard that supports most of the
1714 writing systems in use today. It is a superset of US-ASCII (ANSI
1715 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1716 characters are available at the same code positions.
1717
1718 Behind the scenes, QString uses \l{implicit sharing}
1719 (copy-on-write) to reduce memory usage and to avoid the needless
1720 copying of data. This also helps reduce the inherent overhead of
1721 storing 16-bit characters instead of 8-bit characters.
1722
1723 In addition to QString, Qt also provides the QByteArray class to
1724 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1725 For most purposes, QString is the class you want to use. It is
1726 used throughout the Qt API, and the Unicode support ensures that
1727 your applications are easy to translate if you want to expand
1728 your application's market at some point. Two prominent cases
1729 where QByteArray is appropriate are when you need to store raw
1730 binary data, and when memory conservation is critical (like in
1731 embedded systems).
1732
1733 \section1 Initializing a string
1734
1735 One way to initialize a QString is to pass a \c{const char
1736 *} to its constructor. For example, the following code creates a
1737 QString of size 5 containing the data "Hello":
1738
1739 \snippet qstring/main.cpp 0
1740
1741 QString converts the \c{const char *} data into Unicode using the
1742 fromUtf8() function.
1743
1744 In all of the QString functions that take \c{const char *}
1745 parameters, the \c{const char *} is interpreted as a classic
1746 C-style \c{'\\0'}-terminated string. Except where the function's
1747 name overtly indicates some other encoding, such \c{const char *}
1748 parameters are assumed to be encoded in UTF-8.
1749
1750 You can also provide string data as an array of \l{QChar}s:
1751
1752 \snippet qstring/main.cpp 1
1753
1754 QString makes a deep copy of the QChar data, so you can modify it
1755 later without experiencing side effects. You can avoid taking a
1756 deep copy of the character data by using QStringView or
1757 QString::fromRawData() instead.
1758
1759 Another approach is to set the size of the string using resize()
1760 and to initialize the data character per character. QString uses
1761 0-based indexes, just like C++ arrays. To access the character at
1762 a particular index position, you can use \l operator[](). On
1763 non-\c{const} strings, \l operator[]() returns a reference to a
1764 character that can be used on the left side of an assignment. For
1765 example:
1766
1767 \snippet qstring/main.cpp 2
1768
1769 For read-only access, an alternative syntax is to use the at()
1770 function:
1771
1772 \snippet qstring/main.cpp 3
1773
1774 The at() function can be faster than \l operator[]() because it
1775 never causes a \l{deep copy} to occur. Alternatively, use the
1776 first(), last(), or sliced() functions to extract several characters
1777 at a time.
1778
1779 A QString can embed '\\0' characters (QChar::Null). The size()
1780 function always returns the size of the whole string, including
1781 embedded '\\0' characters.
1782
1783 After a call to the resize() function, newly allocated characters
1784 have undefined values. To set all the characters in the string to
1785 a particular value, use the fill() function.
1786
1787 QString provides dozens of overloads designed to simplify string
1788 usage. For example, if you want to compare a QString with a string
1789 literal, you can write code like this and it will work as expected:
1790
1791 \snippet qstring/main.cpp 4
1792
1793 You can also pass string literals to functions that take QStrings
1794 as arguments, invoking the QString(const char *)
1795 constructor. Similarly, you can pass a QString to a function that
1796 takes a \c{const char *} argument using the \l qPrintable() macro,
1797 which returns the given QString as a \c{const char *}. This is
1798 equivalent to calling toLocal8Bit().\l{QByteArray::}{constData()}
1799 on the QString.
1800
1801 \section1 Manipulating string data
1802
1803 QString provides the following basic functions for modifying the
1804 character data: append(), prepend(), insert(), replace(), and
1805 remove(). For example:
1806
1807 \snippet qstring/main.cpp 5
1808
1809 In the above example, the replace() function's first two arguments are the
1810 position from which to start replacing and the number of characters that
1811 should be replaced.
1812
1813 When data-modifying functions increase the size of the string,
1814 QString may reallocate the memory in which it holds its data. When
1815 this happens, QString expands by more than it immediately needs so as
1816 to have space for further expansion without reallocation until the size
1817 of the string has significantly increased.
1818
1819 The insert(), remove(), and, when replacing a sub-string with one of
1820 different size, replace() functions can be slow (\l{linear time}) for
1821 large strings because they require moving many characters in the string
1822 by at least one position in memory.
1823
1824 If you are building a QString gradually and know in advance
1825 approximately how many characters the QString will contain, you
1826 can call reserve(), asking QString to preallocate a certain amount
1827 of memory. You can also call capacity() to find out how much
1828 memory the QString actually has allocated.
1829
1830 QString provides \l{STL-style iterators} (QString::const_iterator and
1831 QString::iterator). In practice, iterators are handy when working with
1832 generic algorithms provided by the C++ standard library.
1833
1834 \note Iterators over a QString, and references to individual characters
1835 within one, cannot be relied on to remain valid when any non-\c{const}
1836 method of the QString is called. Accessing such an iterator or reference
1837 after the call to a non-\c{const} method leads to undefined behavior. When
1838 stability for iterator-like functionality is required, you should use
1839 indexes instead of iterators, as they are not tied to QString's internal
1840 state and thus do not get invalidated.
1841
1842 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1843 function used on a given QString may cause it to internally perform a deep
1844 copy of its data. This invalidates all iterators over the string and
1845 references to individual characters within it. Do not call non-const
1846 functions while keeping iterators. Accessing an iterator or reference
1847 after it has been invalidated leads to undefined behavior. See the
1848 \l{Implicit sharing iterator problem} section for more information.
1849
1850 A frequent requirement is to remove or simplify the spacing between
1851 visible characters in a string. The characters that make up that spacing
1852 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1853 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1854 To obtain a copy of a string leaving out any spacing from its start and end,
1855 use \l trimmed(). To also replace each sequence of spacing characters within
1856 the string with a simple space, \c{' '}, use \l simplified().
1857
1858 If you want to find all occurrences of a particular character or
1859 substring in a QString, use the indexOf() or lastIndexOf()
1860 functions.The former searches forward, the latter searches backward.
1861 Either can be told an index position from which to start their search.
1862 Each returns the index position of the character or substring if they
1863 find it; otherwise, they return -1. For example, here is a typical loop
1864 that finds all occurrences of a particular substring:
1865
1866 \snippet qstring/main.cpp 6
1867
1868 QString provides many functions for converting numbers into
1869 strings and strings into numbers. See the arg() functions, the
1870 setNum() functions, the number() static functions, and the
1871 toInt(), toDouble(), and similar functions.
1872
1873 To get an uppercase or lowercase version of a string, use toUpper() or
1874 toLower().
1875
1876 Lists of strings are handled by the QStringList class. You can
1877 split a string into a list of strings using the split() function,
1878 and join a list of strings into a single string with an optional
1879 separator using QStringList::join(). You can obtain a filtered list
1880 from a string list by selecting the entries in it that contain a
1881 particular substring or match a particular QRegularExpression.
1882 See QStringList::filter() for details.
1883
1884 \section1 Querying string data
1885
1886 To see if a QString starts or ends with a particular substring, use
1887 startsWith() or endsWith(). To check whether a QString contains a
1888 specific character or substring, use the contains() function. To
1889 find out how many times a particular character or substring occurs
1890 in a string, use count().
1891
1892 To obtain a pointer to the actual character data, call data() or
1893 constData(). These functions return a pointer to the beginning of
1894 the QChar data. The pointer is guaranteed to remain valid until a
1895 non-\c{const} function is called on the QString.
1896
1897 \section2 Comparing strings
1898
1899 QStrings can be compared using overloaded operators such as \l
1900 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1901 and so on. The comparison is based exclusively on the lexicographical
1902 order of the two strings, seen as sequences of UTF-16 code units.
1903 It is very fast but is not what a human would expect; the
1904 QString::localeAwareCompare() function is usually a better choice for
1905 sorting user-interface strings, when such a comparison is available.
1906
1907 When Qt is linked with the ICU library (which it usually is), its
1908 locale-aware sorting is used. Otherwise, platform-specific solutions
1909 are used:
1910 \list
1911 \li On Windows, localeAwareCompare() uses the current user locale,
1912 as set in the \uicontrol{regional} and \uicontrol{language}
1913 options portion of \uicontrol{Control Panel}.
1914 \li On \macos and iOS, \l localeAwareCompare() compares according
1915 to the \uicontrol{Order for sorted lists} setting in the
1916 \uicontrol{International preferences} panel.
1917 \li On other Unix-like systems, the comparison falls back to the
1918 system library's \c strcoll().
1919 \endlist
1920
1921 \section1 Converting between encoded string data and QString
1922
1923 QString provides the following functions that return a
1924 \c{const char *} version of the string as QByteArray: toUtf8(),
1925 toLatin1(), and toLocal8Bit().
1926
1927 \list
1928 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1929 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1930 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1931 Unicode character set through multibyte sequences.
1932 \li toLocal8Bit() returns an 8-bit string using the system's local
1933 encoding. This is the same as toUtf8() on Unix systems.
1934 \endlist
1935
1936 To convert from one of these encodings, QString provides
1937 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1938 encodings are supported through the QStringEncoder and QStringDecoder
1939 classes.
1940
1941 As mentioned above, QString provides a lot of functions and
1942 operators that make it easy to interoperate with \c{const char *}
1943 strings. But this functionality is a double-edged sword: It makes
1944 QString more convenient to use if all strings are US-ASCII or
1945 Latin-1, but there is always the risk that an implicit conversion
1946 from or to \c{const char *} is done using the wrong 8-bit
1947 encoding. To minimize these risks, you can turn off these implicit
1948 conversions by defining some of the following preprocessor symbols:
1949
1950 \list
1951 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1952 C string literals and pointers to Unicode.
1953 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1954 from C characters and character arrays but disables automatic
1955 conversions from character pointers to Unicode.
1956 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1957 to C strings.
1958 \endlist
1959
1960 You then need to explicitly call fromUtf8(), fromLatin1(),
1961 or fromLocal8Bit() to construct a QString from an
1962 8-bit string, or use the lightweight QLatin1StringView class. For
1963 example:
1964
1965 \snippet code/src_corelib_text_qstring.cpp 1
1966
1967 Similarly, you must call toLatin1(), toUtf8(), or
1968 toLocal8Bit() explicitly to convert the QString to an 8-bit
1969 string.
1970
1971 \table 100 %
1972 \header
1973 \li Note for C Programmers
1974
1975 \row
1976 \li
1977 Due to C++'s type system and the fact that QString is
1978 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1979 other basic types. For example:
1980
1981 \snippet qstring/main.cpp 7
1982
1983 The \c result variable is a normal variable allocated on the
1984 stack. When \c return is called, and because we're returning by
1985 value, the copy constructor is called and a copy of the string is
1986 returned. No actual copying takes place thanks to the implicit
1987 sharing.
1988
1989 \endtable
1990
1991 \section1 Distinction between null and empty strings
1992
1993 For historical reasons, QString distinguishes between null
1994 and empty strings. A \e null string is a string that is
1995 initialized using QString's default constructor or by passing
1996 \nullptr to the constructor. An \e empty string is any
1997 string with size 0. A null string is always empty, but an empty
1998 string isn't necessarily null:
1999
2000 \snippet qstring/main.cpp 8
2001
2002 All functions except isNull() treat null strings the same as empty
2003 strings. For example, toUtf8().\l{QByteArray::}{constData()} returns a valid pointer
2004 (not \nullptr) to a '\\0' character for a null string. We
2005 recommend that you always use the isEmpty() function and avoid isNull().
2006
2007 \section1 Number formats
2008
2009 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2010 qualifier, and the base is ten (its default), the default locale is
2011 used. This can be set using \l{QLocale::setDefault()}. For more refined
2012 control of localized string representations of numbers, see
2013 QLocale::toString(). All other number formatting done by QString follows the
2014 C locale's representation of numbers.
2015
2016 When QString::arg() applies left-padding to numbers, the fill character
2017 \c{'0'} is treated specially. If the number is negative, its minus sign
2018 appears before the zero-padding. If the field is localized, the
2019 locale-appropriate zero character is used in place of \c{'0'}. For
2020 floating-point numbers, this special treatment only applies if the number is
2021 finite.
2022
2023 \section2 Floating-point formats
2024
2025 In member functions (for example, arg() and number()) that format floating-point
2026 numbers (\c float or \c double) as strings, the representation used can be
2027 controlled by a choice of \e format and \e precision, whose meanings are as
2028 for \l {QLocale::toString(double, char, int)}.
2029
2030 If the selected \e format includes an exponent, localized forms follow the
2031 locale's convention on digits in the exponent. For non-localized formatting,
2032 the exponent shows its sign and includes at least two digits, left-padding
2033 with zero if needed.
2034
2035 \section1 More efficient string construction
2036
2037 Many strings are known at compile time. The QString constructor from
2038 C++ string literals will copy the contents of the string,
2039 treating the contents as UTF-8. This requires memory allocation and
2040 re-encoding string data, operations that will happen at runtime.
2041 If the string data is known at compile time, you can use the QStringLiteral
2042 macro or similarly \c{operator""_s} to create QString's payload at compile
2043 time instead.
2044
2045 Using the QString \c{'+'} operator, it is easy to construct a
2046 complex string from multiple substrings. You will often write code
2047 like this:
2048
2049 \snippet qstring/stringbuilder.cpp 0
2050
2051 There is nothing wrong with either of these string constructions,
2052 but there are a few hidden inefficiencies:
2053
2054 First, repeated use of the \c{'+'} operator may lead to
2055 multiple memory allocations. When concatenating \e{n} substrings,
2056 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2057 memory allocator.
2058
2059 These allocations can be optimized by an internal class
2060 \c{QStringBuilder}. This class is marked
2061 internal and does not appear in the documentation, because you
2062 aren't meant to instantiate it in your code. Its use will be
2063 automatic, as described below.
2064
2065 \c{QStringBuilder} uses expression templates and reimplements the
2066 \c{'%'} operator so that when you use \c{'%'} for string
2067 concatenation instead of \c{'+'}, multiple substring
2068 concatenations will be postponed until the final result is about
2069 to be assigned to a QString. At this point, the amount of memory
2070 required for the final result is known. The memory allocator is
2071 then called \e{once} to get the required space, and the substrings
2072 are copied into it one by one.
2073
2074 Additional efficiency is gained by inlining and reducing reference
2075 counting (the QString created from a \c{QStringBuilder}
2076 has a ref count of 1, whereas QString::append() needs an extra
2077 test).
2078
2079 There are two ways you can access this improved method of string
2080 construction. The straightforward way is to include
2081 \c{QStringBuilder} wherever you want to use it and use the
2082 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2083
2084 \snippet qstring/stringbuilder.cpp 5
2085
2086 A more global approach, which is more convenient but not entirely
2087 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2088 it to the compiler flags) at build time. This will make concatenating
2089 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2090
2091 \note Using automatic type deduction (for example, by using the \c
2092 auto keyword) with the result of string concatenation when QStringBuilder
2093 is enabled will show that the concatenation is indeed an object of a
2094 QStringBuilder specialization:
2095
2096 \snippet qstring/stringbuilder.cpp 6
2097
2098 This does not cause any harm, as QStringBuilder will implicitly convert to
2099 QString when required. If this is undesirable, then one should specify
2100 the necessary types instead of having the compiler deduce them:
2101
2102 \snippet qstring/stringbuilder.cpp 7
2103
2104 \section1 Maximum size and out-of-memory conditions
2105
2106 The maximum size of QString depends on the architecture. Most 64-bit
2107 systems can allocate more than 2 GB of memory, with a typical limit
2108 of 2^63 bytes. The actual value also depends on the overhead required for
2109 managing the data block. As a result, you can expect a maximum size
2110 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2111 on 64-bit platforms. The number of elements that can be stored in a
2112 QString is this maximum size divided by the size of QChar.
2113
2114 When memory allocation fails, QString throws a \c std::bad_alloc
2115 exception if the application was compiled with exception support.
2116 Out-of-memory conditions in Qt containers are the only cases where Qt
2117 will throw exceptions. If exceptions are disabled, then running out of
2118 memory is undefined behavior.
2119
2120 \note Target operating systems may impose limits on how much memory an
2121 application can allocate, in total, or on the size of individual allocations.
2122 This may further restrict the size of string a QString can hold.
2123 Mitigating or controlling the behavior these limits cause is beyond the
2124 scope of the Qt API.
2125
2126 \sa {Which string class to use?}, fromRawData(), QChar, QStringView,
2127 QLatin1StringView, QByteArray
2128*/
2129
2130/*! \typedef QString::ConstIterator
2131
2132 Qt-style synonym for QString::const_iterator.
2133*/
2134
2135/*! \typedef QString::Iterator
2136
2137 Qt-style synonym for QString::iterator.
2138*/
2139
2140/*! \typedef QString::const_iterator
2141
2142 \sa QString::iterator
2143*/
2144
2145/*! \typedef QString::iterator
2146
2147 \sa QString::const_iterator
2148*/
2149
2150/*! \typedef QString::const_reverse_iterator
2151 \since 5.6
2152
2153 \sa QString::reverse_iterator, QString::const_iterator
2154*/
2155
2156/*! \typedef QString::reverse_iterator
2157 \since 5.6
2158
2159 \sa QString::const_reverse_iterator, QString::iterator
2160*/
2161
2162/*!
2163 \typedef QString::size_type
2164*/
2165
2166/*!
2167 \typedef QString::difference_type
2168*/
2169
2170/*!
2171 \typedef QString::const_reference
2172*/
2173/*!
2174 \typedef QString::reference
2175*/
2176
2177/*!
2178 \typedef QString::const_pointer
2179
2180 The QString::const_pointer typedef provides an STL-style
2181 const pointer to a QString element (QChar).
2182*/
2183/*!
2184 \typedef QString::pointer
2185
2186 The QString::pointer typedef provides an STL-style
2187 pointer to a QString element (QChar).
2188*/
2189
2190/*!
2191 \typedef QString::value_type
2192*/
2193
2194/*! \fn QString::iterator QString::begin()
2195
2196 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2197 first character in the string.
2198
2199//! [iterator-invalidation-func-desc]
2200 \warning The returned iterator is invalidated on detachment or when the
2201 QString is modified.
2202//! [iterator-invalidation-func-desc]
2203
2204 \sa constBegin(), end()
2205*/
2206
2207/*! \fn QString::const_iterator QString::begin() const
2208
2209 \overload begin()
2210*/
2211
2212/*! \fn QString::const_iterator QString::cbegin() const
2213 \since 5.0
2214
2215 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2216 first character in the string.
2217
2218 \include qstring.cpp iterator-invalidation-func-desc
2219
2220 \sa begin(), cend()
2221*/
2222
2223/*! \fn QString::const_iterator QString::constBegin() const
2224
2225 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2226 first character in the string.
2227
2228 \include qstring.cpp iterator-invalidation-func-desc
2229
2230 \sa begin(), constEnd()
2231*/
2232
2233/*! \fn QString::iterator QString::end()
2234
2235 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2236 the last character in the string.
2237
2238 \include qstring.cpp iterator-invalidation-func-desc
2239
2240 \sa begin(), constEnd()
2241*/
2242
2243/*! \fn QString::const_iterator QString::end() const
2244
2245 \overload end()
2246*/
2247
2248/*! \fn QString::const_iterator QString::cend() const
2249 \since 5.0
2250
2251 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2252 after the last character in the string.
2253
2254 \include qstring.cpp iterator-invalidation-func-desc
2255
2256 \sa cbegin(), end()
2257*/
2258
2259/*! \fn QString::const_iterator QString::constEnd() const
2260
2261 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2262 after the last character in the string.
2263
2264 \include qstring.cpp iterator-invalidation-func-desc
2265
2266 \sa constBegin(), end()
2267*/
2268
2269/*! \fn QString::reverse_iterator QString::rbegin()
2270 \since 5.6
2271
2272 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2273 the first character in the string, in reverse order.
2274
2275 \include qstring.cpp iterator-invalidation-func-desc
2276
2277 \sa begin(), crbegin(), rend()
2278*/
2279
2280/*! \fn QString::const_reverse_iterator QString::rbegin() const
2281 \since 5.6
2282 \overload
2283*/
2284
2285/*! \fn QString::const_reverse_iterator QString::crbegin() const
2286 \since 5.6
2287
2288 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2289 pointing to the first character in the string, in reverse order.
2290
2291 \include qstring.cpp iterator-invalidation-func-desc
2292
2293 \sa begin(), rbegin(), rend()
2294*/
2295
2296/*! \fn QString::reverse_iterator QString::rend()
2297 \since 5.6
2298
2299 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2300 after the last character in the string, in reverse order.
2301
2302 \include qstring.cpp iterator-invalidation-func-desc
2303
2304 \sa end(), crend(), rbegin()
2305*/
2306
2307/*! \fn QString::const_reverse_iterator QString::rend() const
2308 \since 5.6
2309 \overload
2310*/
2311
2312/*! \fn QString::const_reverse_iterator QString::crend() const
2313 \since 5.6
2314
2315 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2316 pointing just after the last character in the string, in reverse order.
2317
2318 \include qstring.cpp iterator-invalidation-func-desc
2319
2320 \sa end(), rend(), rbegin()
2321*/
2322
2323/*!
2324 \fn QString::QString()
2325
2326 Constructs a null string. Null strings are also considered empty.
2327
2328 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2329*/
2330
2331/*!
2332 \fn QString::QString(QString &&other)
2333
2334 Move-constructs a QString instance, making it point at the same
2335 object that \a other was pointing to.
2336
2337 \since 5.2
2338*/
2339
2340/*! \fn QString::QString(const char *str)
2341
2342 Constructs a string initialized with the 8-bit string \a str. The
2343 given const char pointer is converted to Unicode using the
2344 fromUtf8() function.
2345
2346 You can disable this constructor by defining
2347 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2348 can be useful if you want to ensure that all user-visible strings
2349 go through QObject::tr(), for example.
2350
2351 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2352 this constructor, but enables a \c{QString(const char (&ch)[N])}
2353 constructor instead. Using non-literal input, or input with
2354 embedded NUL characters, or non-7-bit characters is undefined
2355 in this case.
2356
2357 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2358*/
2359
2360/*! \fn QString::QString(const char8_t *str)
2361
2362 Constructs a string initialized with the UTF-8 string \a str. The
2363 given const char8_t pointer is converted to Unicode using the
2364 fromUtf8() function.
2365
2366 \since 6.1
2367 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2368*/
2369
2370/*!
2371 \fn QString::QString(QStringView sv)
2372
2373 Constructs a string initialized with the string view's data.
2374
2375 The QString will be null if and only if \a sv is null.
2376
2377 \since 6.8
2378
2379 \sa fromUtf16()
2380*/
2381
2382/*
2383//! [from-std-string]
2384Returns a copy of the \a str string. The given string is assumed to be
2385encoded in \1, and is converted to QString using the \2 function.
2386//! [from-std-string]
2387*/
2388
2389/*! \fn QString QString::fromStdString(const std::string &str)
2390
2391 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2392
2393 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2394*/
2395
2396/*! \fn QString QString::fromStdWString(const std::wstring &str)
2397
2398 Returns a copy of the \a str string. The given string is assumed
2399 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2400 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2401 systems).
2402
2403 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2404 fromStdU16String(), fromStdU32String()
2405*/
2406
2407/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2408 \since 4.2
2409
2410 Reads the first \a size code units of the \c wchar_t array to whose start
2411 \a string points, converting them to Unicode and returning the result as
2412 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2413 type's size is four bytes or UTF-16 if its size is two bytes.
2414
2415 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2416
2417 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2418 fromStdWString()
2419*/
2420
2421/*! \fn std::wstring QString::toStdWString() const
2422
2423 Returns a std::wstring object with the data contained in this
2424 QString. The std::wstring is encoded in UTF-16 on platforms where
2425 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2426 where wchar_t is 4 bytes wide (most Unix systems).
2427
2428 This method is mostly useful to pass a QString to a function
2429 that accepts a std::wstring object.
2430
2431 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2432 toStdU32String()
2433*/
2434
2435qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2436{
2437 qsizetype count = 0;
2438
2439 QStringIterator i(QStringView(uc, length));
2440 while (i.hasNext())
2441 out[count++] = i.next();
2442
2443 return count;
2444}
2445
2446/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2447 \since 4.2
2448
2449 Fills the \a array with the data contained in this QString object.
2450 The array is encoded in UTF-16 on platforms where
2451 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2452 where wchar_t is 4 bytes wide (most Unix systems).
2453
2454 \a array has to be allocated by the caller and contain enough space to
2455 hold the complete string (allocating the array with the same length as the
2456 string is always sufficient).
2457
2458 This function returns the actual length of the string in \a array.
2459
2460 \note This function does not append a null character to the array.
2461
2462 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2463 QStringView::toWCharArray()
2464*/
2465
2466/*! \fn QString::QString(const QString &other)
2467
2468 Constructs a copy of \a other.
2469
2470 This operation takes \l{constant time}, because QString is
2471 \l{implicitly shared}. This makes returning a QString from a
2472 function very fast. If a shared instance is modified, it will be
2473 copied (copy-on-write), and that takes \l{linear time}.
2474
2475 \sa operator=()
2476*/
2477
2478/*!
2479 Constructs a string initialized with the first \a size characters
2480 of the QChar array \a unicode.
2481
2482 If \a unicode is 0, a null string is constructed.
2483
2484 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2485 array and its length is determined dynamically. The terminating
2486 null character is not considered part of the string.
2487
2488 QString makes a deep copy of the string data. The unicode data is copied as
2489 is and the Byte Order Mark is preserved if present.
2490
2491 \sa fromRawData()
2492*/
2493QString::QString(const QChar *unicode, qsizetype size)
2494{
2495 if (!unicode) {
2496 d.clear();
2497 } else {
2498 if (size < 0)
2499 size = QtPrivate::qustrlen(reinterpret_cast<const char16_t *>(unicode));
2500 if (!size) {
2501 d = DataPointer::fromRawData(&_empty, 0);
2502 } else {
2503 d = DataPointer(size, size);
2504 Q_CHECK_PTR(d.data());
2505 memcpy(d.data(), unicode, size * sizeof(QChar));
2506 d.data()[size] = '\0';
2507 }
2508 }
2509}
2510
2511/*!
2512 Constructs a string of the given \a size with every character set
2513 to \a ch.
2514
2515 \sa fill()
2516*/
2517QString::QString(qsizetype size, QChar ch)
2518{
2519 if (size <= 0) {
2520 d = DataPointer::fromRawData(&_empty, 0);
2521 } else {
2522 d = DataPointer(size, size);
2523 Q_CHECK_PTR(d.data());
2524 d.data()[size] = '\0';
2525 char16_t *b = d.data();
2526 char16_t *e = d.data() + size;
2527 const char16_t value = ch.unicode();
2528 std::fill(b, e, value);
2529 }
2530}
2531
2532/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2533 \internal
2534
2535 Constructs a string of the given \a size without initializing the
2536 characters. This is only used in \c QStringBuilder::toString().
2537*/
2538QString::QString(qsizetype size, Qt::Initialization)
2539{
2540 if (size <= 0) {
2541 d = DataPointer::fromRawData(&_empty, 0);
2542 } else {
2543 d = DataPointer(size, size);
2544 Q_CHECK_PTR(d.data());
2545 d.data()[size] = '\0';
2546 }
2547}
2548
2549/*! \fn QString::QString(QLatin1StringView str)
2550
2551 Constructs a copy of the Latin-1 string viewed by \a str.
2552
2553 \sa fromLatin1()
2554*/
2555
2556/*!
2557 Constructs a string of size 1 containing the character \a ch.
2558*/
2559QString::QString(QChar ch)
2560{
2561 d = DataPointer(1, 1);
2562 Q_CHECK_PTR(d.data());
2563 d.data()[0] = ch.unicode();
2564 d.data()[1] = '\0';
2565}
2566
2567/*! \fn QString::QString(const QByteArray &ba)
2568
2569 Constructs a string initialized with the byte array \a ba. The
2570 given byte array is converted to Unicode using fromUtf8().
2571
2572 You can disable this constructor by defining
2573 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2574 can be useful if you want to ensure that all user-visible strings
2575 go through QObject::tr(), for example.
2576
2577 \note Any null ('\\0') bytes in the byte array will be included in this
2578 string, converted to Unicode null characters (U+0000). This behavior is
2579 different from Qt 5.x.
2580
2581 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2582*/
2583
2584/*! \fn QString::QString(const Null &)
2585 \internal
2586*/
2587
2588/*! \fn QString::QString(QStringPrivate)
2589 \internal
2590*/
2591
2592/*! \fn QString &QString::operator=(const QString::Null &)
2593 \internal
2594*/
2595
2596/*!
2597 \fn QString::~QString()
2598
2599 Destroys the string.
2600*/
2601
2602
2603/*! \fn void QString::swap(QString &other)
2604 \since 4.8
2605 \memberswap{string}
2606*/
2607
2608/*! \fn void QString::detach()
2609
2610 \internal
2611*/
2612
2613/*! \fn bool QString::isDetached() const
2614
2615 \internal
2616*/
2617
2618/*! \fn bool QString::isSharedWith(const QString &other) const
2619
2620 \internal
2621*/
2622
2623/*! \fn QString::operator std::u16string_view() const
2624 \since 6.7
2625
2626 Converts this QString object to a \c{std::u16string_view} object.
2627*/
2628
2629static bool needsReallocate(const QString &str, qsizetype newSize)
2630{
2631 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2632 return newSize > capacityAtEnd;
2633}
2634
2635/*!
2636 Sets the size of the string to \a size characters.
2637
2638 If \a size is greater than the current size, the string is
2639 extended to make it \a size characters long with the extra
2640 characters added to the end. The new characters are uninitialized.
2641
2642 If \a size is less than the current size, characters beyond position
2643 \a size are excluded from the string.
2644
2645 \note While resize() will grow the capacity if needed, it never shrinks
2646 capacity. To shed excess capacity, use squeeze().
2647
2648 Example:
2649
2650 \snippet qstring/main.cpp 45
2651
2652 If you want to append a certain number of identical characters to
2653 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2654
2655 If you want to expand the string so that it reaches a certain
2656 width and fill the new positions with a particular character, use
2657 the leftJustified() function:
2658
2659 If \a size is negative, it is equivalent to passing zero.
2660
2661 \snippet qstring/main.cpp 47
2662
2663 \sa truncate(), reserve(), squeeze()
2664*/
2665
2666void QString::resize(qsizetype size)
2667{
2668 if (size < 0)
2669 size = 0;
2670
2671 if (d->needsDetach() || needsReallocate(*this, size))
2672 reallocData(size, QArrayData::Grow);
2673 d.size = size;
2674 if (d->allocatedCapacity())
2675 d.data()[size] = u'\0';
2676}
2677
2678/*!
2679 \overload
2680 \since 5.7
2681
2682 Unlike \l {QString::}{resize(qsizetype)}, this overload
2683 initializes the new characters to \a fillChar:
2684
2685 \snippet qstring/main.cpp 46
2686*/
2687
2688void QString::resize(qsizetype newSize, QChar fillChar)
2689{
2690 const qsizetype oldSize = size();
2691 resize(newSize);
2692 const qsizetype difference = size() - oldSize;
2693 if (difference > 0)
2694 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2695}
2696
2697
2698/*!
2699 \since 6.8
2700
2701 Sets the size of the string to \a size characters. If the size of
2702 the string grows, the new characters are uninitialized.
2703
2704 The behavior is identical to \c{resize(size)}.
2705
2706 \sa resize()
2707*/
2708
2709void QString::resizeForOverwrite(qsizetype size)
2710{
2711 resize(size);
2712}
2713
2714
2715/*! \fn qsizetype QString::capacity() const
2716
2717 Returns the maximum number of characters that can be stored in
2718 the string without forcing a reallocation.
2719
2720 The sole purpose of this function is to provide a means of fine
2721 tuning QString's memory usage. In general, you will rarely ever
2722 need to call this function. If you want to know how many
2723 characters are in the string, call size().
2724
2725 \note a statically allocated string will report a capacity of 0,
2726 even if it's not empty.
2727
2728 \note The free space position in the allocated memory block is undefined. In
2729 other words, one should not assume that the free memory is always located
2730 after the initialized elements.
2731
2732 \sa reserve(), squeeze()
2733*/
2734
2735/*!
2736 \fn void QString::reserve(qsizetype size)
2737
2738 Ensures the string has space for at least \a size characters.
2739
2740 If you know in advance how large a string will be, you can call this
2741 function to save repeated reallocation while building it.
2742 This can improve performance when building a string incrementally.
2743 A long sequence of operations that add to a string may trigger several
2744 reallocations, the last of which may leave you with significantly more
2745 space than you need. This is less efficient than doing a single
2746 allocation of the right size at the start.
2747
2748 If in doubt about how much space shall be needed, it is usually better to
2749 use an upper bound as \a size, or a high estimate of the most likely size,
2750 if a strict upper bound would be much bigger than this. If \a size is an
2751 underestimate, the string will grow as needed once the reserved size is
2752 exceeded, which may lead to a larger allocation than your best
2753 overestimate would have and will slow the operation that triggers it.
2754
2755 \warning reserve() reserves memory but does not change the size of the
2756 string. Accessing data beyond the end of the string is undefined behavior.
2757 If you need to access memory beyond the current end of the string,
2758 use resize().
2759
2760 This function is useful for code that needs to build up a long
2761 string and wants to avoid repeated reallocation. In this example,
2762 we want to add to the string until some condition is \c true, and
2763 we're fairly sure that size is large enough to make a call to
2764 reserve() worthwhile:
2765
2766 \snippet qstring/main.cpp 44
2767
2768 \sa squeeze(), capacity(), resize()
2769*/
2770
2771/*!
2772 \fn void QString::squeeze()
2773
2774 Releases any memory not required to store the character data.
2775
2776 The sole purpose of this function is to provide a means of fine
2777 tuning QString's memory usage. In general, you will rarely ever
2778 need to call this function.
2779
2780 \sa reserve(), capacity()
2781*/
2782
2783void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2784{
2785 if (!alloc) {
2786 d = DataPointer::fromRawData(&_empty, 0);
2787 return;
2788 }
2789
2790 // don't use reallocate path when reducing capacity and there's free space
2791 // at the beginning: might shift data pointer outside of allocated space
2792 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2793
2794 if (d->needsDetach() || cannotUseReallocate) {
2795 DataPointer dd(alloc, qMin(alloc, d.size), option);
2796 Q_CHECK_PTR(dd.data());
2797 if (dd.size > 0)
2798 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2799 dd.data()[dd.size] = 0;
2800 d.swap(dd);
2801 } else {
2802 d->reallocate(alloc, option);
2803 }
2804}
2805
2806void QString::reallocGrowData(qsizetype n)
2807{
2808 if (!n) // expected to always allocate
2809 n = 1;
2810
2811 if (d->needsDetach()) {
2812 DataPointer dd(DataPointer::allocateGrow(d, n, QArrayData::GrowsAtEnd));
2813 Q_CHECK_PTR(dd.data());
2814 dd->copyAppend(d.data(), d.data() + d.size);
2815 dd.data()[dd.size] = 0;
2816 d.swap(dd);
2817 } else {
2818 d->reallocate(d.constAllocatedCapacity() + n, QArrayData::Grow);
2819 }
2820}
2821
2822/*! \fn void QString::clear()
2823
2824 Clears the contents of the string and makes it null.
2825
2826 \sa resize(), isNull()
2827*/
2828
2829/*! \fn QString &QString::operator=(const QString &other)
2830
2831 Assigns \a other to this string and returns a reference to this
2832 string.
2833*/
2834
2835QString &QString::operator=(const QString &other) noexcept
2836{
2837 d = other.d;
2838 return *this;
2839}
2840
2841/*!
2842 \fn QString &QString::operator=(QString &&other)
2843
2844 Move-assigns \a other to this QString instance.
2845
2846 \since 5.2
2847*/
2848
2849/*! \fn QString &QString::operator=(QLatin1StringView str)
2850
2851 \overload operator=()
2852
2853 Assigns the Latin-1 string viewed by \a str to this string.
2854*/
2855QString &QString::operator=(QLatin1StringView other)
2856{
2857 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2858 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2859 d.size = other.size();
2860 d.data()[other.size()] = 0;
2861 qt_from_latin1(d.data(), other.latin1(), other.size());
2862 } else {
2863 *this = fromLatin1(other.latin1(), other.size());
2864 }
2865 return *this;
2866}
2867
2868/*! \fn QString &QString::operator=(const QByteArray &ba)
2869
2870 \overload operator=()
2871
2872 Assigns \a ba to this string. The byte array is converted to Unicode
2873 using the fromUtf8() function.
2874
2875 You can disable this operator by defining
2876 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2877 can be useful if you want to ensure that all user-visible strings
2878 go through QObject::tr(), for example.
2879*/
2880
2881/*! \fn QString &QString::operator=(const char *str)
2882
2883 \overload operator=()
2884
2885 Assigns \a str to this string. The const char pointer is converted
2886 to Unicode using the fromUtf8() function.
2887
2888 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2889 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2890 This can be useful if you want to ensure that all user-visible strings
2891 go through QObject::tr(), for example.
2892*/
2893
2894/*!
2895 \overload operator=()
2896
2897 Sets the string to contain the single character \a ch.
2898*/
2899QString &QString::operator=(QChar ch)
2900{
2901 return assign(1, ch);
2902}
2903
2904/*!
2905 \fn QString& QString::insert(qsizetype position, const QString &str)
2906
2907 Inserts the string \a str at the given index \a position and
2908 returns a reference to this string.
2909
2910 Example:
2911
2912 \snippet qstring/main.cpp 26
2913
2914//! [string-grow-at-insertion]
2915 This string grows to accommodate the insertion. If \a position is beyond
2916 the end of the string, space characters are appended to the string to reach
2917 this \a position, followed by \a str.
2918//! [string-grow-at-insertion]
2919
2920 \sa append(), prepend(), replace(), remove()
2921*/
2922
2923/*!
2924 \fn QString& QString::insert(qsizetype position, QStringView str)
2925 \since 6.0
2926 \overload insert()
2927
2928 Inserts the string view \a str at the given index \a position and
2929 returns a reference to this string.
2930
2931 \include qstring.cpp string-grow-at-insertion
2932*/
2933
2934
2935/*!
2936 \fn QString& QString::insert(qsizetype position, const char *str)
2937 \since 5.5
2938 \overload insert()
2939
2940 Inserts the C string \a str at the given index \a position and
2941 returns a reference to this string.
2942
2943 \include qstring.cpp string-grow-at-insertion
2944
2945 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2946 defined.
2947*/
2948
2949/*!
2950 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2951 \since 5.5
2952 \overload insert()
2953
2954 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2955 it encodes at the given index \a position and returns a reference to
2956 this string.
2957
2958 \include qstring.cpp string-grow-at-insertion
2959
2960 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2961 defined.
2962*/
2963
2964/*! \internal
2965 T is a view or a container on/of QChar, char16_t, or char
2966*/
2967template <typename T>
2968static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2969{
2970 auto &str_d = str.data_ptr();
2971 qsizetype difference = 0;
2972 if (Q_UNLIKELY(i > str_d.size))
2973 difference = i - str_d.size;
2974 const qsizetype oldSize = str_d.size;
2975 const qsizetype insert_size = toInsert.size();
2976 const qsizetype newSize = str_d.size + difference + insert_size;
2977 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2978
2979 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2980 const auto cbegin = str.cbegin();
2981 const auto cend = str.cend();
2982 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend;
2983 QString other;
2984 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2985 // unittests pass
2986 other.data_ptr().detachAndGrow(side, newSize, nullptr, nullptr);
2987 other.append(QStringView(cbegin, insert_start));
2988 other.resize(i, u' ');
2989 other.append(toInsert);
2990 other.append(QStringView(insert_start, cend));
2991 str.swap(other);
2992 return;
2993 }
2994
2995 str_d.detachAndGrow(side, difference + insert_size, nullptr, nullptr);
2996 Q_CHECK_PTR(str_d.data());
2997 str.resize(newSize);
2998
2999 auto begin = str_d.begin();
3000 auto old_end = std::next(begin, oldSize);
3001 std::fill_n(old_end, difference, u' ');
3002 auto insert_start = std::next(begin, i);
3003 if (difference == 0)
3004 std::move_backward(insert_start, old_end, str_d.end());
3005
3006 using Char = std::remove_cv_t<typename T::value_type>;
3007 if constexpr(std::is_same_v<Char, QChar>)
3008 std::copy_n(reinterpret_cast<const char16_t *>(toInsert.data()), insert_size, insert_start);
3009 else if constexpr (std::is_same_v<Char, char16_t>)
3010 std::copy_n(toInsert.data(), insert_size, insert_start);
3011 else if constexpr (std::is_same_v<Char, char>)
3012 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3013}
3014
3015/*!
3016 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3017 \overload insert()
3018
3019 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3020
3021 \include qstring.cpp string-grow-at-insertion
3022*/
3023QString &QString::insert(qsizetype i, QLatin1StringView str)
3024{
3025 const char *s = str.latin1();
3026 if (i < 0 || !s || !(*s))
3027 return *this;
3028
3029 insert_helper(*this, i, str);
3030 return *this;
3031}
3032
3033/*!
3034 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3035 \overload insert()
3036 \since 6.5
3037
3038 Inserts the UTF-8 string view \a str at the given index \a position.
3039
3040 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3041 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3042 (QLatin1StringView) and should thus be used sparingly.
3043
3044 \include qstring.cpp string-grow-at-insertion
3045*/
3046QString &QString::insert(qsizetype i, QUtf8StringView s)
3047{
3048 auto insert_size = s.size();
3049 if (i < 0 || insert_size <= 0)
3050 return *this;
3051
3052 qsizetype difference = 0;
3053 if (Q_UNLIKELY(i > d.size))
3054 difference = i - d.size;
3055
3056 const qsizetype newSize = d.size + difference + insert_size;
3057
3058 if (d.needsDetach() || needsReallocate(*this, newSize)) {
3059 const auto cbegin = this->cbegin();
3060 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend();
3061 QString other;
3062 other.reserve(newSize);
3063 other.append(QStringView(cbegin, insert_start));
3064 if (difference > 0)
3065 other.resize(i, u' ');
3066 other.append(s);
3067 other.append(QStringView(insert_start, cend()));
3068 swap(other);
3069 return *this;
3070 }
3071
3072 if (i >= d.size) {
3073 d.detachAndGrow(QArrayData::GrowsAtEnd, difference + insert_size, nullptr, nullptr);
3074 Q_CHECK_PTR(d.data());
3075
3076 if (difference > 0)
3077 resize(i, u' ');
3078 append(s);
3079 } else {
3080 // Optimal insertion of Utf8 data is at the end, anywhere else could
3081 // potentially lead to moving characters twice if Utf8 data size
3082 // (variable-width) is less than the equivalent Utf16 data size
3083 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3084 char16_t *b = QUtf8::convertToUnicode(buffer.data(), s);
3085 insert_helper(*this, i, QStringView(buffer.data(), b));
3086 }
3087
3088 return *this;
3089}
3090
3091/*!
3092 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3093 \overload insert()
3094
3095 Inserts the first \a size characters of the QChar array \a unicode
3096 at the given index \a position in the string.
3097
3098 This string grows to accommodate the insertion. If \a position is beyond
3099 the end of the string, space characters are appended to the string to reach
3100 this \a position, followed by \a size characters of the QChar array
3101 \a unicode.
3102*/
3103QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3104{
3105 if (i < 0 || size <= 0)
3106 return *this;
3107
3108 // In case when data points into "this"
3109 if (!d->needsDetach() && QtPrivate::q_points_into_range(unicode, *this)) {
3110 QVarLengthArray copy(unicode, unicode + size);
3111 insert(i, copy.data(), size);
3112 } else {
3113 insert_helper(*this, i, QStringView(unicode, size));
3114 }
3115
3116 return *this;
3117}
3118
3119/*!
3120 \fn QString& QString::insert(qsizetype position, QChar ch)
3121 \overload insert()
3122
3123 Inserts \a ch at the given index \a position in the string.
3124
3125 This string grows to accommodate the insertion. If \a position is beyond
3126 the end of the string, space characters are appended to the string to reach
3127 this \a position, followed by \a ch.
3128*/
3129
3130QString& QString::insert(qsizetype i, QChar ch)
3131{
3132 if (i < 0)
3133 i += d.size;
3134 return insert(i, &ch, 1);
3135}
3136
3137/*!
3138 Appends the string \a str onto the end of this string.
3139
3140 Example:
3141
3142 \snippet qstring/main.cpp 9
3143
3144 This is the same as using the insert() function:
3145
3146 \snippet qstring/main.cpp 10
3147
3148 The append() function is typically very fast (\l{constant time}),
3149 because QString preallocates extra space at the end of the string
3150 data so it can grow without reallocating the entire string each
3151 time.
3152
3153 \sa operator+=(), prepend(), insert()
3154*/
3155QString &QString::append(const QString &str)
3156{
3157 if (!str.isNull()) {
3158 if (isNull()) {
3159 if (Q_UNLIKELY(!str.d.isMutable()))
3160 assign(str); // fromRawData, so we do a deep copy
3161 else
3162 operator=(str);
3163 } else if (str.size()) {
3164 append(str.constData(), str.size());
3165 }
3166 }
3167 return *this;
3168}
3169
3170/*!
3171 \fn QString &QString::append(QStringView v)
3172 \overload append()
3173 \since 6.0
3174
3175 Appends the given string view \a v to this string and returns the result.
3176*/
3177
3178/*!
3179 \overload append()
3180 \since 5.0
3181
3182 Appends \a len characters from the QChar array \a str to this string.
3183*/
3184QString &QString::append(const QChar *str, qsizetype len)
3185{
3186 if (str && len > 0) {
3187 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3188 // the following should be safe as QChar uses char16_t as underlying data
3189 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3190 d->growAppend(char16String, char16String + len);
3191 d.data()[d.size] = u'\0';
3192 }
3193 return *this;
3194}
3195
3196/*!
3197 \overload append()
3198
3199 Appends the Latin-1 string viewed by \a str to this string.
3200*/
3201QString &QString::append(QLatin1StringView str)
3202{
3203 append_helper(*this, str);
3204 return *this;
3205}
3206
3207/*!
3208 \overload append()
3209 \since 6.5
3210
3211 Appends the UTF-8 string view \a str to this string.
3212*/
3213QString &QString::append(QUtf8StringView str)
3214{
3215 append_helper(*this, str);
3216 return *this;
3217}
3218
3219/*! \fn QString &QString::append(const QByteArray &ba)
3220
3221 \overload append()
3222
3223 Appends the byte array \a ba to this string. The given byte array
3224 is converted to Unicode using the fromUtf8() function.
3225
3226 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3227 when you compile your applications. This can be useful if you want
3228 to ensure that all user-visible strings go through QObject::tr(),
3229 for example.
3230*/
3231
3232/*! \fn QString &QString::append(const char *str)
3233
3234 \overload append()
3235
3236 Appends the string \a str to this string. The given const char
3237 pointer is converted to Unicode using the fromUtf8() function.
3238
3239 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3240 when you compile your applications. This can be useful if you want
3241 to ensure that all user-visible strings go through QObject::tr(),
3242 for example.
3243*/
3244
3245/*!
3246 \overload append()
3247
3248 Appends the character \a ch to this string.
3249*/
3250QString &QString::append(QChar ch)
3251{
3252 d.detachAndGrow(QArrayData::GrowsAtEnd, 1, nullptr, nullptr);
3253 d->copyAppend(1, ch.unicode());
3254 d.data()[d.size] = '\0';
3255 return *this;
3256}
3257
3258/*! \fn QString &QString::prepend(const QString &str)
3259
3260 Prepends the string \a str to the beginning of this string and
3261 returns a reference to this string.
3262
3263 This operation is typically very fast (\l{constant time}), because
3264 QString preallocates extra space at the beginning of the string data,
3265 so it can grow without reallocating the entire string each time.
3266
3267 Example:
3268
3269 \snippet qstring/main.cpp 36
3270
3271 \sa append(), insert()
3272*/
3273
3274/*! \fn QString &QString::prepend(QLatin1StringView str)
3275
3276 \overload prepend()
3277
3278 Prepends the Latin-1 string viewed by \a str to this string.
3279*/
3280
3281/*! \fn QString &QString::prepend(QUtf8StringView str)
3282 \since 6.5
3283 \overload prepend()
3284
3285 Prepends the UTF-8 string view \a str to this string.
3286*/
3287
3288/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3289 \since 5.5
3290 \overload prepend()
3291
3292 Prepends \a len characters from the QChar array \a str to this string and
3293 returns a reference to this string.
3294*/
3295
3296/*! \fn QString &QString::prepend(QStringView str)
3297 \since 6.0
3298 \overload prepend()
3299
3300 Prepends the string view \a str to the beginning of this string and
3301 returns a reference to this string.
3302*/
3303
3304/*! \fn QString &QString::prepend(const QByteArray &ba)
3305
3306 \overload prepend()
3307
3308 Prepends the byte array \a ba to this string. The byte array is
3309 converted to Unicode using the fromUtf8() function.
3310
3311 You can disable this function by defining
3312 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3313 can be useful if you want to ensure that all user-visible strings
3314 go through QObject::tr(), for example.
3315*/
3316
3317/*! \fn QString &QString::prepend(const char *str)
3318
3319 \overload prepend()
3320
3321 Prepends the string \a str to this string. The const char pointer
3322 is converted to Unicode using the fromUtf8() function.
3323
3324 You can disable this function by defining
3325 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3326 can be useful if you want to ensure that all user-visible strings
3327 go through QObject::tr(), for example.
3328*/
3329
3330/*! \fn QString &QString::prepend(QChar ch)
3331
3332 \overload prepend()
3333
3334 Prepends the character \a ch to this string.
3335*/
3336
3337/*!
3338 \fn QString &QString::assign(QAnyStringView v)
3339 \since 6.6
3340
3341 Replaces the contents of this string with a copy of \a v and returns a
3342 reference to this string.
3343
3344 The size of this string will be equal to the size of \a v, converted to
3345 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3346 this function only allocates memory if the estimated size exceeds the capacity
3347 of this string or this string is shared.
3348
3349 \sa QAnyStringView::toString()
3350*/
3351
3352/*!
3353 \fn QString &QString::assign(qsizetype n, QChar c)
3354 \since 6.6
3355
3356 Replaces the contents of this string with \a n copies of \a c and
3357 returns a reference to this string.
3358
3359 The size of this string will be equal to \a n, which has to be non-negative.
3360
3361 This function will only allocate memory if \a n exceeds the capacity of this
3362 string or this string is shared.
3363
3364 \sa fill()
3365*/
3366
3367/*!
3368 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3369 \since 6.6
3370
3371 Replaces the contents of this string with a copy of the elements in the
3372 iterator range [\a first, \a last) and returns a reference to this string.
3373
3374 The size of this string will be equal to the decoded length of the elements
3375 in the range [\a first, \a last), which need not be the same as the length of
3376 the range itself, because this function transparently recodes the input
3377 character set to UTF-16.
3378
3379 This function will only allocate memory if the number of elements in the
3380 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3381 resulting string, exceeds the capacity of this string, or if this string is
3382 shared.
3383
3384 \note The behavior is undefined if either argument is an iterator into *this or
3385 [\a first, \a last) is not a valid range.
3386
3387 \constraints
3388 \c InputIterator meets the requirements of a
3389 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3390 and the \c{value_type} of \c InputIterator is one of the following character types:
3391 \list
3392 \li QChar
3393 \li QLatin1Char
3394 \li \c {char}
3395 \li \c {unsigned char}
3396 \li \c {signed char}
3397 \li \c {char8_t}
3398 \li \c char16_t
3399 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3400 \li \c char32_t
3401 \endlist
3402*/
3403
3404QString &QString::assign(QAnyStringView s)
3405{
3406 if (s.size() <= capacity() && isDetached()) {
3407 const auto offset = d.freeSpaceAtBegin();
3408 if (offset)
3409 d.setBegin(d.begin() - offset);
3410 resize(0);
3411 s.visit([this](auto input) {
3412 this->append(input);
3413 });
3414 } else {
3415 *this = s.toString();
3416 }
3417 return *this;
3418}
3419
3420#ifndef QT_BOOTSTRAPPED
3421QString &QString::assign_helper(const char32_t *data, qsizetype len)
3422{
3423 // worst case: each char32_t requires a surrogate pair, so
3424 const auto requiredCapacity = len * 2;
3425 if (requiredCapacity <= capacity() && isDetached()) {
3426 const auto offset = d.freeSpaceAtBegin();
3427 if (offset)
3428 d.setBegin(d.begin() - offset);
3429 auto begin = reinterpret_cast<QChar *>(d.begin());
3430 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3431 QStringConverter::State state;
3432 const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
3433 d.size = end - begin;
3434 d.data()[d.size] = u'\0';
3435 } else {
3436 *this = QString::fromUcs4(data, len);
3437 }
3438 return *this;
3439}
3440#endif
3441
3442/*!
3443 \fn QString &QString::remove(qsizetype position, qsizetype n)
3444
3445 Removes \a n characters from the string, starting at the given \a
3446 position index, and returns a reference to the string.
3447
3448 If the specified \a position index is within the string, but \a
3449 position + \a n is beyond the end of the string, the string is
3450 truncated at the specified \a position.
3451
3452 If \a n is <= 0 nothing is changed.
3453
3454 \snippet qstring/main.cpp 37
3455
3456//! [shrinking-erase]
3457 Element removal will preserve the string's capacity and not reduce the
3458 amount of allocated memory. To shed extra capacity and free as much memory
3459 as possible, call squeeze() after the last change to the string's size.
3460//! [shrinking-erase]
3461
3462 \sa insert(), replace()
3463*/
3464QString &QString::remove(qsizetype pos, qsizetype len)
3465{
3466 if (pos < 0) // count from end of string
3467 pos += size();
3468
3469 if (size_t(pos) >= size_t(size()) || len <= 0)
3470 return *this;
3471
3472 len = std::min(len, size() - pos);
3473
3474 if (!d->isShared()) {
3475 d->erase(d.begin() + pos, len);
3476 d.data()[d.size] = u'\0';
3477 } else {
3478 // TODO: either reserve "size()", which is bigger than needed, or
3479 // modify the shrinking-erase docs of this method (since the size
3480 // of "copy" won't have any extra capacity any more)
3481 const qsizetype sz = size() - len;
3482 QString copy{sz, Qt::Uninitialized};
3483 auto begin = d.begin();
3484 auto toRemove_start = d.begin() + pos;
3485 copy.d->copyRanges({{begin, toRemove_start},
3486 {toRemove_start + len, d.end()}});
3487 swap(copy);
3488 }
3489 return *this;
3490}
3491
3492template<typename T>
3493static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3494{
3495 const auto needleSize = needle.size();
3496 if (!needleSize)
3497 return;
3498
3499 // avoid detach if nothing to do:
3500 qsizetype i = s.indexOf(needle, 0, cs);
3501 if (i < 0)
3502 return;
3503
3504 QString::DataPointer &dptr = s.data_ptr();
3505 auto begin = dptr.begin();
3506 auto end = dptr.end();
3507
3508 auto copyFunc = [&](auto &dst) {
3509 auto src = begin + i + needleSize;
3510 while (src < end) {
3511 i = s.indexOf(needle, std::distance(begin, src), cs);
3512 auto hit = i == -1 ? end : begin + i;
3513 dst = std::copy(src, hit, dst);
3514 src = hit + needleSize;
3515 }
3516 return dst;
3517 };
3518
3519 if (!dptr->needsDetach()) {
3520 auto dst = begin + i;
3521 dst = copyFunc(dst);
3522 s.truncate(std::distance(begin, dst));
3523 } else {
3524 QString copy{s.size(), Qt::Uninitialized};
3525 auto copy_begin = copy.begin();
3526 auto dst = std::copy(begin, begin + i, copy_begin); // Chunk before the first hit
3527 dst = copyFunc(dst);
3528 copy.resize(std::distance(copy_begin, dst));
3529 s.swap(copy);
3530 }
3531}
3532
3533/*!
3534 Removes every occurrence of the given \a str string in this
3535 string, and returns a reference to this string.
3536
3537 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3538
3539 This is the same as \c replace(str, "", cs).
3540
3541 \include qstring.cpp shrinking-erase
3542
3543 \sa replace()
3544*/
3545QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3546{
3547 const auto s = str.d.data();
3548 if (QtPrivate::q_points_into_range(s, d))
3549 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3550 else
3551 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3552 return *this;
3553}
3554
3555/*!
3556 \since 5.11
3557 \overload
3558
3559 Removes every occurrence of the given Latin-1 string viewed by \a str
3560 from this string, and returns a reference to this string.
3561
3562 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3563
3564 This is the same as \c replace(str, "", cs).
3565
3566 \include qstring.cpp shrinking-erase
3567
3568 \sa replace()
3569*/
3570QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3571{
3572 removeStringImpl(*this, str, cs);
3573 return *this;
3574}
3575
3576/*!
3577 \fn QString &QString::removeAt(qsizetype pos)
3578
3579 \since 6.5
3580
3581 Removes the character at index \a pos. If \a pos is out of bounds
3582 (i.e. \a pos >= size()), this function does nothing.
3583
3584 \sa remove()
3585*/
3586
3587/*!
3588 \fn QString &QString::removeFirst()
3589
3590 \since 6.5
3591
3592 Removes the first character in this string. If the string is empty,
3593 this function does nothing.
3594
3595 \sa remove()
3596*/
3597
3598/*!
3599 \fn QString &QString::removeLast()
3600
3601 \since 6.5
3602
3603 Removes the last character in this string. If the string is empty,
3604 this function does nothing.
3605
3606 \sa remove()
3607*/
3608
3609/*!
3610 Removes every occurrence of the character \a ch in this string, and
3611 returns a reference to this string.
3612
3613 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3614
3615 Example:
3616
3617 \snippet qstring/main.cpp 38
3618
3619 This is the same as \c replace(ch, "", cs).
3620
3621 \include qstring.cpp shrinking-erase
3622
3623 \sa replace()
3624*/
3625QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3626{
3627 const qsizetype idx = indexOf(ch, 0, cs);
3628 if (idx == -1)
3629 return *this;
3630
3631 const bool isCase = cs == Qt::CaseSensitive;
3632 ch = isCase ? ch : ch.toCaseFolded();
3633 auto match = [ch, isCase](QChar x) {
3634 return ch == (isCase ? x : x.toCaseFolded());
3635 };
3636
3637
3638 auto begin = d.begin();
3639 auto first_match = begin + idx;
3640 auto end = d.end();
3641 if (!d->isShared()) {
3642 auto it = std::remove_if(first_match, end, match);
3643 d->erase(it, std::distance(it, end));
3644 d.data()[d.size] = u'\0';
3645 } else {
3646 // Instead of detaching, create a new string and copy all characters except for
3647 // the ones we're removing
3648 // TODO: size() is more than the needed since "copy" would be shorter
3649 QString copy{size(), Qt::Uninitialized};
3650 auto dst = copy.d.begin();
3651 auto it = std::copy(begin, first_match, dst); // Chunk before idx
3652 it = std::remove_copy_if(first_match + 1, end, it, match);
3653 copy.d.size = std::distance(dst, it);
3654 copy.d.data()[copy.d.size] = u'\0';
3655 *this = std::move(copy);
3656 }
3657 return *this;
3658}
3659
3660/*!
3661 \fn QString &QString::remove(const QRegularExpression &re)
3662 \since 5.0
3663
3664 Removes every occurrence of the regular expression \a re in the
3665 string, and returns a reference to the string. For example:
3666
3667 \snippet qstring/main.cpp 96
3668
3669 \include qstring.cpp shrinking-erase
3670
3671 \sa indexOf(), lastIndexOf(), replace()
3672*/
3673
3674/*!
3675 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3676 \since 6.1
3677
3678 Removes all elements for which the predicate \a pred returns true
3679 from the string. Returns a reference to the string.
3680
3681 \sa remove()
3682*/
3683
3684static void replace_helper(QString &str, QSpan<qsizetype> indices, qsizetype blen, QStringView after)
3685{
3686 const qsizetype oldSize = str.data_ptr().size;
3687 const qsizetype adjust = indices.size() * (after.size() - blen);
3688 const qsizetype newSize = oldSize + adjust;
3689 using A = QStringAlgorithms<QString>;
3690 if (str.data_ptr().needsDetach() || needsReallocate(str, newSize)) {
3691 A::replace_helper(str, blen, after, indices);
3692 return;
3693 }
3694
3695 if (QtPrivate::q_points_into_range(after.begin(), str)) {
3696 // Copy after if it lies inside our own d.b area (which we could
3697 // possibly invalidate via a realloc or modify by replacement)
3698 A::replace_helper(str, blen, QVarLengthArray(after.begin(), after.end()), indices);
3699 } else {
3700 A::replace_helper(str, blen, after, indices);
3701 }
3702}
3703
3704/*!
3705 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3706
3707 Replaces \a n characters beginning at index \a position with
3708 the string \a after and returns a reference to this string.
3709
3710 \note If the specified \a position index is within the string,
3711 but \a position + \a n goes outside the strings range,
3712 then \a n will be adjusted to stop at the end of the string.
3713
3714 Example:
3715
3716 \snippet qstring/main.cpp 40
3717
3718 \sa insert(), remove()
3719*/
3720QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3721{
3722 return replace(pos, len, after.constData(), after.size());
3723}
3724
3725/*!
3726 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3727 \overload replace()
3728 Replaces \a n characters beginning at index \a position with the
3729 first \a alen characters of the QChar array \a after and returns a
3730 reference to this string.
3731
3732 \a n must not be negative.
3733*/
3734QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3735{
3736 Q_PRE(len >= 0);
3737
3738 if (size_t(pos) > size_t(this->size()))
3739 return *this;
3740 if (len > this->size() - pos)
3741 len = this->size() - pos;
3742
3743 qsizetype indices[] = {pos};
3744 replace_helper(*this, indices, len, QStringView{after, alen});
3745 return *this;
3746}
3747
3748/*!
3749 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3750 \overload replace()
3751
3752 Replaces \a n characters beginning at index \a position with the
3753 character \a after and returns a reference to this string.
3754*/
3755QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3756{
3757 return replace(pos, len, &after, 1);
3758}
3759
3760/*!
3761 \overload replace()
3762 Replaces every occurrence of the string \a before with the string \a
3763 after and returns a reference to this string.
3764
3765 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3766
3767 Example:
3768
3769 \snippet qstring/main.cpp 41
3770
3771 \note The replacement text is not rescanned after it is inserted.
3772
3773 Example:
3774
3775 \snippet qstring/main.cpp 86
3776
3777//! [empty-before-arg-in-replace]
3778 \note If you use an empty \a before argument, the \a after argument will be
3779 inserted \e {before and after} each character of the string.
3780//! [empty-before-arg-in-replace]
3781
3782*/
3783QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3784{
3785 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3786}
3787
3788/*!
3789 \since 4.5
3790 \overload replace()
3791
3792 Replaces each occurrence in this string of the first \a blen
3793 characters of \a before with the first \a alen characters of \a
3794 after and returns a reference to this string.
3795
3796 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3797
3798 \note If \a before points to an \e empty string (that is, \a blen == 0),
3799 the string pointed to by \a after will be inserted \e {before and after}
3800 each character in this string.
3801*/
3802QString &QString::replace(const QChar *before, qsizetype blen,
3803 const QChar *after, qsizetype alen,
3804 Qt::CaseSensitivity cs)
3805{
3806 if (isEmpty()) {
3807 if (blen)
3808 return *this;
3809 } else {
3810 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3811 return *this;
3812 }
3813 if (alen == 0 && blen == 0)
3814 return *this;
3815 if (alen == 1 && blen == 1)
3816 return replace(*before, *after, cs);
3817
3818 QStringMatcher matcher(before, blen, cs);
3819
3820 qsizetype index = 0;
3821
3822 QVarLengthArray<qsizetype> indices;
3823 while ((index = matcher.indexIn(*this, index)) != -1) {
3824 indices.push_back(index);
3825 if (blen) // Step over before:
3826 index += blen;
3827 else // Only count one instance of empty between any two characters:
3828 index++;
3829 }
3830 if (indices.isEmpty())
3831 return *this;
3832
3833 replace_helper(*this, indices, blen, QStringView{after, alen});
3834 return *this;
3835}
3836
3837/*!
3838 \overload replace()
3839 Replaces every occurrence of the character \a ch in the string with
3840 \a after and returns a reference to this string.
3841
3842 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3843*/
3844QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3845{
3846 if (after.size() == 0)
3847 return remove(ch, cs);
3848
3849 if (after.size() == 1)
3850 return replace(ch, after.front(), cs);
3851
3852 if (size() == 0)
3853 return *this;
3854
3855 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3856
3857 QVarLengthArray<qsizetype> indices;
3858 if (cs == Qt::CaseSensitive) {
3859 const char16_t *begin = d.begin();
3860 const char16_t *end = d.end();
3861 QStringView view(begin, end);
3862 const char16_t *hit = nullptr;
3863 while ((hit = QtPrivate::qustrchr(view, cc)) != end) {
3864 indices.push_back(std::distance(begin, hit));
3865 view = QStringView(std::next(hit), end);
3866 }
3867 } else {
3868 for (qsizetype i = 0; i < d.size; ++i)
3869 if (QChar::toCaseFolded(d.data()[i]) == cc)
3870 indices.push_back(i);
3871 }
3872 if (indices.isEmpty())
3873 return *this;
3874
3875 replace_helper(*this, indices, 1, after);
3876 return *this;
3877}
3878
3879/*!
3880 \overload replace()
3881 Replaces every occurrence of the character \a before with the
3882 character \a after and returns a reference to this string.
3883
3884 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3885*/
3886QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3887{
3888 const qsizetype idx = indexOf(before, 0, cs);
3889 if (idx == -1)
3890 return *this;
3891
3892 const char16_t achar = after.unicode();
3893 char16_t bchar = before.unicode();
3894
3895 auto matchesCIS = [](char16_t beforeChar) {
3896 return [beforeChar](char16_t ch) { return foldAndCompare(ch, beforeChar); };
3897 };
3898
3899 auto hit = d.begin() + idx;
3900 if (!d.needsDetach()) {
3901 *hit++ = achar;
3902 if (cs == Qt::CaseSensitive) {
3903 std::replace(hit, d.end(), bchar, achar);
3904 } else {
3905 bchar = foldCase(bchar);
3906 std::replace_if(hit, d.end(), matchesCIS(bchar), achar);
3907 }
3908 } else {
3909 QString other{ d.size, Qt::Uninitialized };
3910 auto dest = std::copy(d.begin(), hit, other.d.begin());
3911 *dest++ = achar;
3912 ++hit;
3913 if (cs == Qt::CaseSensitive) {
3914 std::replace_copy(hit, d.end(), dest, bchar, achar);
3915 } else {
3916 bchar = foldCase(bchar);
3917 std::replace_copy_if(hit, d.end(), dest, matchesCIS(bchar), achar);
3918 }
3919
3920 swap(other);
3921 }
3922 return *this;
3923}
3924
3925/*!
3926 \since 4.5
3927 \overload replace()
3928
3929 Replaces every occurrence in this string of the Latin-1 string viewed
3930 by \a before with the Latin-1 string viewed by \a after, and returns a
3931 reference to this string.
3932
3933 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3934
3935 \note The text is not rescanned after a replacement.
3936
3937 \include qstring.cpp empty-before-arg-in-replace
3938*/
3939QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
3940{
3941 const qsizetype alen = after.size();
3942 const qsizetype blen = before.size();
3943 if (blen == 1 && alen == 1)
3944 return replace(before.front(), after.front(), cs);
3945
3946 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
3947 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3948 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
3949}
3950
3951/*!
3952 \since 4.5
3953 \overload replace()
3954
3955 Replaces every occurrence in this string of the Latin-1 string viewed
3956 by \a before with the string \a after, and returns a reference to this
3957 string.
3958
3959 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3960
3961 \note The text is not rescanned after a replacement.
3962
3963 \include qstring.cpp empty-before-arg-in-replace
3964*/
3965QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
3966{
3967 const qsizetype blen = before.size();
3968 if (blen == 1 && after.size() == 1)
3969 return replace(before.front(), after.front(), cs);
3970
3971 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3972 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
3973}
3974
3975/*!
3976 \since 4.5
3977 \overload replace()
3978
3979 Replaces every occurrence of the string \a before with the string \a
3980 after and returns a reference to this string.
3981
3982 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3983
3984 \note The text is not rescanned after a replacement.
3985
3986 \include qstring.cpp empty-before-arg-in-replace
3987*/
3988QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
3989{
3990 const qsizetype alen = after.size();
3991 if (before.size() == 1 && alen == 1)
3992 return replace(before.front(), after.front(), cs);
3993
3994 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
3995 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
3996}
3997
3998/*!
3999 \since 4.5
4000 \overload replace()
4001
4002 Replaces every occurrence of the character \a c with the string \a
4003 after and returns a reference to this string.
4004
4005 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4006
4007 \note The text is not rescanned after a replacement.
4008*/
4009QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4010{
4011 const qsizetype alen = after.size();
4012 if (alen == 1)
4013 return replace(c, after.front(), cs);
4014
4015 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4016 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
4017}
4018
4019/*!
4020 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4021 \overload operator==()
4022
4023 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4024 returns \c false.
4025
4026 \include qstring.cpp compare-isNull-vs-isEmpty
4027
4028 \sa {Comparing Strings}
4029*/
4030
4031/*!
4032 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4033
4034 \overload operator==()
4035
4036 Returns \c true if \a lhs is equal to \a rhs; otherwise
4037 returns \c false.
4038*/
4039
4040/*!
4041 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4042
4043 \overload operator==()
4044
4045 Returns \c true if \a lhs is equal to \a rhs; otherwise
4046 returns \c false.
4047*/
4048
4049/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4050
4051 \overload operator==()
4052
4053 The \a rhs byte array is converted to a QUtf8StringView.
4054
4055 You can disable this operator by defining
4056 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4057 can be useful if you want to ensure that all user-visible strings
4058 go through QObject::tr(), for example.
4059
4060 Returns \c true if string \a lhs is lexically equal to \a rhs.
4061 Otherwise returns \c false.
4062*/
4063
4064/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4065
4066 \overload operator==()
4067
4068 The \a rhs const char pointer is converted to a QUtf8StringView.
4069
4070 You can disable this operator by defining
4071 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4072 can be useful if you want to ensure that all user-visible strings
4073 go through QObject::tr(), for example.
4074*/
4075
4076/*!
4077 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4078
4079 \overload operator<()
4080
4081 Returns \c true if string \a lhs is lexically less than string
4082 \a rhs; otherwise returns \c false.
4083
4084 \sa {Comparing Strings}
4085*/
4086
4087/*!
4088 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4089
4090 \overload operator<()
4091
4092 Returns \c true if \a lhs is lexically less than \a rhs;
4093 otherwise returns \c false.
4094*/
4095
4096/*!
4097 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4098
4099 \overload operator<()
4100
4101 Returns \c true if \a lhs is lexically less than \a rhs;
4102 otherwise returns \c false.
4103*/
4104
4105/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4106
4107 \overload operator<()
4108
4109 The \a rhs byte array is converted to a QUtf8StringView.
4110 If any NUL characters ('\\0') are embedded in the byte array, they will be
4111 included in the transformation.
4112
4113 You can disable this operator
4114 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4115 can be useful if you want to ensure that all user-visible strings
4116 go through QObject::tr(), for example.
4117*/
4118
4119/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4120
4121 Returns \c true if string \a lhs is lexically less than string \a rhs.
4122 Otherwise returns \c false.
4123
4124 \overload operator<()
4125
4126 The \a rhs const char pointer is converted to a QUtf8StringView.
4127
4128 You can disable this operator by defining
4129 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4130 can be useful if you want to ensure that all user-visible strings
4131 go through QObject::tr(), for example.
4132*/
4133
4134/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4135
4136 Returns \c true if string \a lhs is lexically less than or equal to
4137 string \a rhs; otherwise returns \c false.
4138
4139 \sa {Comparing Strings}
4140*/
4141
4142/*!
4143 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4144
4145 \overload operator<=()
4146
4147 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4148 otherwise returns \c false.
4149*/
4150
4151/*!
4152 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4153
4154 \overload operator<=()
4155
4156 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4157 otherwise returns \c false.
4158*/
4159
4160/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4161
4162 \overload operator<=()
4163
4164 The \a rhs byte array is converted to a QUtf8StringView.
4165 If any NUL characters ('\\0') are embedded in the byte array, they will be
4166 included in the transformation.
4167
4168 You can disable this operator by defining
4169 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4170 can be useful if you want to ensure that all user-visible strings
4171 go through QObject::tr(), for example.
4172*/
4173
4174/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4175
4176 \overload operator<=()
4177
4178 The \a rhs const char pointer is converted to a QUtf8StringView.
4179
4180 You can disable this operator by defining
4181 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4182 can be useful if you want to ensure that all user-visible strings
4183 go through QObject::tr(), for example.
4184*/
4185
4186/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4187
4188 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4189 otherwise returns \c false.
4190
4191 \sa {Comparing Strings}
4192*/
4193
4194/*!
4195 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4196
4197 \overload operator>()
4198
4199 Returns \c true if \a lhs is lexically greater than \a rhs;
4200 otherwise returns \c false.
4201*/
4202
4203/*!
4204 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4205
4206 \overload operator>()
4207
4208 Returns \c true if \a lhs is lexically greater than \a rhs;
4209 otherwise returns \c false.
4210*/
4211
4212/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4213
4214 \overload operator>()
4215
4216 The \a rhs byte array is converted to a QUtf8StringView.
4217 If any NUL characters ('\\0') are embedded in the byte array, they will be
4218 included in the transformation.
4219
4220 You can disable this operator by defining
4221 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4222 can be useful if you want to ensure that all user-visible strings
4223 go through QObject::tr(), for example.
4224*/
4225
4226/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4227
4228 \overload operator>()
4229
4230 The \a rhs const char pointer is converted to a QUtf8StringView.
4231
4232 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4233 when you compile your applications. This can be useful if you want
4234 to ensure that all user-visible strings go through QObject::tr(),
4235 for example.
4236*/
4237
4238/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4239
4240 Returns \c true if string \a lhs is lexically greater than or equal to
4241 string \a rhs; otherwise returns \c false.
4242
4243 \sa {Comparing Strings}
4244*/
4245
4246/*!
4247 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4248
4249 \overload operator>=()
4250
4251 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4252 otherwise returns \c false.
4253*/
4254
4255/*!
4256 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4257
4258 \overload operator>=()
4259
4260 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4261 otherwise returns \c false.
4262*/
4263
4264/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4265
4266 \overload operator>=()
4267
4268 The \a rhs byte array is converted to a QUtf8StringView.
4269 If any NUL characters ('\\0') are embedded in the byte array, they will be
4270 included in the transformation.
4271
4272 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4273 when you compile your applications. This can be useful if you want
4274 to ensure that all user-visible strings go through QObject::tr(),
4275 for example.
4276*/
4277
4278/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4279
4280 \overload operator>=()
4281
4282 The \a rhs const char pointer is converted to a QUtf8StringView.
4283
4284 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4285 when you compile your applications. This can be useful if you want
4286 to ensure that all user-visible strings go through QObject::tr(),
4287 for example.
4288*/
4289
4290/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4291
4292 Returns \c true if string \a lhs is not equal to string \a rhs;
4293 otherwise returns \c false.
4294
4295 \sa {Comparing Strings}
4296*/
4297
4298/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4299
4300 Returns \c true if string \a lhs is not equal to string \a rhs.
4301 Otherwise returns \c false.
4302
4303 \overload operator!=()
4304*/
4305
4306/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4307
4308 \overload operator!=()
4309
4310 The \a rhs byte array is converted to a QUtf8StringView.
4311 If any NUL characters ('\\0') are embedded in the byte array, they will be
4312 included in the transformation.
4313
4314 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4315 when you compile your applications. This can be useful if you want
4316 to ensure that all user-visible strings go through QObject::tr(),
4317 for example.
4318*/
4319
4320/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4321
4322 \overload operator!=()
4323
4324 The \a rhs const char pointer is converted to a QUtf8StringView.
4325
4326 You can disable this operator by defining
4327 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4328 can be useful if you want to ensure that all user-visible strings
4329 go through QObject::tr(), for example.
4330*/
4331
4332/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4333
4334 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4335 \a rhs; otherwise returns \c false.
4336
4337 The comparison is case sensitive.
4338
4339 You can disable this operator by defining \c
4340 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4341 then need to call QString::fromUtf8(), QString::fromLatin1(),
4342 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4343 array to a QString before doing the comparison.
4344*/
4345
4346/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4347
4348 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4349 \a rhs; otherwise returns \c false.
4350
4351 The comparison is case sensitive.
4352
4353 You can disable this operator by defining \c
4354 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4355 then need to call QString::fromUtf8(), QString::fromLatin1(),
4356 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4357 array to a QString before doing the comparison.
4358*/
4359
4360/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4361
4362 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4363 of \a rhs; otherwise returns \c false.
4364
4365 The comparison is case sensitive.
4366
4367 You can disable this operator by defining \c
4368 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4369 then need to call QString::fromUtf8(), QString::fromLatin1(),
4370 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4371 array to a QString before doing the comparison.
4372*/
4373
4374/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4375
4376 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4377 encoding of \a rhs; otherwise returns \c false.
4378
4379 The comparison is case sensitive.
4380
4381 You can disable this operator by defining \c
4382 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4383 then need to call QString::fromUtf8(), QString::fromLatin1(),
4384 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4385 array to a QString before doing the comparison.
4386*/
4387
4388/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4389
4390 Returns \c true if byte array \a lhs is lexically less than or equal to the
4391 UTF-8 encoding of \a rhs; otherwise returns \c false.
4392
4393 The comparison is case sensitive.
4394
4395 You can disable this operator by defining \c
4396 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4397 then need to call QString::fromUtf8(), QString::fromLatin1(),
4398 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4399 array to a QString before doing the comparison.
4400*/
4401
4402/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4403
4404 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4405 encoding of \a rhs; otherwise returns \c false.
4406
4407 The comparison is case sensitive.
4408
4409 You can disable this operator by defining \c
4410 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4411 then need to call QString::fromUtf8(), QString::fromLatin1(),
4412 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4413 array to a QString before doing the comparison.
4414*/
4415
4416/*!
4417 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4418
4419 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4420
4421 Example:
4422
4423 \snippet qstring/main.cpp 24
4424
4425 \include qstring.qdocinc negative-index-start-search-from-end
4426
4427 \sa lastIndexOf(), contains(), count()
4428*/
4429qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4430{
4431 return QtPrivate::findString(QStringView(unicode(), size()), from, QStringView(str.unicode(), str.size()), cs);
4432}
4433
4434/*!
4435 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4436 \since 5.14
4437 \overload indexOf()
4438
4439 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4440
4441 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4442
4443 \include qstring.qdocinc negative-index-start-search-from-end
4444
4445 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4446*/
4447
4448/*!
4449 \since 4.5
4450
4451 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4452
4453 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4454
4455 Example:
4456
4457 \snippet qstring/main.cpp 24
4458
4459 \include qstring.qdocinc negative-index-start-search-from-end
4460
4461 \sa lastIndexOf(), contains(), count()
4462*/
4463
4464qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4465{
4466 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
4467}
4468
4469/*!
4470 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4471 \overload indexOf()
4472
4473 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4474*/
4475
4476/*!
4477 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4478
4479 \include qstring.qdocinc negative-index-start-search-from-end
4480
4481 Returns -1 if \a str is not found.
4482
4483 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4484
4485 Example:
4486
4487 \snippet qstring/main.cpp 29
4488
4489 \note When searching for a 0-length \a str, the match at the end of
4490 the data is excluded from the search by a negative \a from, even
4491 though \c{-1} is normally thought of as searching from the end of the
4492 string: the match at the end is \e after the last character, so it is
4493 excluded. To include such a final empty match, either give a positive
4494 value for \a from or omit the \a from parameter entirely.
4495
4496 \sa indexOf(), contains(), count()
4497*/
4498qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4499{
4500 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
4501}
4502
4503/*!
4504 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4505 \since 6.2
4506 \overload lastIndexOf()
4507
4508 Returns the index position of the last occurrence of the string \a
4509 str in this string. Returns -1 if \a str is not found.
4510
4511 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4512
4513 Example:
4514
4515 \snippet qstring/main.cpp 29
4516
4517 \sa indexOf(), contains(), count()
4518*/
4519
4520
4521/*!
4522 \since 4.5
4523 \overload lastIndexOf()
4524
4525 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4526
4527 \include qstring.qdocinc negative-index-start-search-from-end
4528
4529 Returns -1 if \a str is not found.
4530
4531 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4532
4533 Example:
4534
4535 \snippet qstring/main.cpp 29
4536
4537 \note When searching for a 0-length \a str, the match at the end of
4538 the data is excluded from the search by a negative \a from, even
4539 though \c{-1} is normally thought of as searching from the end of the
4540 string: the match at the end is \e after the last character, so it is
4541 excluded. To include such a final empty match, either give a positive
4542 value for \a from or omit the \a from parameter entirely.
4543
4544 \sa indexOf(), contains(), count()
4545*/
4546qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4547{
4548 return QtPrivate::lastIndexOf(*this, from, str, cs);
4549}
4550
4551/*!
4552 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4553 \since 6.2
4554 \overload lastIndexOf()
4555
4556 Returns the index position of the last occurrence of the string \a
4557 str in this string. Returns -1 if \a str is not found.
4558
4559 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4560
4561 Example:
4562
4563 \snippet qstring/main.cpp 29
4564
4565 \sa indexOf(), contains(), count()
4566*/
4567
4568/*!
4569 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4570 \overload lastIndexOf()
4571
4572 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4573*/
4574
4575/*!
4576 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4577 \since 6.3
4578 \overload lastIndexOf()
4579*/
4580
4581/*!
4582 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4583 \since 5.14
4584 \overload lastIndexOf()
4585
4586 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4587
4588 \include qstring.qdocinc negative-index-start-search-from-end
4589
4590 Returns -1 if \a str is not found.
4591
4592 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4593
4594 \note When searching for a 0-length \a str, the match at the end of
4595 the data is excluded from the search by a negative \a from, even
4596 though \c{-1} is normally thought of as searching from the end of the
4597 string: the match at the end is \e after the last character, so it is
4598 excluded. To include such a final empty match, either give a positive
4599 value for \a from or omit the \a from parameter entirely.
4600
4601 \sa indexOf(), contains(), count()
4602*/
4603
4604/*!
4605 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4606 \since 6.2
4607 \overload lastIndexOf()
4608
4609 Returns the index position of the last occurrence of the string view \a
4610 str in this string. Returns -1 if \a str is not found.
4611
4612 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4613
4614 \sa indexOf(), contains(), count()
4615*/
4616
4617#if QT_CONFIG(regularexpression)
4618struct QStringCapture
4619{
4620 qsizetype pos;
4621 qsizetype len;
4622 int no;
4623};
4624Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4625
4626/*!
4627 \overload replace()
4628 \since 5.0
4629
4630 Replaces every occurrence of the regular expression \a re in the
4631 string with \a after. Returns a reference to the string. For
4632 example:
4633
4634 \snippet qstring/main.cpp 87
4635
4636 For regular expressions containing capturing groups,
4637 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4638 with the string captured by the corresponding capturing group.
4639
4640 \snippet qstring/main.cpp 88
4641
4642 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4643*/
4644QString &QString::replace(const QRegularExpression &re, const QString &after)
4645{
4646 if (!re.isValid()) {
4647 qtWarnAboutInvalidRegularExpression(re, "QString", "replace");
4648 return *this;
4649 }
4650
4651 const QString copy(*this);
4652 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4653 if (!iterator.hasNext()) // no matches at all
4654 return *this;
4655
4656 reallocData(d.size, QArrayData::KeepSize);
4657
4658 qsizetype numCaptures = re.captureCount();
4659
4660 // 1. build the backreferences list, holding where the backreferences
4661 // are in the replacement string
4662 QVarLengthArray<QStringCapture> backReferences;
4663 const qsizetype al = after.size();
4664 const QChar *ac = after.unicode();
4665
4666 for (qsizetype i = 0; i < al - 1; i++) {
4667 if (ac[i] == u'\\') {
4668 int no = ac[i + 1].digitValue();
4669 if (no > 0 && no <= numCaptures) {
4670 QStringCapture backReference;
4671 backReference.pos = i;
4672 backReference.len = 2;
4673
4674 if (i < al - 2) {
4675 int secondDigit = ac[i + 2].digitValue();
4676 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4677 no = (no * 10) + secondDigit;
4678 ++backReference.len;
4679 }
4680 }
4681
4682 backReference.no = no;
4683 backReferences.append(backReference);
4684 }
4685 }
4686 }
4687
4688 // 2. iterate on the matches. For every match, copy in chunks
4689 // - the part before the match
4690 // - the after string, with the proper replacements for the backreferences
4691
4692 qsizetype newLength = 0; // length of the new string, with all the replacements
4693 qsizetype lastEnd = 0;
4694 QVarLengthArray<QStringView> chunks;
4695 const QStringView copyView{ copy }, afterView{ after };
4696 while (iterator.hasNext()) {
4697 QRegularExpressionMatch match = iterator.next();
4698 qsizetype len;
4699 // add the part before the match
4700 len = match.capturedStart() - lastEnd;
4701 if (len > 0) {
4702 chunks << copyView.mid(lastEnd, len);
4703 newLength += len;
4704 }
4705
4706 lastEnd = 0;
4707 // add the after string, with replacements for the backreferences
4708 for (const QStringCapture &backReference : std::as_const(backReferences)) {
4709 // part of "after" before the backreference
4710 len = backReference.pos - lastEnd;
4711 if (len > 0) {
4712 chunks << afterView.mid(lastEnd, len);
4713 newLength += len;
4714 }
4715
4716 // backreference itself
4717 len = match.capturedLength(backReference.no);
4718 if (len > 0) {
4719 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4720 newLength += len;
4721 }
4722
4723 lastEnd = backReference.pos + backReference.len;
4724 }
4725
4726 // add the last part of the after string
4727 len = afterView.size() - lastEnd;
4728 if (len > 0) {
4729 chunks << afterView.mid(lastEnd, len);
4730 newLength += len;
4731 }
4732
4733 lastEnd = match.capturedEnd();
4734 }
4735
4736 // 3. trailing string after the last match
4737 if (copyView.size() > lastEnd) {
4738 chunks << copyView.mid(lastEnd);
4739 newLength += copyView.size() - lastEnd;
4740 }
4741
4742 // 4. assemble the chunks together
4743 resize(newLength);
4744 qsizetype i = 0;
4745 QChar *uc = data();
4746 for (const QStringView &chunk : std::as_const(chunks)) {
4747 qsizetype len = chunk.size();
4748 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4749 i += len;
4750 }
4751
4752 return *this;
4753}
4754#endif // QT_CONFIG(regularexpression)
4755
4756/*!
4757 Returns the number of (potentially overlapping) occurrences of
4758 the string \a str in this string.
4759
4760 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4761
4762 \sa contains(), indexOf()
4763*/
4764
4765qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4766{
4767 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4768}
4769
4770/*!
4771 \overload count()
4772
4773 Returns the number of occurrences of character \a ch in the string.
4774
4775 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4776
4777 \sa contains(), indexOf()
4778*/
4779
4780qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4781{
4782 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4783}
4784
4785/*!
4786 \since 6.0
4787 \overload count()
4788 Returns the number of (potentially overlapping) occurrences of the
4789 string view \a str in this string.
4790
4791 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4792
4793 \sa contains(), indexOf()
4794*/
4795qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4796{
4797 return QtPrivate::count(*this, str, cs);
4798}
4799
4800/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4801
4802 Returns \c true if this string contains an occurrence of the string
4803 \a str; otherwise returns \c false.
4804
4805 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4806
4807 Example:
4808 \snippet qstring/main.cpp 17
4809
4810 \sa indexOf(), count()
4811*/
4812
4813/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4814 \since 5.3
4815
4816 \overload contains()
4817
4818 Returns \c true if this string contains an occurrence of the latin-1 string
4819 \a str; otherwise returns \c false.
4820*/
4821
4822/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4823
4824 \overload contains()
4825
4826 Returns \c true if this string contains an occurrence of the
4827 character \a ch; otherwise returns \c false.
4828*/
4829
4830/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4831 \since 5.14
4832 \overload contains()
4833
4834 Returns \c true if this string contains an occurrence of the string view
4835 \a str; otherwise returns \c false.
4836
4837 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4838
4839 \sa indexOf(), count()
4840*/
4841
4842#if QT_CONFIG(regularexpression)
4843/*!
4844 \since 5.5
4845
4846 Returns the index position of the first match of the regular
4847 expression \a re in the string, searching forward from index
4848 position \a from. Returns -1 if \a re didn't match anywhere.
4849
4850 If the match is successful and \a rmatch is not \nullptr, it also
4851 writes the results of the match into the QRegularExpressionMatch object
4852 pointed to by \a rmatch.
4853
4854 Example:
4855
4856 \snippet qstring/main.cpp 93
4857*/
4858qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4859{
4860 return QtPrivate::indexOf(QStringView(*this), this, re, from, rmatch);
4861}
4862
4863/*!
4864 \since 5.5
4865
4866 Returns the index position of the last match of the regular
4867 expression \a re in the string, which starts before the index
4868 position \a from.
4869
4870 \include qstring.qdocinc negative-index-start-search-from-end
4871
4872 Returns -1 if \a re didn't match anywhere.
4873
4874 If the match is successful and \a rmatch is not \nullptr, it also
4875 writes the results of the match into the QRegularExpressionMatch object
4876 pointed to by \a rmatch.
4877
4878 Example:
4879
4880 \snippet qstring/main.cpp 94
4881
4882 \note Due to how the regular expression matching algorithm works,
4883 this function will actually match repeatedly from the beginning of
4884 the string until the position \a from is reached.
4885
4886 \note When searching for a regular expression \a re that may match
4887 0 characters, the match at the end of the data is excluded from the
4888 search by a negative \a from, even though \c{-1} is normally
4889 thought of as searching from the end of the string: the match at
4890 the end is \e after the last character, so it is excluded. To
4891 include such a final empty match, either give a positive value for
4892 \a from or omit the \a from parameter entirely.
4893*/
4894qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4895{
4896 return QtPrivate::lastIndexOf(QStringView(*this), this, re, from, rmatch);
4897}
4898
4899/*!
4900 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4901 \since 6.2
4902 \overload lastIndexOf()
4903
4904 Returns the index position of the last match of the regular
4905 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4906
4907 If the match is successful and \a rmatch is not \nullptr, it also
4908 writes the results of the match into the QRegularExpressionMatch object
4909 pointed to by \a rmatch.
4910
4911 Example:
4912
4913 \snippet qstring/main.cpp 94
4914
4915 \note Due to how the regular expression matching algorithm works,
4916 this function will actually match repeatedly from the beginning of
4917 the string until the end of the string is reached.
4918*/
4919
4920/*!
4921 \since 5.1
4922
4923 Returns \c true if the regular expression \a re matches somewhere in this
4924 string; otherwise returns \c false.
4925
4926 If the match is successful and \a rmatch is not \nullptr, it also
4927 writes the results of the match into the QRegularExpressionMatch object
4928 pointed to by \a rmatch.
4929
4930 \sa QRegularExpression::match()
4931*/
4932
4933bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4934{
4935 return QtPrivate::contains(QStringView(*this), this, re, rmatch);
4936}
4937
4938/*!
4939 \overload count()
4940 \since 5.0
4941
4942 Returns the number of times the regular expression \a re matches
4943 in the string.
4944
4945 For historical reasons, this function counts overlapping matches,
4946 so in the example below, there are four instances of "ana" or
4947 "ama":
4948
4949 \snippet qstring/main.cpp 95
4950
4951 This behavior is different from simply iterating over the matches
4952 in the string using QRegularExpressionMatchIterator.
4953
4954 \sa QRegularExpression::globalMatch()
4955*/
4956qsizetype QString::count(const QRegularExpression &re) const
4957{
4958 return QtPrivate::count(QStringView(*this), re);
4959}
4960#endif // QT_CONFIG(regularexpression)
4961
4962#if QT_DEPRECATED_SINCE(6, 4)
4963/*! \fn qsizetype QString::count() const
4964 \deprecated [6.4] Use size() or length() instead.
4965 \overload count()
4966
4967 Same as size().
4968*/
4969#endif
4970
4971/*!
4972 \enum QString::SectionFlag
4973
4974 This enum specifies flags that can be used to affect various
4975 aspects of the section() function's behavior with respect to
4976 separators and empty fields.
4977
4978 \value SectionDefault Empty fields are counted, leading and
4979 trailing separators are not included, and the separator is
4980 compared case sensitively.
4981
4982 \value SectionSkipEmpty Treat empty fields as if they don't exist,
4983 i.e. they are not considered as far as \e start and \e end are
4984 concerned.
4985
4986 \value SectionIncludeLeadingSep Include the leading separator (if
4987 any) in the result string.
4988
4989 \value SectionIncludeTrailingSep Include the trailing separator
4990 (if any) in the result string.
4991
4992 \value SectionCaseInsensitiveSeps Compare the separator
4993 case-insensitively.
4994
4995 \sa section()
4996*/
4997
4998/*!
4999 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5000
5001 This function returns a section of the string.
5002
5003 This string is treated as a sequence of fields separated by the
5004 character, \a sep. The returned string consists of the fields from
5005 position \a start to position \a end inclusive. If \a end is not
5006 specified, all fields from position \a start to the end of the
5007 string are included. Fields are numbered 0, 1, 2, etc., counting
5008 from the left, and -1, -2, etc., counting from right to left.
5009
5010 The \a flags argument can be used to affect some aspects of the
5011 function's behavior, e.g. whether to be case sensitive, whether
5012 to skip empty fields and how to deal with leading and trailing
5013 separators; see \l{SectionFlags}.
5014
5015 \snippet qstring/main.cpp 52
5016
5017 If \a start or \a end is negative, we count fields from the right
5018 of the string, the right-most field being -1, the one from
5019 right-most field being -2, and so on.
5020
5021 \snippet qstring/main.cpp 53
5022
5023 \sa split()
5024*/
5025
5026/*!
5027 \overload section()
5028
5029 \snippet qstring/main.cpp 51
5030 \snippet qstring/main.cpp 54
5031
5032 \sa split()
5033*/
5034
5035QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5036{
5037 const QList<QStringView> sections = QStringView{ *this }.split(
5038 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5039 const qsizetype sectionsSize = sections.size();
5040 if (!(flags & SectionSkipEmpty)) {
5041 if (start < 0)
5042 start += sectionsSize;
5043 if (end < 0)
5044 end += sectionsSize;
5045 } else {
5046 qsizetype skip = 0;
5047 for (qsizetype k = 0; k < sectionsSize; ++k) {
5048 if (sections.at(k).isEmpty())
5049 skip++;
5050 }
5051 if (start < 0)
5052 start += sectionsSize - skip;
5053 if (end < 0)
5054 end += sectionsSize - skip;
5055 }
5056 if (start >= sectionsSize || end < 0 || start > end)
5057 return QString();
5058
5059 QString ret;
5060 qsizetype first_i = start, last_i = end;
5061 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5062 const QStringView &section = sections.at(i);
5063 const bool empty = section.isEmpty();
5064 if (x >= start) {
5065 if (x == start)
5066 first_i = i;
5067 if (x == end)
5068 last_i = i;
5069 if (x > start && i > 0)
5070 ret += sep;
5071 ret += section;
5072 }
5073 if (!empty || !(flags & SectionSkipEmpty))
5074 x++;
5075 }
5076 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5077 ret.prepend(sep);
5078 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5079 ret += sep;
5080 return ret;
5081}
5082
5083#if QT_CONFIG(regularexpression)
5084struct qt_section_chunk
5085{
5086 qsizetype length;
5087 QStringView string;
5088};
5089Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5090
5091static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5092 QString::SectionFlags flags)
5093{
5094 const qsizetype sectionsSize = sections.size();
5095
5096 if (!(flags & QString::SectionSkipEmpty)) {
5097 if (start < 0)
5098 start += sectionsSize;
5099 if (end < 0)
5100 end += sectionsSize;
5101 } else {
5102 qsizetype skip = 0;
5103 for (qsizetype k = 0; k < sectionsSize; ++k) {
5104 const qt_section_chunk &section = sections[k];
5105 if (section.length == section.string.size())
5106 skip++;
5107 }
5108 if (start < 0)
5109 start += sectionsSize - skip;
5110 if (end < 0)
5111 end += sectionsSize - skip;
5112 }
5113 if (start >= sectionsSize || end < 0 || start > end)
5114 return QString();
5115
5116 QString ret;
5117 qsizetype x = 0;
5118 qsizetype first_i = start, last_i = end;
5119 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5120 const qt_section_chunk &section = sections[i];
5121 const bool empty = (section.length == section.string.size());
5122 if (x >= start) {
5123 if (x == start)
5124 first_i = i;
5125 if (x == end)
5126 last_i = i;
5127 if (x != start)
5128 ret += section.string;
5129 else
5130 ret += section.string.mid(section.length);
5131 }
5132 if (!empty || !(flags & QString::SectionSkipEmpty))
5133 x++;
5134 }
5135
5136 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5137 const qt_section_chunk &section = sections[first_i];
5138 ret.prepend(section.string.left(section.length));
5139 }
5140
5141 if ((flags & QString::SectionIncludeTrailingSep)
5142 && last_i < sectionsSize - 1) {
5143 const qt_section_chunk &section = sections[last_i + 1];
5144 ret += section.string.left(section.length);
5145 }
5146
5147 return ret;
5148}
5149
5150/*!
5151 \overload section()
5152 \since 5.0
5153
5154 This string is treated as a sequence of fields separated by the
5155 regular expression, \a re.
5156
5157 \snippet qstring/main.cpp 89
5158
5159 \warning Using this QRegularExpression version is much more expensive than
5160 the overloaded string and character versions.
5161
5162 \sa split(), simplified()
5163*/
5164QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5165{
5166 if (!re.isValid()) {
5167 qtWarnAboutInvalidRegularExpression(re, "QString", "section");
5168 return QString();
5169 }
5170
5171 const QChar *uc = unicode();
5172 if (!uc)
5173 return QString();
5174
5175 QRegularExpression sep(re);
5176 if (flags & SectionCaseInsensitiveSeps)
5177 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5178
5179 QVarLengthArray<qt_section_chunk> sections;
5180 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5181 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
5182 while (iterator.hasNext()) {
5183 QRegularExpressionMatch match = iterator.next();
5184 m = match.capturedStart();
5185 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, m - last_m)});
5186 last_m = m;
5187 last_len = match.capturedLength();
5188 }
5189 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, n - last_m)});
5190
5191 return extractSections(sections, start, end, flags);
5192}
5193#endif // QT_CONFIG(regularexpression)
5194
5195/*!
5196 \fn QString QString::left(qsizetype n) const &
5197 \fn QString QString::left(qsizetype n) &&
5198
5199 Returns a substring that contains the \a n leftmost characters of
5200 this string (that is, from the beginning of this string up to, but not
5201 including, the element at index position \a n).
5202
5203 If you know that \a n cannot be out of bounds, use first() instead in new
5204 code, because it is faster.
5205
5206 The entire string is returned if \a n is greater than or equal
5207 to size(), or less than zero.
5208
5209 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5210*/
5211
5212/*!
5213 \fn QString QString::right(qsizetype n) const &
5214 \fn QString QString::right(qsizetype n) &&
5215
5216 Returns a substring that contains the \a n rightmost characters
5217 of the string.
5218
5219 If you know that \a n cannot be out of bounds, use last() instead in new
5220 code, because it is faster.
5221
5222 The entire string is returned if \a n is greater than or equal
5223 to size(), or less than zero.
5224
5225 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5226*/
5227
5228/*!
5229 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5230 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5231
5232 Returns a string that contains \a n characters of this string, starting
5233 at the specified \a position index up to, but not including, the element
5234 at index position \c {\a position + n}.
5235
5236 If you know that \a position and \a n cannot be out of bounds, use sliced()
5237 instead in new code, because it is faster.
5238
5239 Returns a null string if the \a position index exceeds the
5240 length of the string. If there are less than \a n characters
5241 available in the string starting at the given \a position, or if
5242 \a n is -1 (default), the function returns all characters that
5243 are available from the specified \a position.
5244
5245 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5246*/
5247QString QString::mid(qsizetype position, qsizetype n) const &
5248{
5249 qsizetype p = position;
5250 qsizetype l = n;
5251 using namespace QtPrivate;
5252 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5253 case QContainerImplHelper::Null:
5254 return QString();
5255 case QContainerImplHelper::Empty:
5256 return QString(DataPointer::fromRawData(&_empty, 0));
5257 case QContainerImplHelper::Full:
5258 return *this;
5259 case QContainerImplHelper::Subset:
5260 return sliced(p, l);
5261 }
5262 Q_UNREACHABLE_RETURN(QString());
5263}
5264
5265QString QString::mid(qsizetype position, qsizetype n) &&
5266{
5267 qsizetype p = position;
5268 qsizetype l = n;
5269 using namespace QtPrivate;
5270 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5271 case QContainerImplHelper::Null:
5272 return QString();
5273 case QContainerImplHelper::Empty:
5274 resize(0); // keep capacity if we've reserve()d
5275 [[fallthrough]];
5276 case QContainerImplHelper::Full:
5277 return std::move(*this);
5278 case QContainerImplHelper::Subset:
5279 return std::move(*this).sliced(p, l);
5280 }
5281 Q_UNREACHABLE_RETURN(QString());
5282}
5283
5284/*!
5285 \fn QString QString::first(qsizetype n) const &
5286 \fn QString QString::first(qsizetype n) &&
5287 \since 6.0
5288
5289 Returns a string that contains the first \a n characters of this string,
5290 (that is, from the beginning of this string up to, but not including,
5291 the element at index position \a n).
5292
5293 \note The behavior is undefined when \a n < 0 or \a n > size().
5294
5295 \snippet qstring/main.cpp 31
5296
5297 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5298*/
5299
5300/*!
5301 \fn QString QString::last(qsizetype n) const &
5302 \fn QString QString::last(qsizetype n) &&
5303 \since 6.0
5304
5305 Returns the string that contains the last \a n characters of this string.
5306
5307 \note The behavior is undefined when \a n < 0 or \a n > size().
5308
5309 \snippet qstring/main.cpp 48
5310
5311 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5312*/
5313
5314/*!
5315 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5316 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5317 \since 6.0
5318
5319 Returns a string that contains \a n characters of this string, starting
5320 at position \a pos up to, but not including, the element at index position
5321 \c {\a pos + n}.
5322
5323 \note The behavior is undefined when \a pos < 0, \a n < 0,
5324 or \a pos + \a n > size().
5325
5326 \snippet qstring/main.cpp 34
5327
5328 \sa first(), last(), chopped(), chop(), truncate(), slice()
5329*/
5330QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5331{
5332 if (n == 0)
5333 return QString(DataPointer::fromRawData(&_empty, 0));
5334 DataPointer d = std::move(str.d).sliced(pos, n);
5335 d.data()[n] = 0;
5336 return QString(std::move(d));
5337}
5338
5339/*!
5340 \fn QString QString::sliced(qsizetype pos) const &
5341 \fn QString QString::sliced(qsizetype pos) &&
5342 \since 6.0
5343 \overload
5344
5345 Returns a string that contains the portion of this string starting at
5346 position \a pos and extending to its end.
5347
5348 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5349
5350 \sa first(), last(), chopped(), chop(), truncate(), slice()
5351*/
5352
5353/*!
5354 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5355 \since 6.8
5356
5357 Modifies this string to start at position \a pos, up to, but not including,
5358 the character (code point) at index position \c {\a pos + n}; and returns
5359 a reference to this string.
5360
5361 \note The behavior is undefined if \a pos < 0, \a n < 0,
5362 or \a pos + \a n > size().
5363
5364 \snippet qstring/main.cpp slice97
5365
5366 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5367*/
5368
5369/*!
5370 \fn QString &QString::slice(qsizetype pos)
5371 \since 6.8
5372 \overload
5373
5374 Modifies this string to start at position \a pos and extending to its end,
5375 and returns a reference to this string.
5376
5377 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5378
5379 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5380*/
5381
5382/*!
5383 \fn QString QString::chopped(qsizetype len) const &
5384 \fn QString QString::chopped(qsizetype len) &&
5385 \since 5.10
5386
5387 Returns a string that contains the size() - \a len leftmost characters
5388 of this string.
5389
5390 \note The behavior is undefined if \a len is negative or greater than size().
5391
5392 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5393*/
5394
5395/*!
5396 Returns \c true if the string starts with \a s; otherwise returns
5397 \c false.
5398
5399 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5400
5401 \snippet qstring/main.cpp 65
5402
5403 \sa endsWith()
5404*/
5405bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5406{
5407 return qt_starts_with_impl(QStringView(*this), QStringView(s), cs);
5408}
5409
5410/*!
5411 \overload startsWith()
5412 */
5413bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5414{
5415 return qt_starts_with_impl(QStringView(*this), s, cs);
5416}
5417
5418/*!
5419 \overload startsWith()
5420
5421 Returns \c true if the string starts with \a c; otherwise returns
5422 \c false.
5423*/
5424bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5425{
5426 if (!size())
5427 return false;
5428 if (cs == Qt::CaseSensitive)
5429 return at(0) == c;
5430 return foldCase(at(0)) == foldCase(c);
5431}
5432
5433/*!
5434 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5435 \since 5.10
5436 \overload
5437
5438 Returns \c true if the string starts with the string view \a str;
5439 otherwise returns \c false.
5440
5441 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5442
5443 \sa endsWith()
5444*/
5445
5446/*!
5447 Returns \c true if the string ends with \a s; otherwise returns
5448 \c false.
5449
5450 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5451
5452 \snippet qstring/main.cpp 20
5453
5454 \sa startsWith()
5455*/
5456bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5457{
5458 return qt_ends_with_impl(QStringView(*this), QStringView(s), cs);
5459}
5460
5461/*!
5462 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5463 \since 5.10
5464 \overload endsWith()
5465 Returns \c true if the string ends with the string view \a str;
5466 otherwise returns \c false.
5467
5468 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5469
5470 \sa startsWith()
5471*/
5472
5473/*!
5474 \overload endsWith()
5475*/
5476bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5477{
5478 return qt_ends_with_impl(QStringView(*this), s, cs);
5479}
5480
5481/*!
5482 Returns \c true if the string ends with \a c; otherwise returns
5483 \c false.
5484
5485 \overload endsWith()
5486 */
5487bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5488{
5489 if (!size())
5490 return false;
5491 if (cs == Qt::CaseSensitive)
5492 return at(size() - 1) == c;
5493 return foldCase(at(size() - 1)) == foldCase(c);
5494}
5495
5496static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5497{
5498 QStringIterator it(s);
5499 while (it.hasNext()) {
5500 const char32_t uc = it.next();
5501 if (caseConversion(uc)[c].diff)
5502 return false;
5503 }
5504 return true;
5505}
5506
5507bool QtPrivate::isLower(QStringView s) noexcept
5508{
5509 return checkCase(s, QUnicodeTables::LowerCase);
5510}
5511
5512bool QtPrivate::isUpper(QStringView s) noexcept
5513{
5514 return checkCase(s, QUnicodeTables::UpperCase);
5515}
5516
5517/*!
5518 Returns \c true if the string is uppercase, that is, it's identical
5519 to its toUpper() folding.
5520
5521 Note that this does \e not mean that the string does not contain
5522 lowercase letters (some lowercase letters do not have a uppercase
5523 folding; they are left unchanged by toUpper()).
5524 For more information, refer to the Unicode standard, section 3.13.
5525
5526 \since 5.12
5527
5528 \sa QChar::toUpper(), isLower()
5529*/
5530bool QString::isUpper() const
5531{
5532 return QtPrivate::isUpper(qToStringViewIgnoringNull(*this));
5533}
5534
5535/*!
5536 Returns \c true if the string is lowercase, that is, it's identical
5537 to its toLower() folding.
5538
5539 Note that this does \e not mean that the string does not contain
5540 uppercase letters (some uppercase letters do not have a lowercase
5541 folding; they are left unchanged by toLower()).
5542 For more information, refer to the Unicode standard, section 3.13.
5543
5544 \since 5.12
5545
5546 \sa QChar::toLower(), isUpper()
5547 */
5548bool QString::isLower() const
5549{
5550 return QtPrivate::isLower(qToStringViewIgnoringNull(*this));
5551}
5552
5553static QByteArray qt_convert_to_latin1(QStringView string);
5554
5555QByteArray QString::toLatin1_helper(const QString &string)
5556{
5557 return qt_convert_to_latin1(string);
5558}
5559
5560/*!
5561 \since 6.0
5562 \internal
5563 \relates QAnyStringView
5564
5565 Returns a UTF-16 representation of \a string as a QString.
5566
5567 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5568 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5569*/
5570QString QtPrivate::convertToQString(QAnyStringView string)
5571{
5572 return string.visit([] (auto string) { return string.toString(); });
5573}
5574
5575/*!
5576 \since 5.10
5577 \internal
5578 \relates QStringView
5579
5580 Returns a Latin-1 representation of \a string as a QByteArray.
5581
5582 The behavior is undefined if \a string contains non-Latin1 characters.
5583
5584 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5585 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5586*/
5588{
5589 return qt_convert_to_latin1(string);
5590}
5591
5592Q_NEVER_INLINE
5593static QByteArray qt_convert_to_latin1(QStringView string)
5594{
5595 if (Q_UNLIKELY(string.isNull()))
5596 return QByteArray();
5597
5598 QByteArray ba(string.size(), Qt::Uninitialized);
5599
5600 // since we own the only copy, we're going to const_cast the constData;
5601 // that avoids an unnecessary call to detach() and expansion code that will never get used
5602 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5603 string.utf16(), string.size());
5604 return ba;
5605}
5606
5607QByteArray QString::toLatin1_helper_inplace(QString &s)
5608{
5609 if (!s.isDetached())
5610 return qt_convert_to_latin1(s);
5611
5612 // We can return our own buffer to the caller.
5613 // Conversion to Latin-1 always shrinks the buffer by half.
5614 // This relies on the fact that we use QArrayData for everything behind the scenes
5615
5616 // First, do the in-place conversion. Since isDetached() == true, the data
5617 // was allocated by QArrayData, so the null terminator must be there.
5618 qsizetype length = s.size();
5619 char16_t *sdata = s.d->data();
5620 Q_ASSERT(sdata[length] == u'\0');
5621 qt_to_latin1(reinterpret_cast<uchar *>(sdata), sdata, length + 1);
5622
5623 // Move the internals over to the byte array.
5624 // Kids, avert your eyes. Don't try this at home.
5625 auto ba_d = std::move(s.d).reinterpreted<char>();
5626
5627 // Some sanity checks
5628 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5629 Q_ASSERT(s.isNull());
5630 Q_ASSERT(s.isEmpty());
5631 Q_ASSERT(s.constData() == QString().constData());
5632
5633 return QByteArray(std::move(ba_d));
5634}
5635
5636/*!
5637 \since 6.9
5638 \internal
5639 \relates QLatin1StringView
5640
5641 Returns a UTF-8 representation of \a string as a QByteArray.
5642*/
5643QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
5644{
5645 if (Q_UNLIKELY(string.isNull()))
5646 return QByteArray();
5647
5648 // create a QByteArray with the worst case scenario size
5649 QByteArray ba(string.size() * 2, Qt::Uninitialized);
5650 const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
5651 ba.truncate(sz);
5652
5653 return ba;
5654}
5655
5656// QLatin1 methods that use helpers from qstring.cpp
5657char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5658{
5659 const qsizetype len = in.size();
5660 qt_from_latin1(out, in.data(), len);
5661 return std::next(out, len);
5662}
5663
5664char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5665{
5666 const qsizetype len = in.size();
5667 qt_to_latin1(reinterpret_cast<uchar *>(out), in.utf16(), len);
5668 return out + len;
5669}
5670
5671/*!
5672 \fn QByteArray QString::toLatin1() const
5673
5674 Returns a Latin-1 representation of the string as a QByteArray.
5675
5676 The returned byte array is undefined if the string contains non-Latin1
5677 characters. Those characters may be suppressed or replaced with a
5678 question mark.
5679
5680 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5681*/
5682
5683static QByteArray qt_convert_to_local_8bit(QStringView string);
5684
5685/*!
5686 \fn QByteArray QString::toLocal8Bit() const
5687
5688 Returns the local 8-bit representation of the string as a
5689 QByteArray.
5690
5691 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5692
5693 If this string contains any characters that cannot be encoded in the
5694 local 8-bit encoding, the returned byte array is undefined. Those
5695 characters may be suppressed or replaced by another.
5696
5697 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5698*/
5699
5700QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5701{
5702 return qt_convert_to_local_8bit(QStringView(data, size));
5703}
5704
5705static QByteArray qt_convert_to_local_8bit(QStringView string)
5706{
5707 if (string.isNull())
5708 return QByteArray();
5709 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5710 return fromUtf16(string);
5711}
5712
5713/*!
5714 \since 5.10
5715 \internal
5716 \relates QStringView
5717
5718 Returns a local 8-bit representation of \a string as a QByteArray.
5719
5720 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5721 current code page is being used.
5722
5723 The behavior is undefined if \a string contains characters not
5724 supported by the locale's 8-bit encoding.
5725
5726 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5727*/
5729{
5730 return qt_convert_to_local_8bit(string);
5731}
5732
5733static QByteArray qt_convert_to_utf8(QStringView str);
5734
5735/*!
5736 \fn QByteArray QString::toUtf8() const
5737
5738 Returns a UTF-8 representation of the string as a QByteArray.
5739
5740 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5741 string like QString.
5742
5743 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5744*/
5745
5746QByteArray QString::toUtf8_helper(const QString &str)
5747{
5748 return qt_convert_to_utf8(str);
5749}
5750
5751static QByteArray qt_convert_to_utf8(QStringView str)
5752{
5753 if (str.isNull())
5754 return QByteArray();
5755
5756 return QUtf8::convertFromUnicode(str);
5757}
5758
5759/*!
5760 \since 5.10
5761 \internal
5762 \relates QStringView
5763
5764 Returns a UTF-8 representation of \a string as a QByteArray.
5765
5766 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5767 string like QStringView.
5768
5769 \sa QString::toUtf8(), QStringView::toUtf8()
5770*/
5772{
5773 return qt_convert_to_utf8(string);
5774}
5775
5776static QList<uint> qt_convert_to_ucs4(QStringView string);
5777
5778/*!
5779 \since 4.2
5780
5781 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5782
5783 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5784 this string will be encoded in UTF-32. Any invalid sequence of code units in
5785 this string is replaced by the Unicode replacement character
5786 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5787
5788 The returned list is not 0-terminated.
5789
5790 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5791 fromUcs4(), toWCharArray()
5792*/
5793QList<uint> QString::toUcs4() const
5794{
5795 return qt_convert_to_ucs4(*this);
5796}
5797
5798static QList<uint> qt_convert_to_ucs4(QStringView string)
5799{
5800 QList<uint> v(string.size());
5801 uint *a = const_cast<uint*>(v.constData());
5802 QStringIterator it(string);
5803 while (it.hasNext())
5804 *a++ = it.next();
5805 v.resize(a - v.constData());
5806 return v;
5807}
5808
5809/*!
5810 \since 5.10
5811 \internal
5812 \relates QStringView
5813
5814 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5815
5816 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5817 this string will be encoded in UTF-32. Any invalid sequence of code units in
5818 this string is replaced by the Unicode replacement character
5819 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5820
5821 The returned list is not 0-terminated.
5822
5823 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5824 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5825*/
5826QList<uint> QtPrivate::convertToUcs4(QStringView string)
5827{
5828 return qt_convert_to_ucs4(string);
5829}
5830
5831/*!
5832 \fn QString QString::fromLatin1(QByteArrayView str)
5833 \overload
5834 \since 6.0
5835
5836 Returns a QString initialized with the Latin-1 string \a str.
5837
5838 \note: any null ('\\0') bytes in the byte array will be included in this
5839 string, converted to Unicode null characters (U+0000).
5840*/
5841QString QString::fromLatin1(QByteArrayView ba)
5842{
5843 DataPointer d;
5844 if (!ba.data()) {
5845 // nothing to do
5846 } else if (ba.size() == 0) {
5847 d = DataPointer::fromRawData(&_empty, 0);
5848 } else {
5849 d = DataPointer(ba.size(), ba.size());
5850 Q_CHECK_PTR(d.data());
5851 d.data()[ba.size()] = '\0';
5852 char16_t *dst = d.data();
5853
5854 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5855 }
5856 return QString(std::move(d));
5857}
5858
5859/*!
5860 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5861 Returns a QString initialized with the first \a size characters
5862 of the Latin-1 string \a str.
5863
5864 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5865
5866 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5867*/
5868
5869/*!
5870 \fn QString QString::fromLatin1(const QByteArray &str)
5871 \overload
5872 \since 5.0
5873
5874 Returns a QString initialized with the Latin-1 string \a str.
5875
5876 \note: any null ('\\0') bytes in the byte array will be included in this
5877 string, converted to Unicode null characters (U+0000). This behavior is
5878 different from Qt 5.x.
5879*/
5880
5881/*!
5882 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5883 Returns a QString initialized with the first \a size characters
5884 of the 8-bit string \a str.
5885
5886 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5887
5888 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5889
5890 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5891*/
5892
5893/*!
5894 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5895 \overload
5896 \since 5.0
5897
5898 Returns a QString initialized with the 8-bit string \a str.
5899
5900 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5901
5902 \note: any null ('\\0') bytes in the byte array will be included in this
5903 string, converted to Unicode null characters (U+0000). This behavior is
5904 different from Qt 5.x.
5905*/
5906
5907/*!
5908 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5909 \overload
5910 \since 6.0
5911
5912 Returns a QString initialized with the 8-bit string \a str.
5913
5914 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5915
5916 \note: any null ('\\0') bytes in the byte array will be included in this
5917 string, converted to Unicode null characters (U+0000).
5918*/
5919QString QString::fromLocal8Bit(QByteArrayView ba)
5920{
5921 if (ba.isNull())
5922 return QString();
5923 if (ba.isEmpty())
5924 return QString(DataPointer::fromRawData(&_empty, 0));
5925 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5926 return toUtf16(ba);
5927}
5928
5929/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5930 Returns a QString initialized with the first \a size bytes
5931 of the UTF-8 string \a str.
5932
5933 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5934
5935 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5936 string like QString. However, invalid sequences are possible with UTF-8
5937 and, if any such are found, they will be replaced with one or more
5938 "replacement characters", or suppressed. These include non-Unicode
5939 sequences, non-characters, overlong sequences or surrogate codepoints
5940 encoded into UTF-8.
5941
5942 This function can be used to process incoming data incrementally as long as
5943 all UTF-8 characters are terminated within the incoming data. Any
5944 unterminated characters at the end of the string will be replaced or
5945 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5946
5947 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5948*/
5949
5950/*!
5951 \fn QString QString::fromUtf8(const char8_t *str)
5952 \overload
5953 \since 6.1
5954
5955 This overload is only available when compiling in C++20 mode.
5956*/
5957
5958/*!
5959 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
5960 \overload
5961 \since 6.0
5962
5963 This overload is only available when compiling in C++20 mode.
5964*/
5965
5966/*!
5967 \fn QString QString::fromUtf8(const QByteArray &str)
5968 \overload
5969 \since 5.0
5970
5971 Returns a QString initialized with the UTF-8 string \a str.
5972
5973 \note: any null ('\\0') bytes in the byte array will be included in this
5974 string, converted to Unicode null characters (U+0000). This behavior is
5975 different from Qt 5.x.
5976*/
5977
5978/*!
5979 \fn QString QString::fromUtf8(QByteArrayView str)
5980 \overload
5981 \since 6.0
5982
5983 Returns a QString initialized with the UTF-8 string \a str.
5984
5985 \note: any null ('\\0') bytes in the byte array will be included in this
5986 string, converted to Unicode null characters (U+0000).
5987*/
5988QString QString::fromUtf8(QByteArrayView ba)
5989{
5990 if (ba.isNull())
5991 return QString();
5992 if (ba.isEmpty())
5993 return QString(DataPointer::fromRawData(&_empty, 0));
5994 return QUtf8::convertToUnicode(ba);
5995}
5996
5997#ifndef QT_BOOTSTRAPPED
5998/*!
5999 \since 5.3
6000 Returns a QString initialized with the first \a size characters
6001 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6002
6003 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6004
6005 This function checks for a Byte Order Mark (BOM). If it is missing,
6006 host byte order is assumed.
6007
6008 This function is slow compared to the other Unicode conversions.
6009 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6010
6011 QString makes a deep copy of the Unicode data.
6012
6013 \sa utf16(), setUtf16(), fromStdU16String()
6014*/
6015QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6016{
6017 if (!unicode)
6018 return QString();
6019 if (size < 0)
6020 size = QtPrivate::qustrlen(unicode);
6021 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6022 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6023}
6024
6025/*!
6026 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6027 \deprecated [6.0] Use the \c char16_t overload instead.
6028*/
6029
6030/*!
6031 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6032 \since 4.2
6033 \deprecated [6.0] Use the \c char32_t overload instead.
6034*/
6035
6036/*!
6037 \since 5.3
6038
6039 Returns a QString initialized with the first \a size characters
6040 of the Unicode string \a unicode (encoded as UTF-32).
6041
6042 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6043
6044 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6045 fromStdU32String()
6046*/
6047QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6048{
6049 if (!unicode)
6050 return QString();
6051 if (size < 0) {
6052 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6053 size = wcslen(reinterpret_cast<const wchar_t *>(unicode));
6054 else
6055 size = std::char_traits<char32_t>::length(unicode);
6056 }
6057 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6058 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6059}
6060#endif // !QT_BOOTSTRAPPED
6061
6062/*!
6063 Resizes the string to \a size characters and copies \a unicode
6064 into the string.
6065
6066 If \a unicode is \nullptr, nothing is copied, but the string is still
6067 resized to \a size.
6068
6069 \sa unicode(), setUtf16()
6070*/
6071QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6072{
6073 resize(size);
6074 if (unicode && size)
6075 memcpy(d.data(), unicode, size * sizeof(QChar));
6076 return *this;
6077}
6078
6079/*!
6080 \fn QString::setUnicode(const char16_t *unicode, qsizetype size)
6081 \overload
6082 \since 6.9
6083
6084 \sa unicode(), setUtf16()
6085*/
6086
6087/*!
6088 \fn QString::setUtf16(const char16_t *unicode, qsizetype size)
6089 \since 6.9
6090
6091 Resizes the string to \a size characters and copies \a unicode
6092 into the string.
6093
6094 If \a unicode is \nullptr, nothing is copied, but the string is still
6095 resized to \a size.
6096
6097 Note that unlike fromUtf16(), this function does not consider BOMs and
6098 possibly differing byte ordering.
6099
6100 \sa utf16(), setUnicode()
6101*/
6102
6103/*!
6104 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6105 \obsolete Use the \c char16_t overload instead.
6106*/
6107
6108/*!
6109 \fn QString QString::simplified() const
6110
6111 Returns a string that has whitespace removed from the start
6112 and the end, and that has each sequence of internal whitespace
6113 replaced with a single space.
6114
6115 Whitespace means any character for which QChar::isSpace() returns
6116 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6117 '\\f', '\\r', and ' '.
6118
6119 Example:
6120
6121 \snippet qstring/main.cpp 57
6122
6123 \sa trimmed()
6124*/
6125QString QString::simplified_helper(const QString &str)
6126{
6127 return QStringAlgorithms<const QString>::simplified_helper(str);
6128}
6129
6130QString QString::simplified_helper(QString &str)
6131{
6132 return QStringAlgorithms<QString>::simplified_helper(str);
6133}
6134
6135namespace {
6136 template <typename StringView>
6137 StringView qt_trimmed(StringView s) noexcept
6138 {
6139 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6140 return StringView{begin, end};
6141 }
6142}
6143
6144/*!
6145 \fn QStringView QtPrivate::trimmed(QStringView s)
6146 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6147 \internal
6148 \relates QStringView
6149 \since 5.10
6150
6151 Returns \a s with whitespace removed from the start and the end.
6152
6153 Whitespace means any character for which QChar::isSpace() returns
6154 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6155 '\\f', '\\r', and ' '.
6156
6157 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6158*/
6159QStringView QtPrivate::trimmed(QStringView s) noexcept
6160{
6161 return qt_trimmed(s);
6162}
6163
6164QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6165{
6166 return qt_trimmed(s);
6167}
6168
6169/*!
6170 \fn QString QString::trimmed() const
6171
6172 Returns a string that has whitespace removed from the start and
6173 the end.
6174
6175 Whitespace means any character for which QChar::isSpace() returns
6176 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6177 '\\f', '\\r', and ' '.
6178
6179 Example:
6180
6181 \snippet qstring/main.cpp 82
6182
6183 Unlike simplified(), trimmed() leaves internal whitespace alone.
6184
6185 \sa simplified()
6186*/
6187QString QString::trimmed_helper(const QString &str)
6188{
6189 return QStringAlgorithms<const QString>::trimmed_helper(str);
6190}
6191
6192QString QString::trimmed_helper(QString &str)
6193{
6194 return QStringAlgorithms<QString>::trimmed_helper(str);
6195}
6196
6197/*! \fn const QChar QString::at(qsizetype position) const
6198
6199 Returns the character at the given index \a position in the
6200 string.
6201
6202 The \a position must be a valid index position in the string
6203 (i.e., 0 <= \a position < size()).
6204
6205 \sa operator[]()
6206*/
6207
6208/*!
6209 \fn QChar &QString::operator[](qsizetype position)
6210
6211 Returns the character at the specified \a position in the string as a
6212 modifiable reference.
6213
6214 Example:
6215
6216 \snippet qstring/main.cpp 85
6217
6218 \sa at()
6219*/
6220
6221/*!
6222 \fn const QChar QString::operator[](qsizetype position) const
6223
6224 \overload operator[]()
6225*/
6226
6227/*!
6228 \fn QChar QString::front() const
6229 \since 5.10
6230
6231 Returns the first character in the string.
6232 Same as \c{at(0)}.
6233
6234 This function is provided for STL compatibility.
6235
6236 \warning Calling this function on an empty string constitutes
6237 undefined behavior.
6238
6239 \sa back(), at(), operator[]()
6240*/
6241
6242/*!
6243 \fn QChar QString::back() const
6244 \since 5.10
6245
6246 Returns the last character in the string.
6247 Same as \c{at(size() - 1)}.
6248
6249 This function is provided for STL compatibility.
6250
6251 \warning Calling this function on an empty string constitutes
6252 undefined behavior.
6253
6254 \sa front(), at(), operator[]()
6255*/
6256
6257/*!
6258 \fn QChar &QString::front()
6259 \since 5.10
6260
6261 Returns a reference to the first character in the string.
6262 Same as \c{operator[](0)}.
6263
6264 This function is provided for STL compatibility.
6265
6266 \warning Calling this function on an empty string constitutes
6267 undefined behavior.
6268
6269 \sa back(), at(), operator[]()
6270*/
6271
6272/*!
6273 \fn QChar &QString::back()
6274 \since 5.10
6275
6276 Returns a reference to the last character in the string.
6277 Same as \c{operator[](size() - 1)}.
6278
6279 This function is provided for STL compatibility.
6280
6281 \warning Calling this function on an empty string constitutes
6282 undefined behavior.
6283
6284 \sa front(), at(), operator[]()
6285*/
6286
6287/*!
6288 \fn void QString::truncate(qsizetype position)
6289
6290 Truncates the string starting from, and including, the element at index
6291 \a position.
6292
6293 If the specified \a position index is beyond the end of the
6294 string, nothing happens.
6295
6296 Example:
6297
6298 \snippet qstring/main.cpp 83
6299
6300 If \a position is negative, it is equivalent to passing zero.
6301
6302 \sa chop(), resize(), first(), QStringView::truncate()
6303*/
6304
6305void QString::truncate(qsizetype pos)
6306{
6307 if (pos < size())
6308 resize(pos);
6309}
6310
6311
6312/*!
6313 Removes \a n characters from the end of the string.
6314
6315 If \a n is greater than or equal to size(), the result is an
6316 empty string; if \a n is negative, it is equivalent to passing zero.
6317
6318 Example:
6319 \snippet qstring/main.cpp 15
6320
6321 If you want to remove characters from the \e beginning of the
6322 string, use remove() instead.
6323
6324 \sa truncate(), resize(), remove(), QStringView::chop()
6325*/
6326void QString::chop(qsizetype n)
6327{
6328 if (n > 0)
6329 resize(d.size - n);
6330}
6331
6332/*!
6333 Sets every character in the string to character \a ch. If \a size
6334 is different from -1 (default), the string is resized to \a
6335 size beforehand.
6336
6337 Example:
6338
6339 \snippet qstring/main.cpp 21
6340
6341 \sa resize()
6342*/
6343
6344QString& QString::fill(QChar ch, qsizetype size)
6345{
6346 resize(size < 0 ? d.size : size);
6347 if (d.size)
6348 std::fill(d.data(), d.data() + d.size, ch.unicode());
6349 return *this;
6350}
6351
6352/*!
6353 \fn qsizetype QString::length() const
6354
6355 Returns the number of characters in this string. Equivalent to
6356 size().
6357
6358 \sa resize()
6359*/
6360
6361/*!
6362 \fn qsizetype QString::size() const
6363
6364 Returns the number of characters in this string.
6365
6366 The last character in the string is at position size() - 1.
6367
6368 Example:
6369 \snippet qstring/main.cpp 58
6370
6371 \sa isEmpty(), resize()
6372*/
6373
6374/*!
6375 \fn qsizetype QString::max_size() const
6376 \fn qsizetype QString::maxSize()
6377 \since 6.8
6378
6379 It returns the maximum number of elements that the string can
6380 theoretically hold. In practice, the number can be much smaller,
6381 limited by the amount of memory available to the system.
6382*/
6383
6384/*! \fn bool QString::isNull() const
6385
6386 Returns \c true if this string is null; otherwise returns \c false.
6387
6388 Example:
6389
6390 \snippet qstring/main.cpp 28
6391
6392 Qt makes a distinction between null strings and empty strings for
6393 historical reasons. For most applications, what matters is
6394 whether or not a string contains any data, and this can be
6395 determined using the isEmpty() function.
6396
6397 \sa isEmpty()
6398*/
6399
6400/*! \fn bool QString::isEmpty() const
6401
6402 Returns \c true if the string has no characters; otherwise returns
6403 \c false.
6404
6405 Example:
6406
6407 \snippet qstring/main.cpp 27
6408
6409 \sa size()
6410*/
6411
6412/*! \fn QString &QString::operator+=(const QString &other)
6413
6414 Appends the string \a other onto the end of this string and
6415 returns a reference to this string.
6416
6417 Example:
6418
6419 \snippet qstring/main.cpp 84
6420
6421 This operation is typically very fast (\l{constant time}),
6422 because QString preallocates extra space at the end of the string
6423 data so it can grow without reallocating the entire string each
6424 time.
6425
6426 \sa append(), prepend()
6427*/
6428
6429/*! \fn QString &QString::operator+=(QLatin1StringView str)
6430
6431 \overload operator+=()
6432
6433 Appends the Latin-1 string viewed by \a str to this string.
6434*/
6435
6436/*! \fn QString &QString::operator+=(QUtf8StringView str)
6437 \since 6.5
6438 \overload operator+=()
6439
6440 Appends the UTF-8 string view \a str to this string.
6441*/
6442
6443/*! \fn QString &QString::operator+=(const QByteArray &ba)
6444
6445 \overload operator+=()
6446
6447 Appends the byte array \a ba to this string. The byte array is converted
6448 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6449 are embedded in the \a ba byte array, they will be included in the
6450 transformation.
6451
6452 You can disable this function by defining
6453 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6454 can be useful if you want to ensure that all user-visible strings
6455 go through QObject::tr(), for example.
6456*/
6457
6458/*! \fn QString &QString::operator+=(const char *str)
6459
6460 \overload operator+=()
6461
6462 Appends the string \a str to this string. The const char pointer
6463 is converted to Unicode using the fromUtf8() function.
6464
6465 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6466 when you compile your applications. This can be useful if you want
6467 to ensure that all user-visible strings go through QObject::tr(),
6468 for example.
6469*/
6470
6471/*! \fn QString &QString::operator+=(QStringView str)
6472 \since 6.0
6473 \overload operator+=()
6474
6475 Appends the string view \a str to this string.
6476*/
6477
6478/*! \fn QString &QString::operator+=(QChar ch)
6479
6480 \overload operator+=()
6481
6482 Appends the character \a ch to the string.
6483*/
6484
6485/*!
6486 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6487
6488 \overload operator==()
6489
6490 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6491 Note that no string is equal to \a lhs being 0.
6492
6493 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6494*/
6495
6496/*!
6497 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6498
6499 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6500 \c false.
6501
6502 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6503 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6504*/
6505
6506/*!
6507 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6508
6509 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6510 returns \c false. For \a lhs != 0, this is equivalent to \c
6511 {compare(lhs, rhs) < 0}.
6512
6513 \sa {Comparing Strings}
6514*/
6515
6516/*!
6517 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6518
6519 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6520 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6521 {compare(lhs, rhs) <= 0}.
6522
6523 \sa {Comparing Strings}
6524*/
6525
6526/*!
6527 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6528
6529 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6530 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6531
6532 \sa {Comparing Strings}
6533*/
6534
6535/*!
6536 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6537
6538 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6539 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6540 {compare(lhs, rhs) >= 0}.
6541
6542 \sa {Comparing Strings}
6543*/
6544
6545/*!
6546 \fn QString operator+(const QString &s1, const QString &s2)
6547 \fn QString operator+(QString &&s1, const QString &s2)
6548 \relates QString
6549
6550 Returns a string which is the result of concatenating \a s1 and \a
6551 s2.
6552*/
6553
6554/*!
6555 \fn QString operator+(const QString &s1, const char *s2)
6556 \relates QString
6557
6558 Returns a string which is the result of concatenating \a s1 and \a
6559 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6560 function).
6561
6562 \sa QString::fromUtf8()
6563*/
6564
6565/*!
6566 \fn QString operator+(const char *s1, const QString &s2)
6567 \relates QString
6568
6569 Returns a string which is the result of concatenating \a s1 and \a
6570 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6571 function).
6572
6573 \sa QString::fromUtf8()
6574*/
6575
6576/*!
6577 \fn QString operator+(QStringView lhs, const QString &rhs)
6578 \fn QString operator+(const QString &lhs, QStringView rhs)
6579
6580 \relates QString
6581 \since 6.9
6582
6583 Returns a string that is the result of concatenating \a lhs and \a rhs.
6584*/
6585
6586/*!
6587 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6588 \since 4.2
6589
6590 Compares the string \a s1 with the string \a s2 and returns a negative integer
6591 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6592 and zero if they are equal.
6593
6594 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6595
6596 Case sensitive comparison is based exclusively on the numeric
6597 Unicode values of the characters and is very fast, but is not what
6598 a human would expect. Consider sorting user-visible strings with
6599 localeAwareCompare().
6600
6601 \snippet qstring/main.cpp 16
6602
6603//! [compare-isNull-vs-isEmpty]
6604 \note This function treats null strings the same as empty strings,
6605 for more details see \l {Distinction Between Null and Empty Strings}.
6606//! [compare-isNull-vs-isEmpty]
6607
6608 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6609*/
6610
6611/*!
6612 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6613 \since 4.2
6614 \overload compare()
6615
6616 Performs a comparison of \a s1 and \a s2, using the case
6617 sensitivity setting \a cs.
6618*/
6619
6620/*!
6621 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6622
6623 \since 4.2
6624 \overload compare()
6625
6626 Performs a comparison of \a s1 and \a s2, using the case
6627 sensitivity setting \a cs.
6628*/
6629
6630/*!
6631 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6632
6633 \since 5.12
6634 \overload compare()
6635
6636 Performs a comparison of this with \a s, using the case
6637 sensitivity setting \a cs.
6638*/
6639
6640/*!
6641 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6642
6643 \since 5.14
6644 \overload compare()
6645
6646 Performs a comparison of this with \a ch, using the case
6647 sensitivity setting \a cs.
6648*/
6649
6650/*!
6651 \overload compare()
6652 \since 4.2
6653
6654 Lexically compares this string with the string \a other and returns
6655 a negative integer if this string is less than \a other, a positive
6656 integer if it is greater than \a other, and zero if they are equal.
6657
6658 Same as compare(*this, \a other, \a cs).
6659*/
6660int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6661{
6662 return QtPrivate::compareStrings(*this, other, cs);
6663}
6664
6665/*!
6666 \internal
6667 \since 4.5
6668*/
6669int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6670 Qt::CaseSensitivity cs) noexcept
6671{
6672 Q_ASSERT(length1 >= 0);
6673 Q_ASSERT(length2 >= 0);
6674 Q_ASSERT(data1 || length1 == 0);
6675 Q_ASSERT(data2 || length2 == 0);
6676 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
6677}
6678
6679/*!
6680 \overload compare()
6681 \since 4.2
6682
6683 Same as compare(*this, \a other, \a cs).
6684*/
6685int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6686{
6687 return QtPrivate::compareStrings(*this, other, cs);
6688}
6689
6690/*!
6691 \internal
6692 \since 5.0
6693*/
6694int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6695 Qt::CaseSensitivity cs) noexcept
6696{
6697 Q_ASSERT(length1 >= 0);
6698 Q_ASSERT(data1 || length1 == 0);
6699 if (!data2)
6700 return qt_lencmp(length1, 0);
6701 if (Q_UNLIKELY(length2 < 0))
6702 length2 = qsizetype(strlen(data2));
6703 return QtPrivate::compareStrings(QStringView(data1, length1),
6704 QUtf8StringView(data2, length2), cs);
6705}
6706
6707/*!
6708 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6709 \overload compare()
6710*/
6711
6712/*!
6713 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6714 \overload compare()
6715*/
6716
6717bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6718{
6719 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6720}
6721
6722Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6723{
6724 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6725 return Qt::compareThreeWay(res, 0);
6726}
6727
6728bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6729{
6730 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6731}
6732
6733Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6734{
6735 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6736 return Qt::compareThreeWay(res, 0);
6737}
6738
6739bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6740{
6741 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6742}
6743
6744Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6745{
6746 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6747 return Qt::compareThreeWay(res, 0);
6748}
6749
6750bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6751{
6752 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6753}
6754
6755Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6756{
6757 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6758 return Qt::compareThreeWay(res, 0);
6759}
6760
6761/*!
6762 \internal
6763 \since 6.8
6764*/
6765bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6766{
6767 return QtPrivate::equalStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6768}
6769
6770int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6771{
6772 return QtPrivate::compareStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6773}
6774
6775/*!
6776 \internal
6777 \since 6.8
6778*/
6779bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6780{
6781 Q_ASSERT(len >= 0);
6782 Q_ASSERT(data || len == 0);
6783 return QtPrivate::equalStrings(sv, QUtf8StringView(data, len));
6784}
6785
6786/*!
6787 \internal
6788 \since 6.8
6789*/
6790int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6791{
6792 Q_ASSERT(len >= 0);
6793 Q_ASSERT(data || len == 0);
6794 return QtPrivate::compareStrings(sv, QUtf8StringView(data, len));
6795}
6796
6797/*!
6798 \internal
6799 \since 6.8
6800*/
6801bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6802{
6803 // because qlatin1stringview.h can't include qutf8stringview.h
6804 Q_ASSERT(len >= 0);
6805 Q_ASSERT(s2 || len == 0);
6806 return QtPrivate::equalStrings(s1, QUtf8StringView(s2, len));
6807}
6808
6809/*!
6810 \internal
6811 \since 6.6
6812*/
6813int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6814{
6815 // because qlatin1stringview.h can't include qutf8stringview.h
6816 Q_ASSERT(len >= 0);
6817 Q_ASSERT(s2 || len == 0);
6818 return QtPrivate::compareStrings(s1, QUtf8StringView(s2, len));
6819}
6820
6821/*!
6822 \internal
6823 \since 4.5
6824*/
6825int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6826 Qt::CaseSensitivity cs) noexcept
6827{
6828 Q_ASSERT(length1 >= 0);
6829 Q_ASSERT(data1 || length1 == 0);
6830 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
6831}
6832
6833/*!
6834 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6835
6836 Compares \a s1 with \a s2 and returns an integer less than, equal
6837 to, or greater than zero if \a s1 is less than, equal to, or
6838 greater than \a s2.
6839
6840 The comparison is performed in a locale- and also
6841 platform-dependent manner. Use this function to present sorted
6842 lists of strings to the user.
6843
6844 \sa compare(), QLocale, {Comparing Strings}
6845*/
6846
6847/*!
6848 \fn int QString::localeAwareCompare(QStringView other) const
6849 \since 6.0
6850 \overload localeAwareCompare()
6851
6852 Compares this string with the \a other string and returns an
6853 integer less than, equal to, or greater than zero if this string
6854 is less than, equal to, or greater than the \a other string.
6855
6856 The comparison is performed in a locale- and also
6857 platform-dependent manner. Use this function to present sorted
6858 lists of strings to the user.
6859
6860 Same as \c {localeAwareCompare(*this, other)}.
6861
6862 \sa {Comparing Strings}
6863*/
6864
6865/*!
6866 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6867 \since 6.0
6868 \overload localeAwareCompare()
6869
6870 Compares \a s1 with \a s2 and returns an integer less than, equal
6871 to, or greater than zero if \a s1 is less than, equal to, or
6872 greater than \a s2.
6873
6874 The comparison is performed in a locale- and also
6875 platform-dependent manner. Use this function to present sorted
6876 lists of strings to the user.
6877
6878 \sa {Comparing Strings}
6879*/
6880
6881
6882#if !defined(CSTR_LESS_THAN)
6883#define CSTR_LESS_THAN 1
6884#define CSTR_EQUAL 2
6885#define CSTR_GREATER_THAN 3
6886#endif
6887
6888/*!
6889 \overload localeAwareCompare()
6890
6891 Compares this string with the \a other string and returns an
6892 integer less than, equal to, or greater than zero if this string
6893 is less than, equal to, or greater than the \a other string.
6894
6895 The comparison is performed in a locale- and also
6896 platform-dependent manner. Use this function to present sorted
6897 lists of strings to the user.
6898
6899 Same as \c {localeAwareCompare(*this, other)}.
6900
6901 \sa {Comparing Strings}
6902*/
6903int QString::localeAwareCompare(const QString &other) const
6904{
6905 return localeAwareCompare_helper(constData(), size(), other.constData(), other.size());
6906}
6907
6908/*!
6909 \internal
6910 \since 4.5
6911*/
6912int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6913 const QChar *data2, qsizetype length2)
6914{
6915 Q_ASSERT(length1 >= 0);
6916 Q_ASSERT(data1 || length1 == 0);
6917 Q_ASSERT(length2 >= 0);
6918 Q_ASSERT(data2 || length2 == 0);
6919
6920 // do the right thing for null and empty
6921 if (length1 == 0 || length2 == 0)
6922 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6923 Qt::CaseSensitive);
6924
6925#if QT_CONFIG(icu)
6926 return QCollator::defaultCompare(QStringView(data1, length1), QStringView(data2, length2));
6927#else
6928 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6929 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6930# if defined(Q_OS_WIN)
6931 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6932
6933 switch (res) {
6934 case CSTR_LESS_THAN:
6935 return -1;
6936 case CSTR_GREATER_THAN:
6937 return 1;
6938 default:
6939 return 0;
6940 }
6941# elif defined (Q_OS_DARWIN)
6942 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6943 // strings the same way as native applications do, and also respects
6944 // the "Order for sorted lists" setting in the International preferences
6945 // panel.
6946 const CFStringRef thisString =
6947 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6948 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6949 const CFStringRef otherString =
6950 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6951 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6952
6953 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6954 CFRelease(thisString);
6955 CFRelease(otherString);
6956 return result;
6957# elif defined(Q_OS_UNIX)
6958 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6959 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6960# else
6961# error "This case shouldn't happen"
6962 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6963# endif
6964#endif // !QT_CONFIG(icu)
6965}
6966
6967
6968/*!
6969 \fn const QChar *QString::unicode() const
6970
6971 Returns a Unicode representation of the string.
6972 The result remains valid until the string is modified.
6973
6974 \note The returned string may not be '\\0'-terminated.
6975 Use size() to determine the length of the array.
6976
6977 \sa utf16(), fromRawData()
6978*/
6979
6980/*!
6981 \fn const ushort *QString::utf16() const
6982
6983 Returns the QString as a '\\0\'-terminated array of unsigned
6984 shorts. The result remains valid until the string is modified.
6985
6986 The returned string is in host byte order.
6987
6988 \sa unicode()
6989*/
6990
6991const ushort *QString::utf16() const
6992{
6993 if (!d->isMutable()) {
6994 // ensure '\0'-termination for ::fromRawData strings
6995 const_cast<QString*>(this)->reallocData(d.size, QArrayData::KeepSize);
6996 }
6997 return reinterpret_cast<const ushort *>(d.data());
6998}
6999
7000/*!
7001 \fn QString &QString::nullTerminate()
7002 \since 6.10
7003
7004 If this string data isn't null-terminated, this method will make a deep
7005 copy of the data and make it null-terminated.
7006
7007 A QString is null-terminated by default, however in some cases (e.g.
7008 when using fromRawData()), the string data doesn't necessarily end
7009 with a \c {\0} character, which could be a problem when calling methods
7010 that expect a null-terminated string.
7011
7012 \sa nullTerminated(), fromRawData(), setRawData()
7013*/
7014QString &QString::nullTerminate()
7015{
7016 // ensure '\0'-termination for ::fromRawData strings
7017 if (!d->isMutable())
7018 *this = QString{constData(), size()};
7019 return *this;
7020}
7021
7022/*!
7023 \fn QString QString::nullTerminated() const &
7024 \fn QString QString::nullTerminated() &&
7025 \since 6.10
7026
7027 Returns a copy of this string that is always null-terminated.
7028
7029 \sa nullTerminate(), fromRawData(), setRawData()
7030*/
7031QString QString::nullTerminated() const &
7032{
7033 // ensure '\0'-termination for ::fromRawData strings
7034 if (!d->isMutable())
7035 return QString{constData(), size()};
7036 return *this;
7037}
7038
7039QString QString::nullTerminated() &&
7040{
7041 nullTerminate();
7042 return std::move(*this);
7043}
7044
7045/*!
7046 Returns a string of size \a width that contains this string
7047 padded by the \a fill character.
7048
7049 If \a truncate is \c false and the size() of the string is more than
7050 \a width, then the returned string is a copy of the string.
7051
7052 \snippet qstring/main.cpp 32
7053
7054 If \a truncate is \c true and the size() of the string is more than
7055 \a width, then any characters in a copy of the string after
7056 position \a width are removed, and the copy is returned.
7057
7058 \snippet qstring/main.cpp 33
7059
7060 \sa rightJustified()
7061*/
7062
7063QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7064{
7065 QString result;
7066 qsizetype len = size();
7067 qsizetype padlen = width - len;
7068 if (padlen > 0) {
7069 result.resize(len+padlen);
7070 if (len)
7071 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
7072 QChar *uc = (QChar*)result.d.data() + len;
7073 while (padlen--)
7074 * uc++ = fill;
7075 } else {
7076 if (truncate)
7077 result = left(width);
7078 else
7079 result = *this;
7080 }
7081 return result;
7082}
7083
7084/*!
7085 Returns a string of size() \a width that contains the \a fill
7086 character followed by the string. For example:
7087
7088 \snippet qstring/main.cpp 49
7089
7090 If \a truncate is \c false and the size() of the string is more than
7091 \a width, then the returned string is a copy of the string.
7092
7093 If \a truncate is true and the size() of the string is more than
7094 \a width, then the resulting string is truncated at position \a
7095 width.
7096
7097 \snippet qstring/main.cpp 50
7098
7099 \sa leftJustified()
7100*/
7101
7102QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7103{
7104 QString result;
7105 qsizetype len = size();
7106 qsizetype padlen = width - len;
7107 if (padlen > 0) {
7108 result.resize(len+padlen);
7109 QChar *uc = (QChar*)result.d.data();
7110 while (padlen--)
7111 * uc++ = fill;
7112 if (len)
7113 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
7114 } else {
7115 if (truncate)
7116 result = left(width);
7117 else
7118 result = *this;
7119 }
7120 return result;
7121}
7122
7123/*!
7124 \fn QString QString::toLower() const
7125
7126 Returns a lowercase copy of the string.
7127
7128 \snippet qstring/main.cpp 75
7129
7130 The case conversion will always happen in the 'C' locale. For
7131 locale-dependent case folding use QLocale::toLower()
7132
7133 \sa toUpper(), QLocale::toLower()
7134*/
7135
7136namespace QUnicodeTables {
7137/*
7138 \internal
7139 Converts the \a str string starting from the position pointed to by the \a
7140 it iterator, using the Unicode case traits \c Traits, and returns the
7141 result. The input string must not be empty (the convertCase function below
7142 guarantees that).
7143
7144 The string type \c{T} is also a template and is either \c{const QString} or
7145 \c{QString}. This function can do both copy-conversion and in-place
7146 conversion depending on the state of the \a str parameter:
7147 \list
7148 \li \c{T} is \c{const QString}: copy-convert
7149 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7150 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7151 \endlist
7152
7153 In copy-convert mode, the local variable \c{s} is detached from the input
7154 \a str. In the in-place convert mode, \a str is in moved-from state and
7155 \c{s} contains the only copy of the string, without reallocation (thus,
7156 \a it is still valid).
7157
7158 There is one pathological case left: when the in-place conversion needs to
7159 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7160 it pointer.
7161 */
7162template <typename T>
7163Q_NEVER_INLINE
7165{
7166 Q_ASSERT(!str.isEmpty());
7167 QString s = std::move(str); // will copy if T is const QString
7168 QChar *pp = s.begin() + it.index(); // will detach if necessary
7169
7170 do {
7171 const auto folded = fullConvertCase(it.next(), which);
7172 if (Q_UNLIKELY(folded.size() > 1)) {
7173 if (folded.chars[0] == *pp && folded.size() == 2) {
7174 // special case: only second actually changed (e.g. surrogate pairs),
7175 // avoid slow case
7176 ++pp;
7177 *pp++ = folded.chars[1];
7178 } else {
7179 // slow path: the string is growing
7180 qsizetype inpos = it.index() - 1;
7182
7183 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
7184 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7185
7186 // Adjust the input iterator if we are performing an in-place conversion
7187 if constexpr (!std::is_const<T>::value)
7189 }
7190 } else {
7191 *pp++ = folded.chars[0];
7192 }
7193 } while (it.hasNext());
7194
7195 return s;
7196}
7197
7198template <typename T>
7199static QString convertCase(T &str, QUnicodeTables::Case which)
7200{
7201 const QChar *p = str.constBegin();
7202 const QChar *e = p + str.size();
7203
7204 // this avoids out of bounds check in the loop
7205 while (e != p && e[-1].isHighSurrogate())
7206 --e;
7207
7208 QStringIterator it(p, e);
7209 while (it.hasNext()) {
7210 const char32_t uc = it.next();
7211 if (caseConversion(uc)[which].diff) {
7212 it.recede();
7213 return detachAndConvertCase(str, it, which);
7214 }
7215 }
7216 return std::move(str);
7217}
7218} // namespace QUnicodeTables
7219
7220QString QString::toLower_helper(const QString &str)
7221{
7222 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7223}
7224
7225QString QString::toLower_helper(QString &str)
7226{
7227 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7228}
7229
7230/*!
7231 \fn QString QString::toCaseFolded() const
7232
7233 Returns the case folded equivalent of the string. For most Unicode
7234 characters this is the same as toLower().
7235*/
7236
7237QString QString::toCaseFolded_helper(const QString &str)
7238{
7239 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7240}
7241
7242QString QString::toCaseFolded_helper(QString &str)
7243{
7244 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7245}
7246
7247/*!
7248 \fn QString QString::toUpper() const
7249
7250 Returns an uppercase copy of the string.
7251
7252 \snippet qstring/main.cpp 81
7253
7254 The case conversion will always happen in the 'C' locale. For
7255 locale-dependent case folding use QLocale::toUpper().
7256
7257 \note In some cases the uppercase form of a string may be longer than the
7258 original.
7259
7260 \note Since 2024, the German language officially prefers to uppercase ß
7261 (U+00DF LATIN SMALL LETTER SHARP S) as ẞ (U+1E9E LATIN CAPITAL LETTER SHARP S).
7262 Qt's implementation follows Unicode, which still mandates the use of "SS".
7263 If you need to implement the new German rules, you need to manually do
7264 \c{replace(u'ß', u'ẞ')} \e{before} calling this function.
7265
7266 \sa toLower(), QLocale::toLower()
7267*/
7268
7269QString QString::toUpper_helper(const QString &str)
7270{
7271 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7272}
7273
7274QString QString::toUpper_helper(QString &str)
7275{
7276 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7277}
7278
7279/*!
7280 \since 5.5
7281
7282 Safely builds a formatted string from the format string \a cformat
7283 and an arbitrary list of arguments.
7284
7285 The format string supports the conversion specifiers, length modifiers,
7286 and flags provided by printf() in the standard C++ library. The \a cformat
7287 string and \c{%s} arguments must be UTF-8 encoded.
7288
7289 \note The \c{%lc} escape sequence expects a unicode character of type
7290 \c char16_t, or \c ushort (as returned by QChar::unicode()).
7291 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7292 of unicode characters of type \c char16_t, or ushort (as returned by
7293 QString::utf16()). This is at odds with the printf() in the standard C++
7294 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7295 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7296 where the size of \c {wchar_t} is not 16 bits.
7297
7298 \warning We do not recommend using QString::asprintf() in new Qt
7299 code. Instead, consider using QTextStream or arg(), both of
7300 which support Unicode strings seamlessly and are type-safe.
7301 Here is an example that uses QTextStream:
7302
7303 \snippet qstring/main.cpp 64
7304
7305 For \l {QObject::tr()}{translations}, especially if the strings
7306 contains more than one escape sequence, you should consider using
7307 the arg() function instead. This allows the order of the
7308 replacements to be controlled by the translator.
7309
7310 \sa arg()
7311*/
7312
7313QString QString::asprintf(const char *cformat, ...)
7314{
7315 va_list ap;
7316 va_start(ap, cformat);
7317 QString s = vasprintf(cformat, ap);
7318 va_end(ap);
7319 return s;
7320}
7321
7322static void append_utf8(QString &qs, const char *cs, qsizetype len)
7323{
7324 const qsizetype oldSize = qs.size();
7325 qs.resize(oldSize + len);
7326 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
7327 qs.resize(newEnd - qs.constData());
7328}
7329
7330static uint parse_flag_characters(const char * &c) noexcept
7331{
7332 uint flags = QLocaleData::ZeroPadExponent;
7333 while (true) {
7334 switch (*c) {
7335 case '#':
7338 break;
7339 case '0': flags |= QLocaleData::ZeroPadded; break;
7340 case '-': flags |= QLocaleData::LeftAdjusted; break;
7341 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7342 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7343 case '\'': flags |= QLocaleData::GroupDigits; break;
7344 default: return flags;
7345 }
7346 ++c;
7347 }
7348}
7349
7350static int parse_field_width(const char *&c, qsizetype size)
7351{
7352 Q_ASSERT(isAsciiDigit(*c));
7353 const char *const stop = c + size;
7354
7355 // can't be negative - started with a digit
7356 // contains at least one digit
7357 auto [result, used] = qstrntoull(c, size, 10);
7358 c += used;
7359 if (used <= 0)
7360 return false;
7361 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7362 while (c < stop && isAsciiDigit(*c))
7363 ++c;
7364 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7365}
7366
7368
7369static inline bool can_consume(const char * &c, char ch) noexcept
7370{
7371 if (*c == ch) {
7372 ++c;
7373 return true;
7374 }
7375 return false;
7376}
7377
7378static LengthMod parse_length_modifier(const char * &c) noexcept
7379{
7380 switch (*c++) {
7381 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
7382 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
7383 case 'L': return lm_L;
7384 case 'j': return lm_j;
7385 case 'z':
7386 case 'Z': return lm_z;
7387 case 't': return lm_t;
7388 }
7389 --c; // don't consume *c - it wasn't a flag
7390 return lm_none;
7391}
7392
7393/*!
7394 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7395 \since 5.5
7396
7397 Equivalent method to asprintf(), but takes a va_list \a ap
7398 instead a list of variable arguments. See the asprintf()
7399 documentation for an explanation of \a cformat.
7400
7401 This method does not call the va_end macro, the caller
7402 is responsible to call va_end on \a ap.
7403
7404 \sa asprintf()
7405*/
7406
7407QString QString::vasprintf(const char *cformat, va_list ap)
7408{
7409 if (!cformat || !*cformat) {
7410 // Qt 1.x compat
7411 return fromLatin1("");
7412 }
7413
7414 // Parse cformat
7415
7416 QString result;
7417 const char *c = cformat;
7418 const char *formatEnd = cformat + qstrlen(cformat);
7419 for (;;) {
7420 // Copy non-escape chars to result
7421 const char *cb = c;
7422 while (*c != '\0' && *c != '%')
7423 c++;
7424 append_utf8(result, cb, qsizetype(c - cb));
7425
7426 if (*c == '\0')
7427 break;
7428
7429 // Found '%'
7430 const char *escape_start = c;
7431 ++c;
7432
7433 if (*c == '\0') {
7434 result.append(u'%'); // a % at the end of the string - treat as non-escape text
7435 break;
7436 }
7437 if (*c == '%') {
7438 result.append(u'%'); // %%
7439 ++c;
7440 continue;
7441 }
7442
7443 uint flags = parse_flag_characters(c);
7444
7445 if (*c == '\0') {
7446 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7447 break;
7448 }
7449
7450 // Parse field width
7451 int width = -1; // -1 means unspecified
7452 if (isAsciiDigit(*c)) {
7453 width = parse_field_width(c, formatEnd - c);
7454 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7455 width = va_arg(ap, int);
7456 if (width < 0)
7457 width = -1; // treat all negative numbers as unspecified
7458 ++c;
7459 }
7460
7461 if (*c == '\0') {
7462 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7463 break;
7464 }
7465
7466 // Parse precision
7467 int precision = -1; // -1 means unspecified
7468 if (*c == '.') {
7469 ++c;
7470 precision = 0;
7471 if (isAsciiDigit(*c)) {
7472 precision = parse_field_width(c, formatEnd - c);
7473 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7474 precision = va_arg(ap, int);
7475 if (precision < 0)
7476 precision = -1; // treat all negative numbers as unspecified
7477 ++c;
7478 }
7479 }
7480
7481 if (*c == '\0') {
7482 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7483 break;
7484 }
7485
7486 const LengthMod length_mod = parse_length_modifier(c);
7487
7488 if (*c == '\0') {
7489 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7490 break;
7491 }
7492
7493 // Parse the conversion specifier and do the conversion
7494 QString subst;
7495 switch (*c) {
7496 case 'd':
7497 case 'i': {
7498 qint64 i;
7499 switch (length_mod) {
7500 case lm_none: i = va_arg(ap, int); break;
7501 case lm_hh: i = va_arg(ap, int); break;
7502 case lm_h: i = va_arg(ap, int); break;
7503 case lm_l: i = va_arg(ap, long int); break;
7504 case lm_ll: i = va_arg(ap, qint64); break;
7505 case lm_j: i = va_arg(ap, long int); break;
7506
7507 /* ptrdiff_t actually, but it should be the same for us */
7508 case lm_z: i = va_arg(ap, qsizetype); break;
7509 case lm_t: i = va_arg(ap, qsizetype); break;
7510 default: i = 0; break;
7511 }
7512 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
7513 ++c;
7514 break;
7515 }
7516 case 'o':
7517 case 'u':
7518 case 'x':
7519 case 'X': {
7520 quint64 u;
7521 switch (length_mod) {
7522 case lm_none: u = va_arg(ap, uint); break;
7523 case lm_hh: u = va_arg(ap, uint); break;
7524 case lm_h: u = va_arg(ap, uint); break;
7525 case lm_l: u = va_arg(ap, ulong); break;
7526 case lm_ll: u = va_arg(ap, quint64); break;
7527 case lm_t: u = va_arg(ap, size_t); break;
7528 case lm_z: u = va_arg(ap, size_t); break;
7529 default: u = 0; break;
7530 }
7531
7532 if (isAsciiUpper(*c))
7533 flags |= QLocaleData::CapitalEorX;
7534
7535 int base = 10;
7536 switch (QtMiscUtils::toAsciiLower(*c)) {
7537 case 'o':
7538 base = 8; break;
7539 case 'u':
7540 base = 10; break;
7541 case 'x':
7542 base = 16; break;
7543 default: break;
7544 }
7545 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
7546 ++c;
7547 break;
7548 }
7549 case 'E':
7550 case 'e':
7551 case 'F':
7552 case 'f':
7553 case 'G':
7554 case 'g':
7555 case 'A':
7556 case 'a': {
7557 double d;
7558 if (length_mod == lm_L)
7559 d = va_arg(ap, long double); // not supported - converted to a double
7560 else
7561 d = va_arg(ap, double);
7562
7563 if (isAsciiUpper(*c))
7564 flags |= QLocaleData::CapitalEorX;
7565
7566 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7567 switch (QtMiscUtils::toAsciiLower(*c)) {
7568 case 'e': form = QLocaleData::DFExponent; break;
7569 case 'a': // not supported - decimal form used instead
7570 case 'f': form = QLocaleData::DFDecimal; break;
7571 case 'g': form = QLocaleData::DFSignificantDigits; break;
7572 default: break;
7573 }
7574 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7575 ++c;
7576 break;
7577 }
7578 case 'c': {
7579 if (length_mod == lm_l)
7580 subst = QChar::fromUcs2(va_arg(ap, int));
7581 else
7582 subst = QLatin1Char((uchar) va_arg(ap, int));
7583 ++c;
7584 break;
7585 }
7586 case 's': {
7587 if (length_mod == lm_l) {
7588 const char16_t *buff = va_arg(ap, const char16_t*);
7589 const auto *ch = buff;
7590 while (precision != 0 && *ch != 0) {
7591 ++ch;
7592 --precision;
7593 }
7594 subst.setUtf16(buff, ch - buff);
7595 } else if (precision == -1) {
7596 subst = QString::fromUtf8(va_arg(ap, const char*));
7597 } else {
7598 const char *buff = va_arg(ap, const char*);
7599 subst = QString::fromUtf8(buff, qstrnlen(buff, precision));
7600 }
7601 ++c;
7602 break;
7603 }
7604 case 'p': {
7605 void *arg = va_arg(ap, void*);
7606 const quint64 i = reinterpret_cast<quintptr>(arg);
7607 flags |= QLocaleData::ShowBase;
7608 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
7609 ++c;
7610 break;
7611 }
7612 case 'n':
7613 switch (length_mod) {
7614 case lm_hh: {
7615 signed char *n = va_arg(ap, signed char*);
7616 *n = result.size();
7617 break;
7618 }
7619 case lm_h: {
7620 short int *n = va_arg(ap, short int*);
7621 *n = result.size();
7622 break;
7623 }
7624 case lm_l: {
7625 long int *n = va_arg(ap, long int*);
7626 *n = result.size();
7627 break;
7628 }
7629 case lm_ll: {
7630 qint64 *n = va_arg(ap, qint64*);
7631 *n = result.size();
7632 break;
7633 }
7634 default: {
7635 int *n = va_arg(ap, int*);
7636 *n = int(result.size());
7637 break;
7638 }
7639 }
7640 ++c;
7641 break;
7642
7643 default: // bad escape, treat as non-escape text
7644 for (const char *cc = escape_start; cc != c; ++cc)
7645 result.append(QLatin1Char(*cc));
7646 continue;
7647 }
7648
7649 if (flags & QLocaleData::LeftAdjusted)
7650 result.append(subst.leftJustified(width));
7651 else
7652 result.append(subst.rightJustified(width));
7653 }
7654
7655 return result;
7656}
7657
7658/*!
7659 \fn QString::toLongLong(bool *ok, int base) const
7660
7661 Returns the string converted to a \c{long long} using base \a
7662 base, which is 10 by default and must be between 2 and 36, or 0.
7663 Returns 0 if the conversion fails.
7664
7665 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7666 to \c false, and success by setting *\a{ok} to \c true.
7667
7668 If \a base is 0, the C language convention is used: if the string begins
7669 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7670 2 is used; otherwise, if the string begins with "0", base 8 is used;
7671 otherwise, base 10 is used.
7672
7673 The string conversion will always happen in the 'C' locale. For
7674 locale-dependent conversion use QLocale::toLongLong()
7675
7676 Example:
7677
7678 \snippet qstring/main.cpp 74
7679
7680 This function ignores leading and trailing whitespace.
7681
7682 \note Support for the "0b" prefix was added in Qt 6.4.
7683
7684 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7685*/
7686
7687template <typename Int>
7688static Int toIntegral(QStringView string, bool *ok, int base)
7689{
7690#if defined(QT_CHECK_RANGE)
7691 if (base != 0 && (base < 2 || base > 36)) {
7692 qWarning("QString::toIntegral: Invalid base (%d)", base);
7693 base = 10;
7694 }
7695#endif
7696
7697 QVarLengthArray<uchar> latin1(string.size());
7698 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7699 QSimpleParsedNumber<Int> r;
7700 if constexpr (std::is_signed_v<Int>)
7701 r = QLocaleData::bytearrayToLongLong(latin1, base);
7702 else
7703 r = QLocaleData::bytearrayToUnsLongLong(latin1, base);
7704 if (ok)
7705 *ok = r.ok();
7706 return r.result;
7707}
7708
7709qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7710{
7711 return toIntegral<qlonglong>(string, ok, base);
7712}
7713
7714/*!
7715 \fn QString::toULongLong(bool *ok, int base) const
7716
7717 Returns the string converted to an \c{unsigned long long} using base \a
7718 base, which is 10 by default and must be between 2 and 36, or 0.
7719 Returns 0 if the conversion fails.
7720
7721 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7722 to \c false, and success by setting *\a{ok} to \c true.
7723
7724 If \a base is 0, the C language convention is used: if the string begins
7725 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7726 2 is used; otherwise, if the string begins with "0", base 8 is used;
7727 otherwise, base 10 is used.
7728
7729 The string conversion will always happen in the 'C' locale. For
7730 locale-dependent conversion use QLocale::toULongLong()
7731
7732 Example:
7733
7734 \snippet qstring/main.cpp 79
7735
7736 This function ignores leading and trailing whitespace.
7737
7738 \note Support for the "0b" prefix was added in Qt 6.4.
7739
7740 \sa number(), toLongLong(), QLocale::toULongLong()
7741*/
7742
7743qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7744{
7745 return toIntegral<qulonglong>(string, ok, base);
7746}
7747
7748/*!
7749 \fn long QString::toLong(bool *ok, int base) const
7750
7751 Returns the string converted to a \c long using base \a
7752 base, which is 10 by default and must be between 2 and 36, or 0.
7753 Returns 0 if the conversion fails.
7754
7755 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7756 to \c false, and success by setting *\a{ok} to \c true.
7757
7758 If \a base is 0, the C language convention is used: if the string begins
7759 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7760 2 is used; otherwise, if the string begins with "0", base 8 is used;
7761 otherwise, base 10 is used.
7762
7763 The string conversion will always happen in the 'C' locale. For
7764 locale-dependent conversion use QLocale::toLongLong()
7765
7766 Example:
7767
7768 \snippet qstring/main.cpp 73
7769
7770 This function ignores leading and trailing whitespace.
7771
7772 \note Support for the "0b" prefix was added in Qt 6.4.
7773
7774 \sa number(), toULong(), toInt(), QLocale::toInt()
7775*/
7776
7777/*!
7778 \fn ulong QString::toULong(bool *ok, int base) const
7779
7780 Returns the string converted to an \c{unsigned long} using base \a
7781 base, which is 10 by default and must be between 2 and 36, or 0.
7782 Returns 0 if the conversion fails.
7783
7784 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7785 to \c false, and success by setting *\a{ok} to \c true.
7786
7787 If \a base is 0, the C language convention is used: if the string begins
7788 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7789 2 is used; otherwise, if the string begins with "0", base 8 is used;
7790 otherwise, base 10 is used.
7791
7792 The string conversion will always happen in the 'C' locale. For
7793 locale-dependent conversion use QLocale::toULongLong()
7794
7795 Example:
7796
7797 \snippet qstring/main.cpp 78
7798
7799 This function ignores leading and trailing whitespace.
7800
7801 \note Support for the "0b" prefix was added in Qt 6.4.
7802
7803 \sa number(), QLocale::toUInt()
7804*/
7805
7806/*!
7807 \fn int QString::toInt(bool *ok, int base) const
7808 Returns the string converted to an \c int using base \a
7809 base, which is 10 by default and must be between 2 and 36, or 0.
7810 Returns 0 if the conversion fails.
7811
7812 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7813 to \c false, and success by setting *\a{ok} to \c true.
7814
7815 If \a base is 0, the C language convention is used: if the string begins
7816 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7817 2 is used; otherwise, if the string begins with "0", base 8 is used;
7818 otherwise, base 10 is used.
7819
7820 The string conversion will always happen in the 'C' locale. For
7821 locale-dependent conversion use QLocale::toInt()
7822
7823 Example:
7824
7825 \snippet qstring/main.cpp 72
7826
7827 This function ignores leading and trailing whitespace.
7828
7829 \note Support for the "0b" prefix was added in Qt 6.4.
7830
7831 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7832*/
7833
7834/*!
7835 \fn uint QString::toUInt(bool *ok, int base) const
7836 Returns the string converted to an \c{unsigned int} using base \a
7837 base, which is 10 by default and must be between 2 and 36, or 0.
7838 Returns 0 if the conversion fails.
7839
7840 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7841 to \c false, and success by setting *\a{ok} to \c true.
7842
7843 If \a base is 0, the C language convention is used: if the string begins
7844 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7845 2 is used; otherwise, if the string begins with "0", base 8 is used;
7846 otherwise, base 10 is used.
7847
7848 The string conversion will always happen in the 'C' locale. For
7849 locale-dependent conversion use QLocale::toUInt()
7850
7851 Example:
7852
7853 \snippet qstring/main.cpp 77
7854
7855 This function ignores leading and trailing whitespace.
7856
7857 \note Support for the "0b" prefix was added in Qt 6.4.
7858
7859 \sa number(), toInt(), QLocale::toUInt()
7860*/
7861
7862/*!
7863 \fn short QString::toShort(bool *ok, int base) const
7864
7865 Returns the string converted to a \c short using base \a
7866 base, which is 10 by default and must be between 2 and 36, or 0.
7867 Returns 0 if the conversion fails.
7868
7869 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7870 to \c false, and success by setting *\a{ok} to \c true.
7871
7872 If \a base is 0, the C language convention is used: if the string begins
7873 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7874 2 is used; otherwise, if the string begins with "0", base 8 is used;
7875 otherwise, base 10 is used.
7876
7877 The string conversion will always happen in the 'C' locale. For
7878 locale-dependent conversion use QLocale::toShort()
7879
7880 Example:
7881
7882 \snippet qstring/main.cpp 76
7883
7884 This function ignores leading and trailing whitespace.
7885
7886 \note Support for the "0b" prefix was added in Qt 6.4.
7887
7888 \sa number(), toUShort(), toInt(), QLocale::toShort()
7889*/
7890
7891/*!
7892 \fn ushort QString::toUShort(bool *ok, int base) const
7893
7894 Returns the string converted to an \c{unsigned short} using base \a
7895 base, which is 10 by default and must be between 2 and 36, or 0.
7896 Returns 0 if the conversion fails.
7897
7898 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7899 to \c false, and success by setting *\a{ok} to \c true.
7900
7901 If \a base is 0, the C language convention is used: if the string begins
7902 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7903 2 is used; otherwise, if the string begins with "0", base 8 is used;
7904 otherwise, base 10 is used.
7905
7906 The string conversion will always happen in the 'C' locale. For
7907 locale-dependent conversion use QLocale::toUShort()
7908
7909 Example:
7910
7911 \snippet qstring/main.cpp 80
7912
7913 This function ignores leading and trailing whitespace.
7914
7915 \note Support for the "0b" prefix was added in Qt 6.4.
7916
7917 \sa number(), toShort(), QLocale::toUShort()
7918*/
7919
7920/*!
7921 Returns the string converted to a \c double value.
7922
7923 Returns an infinity if the conversion overflows or 0.0 if the
7924 conversion fails for other reasons (e.g. underflow).
7925
7926 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7927 to \c false, and success by setting *\a{ok} to \c true.
7928
7929 \snippet qstring/main.cpp 66
7930
7931 \warning The QString content may only contain valid numerical characters
7932 which includes the plus/minus sign, the character e used in scientific
7933 notation, and the decimal point. Including the unit or additional characters
7934 leads to a conversion error.
7935
7936 \snippet qstring/main.cpp 67
7937
7938 The string conversion will always happen in the 'C' locale. For
7939 locale-dependent conversion use QLocale::toDouble()
7940
7941 \snippet qstring/main.cpp 68
7942
7943 For historical reasons, this function does not handle
7944 thousands group separators. If you need to convert such numbers,
7945 use QLocale::toDouble().
7946
7947 \snippet qstring/main.cpp 69
7948
7949 This function ignores leading and trailing whitespace.
7950
7951 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7952*/
7953
7954double QString::toDouble(bool *ok) const
7955{
7956 return QStringView(*this).toDouble(ok);
7957}
7958
7959double QStringView::toDouble(bool *ok) const
7960{
7961 QStringView string = qt_trimmed(*this);
7962 QVarLengthArray<uchar> latin1(string.size());
7963 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7964 auto r = qt_asciiToDouble(reinterpret_cast<const char *>(latin1.data()), string.size());
7965 if (ok != nullptr)
7966 *ok = r.ok();
7967 return r.result;
7968}
7969
7970/*!
7971 Returns the string converted to a \c float value.
7972
7973 Returns an infinity if the conversion overflows or 0.0 if the
7974 conversion fails for other reasons (e.g. underflow).
7975
7976 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7977 to \c false, and success by setting *\a{ok} to \c true.
7978
7979 \warning The QString content may only contain valid numerical characters
7980 which includes the plus/minus sign, the character e used in scientific
7981 notation, and the decimal point. Including the unit or additional characters
7982 leads to a conversion error.
7983
7984 The string conversion will always happen in the 'C' locale. For
7985 locale-dependent conversion use QLocale::toFloat()
7986
7987 For historical reasons, this function does not handle
7988 thousands group separators. If you need to convert such numbers,
7989 use QLocale::toFloat().
7990
7991 Example:
7992
7993 \snippet qstring/main.cpp 71
7994
7995 This function ignores leading and trailing whitespace.
7996
7997 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
7998*/
7999
8000float QString::toFloat(bool *ok) const
8001{
8002 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8003}
8004
8005float QStringView::toFloat(bool *ok) const
8006{
8007 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8008}
8009
8010/*! \fn QString &QString::setNum(int n, int base)
8011
8012 Sets the string to the printed value of \a n in the specified \a
8013 base, and returns a reference to the string.
8014
8015 The base is 10 by default and must be between 2 and 36.
8016
8017 \snippet qstring/main.cpp 56
8018
8019 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8020 To get a localized string representation of a number, use
8021 QLocale::toString() with the appropriate locale.
8022
8023 \sa number()
8024*/
8025
8026/*! \fn QString &QString::setNum(uint n, int base)
8027
8028 \overload
8029*/
8030
8031/*! \fn QString &QString::setNum(long n, int base)
8032
8033 \overload
8034*/
8035
8036/*! \fn QString &QString::setNum(ulong n, int base)
8037
8038 \overload
8039*/
8040
8041/*!
8042 \overload
8043*/
8044QString &QString::setNum(qlonglong n, int base)
8045{
8046 return *this = number(n, base);
8047}
8048
8049/*!
8050 \overload
8051*/
8052QString &QString::setNum(qulonglong n, int base)
8053{
8054 return *this = number(n, base);
8055}
8056
8057/*! \fn QString &QString::setNum(short n, int base)
8058
8059 \overload
8060*/
8061
8062/*! \fn QString &QString::setNum(ushort n, int base)
8063
8064 \overload
8065*/
8066
8067/*!
8068 \overload
8069
8070 Sets the string to the printed value of \a n, formatted according to the
8071 given \a format and \a precision, and returns a reference to the string.
8072
8073 \sa number(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8074*/
8075
8076QString &QString::setNum(double n, char format, int precision)
8077{
8078 return *this = number(n, format, precision);
8079}
8080
8081/*!
8082 \fn QString &QString::setNum(float n, char format, int precision)
8083 \overload
8084
8085 Sets the string to the printed value of \a n, formatted according
8086 to the given \a format and \a precision, and returns a reference
8087 to the string.
8088
8089 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8090 To get a localized string representation of a number, use
8091 QLocale::toString() with the appropriate locale.
8092
8093 \sa number()
8094*/
8095
8096
8097/*!
8098 \fn QString QString::number(long n, int base)
8099
8100 Returns a string equivalent of the number \a n according to the
8101 specified \a base.
8102
8103 The base is 10 by default and must be between 2
8104 and 36. For bases other than 10, \a n is treated as an
8105 unsigned integer.
8106
8107 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8108 To get a localized string representation of a number, use
8109 QLocale::toString() with the appropriate locale.
8110
8111 \snippet qstring/main.cpp 35
8112
8113 \sa setNum()
8114*/
8115
8116QString QString::number(long n, int base)
8117{
8118 return number(qlonglong(n), base);
8119}
8120
8121/*!
8122 \fn QString QString::number(ulong n, int base)
8123
8124 \overload
8125*/
8126QString QString::number(ulong n, int base)
8127{
8128 return number(qulonglong(n), base);
8129}
8130
8131/*!
8132 \overload
8133*/
8134QString QString::number(int n, int base)
8135{
8136 return number(qlonglong(n), base);
8137}
8138
8139/*!
8140 \overload
8141*/
8142QString QString::number(uint n, int base)
8143{
8144 return number(qulonglong(n), base);
8145}
8146
8147/*!
8148 \overload
8149*/
8150QString QString::number(qlonglong n, int base)
8151{
8152#if defined(QT_CHECK_RANGE)
8153 if (base < 2 || base > 36) {
8154 qWarning("QString::setNum: Invalid base (%d)", base);
8155 base = 10;
8156 }
8157#endif
8158 bool negative = n < 0;
8159 /*
8160 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8161 taking an absolute value has to take a slight detour.
8162 */
8163 return qulltoBasicLatin(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8164}
8165
8166/*!
8167 \overload
8168*/
8169QString QString::number(qulonglong n, int base)
8170{
8171#if defined(QT_CHECK_RANGE)
8172 if (base < 2 || base > 36) {
8173 qWarning("QString::setNum: Invalid base (%d)", base);
8174 base = 10;
8175 }
8176#endif
8177 return qulltoBasicLatin(n, base, false);
8178}
8179
8180
8181/*!
8182 Returns a string representing the floating-point number \a n.
8183
8184 Returns a string that represents \a n, formatted according to the specified
8185 \a format and \a precision.
8186
8187 For formats with an exponent, the exponent will show its sign and have at
8188 least two digits, left-padding the exponent with zero if needed.
8189
8190 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8191*/
8192QString QString::number(double n, char format, int precision)
8193{
8194 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8195
8196 switch (QtMiscUtils::toAsciiLower(format)) {
8197 case 'f':
8198 form = QLocaleData::DFDecimal;
8199 break;
8200 case 'e':
8201 form = QLocaleData::DFExponent;
8202 break;
8203 case 'g':
8204 form = QLocaleData::DFSignificantDigits;
8205 break;
8206 default:
8207#if defined(QT_CHECK_RANGE)
8208 qWarning("QString::setNum: Invalid format char '%c'", format);
8209#endif
8210 break;
8211 }
8212
8213 return qdtoBasicLatin(n, form, precision, isAsciiUpper(format));
8214}
8215
8216namespace {
8217template<class ResultList, class StringSource>
8218static ResultList splitString(const StringSource &source, QStringView sep,
8219 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8220{
8221 ResultList list;
8222 typename StringSource::size_type start = 0;
8223 typename StringSource::size_type end;
8224 typename StringSource::size_type extra = 0;
8225 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8226 if (start != end || behavior == Qt::KeepEmptyParts)
8227 list.append(source.sliced(start, end - start));
8228 start = end + sep.size();
8229 extra = (sep.size() == 0 ? 1 : 0);
8230 }
8231 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8232 list.append(source.sliced(start));
8233 return list;
8234}
8235
8236} // namespace
8237
8238/*!
8239 Splits the string into substrings wherever \a sep occurs, and
8240 returns the list of those strings. If \a sep does not match
8241 anywhere in the string, split() returns a single-element list
8242 containing this string.
8243
8244 \a cs specifies whether \a sep should be matched case
8245 sensitively or case insensitively.
8246
8247 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8248 appear in the result. By default, empty entries are kept.
8249
8250 Example:
8251
8252 \snippet qstring/main.cpp 62
8253
8254 If \a sep is empty, split() returns an empty string, followed
8255 by each of the string's characters, followed by another empty string:
8256
8257 \snippet qstring/main.cpp 62-empty
8258
8259 To understand this behavior, recall that the empty string matches
8260 everywhere, so the above is qualitatively the same as:
8261
8262 \snippet qstring/main.cpp 62-slashes
8263
8264 \sa QStringList::join(), section()
8265
8266 \since 5.14
8267*/
8268QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8269{
8270 return splitString<QStringList>(*this, sep, behavior, cs);
8271}
8272
8273/*!
8274 \overload
8275 \since 5.14
8276*/
8277QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8278{
8279 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
8280}
8281
8282/*!
8283 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8284 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8285
8286
8287 Splits the view into substring views wherever \a sep occurs, and
8288 returns the list of those string views.
8289
8290 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8291 the result.
8292
8293 \note All the returned views are valid as long as the data referenced by
8294 this string view is valid. Destroying the data will cause all views to
8295 become dangling.
8296
8297 \since 6.0
8298*/
8299QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8300{
8301 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
8302}
8303
8304QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8305{
8306 return split(QStringView(&sep, 1), behavior, cs);
8307}
8308
8309#if QT_CONFIG(regularexpression)
8310namespace {
8311template<class ResultList, typename String, typename MatchingFunction>
8312static ResultList splitString(const String &source, const QRegularExpression &re,
8313 MatchingFunction matchingFunction,
8314 Qt::SplitBehavior behavior)
8315{
8316 ResultList list;
8317 if (!re.isValid()) {
8318 qtWarnAboutInvalidRegularExpression(re, "QString", "split");
8319 return list;
8320 }
8321
8322 qsizetype start = 0;
8323 qsizetype end = 0;
8324 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8325 while (iterator.hasNext()) {
8326 QRegularExpressionMatch match = iterator.next();
8327 end = match.capturedStart();
8328 if (start != end || behavior == Qt::KeepEmptyParts)
8329 list.append(source.sliced(start, end - start));
8330 start = match.capturedEnd();
8331 }
8332
8333 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8334 list.append(source.sliced(start));
8335
8336 return list;
8337}
8338} // namespace
8339
8340/*!
8341 \overload
8342 \since 5.14
8343
8344 Splits the string into substrings wherever the regular expression
8345 \a re matches, and returns the list of those strings. If \a re
8346 does not match anywhere in the string, split() returns a
8347 single-element list containing this string.
8348
8349 Here is an example where we extract the words in a sentence
8350 using one or more whitespace characters as the separator:
8351
8352 \snippet qstring/main.cpp 90
8353
8354 Here is a similar example, but this time we use any sequence of
8355 non-word characters as the separator:
8356
8357 \snippet qstring/main.cpp 91
8358
8359 Here is a third example where we use a zero-length assertion,
8360 \b{\\b} (word boundary), to split the string into an
8361 alternating sequence of non-word and word tokens:
8362
8363 \snippet qstring/main.cpp 92
8364
8365 \sa QStringList::join(), section()
8366*/
8367QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8368{
8369#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8370 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8371#else
8372 const auto matchingFunction = &QRegularExpression::globalMatch;
8373#endif
8374 return splitString<QStringList>(*this,
8375 re,
8376 matchingFunction,
8377 behavior);
8378}
8379
8380/*!
8381 \overload
8382 \since 6.0
8383
8384 Splits the string into substring views wherever the regular expression \a re
8385 matches, and returns the list of those strings. If \a re does not match
8386 anywhere in the string, split() returns a single-element list containing
8387 this string as view.
8388
8389 \note The views in the returned list are sub-views of this view; as such,
8390 they reference the same data as it and only remain valid for as long as that
8391 data remains live.
8392*/
8393QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8394{
8395 return splitString<QList<QStringView>>(*this, re, &QRegularExpression::globalMatchView, behavior);
8396}
8397
8398#endif // QT_CONFIG(regularexpression)
8399
8400/*!
8401 \enum QString::NormalizationForm
8402
8403 This enum describes the various normalized forms of Unicode text.
8404
8405 \value NormalizationForm_D Canonical Decomposition
8406 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8407 \value NormalizationForm_KD Compatibility Decomposition
8408 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8409
8410 \sa normalized(),
8411 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8412*/
8413
8414/*!
8415 \since 4.5
8416
8417 Returns a copy of this string repeated the specified number of \a times.
8418
8419 If \a times is less than 1, an empty string is returned.
8420
8421 Example:
8422
8423 \snippet code/src_corelib_text_qstring.cpp 8
8424*/
8425QString QString::repeated(qsizetype times) const
8426{
8427 if (d.size == 0)
8428 return *this;
8429
8430 if (times <= 1) {
8431 if (times == 1)
8432 return *this;
8433 return QString();
8434 }
8435
8436 const qsizetype resultSize = times * d.size;
8437
8438 QString result;
8439 result.reserve(resultSize);
8440 if (result.capacity() != resultSize)
8441 return QString(); // not enough memory
8442
8443 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
8444
8445 qsizetype sizeSoFar = d.size;
8446 char16_t *end = result.d.data() + sizeSoFar;
8447
8448 const qsizetype halfResultSize = resultSize >> 1;
8449 while (sizeSoFar <= halfResultSize) {
8450 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
8451 end += sizeSoFar;
8452 sizeSoFar <<= 1;
8453 }
8454 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
8455 result.d.data()[resultSize] = '\0';
8456 result.d.size = resultSize;
8457 return result;
8458}
8459
8460void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8461{
8462 {
8463 // check if it's fully ASCII first, because then we have no work
8464 auto start = reinterpret_cast<const char16_t *>(data->constData());
8465 const char16_t *p = start + from;
8466 if (isAscii_helper(p, p + data->size() - from))
8467 return;
8468 if (p > start + from)
8469 from = p - start - 1; // need one before the non-ASCII to perform NFC
8470 }
8471
8472 if (version == QChar::Unicode_Unassigned) {
8473 version = QChar::currentUnicodeVersion();
8474 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8475 const QString &s = *data;
8476 QChar *d = nullptr;
8478 if (n.version > version) {
8479 qsizetype pos = from;
8480 if (QChar::requiresSurrogates(n.ucs4)) {
8481 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
8482 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
8483 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
8484 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
8485 while (pos < s.size() - 1) {
8486 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
8487 if (!d)
8488 d = data->data();
8489 d[pos] = QChar(oldHigh);
8490 d[++pos] = QChar(oldLow);
8491 }
8492 ++pos;
8493 }
8494 } else {
8495 while (pos < s.size()) {
8496 if (s.at(pos).unicode() == n.ucs4) {
8497 if (!d)
8498 d = data->data();
8499 d[pos] = QChar(n.old_mapping);
8500 }
8501 ++pos;
8502 }
8503 }
8504 }
8505 }
8506 }
8507
8508 if (normalizationQuickCheckHelper(data, mode, from, &from))
8509 return;
8510
8511 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
8512
8513 canonicalOrderHelper(data, version, from);
8514
8515 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8516 return;
8517
8518 composeHelper(data, version, from);
8519}
8520
8521/*!
8522 Returns the string in the given Unicode normalization \a mode,
8523 according to the given \a version of the Unicode standard.
8524*/
8525QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8526{
8527 QString copy = *this;
8528 qt_string_normalize(&copy, mode, version, 0);
8529 return copy;
8530}
8531
8532#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8533static void checkArgEscape(QStringView s)
8534{
8535 // If we're in here, it means that qArgDigitValue has accepted the
8536 // digit. We can skip the check in case we already know it will
8537 // succeed.
8538 if (!supportUnicodeDigitValuesInArg())
8539 return;
8540
8541 const auto isNonAsciiDigit = [](QChar c) {
8542 return c.unicode() < u'0' || c.unicode() > u'9';
8543 };
8544
8545 if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
8546 const auto accumulateDigit = [](int partial, QChar digit) {
8547 return partial * 10 + digit.digitValue();
8548 };
8549 const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
8550
8551 qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8552 " it is currently being interpreted as the %d-th substitution.\n"
8553 " This is deprecated; support for non-ASCII digits will be dropped\n"
8554 " in a future version of Qt.",
8555 qUtf16Printable(s.toString()),
8556 parsedNumber);
8557 }
8558}
8559#endif
8560
8562{
8563 int min_escape; // lowest escape sequence number
8564 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8565 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8566 // contain 'L'
8567 qsizetype escape_len; // total length of escape sequences which will be replaced
8568};
8569
8570static ArgEscapeData findArgEscapes(QStringView s)
8571{
8572 const QChar *uc_begin = s.begin();
8573 const QChar *uc_end = s.end();
8574
8575 ArgEscapeData d;
8576
8577 d.min_escape = INT_MAX;
8578 d.occurrences = 0;
8579 d.escape_len = 0;
8580 d.locale_occurrences = 0;
8581
8582 const QChar *c = uc_begin;
8583 while (c != uc_end) {
8584 while (c != uc_end && c->unicode() != '%')
8585 ++c;
8586
8587 if (c == uc_end)
8588 break;
8589 const QChar *escape_start = c;
8590 if (++c == uc_end)
8591 break;
8592
8593 bool locale_arg = false;
8594 if (c->unicode() == 'L') {
8595 locale_arg = true;
8596 if (++c == uc_end)
8597 break;
8598 }
8599
8600 int escape = qArgDigitValue(*c);
8601 if (escape == -1)
8602 continue;
8603
8604 // ### Qt 7: do not allow anything but ASCII digits
8605 // in arg()'s replacements.
8606#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8607 const QChar *escapeBegin = c;
8608 const QChar *escapeEnd = escapeBegin + 1;
8609#endif
8610
8611 ++c;
8612
8613 if (c != uc_end) {
8614 const int next_escape = qArgDigitValue(*c);
8615 if (next_escape != -1) {
8616 escape = (10 * escape) + next_escape;
8617 ++c;
8618#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8619 ++escapeEnd;
8620#endif
8621 }
8622 }
8623
8624#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8625 checkArgEscape(QStringView(escapeBegin, escapeEnd));
8626#endif
8627
8628 if (escape > d.min_escape)
8629 continue;
8630
8631 if (escape < d.min_escape) {
8632 d.min_escape = escape;
8633 d.occurrences = 0;
8634 d.escape_len = 0;
8635 d.locale_occurrences = 0;
8636 }
8637
8638 ++d.occurrences;
8639 if (locale_arg)
8640 ++d.locale_occurrences;
8641 d.escape_len += c - escape_start;
8642 }
8643 return d;
8644}
8645
8646static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8647 QStringView arg, QStringView larg, QChar fillChar)
8648{
8649 // Negative field-width for right-padding, positive for left-padding:
8650 const qsizetype abs_field_width = qAbs(field_width);
8651 const qsizetype result_len =
8652 s.size() - d.escape_len
8653 + (d.occurrences - d.locale_occurrences) * qMax(abs_field_width, arg.size())
8654 + d.locale_occurrences * qMax(abs_field_width, larg.size());
8655
8656 QString result(result_len, Qt::Uninitialized);
8657 QChar *rc = const_cast<QChar *>(result.unicode());
8658 QChar *const result_end = rc + result_len;
8659 qsizetype repl_cnt = 0;
8660
8661 const QChar *c = s.begin();
8662 const QChar *const uc_end = s.end();
8663 while (c != uc_end) {
8664 Q_ASSERT(d.occurrences > repl_cnt);
8665 /* We don't have to check increments of c against uc_end because, as
8666 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8667 sequences remaining. */
8668
8669 const QChar *text_start = c;
8670 while (c->unicode() != '%')
8671 ++c;
8672
8673 const QChar *escape_start = c++;
8674 const bool localize = c->unicode() == 'L';
8675 if (localize)
8676 ++c;
8677
8678 int escape = qArgDigitValue(*c);
8679 if (escape != -1 && c + 1 != uc_end) {
8680 const int digit = qArgDigitValue(c[1]);
8681 if (digit != -1) {
8682 ++c;
8683 escape = 10 * escape + digit;
8684 }
8685 }
8686
8687 if (escape != d.min_escape) {
8688 memcpy(rc, text_start, (c - text_start) * sizeof(QChar));
8689 rc += c - text_start;
8690 } else {
8691 ++c;
8692
8693 memcpy(rc, text_start, (escape_start - text_start) * sizeof(QChar));
8694 rc += escape_start - text_start;
8695
8696 const QStringView use = localize ? larg : arg;
8697 const qsizetype pad_chars = abs_field_width - use.size();
8698 // (If negative, relevant loops are no-ops: no need to check.)
8699
8700 if (field_width > 0) { // left padded
8701 rc = std::fill_n(rc, pad_chars, fillChar);
8702 }
8703
8704 if (use.size())
8705 memcpy(rc, use.data(), use.size() * sizeof(QChar));
8706 rc += use.size();
8707
8708 if (field_width < 0) { // right padded
8709 rc = std::fill_n(rc, pad_chars, fillChar);
8710 }
8711
8712 if (++repl_cnt == d.occurrences) {
8713 memcpy(rc, c, (uc_end - c) * sizeof(QChar));
8714 rc += uc_end - c;
8715 Q_ASSERT(rc == result_end);
8716 c = uc_end;
8717 }
8718 }
8719 }
8720 Q_ASSERT(rc == result_end);
8721
8722 return result;
8723}
8724
8725/*!
8726 \fn template <typename T, QString::if_string_like<T> = true> QString QString::arg(const T &a, int fieldWidth, QChar fillChar) const
8727
8728 Returns a copy of this string with the lowest-numbered place-marker
8729 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8730
8731 \a fieldWidth specifies the minimum amount of space that \a a
8732 shall occupy. If \a a requires less space than \a fieldWidth, it
8733 is padded to \a fieldWidth with character \a fillChar. A positive
8734 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8735 produces left-aligned text.
8736
8737 This example shows how we might create a \c status string for
8738 reporting progress while processing a list of files:
8739
8740 \snippet qstring/main.cpp 11-qstringview
8741
8742 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8743 %2. Finally, \c arg(fileName) replaces \c %3.
8744
8745 One advantage of using arg() over asprintf() is that the order of the
8746 numbered place markers can change, if the application's strings are
8747 translated into other languages, but each arg() will still replace
8748 the lowest-numbered unreplaced place-marker, no matter where it
8749 appears. Also, if place-marker \c %i appears more than once in the
8750 string, arg() replaces all of them.
8751
8752 If there is no unreplaced place-marker remaining, a warning message
8753 is printed and the result is undefined. Place-marker numbers must be
8754 in the range 1 to 99.
8755
8756 \note In Qt versions prior to 6.9, this function was overloaded on
8757 \c{char}, QChar, QString, QStringView, and QLatin1StringView and in some
8758 cases, \c{wchar_t} and \c{char16_t} arguments would resolve to the integer
8759 overloads. In Qt versions prior to 5.10, this function lacked the
8760 QStringView and QLatin1StringView overloads.
8761*/
8762QString QString::arg_impl(QAnyStringView a, int fieldWidth, QChar fillChar) const
8763{
8764 ArgEscapeData d = findArgEscapes(*this);
8765
8766 if (Q_UNLIKELY(d.occurrences == 0)) {
8767 qWarning("QString::arg: Argument missing: \"%ls\", \"%ls\"", qUtf16Printable(*this),
8768 qUtf16Printable(a.toString()));
8769 return *this;
8770 }
8771 struct {
8772 QVarLengthArray<char16_t> out;
8773 QStringView operator()(QStringView in) noexcept { return in; }
8774 QStringView operator()(QLatin1StringView in)
8775 {
8776 out.resize(in.size());
8777 qt_from_latin1(out.data(), in.data(), size_t(in.size()));
8778 return out;
8779 }
8780 QStringView operator()(QUtf8StringView in)
8781 {
8782 out.resize(in.size());
8783 return QStringView{out.data(), QUtf8::convertToUnicode(out.data(), in)};
8784 }
8785 } convert;
8786
8787 QStringView sv = a.visit(std::ref(convert));
8788 return replaceArgEscapes(*this, d, fieldWidth, sv, sv, fillChar);
8789}
8790
8791/*!
8792 \fn template <typename T, QString::if_integral_non_char<T> = true> QString QString::arg(T a, int fieldWidth, int base, QChar fillChar) const
8793 \overload arg()
8794
8795 The \a a argument is expressed in base \a base, which is 10 by
8796 default and must be between 2 and 36. For bases other than 10, \a a
8797 is treated as an unsigned integer.
8798
8799 \a fieldWidth specifies the minimum amount of space that \a a is
8800 padded to and filled with the character \a fillChar. A positive
8801 value produces right-aligned text; a negative value produces
8802 left-aligned text.
8803
8804 The '%' can be followed by an 'L', in which case the sequence is
8805 replaced with a localized representation of \a a. The conversion
8806 uses the default locale, set by QLocale::setDefault(). If no default
8807 locale was specified, the system locale is used. The 'L' flag is
8808 ignored if \a base is not 10.
8809
8810 \snippet qstring/main.cpp 12
8811 \snippet qstring/main.cpp 14
8812
8813 \note In Qt versions prior to 6.10.1, this function accepted arguments of
8814 types that implicitly convert to integral types. This is no longer supported,
8815 except for (unscoped) enums, because it also accepted types convertible to
8816 floating-point types, losing precision when those were printed as integers. A
8817 backwards-compatible fix is to cast such types to a C++ type whose displayed
8818 form matches your intent (\c int, \c float, ...).
8819
8820 \note In Qt versions prior to 6.9, this function was overloaded on various
8821 integral types and sometimes incorrectly accepted \c char and \c char16_t
8822 arguments.
8823
8824 \sa {Number Formats}
8825*/
8826QString QString::arg_impl(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8827{
8828 ArgEscapeData d = findArgEscapes(*this);
8829
8830 if (d.occurrences == 0) {
8831 qWarning("QString::arg: Argument missing: \"%ls\", %llu", qUtf16Printable(*this), a);
8832 return *this;
8833 }
8834
8835 unsigned flags = QLocaleData::NoFlags;
8836 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8837 if (fillChar == u'0')
8838 flags = QLocaleData::ZeroPadded;
8839
8840 QString arg;
8841 if (d.occurrences > d.locale_occurrences) {
8842 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
8843 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8844 }
8845
8846 QString localeArg;
8847 if (d.locale_occurrences > 0) {
8848 QLocale locale;
8849 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8850 flags |= QLocaleData::GroupDigits;
8851 localeArg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
8852 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8853 }
8854
8855 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8856}
8857
8858QString QString::arg_impl(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8859{
8860 ArgEscapeData d = findArgEscapes(*this);
8861
8862 if (d.occurrences == 0) {
8863 qWarning("QString::arg: Argument missing: \"%ls\", %lld", qUtf16Printable(*this), a);
8864 return *this;
8865 }
8866
8867 unsigned flags = QLocaleData::NoFlags;
8868 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8869 if (fillChar == u'0')
8870 flags = QLocaleData::ZeroPadded;
8871
8872 QString arg;
8873 if (d.occurrences > d.locale_occurrences) {
8874 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8875 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8876 }
8877
8878 QString localeArg;
8879 if (d.locale_occurrences > 0) {
8880 QLocale locale;
8881 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8882 flags |= QLocaleData::GroupDigits;
8883 localeArg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8884 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8885 }
8886
8887 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8888}
8889
8890/*!
8891 \fn template <typename T, QString::if_floating_point<T> = true> QString QString::arg(T a, int fieldWidth, char format, int precision, QChar fillChar) const
8892 \overload arg()
8893
8894 Argument \a a is formatted according to the specified \a format and
8895 \a precision. See \l{Floating-point Formats} for details.
8896
8897 \a fieldWidth specifies the minimum amount of space that \a a is
8898 padded to and filled with the character \a fillChar. A positive
8899 value produces right-aligned text; a negative value produces
8900 left-aligned text.
8901
8902 \snippet code/src_corelib_text_qstring.cpp 2
8903
8904 \note In Qt versions prior to 6.9, this function was a regular function
8905 taking \c double. As a consequence of being a template function now, it no
8906 longer accepts arguments that merely implicitly convert to floating-point
8907 types. A backwards-compatible fix is to cast such types to one of the C++
8908 floating-point types.
8909
8910 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8911*/
8912QString QString::arg_impl(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8913{
8914 ArgEscapeData d = findArgEscapes(*this);
8915
8916 if (d.occurrences == 0) {
8917 qWarning("QString::arg: Argument missing: \"%ls\", %g", qUtf16Printable(*this), a);
8918 return *this;
8919 }
8920
8921 unsigned flags = QLocaleData::NoFlags;
8922 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8923 if (fillChar == u'0')
8924 flags |= QLocaleData::ZeroPadded;
8925
8926 if (isAsciiUpper(format))
8927 flags |= QLocaleData::CapitalEorX;
8928
8929 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8930 switch (QtMiscUtils::toAsciiLower(format)) {
8931 case 'f':
8932 form = QLocaleData::DFDecimal;
8933 break;
8934 case 'e':
8935 form = QLocaleData::DFExponent;
8936 break;
8937 case 'g':
8938 form = QLocaleData::DFSignificantDigits;
8939 break;
8940 default:
8941#if defined(QT_CHECK_RANGE)
8942 qWarning("QString::arg: Invalid format char '%c'", format);
8943#endif
8944 break;
8945 }
8946
8947 QString arg;
8948 if (d.occurrences > d.locale_occurrences) {
8949 arg = QLocaleData::c()->doubleToString(a, precision, form, fieldWidth,
8950 flags | QLocaleData::ZeroPadExponent);
8951 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8952 || fieldWidth <= arg.size());
8953 }
8954
8955 QString localeArg;
8956 if (d.locale_occurrences > 0) {
8957 QLocale locale;
8958
8959 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8960 if (!(numberOptions & QLocale::OmitGroupSeparator))
8961 flags |= QLocaleData::GroupDigits;
8962 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
8963 flags |= QLocaleData::ZeroPadExponent;
8964 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
8965 flags |= QLocaleData::AddTrailingZeroes;
8966 localeArg = locale.d->m_data->doubleToString(a, precision, form, fieldWidth, flags);
8967 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8968 || fieldWidth <= localeArg.size());
8969 }
8970
8971 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8972}
8973
8974static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
8975static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
8976
8977template <typename Char>
8978static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
8979{
8980 qsizetype i = *pos;
8981 ++i;
8982 if (i < len && uc[i] == u'L')
8983 ++i;
8984 if (i < len) {
8985 int escape = to_unicode(uc[i]) - '0';
8986 if (uint(escape) >= 10U)
8987 return -1;
8988 ++i;
8989 if (i < len) {
8990 // there's a second digit
8991 int digit = to_unicode(uc[i]) - '0';
8992 if (uint(digit) < 10U) {
8993 escape = (escape * 10) + digit;
8994 ++i;
8995 }
8996 }
8997 *pos = i;
8998 return escape;
8999 }
9000 return -1;
9001}
9002
9003/*
9004 Algorithm for multiArg:
9005
9006 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9007 The L is parsed and accepted for compatibility with non-multi-arg, but since
9008 multiArg only accepts strings as replacements, the localization request can
9009 be safely ignored.
9010 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9011 either points at text to be copied verbatim (in which case the int is -1),
9012 or, initially, at the textual representation of the placeholder. In that case,
9013 the int contains the numerical number as parsed from the placeholder.
9014 3. Next, collect all the non-negative ints found, sort them in ascending order and
9015 remove duplicates.
9016 3a. If the result has more entries than multiArg() was given replacement strings,
9017 we have found placeholders we can't satisfy with replacement strings. That is
9018 fine (there could be another .arg() call coming after this one), so just
9019 truncate the result to the number of actual multiArg() replacement strings.
9020 3b. If the result has less entries than multiArg() was given replacement strings,
9021 the string is missing placeholders. This is an error that the user should be
9022 warned about.
9023 4. The result of step (3) is a mapping from the index of any replacement string to
9024 placeholder number. This is the wrong way around, but since placeholder
9025 numbers could get as large as 999, while we typically don't have more than 9
9026 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9027 each time we need to map a placeholder number to a replacement string index
9028 (that's a linear search; but still *much* faster than using an associative container).
9029 5. Next, for each of the tuples found in step (1), do the following:
9030 5a. If the int is negative, do nothing.
9031 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9032 the string-ref with a string-ref for the (complete) I'th replacement string.
9033 5c. Otherwise, do nothing.
9034 6. Concatenate all string refs into a single result string.
9035*/
9036
9037namespace {
9038struct Part
9039{
9040 Part() = default; // for QVarLengthArray; do not use
9041 constexpr Part(QAnyStringView s, int num = -1)
9042 : string{s}, number{num} {}
9043
9044 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9045
9046 QAnyStringView string;
9047 int number;
9048};
9049} // unnamed namespace
9050
9052
9053namespace {
9054
9055enum { ExpectedParts = 32 };
9056
9057typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9058typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9059
9060template <typename StringView>
9061static ParseResult parseMultiArgFormatString_impl(StringView s)
9062{
9063 ParseResult result;
9064
9065 const auto uc = s.data();
9066 const auto len = s.size();
9067 const auto end = len - 1;
9068 qsizetype i = 0;
9069 qsizetype last = 0;
9070
9071 while (i < end) {
9072 if (uc[i] == u'%') {
9073 qsizetype percent = i;
9074 int number = getEscape(uc, &i, len);
9075 if (number != -1) {
9076 if (last != percent)
9077 result.push_back(Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9078 result.push_back(Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9079 last = i;
9080 continue;
9081 }
9082 }
9083 ++i;
9084 }
9085
9086 if (last < len)
9087 result.push_back(Part{s.sliced(last, len - last)}); // trailing literal text
9088
9089 return result;
9090}
9091
9092static ParseResult parseMultiArgFormatString(QAnyStringView s)
9093{
9094 return s.visit([] (auto s) { return parseMultiArgFormatString_impl(s); });
9095}
9096
9097static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9098{
9099 ArgIndexToPlaceholderMap result;
9100
9101 for (const Part &part : parts) {
9102 if (part.number >= 0)
9103 result.push_back(part.number);
9104 }
9105
9106 std::sort(result.begin(), result.end());
9107 result.erase(std::unique(result.begin(), result.end()),
9108 result.end());
9109
9110 return result;
9111}
9112
9113static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9114{
9115 using namespace QtPrivate;
9116 qsizetype totalSize = 0;
9117 for (Part &part : parts) {
9118 if (part.number != -1) {
9119 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
9120 if (it != argIndexToPlaceholderMap.end()) {
9121 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9122 switch (arg.tag) {
9123 case ArgBase::L1:
9124 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
9125 break;
9126 case ArgBase::Any:
9127 part.reset(static_cast<const QAnyStringArg&>(arg).string);
9128 break;
9129 case ArgBase::U16:
9130 part.reset(static_cast<const QStringViewArg&>(arg).string);
9131 break;
9132 }
9133 }
9134 }
9135 totalSize += part.string.size();
9136 }
9137 return totalSize;
9138}
9139
9140} // unnamed namespace
9141
9142QString QtPrivate::argToQString(QAnyStringView pattern, size_t numArgs, const ArgBase **args)
9143{
9144 // Step 1-2 above
9145 ParseResult parts = parseMultiArgFormatString(pattern);
9146
9147 // 3-4
9148 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9149
9150 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9151 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
9152 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9153 qWarning("QString::arg: %d argument(s) missing in %ls",
9154 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9155
9156 // 5
9157 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9158
9159 // 6:
9160 QString result(totalSize, Qt::Uninitialized);
9161 auto out = const_cast<QChar*>(result.constData());
9162
9163 struct Concatenate {
9164 QChar *out;
9165 QChar *operator()(QLatin1String part) noexcept
9166 {
9167 if (part.size()) {
9168 qt_from_latin1(reinterpret_cast<char16_t*>(out),
9169 part.data(), part.size());
9170 }
9171 return out + part.size();
9172 }
9173 QChar *operator()(QUtf8StringView part) noexcept
9174 {
9175 return QUtf8::convertToUnicode(out, part);
9176 }
9177 QChar *operator()(QStringView part) noexcept
9178 {
9179 if (part.size())
9180 memcpy(out, part.data(), part.size() * sizeof(QChar));
9181 return out + part.size();
9182 }
9183 };
9184
9185 for (const Part &part : parts)
9186 out = part.string.visit(Concatenate{out});
9187
9188 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9189 result.truncate(out - result.cbegin());
9190
9191 return result;
9192}
9193
9194/*! \fn bool QString::isRightToLeft() const
9195
9196 Returns \c true if the string is read right to left.
9197
9198 \sa QStringView::isRightToLeft()
9199*/
9200bool QString::isRightToLeft() const
9201{
9202 return QtPrivate::isRightToLeft(QStringView(*this));
9203}
9204
9205/*!
9206 \fn bool QString::isValidUtf16() const noexcept
9207 \since 5.15
9208
9209 Returns \c true if the string contains valid UTF-16 encoded data,
9210 or \c false otherwise.
9211
9212 Note that this function does not perform any special validation of the
9213 data; it merely checks if it can be successfully decoded from UTF-16.
9214 The data is assumed to be in host byte order; the presence of a BOM
9215 is meaningless.
9216
9217 \sa QStringView::isValidUtf16()
9218*/
9219
9220/*! \fn QChar *QString::data()
9221
9222 Returns a pointer to the data stored in the QString. The pointer
9223 can be used to access and modify the characters that compose the
9224 string.
9225
9226 Unlike constData() and unicode(), the returned data is always
9227 '\\0'-terminated.
9228
9229 Example:
9230
9231 \snippet qstring/main.cpp 19
9232
9233 Note that the pointer remains valid only as long as the string is
9234 not modified by other means. For read-only access, constData() is
9235 faster because it never causes a \l{deep copy} to occur.
9236
9237 \sa constData(), operator[]()
9238*/
9239
9240/*! \fn const QChar *QString::data() const
9241
9242 \overload
9243
9244 \note The returned string may not be '\\0'-terminated.
9245 Use size() to determine the length of the array.
9246
9247 \sa fromRawData()
9248*/
9249
9250/*! \fn const QChar *QString::constData() const
9251
9252 Returns a pointer to the data stored in the QString. The pointer
9253 can be used to access the characters that compose the string.
9254
9255 Note that the pointer remains valid only as long as the string is
9256 not modified.
9257
9258 \note The returned string may not be '\\0'-terminated.
9259 Use size() to determine the length of the array.
9260
9261 \sa data(), operator[](), fromRawData()
9262*/
9263
9264/*! \fn void QString::push_front(const QString &other)
9265
9266 This function is provided for STL compatibility, prepending the
9267 given \a other string to the beginning of this string. It is
9268 equivalent to \c prepend(other).
9269
9270 \sa prepend()
9271*/
9272
9273/*! \fn void QString::push_front(QChar ch)
9274
9275 \overload
9276
9277 Prepends the given \a ch character to the beginning of this string.
9278*/
9279
9280/*! \fn void QString::push_back(const QString &other)
9281
9282 This function is provided for STL compatibility, appending the
9283 given \a other string onto the end of this string. It is
9284 equivalent to \c append(other).
9285
9286 \sa append()
9287*/
9288
9289/*! \fn void QString::push_back(QChar ch)
9290
9291 \overload
9292
9293 Appends the given \a ch character onto the end of this string.
9294*/
9295
9296/*!
9297 \since 6.1
9298
9299 Removes from the string the characters in the half-open range
9300 [ \a first , \a last ). Returns an iterator to the character
9301 immediately after the last erased character (i.e. the character
9302 referred to by \a last before the erase).
9303*/
9304QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9305{
9306 const auto start = std::distance(cbegin(), first);
9307 const auto len = std::distance(first, last);
9308 remove(start, len);
9309 return begin() + start;
9310}
9311
9312/*!
9313 \fn QString::iterator QString::erase(QString::const_iterator it)
9314
9315 \overload
9316 \since 6.5
9317
9318 Removes the character denoted by \c it from the string.
9319 Returns an iterator to the character immediately after the
9320 erased character.
9321
9322 \code
9323 QString c = "abcdefg";
9324 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9325 \endcode
9326*/
9327
9328/*! \fn void QString::shrink_to_fit()
9329 \since 5.10
9330
9331 This function is provided for STL compatibility. It is
9332 equivalent to squeeze().
9333
9334 \sa squeeze()
9335*/
9336
9337/*!
9338 \fn std::string QString::toStdString() const
9339
9340 Returns a std::string object with the data contained in this
9341 QString. The Unicode data is converted into 8-bit characters using
9342 the toUtf8() function.
9343
9344 This method is mostly useful to pass a QString to a function
9345 that accepts a std::string object.
9346
9347 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9348*/
9349std::string QString::toStdString() const
9350{
9351 std::string result;
9352 if (isEmpty())
9353 return result;
9354
9355 auto writeToBuffer = [this](char *out, size_t) {
9356 char *last = QUtf8::convertFromUnicode(out, *this);
9357 return last - out;
9358 };
9359 size_t maxSize = size() * 3; // worst case for UTF-8
9360#ifdef __cpp_lib_string_resize_and_overwrite
9361 // C++23
9362 result.resize_and_overwrite(maxSize, writeToBuffer);
9363#else
9364 result.resize(maxSize);
9365 result.resize(writeToBuffer(result.data(), result.size()));
9366#endif
9367 return result;
9368}
9369
9370/*!
9371 \fn QString QString::fromRawData(const char16_t *unicode, qsizetype size)
9372 \since 6.10
9373
9374 Constructs a QString that uses the first \a size Unicode characters
9375 in the array \a unicode. The data in \a unicode is \e not
9376 copied. The caller must be able to guarantee that \a unicode will
9377 not be deleted or modified as long as the QString (or an
9378 unmodified copy of it) exists.
9379
9380 Any attempts to modify the QString or copies of it will cause it
9381 to create a deep copy of the data, ensuring that the raw data
9382 isn't modified.
9383
9384 Here is an example of how we can use a QRegularExpression on raw data in
9385 memory without requiring to copy the data into a QString:
9386
9387 \snippet qstring/main.cpp 22
9388 \snippet qstring/main.cpp 23
9389
9390 \warning A string created with fromRawData() is \e not
9391 '\\0'-terminated, unless the raw data contains a '\\0' character
9392 at position \a size. This means unicode() will \e not return a
9393 '\\0'-terminated string (although utf16() does, at the cost of
9394 copying the raw data).
9395
9396 \sa fromUtf16(), setRawData(), data(), constData(),
9397 nullTerminate(), nullTerminated()
9398*/
9399
9400/*!
9401 \fn QString QString::fromRawData(const QChar *unicode, qsizetype size)
9402 \overload
9403*/
9404
9405/*!
9406 \since 4.7
9407
9408 Resets the QString to use the first \a size Unicode characters
9409 in the array \a unicode. The data in \a unicode is \e not
9410 copied. The caller must be able to guarantee that \a unicode will
9411 not be deleted or modified as long as the QString (or an
9412 unmodified copy of it) exists.
9413
9414 This function can be used instead of fromRawData() to re-use
9415 existings QString objects to save memory re-allocations.
9416
9417 \sa fromRawData(), nullTerminate(), nullTerminated()
9418*/
9419QString &QString::setRawData(const QChar *unicode, qsizetype size)
9420{
9421 if (!unicode || !size) {
9422 clear();
9423 }
9424 *this = fromRawData(unicode, size);
9425 return *this;
9426}
9427
9428/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9429 \since 5.5
9430
9431 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9432
9433 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9434*/
9435
9436/*!
9437 \fn std::u16string QString::toStdU16String() const
9438 \since 5.5
9439
9440 Returns a std::u16string object with the data contained in this
9441 QString. The Unicode data is the same as returned by the utf16()
9442 method.
9443
9444 \sa utf16(), toStdWString(), toStdU32String()
9445*/
9446
9447/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9448 \since 5.5
9449
9450 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9451
9452 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9453*/
9454
9455/*!
9456 \fn std::u32string QString::toStdU32String() const
9457 \since 5.5
9458
9459 Returns a std::u32string object with the data contained in this
9460 QString. The Unicode data is the same as returned by the toUcs4()
9461 method.
9462
9463 \sa toUcs4(), toStdWString(), toStdU16String()
9464*/
9465
9466#if !defined(QT_NO_DATASTREAM)
9467/*!
9468 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9469 \relates QString
9470
9471 Writes the given \a string to the specified \a stream.
9472
9473 \sa {Serializing Qt Data Types}
9474*/
9475
9476QDataStream &operator<<(QDataStream &out, const QString &str)
9477{
9478 if (out.version() == 1) {
9479 out << str.toLatin1();
9480 } else {
9481 if (!str.isNull() || out.version() < 3) {
9482 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9483 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9484 static_cast<qsizetype>(sizeof(QChar) * str.size()));
9485 } else {
9486 QVarLengthArray<char16_t> buffer(str.size());
9487 qbswap<sizeof(char16_t)>(str.constData(), str.size(), buffer.data());
9488 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9489 static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9490 }
9491 } else {
9492 QDataStream::writeQSizeType(out, -1); // write null marker
9493 }
9494 }
9495 return out;
9496}
9497
9498/*!
9499 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9500 \relates QString
9501
9502 Reads a string from the specified \a stream into the given \a string.
9503
9504 \sa {Serializing Qt Data Types}
9505*/
9506
9507QDataStream &operator>>(QDataStream &in, QString &str)
9508{
9509 if (in.version() == 1) {
9510 QByteArray l;
9511 in >> l;
9512 str = QString::fromLatin1(l);
9513 } else {
9514 qint64 size = QDataStream::readQSizeType(in);
9515 qsizetype bytes = size;
9516 if (size != bytes || size < -1) {
9517 str.clear();
9518 in.setStatus(QDataStream::SizeLimitExceeded);
9519 return in;
9520 }
9521 if (bytes == -1) { // null string
9522 str = QString();
9523 } else if (bytes > 0) {
9524 if (bytes & 0x1) {
9525 str.clear();
9526 in.setStatus(QDataStream::ReadCorruptData);
9527 return in;
9528 }
9529
9530 const qsizetype Step = 1024 * 1024;
9531 qsizetype len = bytes / 2;
9532 qsizetype allocated = 0;
9533
9534 while (allocated < len) {
9535 int blockSize = qMin(Step, len - allocated);
9536 str.resize(allocated + blockSize);
9537 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9538 blockSize * 2) != blockSize * 2) {
9539 str.clear();
9540 in.setStatus(QDataStream::ReadPastEnd);
9541 return in;
9542 }
9543 allocated += blockSize;
9544 }
9545
9546 if ((in.byteOrder() == QDataStream::BigEndian)
9547 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9548 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9549 qbswap<sizeof(*data)>(data, len, data);
9550 }
9551 } else {
9552 str = QString(QLatin1StringView(""));
9553 }
9554 }
9555 return in;
9556}
9557#endif // QT_NO_DATASTREAM
9558
9559/*!
9560 \typedef QString::Data
9561 \internal
9562*/
9563
9564/*!
9565 \typedef QString::DataPtr
9566 \internal
9567*/
9568
9569/*!
9570 \fn DataPtr & QString::data_ptr()
9571 \internal
9572*/
9573
9574/*!
9575 \since 5.11
9576 \internal
9577 \relates QStringView
9578
9579 Returns \c true if the string is read right to left.
9580
9581 \sa QString::isRightToLeft()
9582*/
9583bool QtPrivate::isRightToLeft(QStringView string) noexcept
9584{
9585 int isolateLevel = 0;
9586
9587 for (QStringIterator i(string); i.hasNext();) {
9588 const char32_t c = i.next();
9589
9590 switch (QChar::direction(c)) {
9591 case QChar::DirRLI:
9592 case QChar::DirLRI:
9593 case QChar::DirFSI:
9594 ++isolateLevel;
9595 break;
9596 case QChar::DirPDI:
9597 if (isolateLevel)
9598 --isolateLevel;
9599 break;
9600 case QChar::DirL:
9601 if (isolateLevel)
9602 break;
9603 return false;
9604 case QChar::DirR:
9605 case QChar::DirAL:
9606 if (isolateLevel)
9607 break;
9608 return true;
9609 case QChar::DirEN:
9610 case QChar::DirES:
9611 case QChar::DirET:
9612 case QChar::DirAN:
9613 case QChar::DirCS:
9614 case QChar::DirB:
9615 case QChar::DirS:
9616 case QChar::DirWS:
9617 case QChar::DirON:
9618 case QChar::DirLRE:
9619 case QChar::DirLRO:
9620 case QChar::DirRLE:
9621 case QChar::DirRLO:
9622 case QChar::DirPDF:
9623 case QChar::DirNSM:
9624 case QChar::DirBN:
9625 break;
9626 }
9627 }
9628 return false;
9629}
9630
9631qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9632{
9633 qsizetype num = 0;
9634 qsizetype i = -1;
9635 if (haystack.size() > 500 && needle.size() > 5) {
9636 QStringMatcher matcher(needle, cs);
9637 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9638 ++num;
9639 } else {
9640 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9641 ++num;
9642 }
9643 return num;
9644}
9645
9646qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9647{
9648 if (cs == Qt::CaseSensitive)
9649 return std::count(haystack.cbegin(), haystack.cend(), needle);
9650
9651 needle = foldCase(needle);
9652 return std::count_if(haystack.cbegin(), haystack.cend(),
9653 [needle](const QChar c) { return foldAndCompare(c, needle); });
9654}
9655
9656qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9657{
9658 qsizetype num = 0;
9659 qsizetype i = -1;
9660
9661 QLatin1StringMatcher matcher(needle, cs);
9662 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9663 ++num;
9664
9665 return num;
9666}
9667
9668qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9669{
9670 if (haystack.size() < needle.size())
9671 return 0;
9672
9673 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9674 return 0;
9675
9676 qsizetype num = 0;
9677 qsizetype i = -1;
9678
9679 QVarLengthArray<uchar> s(needle.size());
9680 qt_to_latin1_unchecked(s.data(), needle.utf16(), needle.size());
9681
9682 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9683 cs);
9684 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9685 ++num;
9686
9687 return num;
9688}
9689
9690qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9691{
9692 if (haystack.size() < needle.size())
9693 return -1;
9694
9695 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9696 return QtPrivate::count(haystack, QStringView(s.data(), s.size()), cs);
9697}
9698
9699qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9700{
9701 // non-L1 needles cannot possibly match in L1-only haystacks
9702 if (needle.unicode() > 0xff)
9703 return 0;
9704
9705 if (cs == Qt::CaseSensitive) {
9706 return std::count(haystack.cbegin(), haystack.cend(), needle.toLatin1());
9707 } else {
9708 return std::count_if(haystack.cbegin(), haystack.cend(),
9709 CaseInsensitiveL1::matcher(needle.toLatin1()));
9710 }
9711}
9712
9713/*!
9714 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9715 \since 5.10
9716 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9717 \since 5.10
9718 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9719 \since 5.10
9720 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9721 \since 5.10
9722 \internal
9723 \relates QStringView
9724
9725 Returns \c true if \a haystack starts with \a needle,
9726 otherwise returns \c false.
9727
9728 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9729
9730 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9731*/
9732
9733bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9734{
9735 return qt_starts_with_impl(haystack, needle, cs);
9736}
9737
9738bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9739{
9740 return qt_starts_with_impl(haystack, needle, cs);
9741}
9742
9743bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9744{
9745 return qt_starts_with_impl(haystack, needle, cs);
9746}
9747
9748bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9749{
9750 return qt_starts_with_impl(haystack, needle, cs);
9751}
9752
9753/*!
9754 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9755 \since 5.10
9756 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9757 \since 5.10
9758 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9759 \since 5.10
9760 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9761 \since 5.10
9762 \internal
9763 \relates QStringView
9764
9765 Returns \c true if \a haystack ends with \a needle,
9766 otherwise returns \c false.
9767
9768 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9769
9770 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9771*/
9772
9773bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9774{
9775 return qt_ends_with_impl(haystack, needle, cs);
9776}
9777
9778bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9779{
9780 return qt_ends_with_impl(haystack, needle, cs);
9781}
9782
9783bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9784{
9785 return qt_ends_with_impl(haystack, needle, cs);
9786}
9787
9788bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9789{
9790 return qt_ends_with_impl(haystack, needle, cs);
9791}
9792
9793qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9794{
9795 const qsizetype l = haystack0.size();
9796 const qsizetype sl = needle0.size();
9797 if (sl == 1)
9798 return findString(haystack0, from, needle0[0], cs);
9799 if (from < 0)
9800 from += l;
9801 if (std::size_t(sl + from) > std::size_t(l))
9802 return -1;
9803 if (!sl)
9804 return from;
9805 if (!l)
9806 return -1;
9807
9808 /*
9809 We use the Boyer-Moore algorithm in cases where the overhead
9810 for the skip table should pay off, otherwise we use a simple
9811 hash function.
9812 */
9813 if (l > 500 && sl > 5)
9814 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9815
9816 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9817 /*
9818 We use some hashing for efficiency's sake. Instead of
9819 comparing strings, we compare the hash value of str with that
9820 of a part of this QString. Only if that matches, we call
9821 qt_string_compare().
9822 */
9823 const char16_t *needle = needle0.utf16();
9824 const char16_t *haystack = haystack0.utf16() + from;
9825 const char16_t *end = haystack0.utf16() + (l - sl);
9826 const qregisteruint sl_minus_1 = sl - 1;
9827 qregisteruint hashNeedle = 0, hashHaystack = 0;
9828 qsizetype idx;
9829
9830 if (cs == Qt::CaseSensitive) {
9831 for (idx = 0; idx < sl; ++idx) {
9832 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9833 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9834 }
9835 hashHaystack -= haystack[sl_minus_1];
9836
9837 while (haystack <= end) {
9838 hashHaystack += haystack[sl_minus_1];
9839 if (hashHaystack == hashNeedle
9840 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9841 return haystack - haystack0.utf16();
9842
9843 REHASH(*haystack);
9844 ++haystack;
9845 }
9846 } else {
9847 const char16_t *haystack_start = haystack0.utf16();
9848 for (idx = 0; idx < sl; ++idx) {
9849 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9850 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9851 }
9852 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9853
9854 while (haystack <= end) {
9855 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
9856 if (hashHaystack == hashNeedle
9857 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
9858 return haystack - haystack0.utf16();
9859
9860 REHASH(foldCase(haystack, haystack_start));
9861 ++haystack;
9862 }
9863 }
9864 return -1;
9865}
9866
9867qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9868{
9869 if (haystack.size() < needle.size())
9870 return -1;
9871
9872 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9873 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9874}
9875
9876qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9877{
9878 if (haystack.size() < needle.size())
9879 return -1;
9880
9881 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9882 return -1;
9883
9884 if (needle.size() == 1) {
9885 const char n = needle.front().toLatin1();
9886 return QtPrivate::findString(haystack, from, QLatin1StringView(&n, 1), cs);
9887 }
9888
9889 QVarLengthArray<char> s(needle.size());
9890 qt_to_latin1_unchecked(reinterpret_cast<uchar *>(s.data()), needle.utf16(), needle.size());
9891 return QtPrivate::findString(haystack, from, QLatin1StringView(s.data(), s.size()), cs);
9892}
9893
9894qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9895{
9896 if (from < 0)
9897 from += haystack.size();
9898 if (from < 0)
9899 return -1;
9900 qsizetype adjustedSize = haystack.size() - from;
9901 if (adjustedSize < needle.size())
9902 return -1;
9903 if (needle.size() == 0)
9904 return from;
9905
9906 if (cs == Qt::CaseSensitive) {
9907
9908 if (needle.size() == 1) {
9909 Q_ASSERT(haystack.data() != nullptr); // see size check above
9910 if (auto it = memchr(haystack.data() + from, needle.front().toLatin1(), adjustedSize))
9911 return static_cast<const char *>(it) - haystack.data();
9912 return -1;
9913 }
9914
9915 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
9916 return matcher.indexIn(haystack, from);
9917 }
9918
9919 // If the needle is sufficiently small we simply iteratively search through
9920 // the haystack. When the needle is too long we use a boyer-moore searcher
9921 // from the standard library, if available. If it is not available then the
9922 // QLatin1Strings are converted to QString and compared as such. Though
9923 // initialization is slower the boyer-moore search it employs still makes up
9924 // for it when haystack and needle are sufficiently long.
9925 // The needle size was chosen by testing various lengths using the
9926 // qstringtokenizer benchmark with the
9927 // "tokenize_qlatin1string_qlatin1string" test.
9928#ifdef Q_CC_MSVC
9929 const qsizetype threshold = 1;
9930#else
9931 const qsizetype threshold = 13;
9932#endif
9933 if (needle.size() <= threshold) {
9934 const auto begin = haystack.begin();
9935 const auto end = haystack.end() - needle.size() + 1;
9936 auto ciMatch = CaseInsensitiveL1::matcher(needle[0].toLatin1());
9937 const qsizetype nlen1 = needle.size() - 1;
9938 for (auto it = std::find_if(begin + from, end, ciMatch); it != end;
9939 it = std::find_if(it + 1, end, ciMatch)) {
9940 // In this comparison we skip the first character because we know it's a match
9941 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
9942 return std::distance(begin, it);
9943 }
9944 return -1;
9945 }
9946
9947 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
9948 return matcher.indexIn(haystack, from);
9949}
9950
9951qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
9952{
9953 return qLastIndexOf(haystack, QChar(needle), from, cs);
9954}
9955
9956qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9957{
9958 return qLastIndexOf(haystack, from, needle, cs);
9959}
9960
9961qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9962{
9963 return qLastIndexOf(haystack, from, needle, cs);
9964}
9965
9966qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9967{
9968 return qLastIndexOf(haystack, from, needle, cs);
9969}
9970
9971qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9972{
9973 return qLastIndexOf(haystack, from, needle, cs);
9974}
9975
9976#if QT_CONFIG(regularexpression)
9977qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
9978{
9979 if (!re.isValid()) {
9980 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "indexOf");
9981 return -1;
9982 }
9983
9984 QRegularExpressionMatch match = stringHaystack
9985 ? re.match(*stringHaystack, from)
9986 : re.matchView(viewHaystack, from);
9987 if (match.hasMatch()) {
9988 const qsizetype ret = match.capturedStart();
9989 if (rmatch)
9990 *rmatch = std::move(match);
9991 return ret;
9992 }
9993
9994 return -1;
9995}
9996
9997qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
9998{
9999 return indexOf(haystack, nullptr, re, from, rmatch);
10000}
10001
10002qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10003{
10004 if (!re.isValid()) {
10005 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "lastIndexOf");
10006 return -1;
10007 }
10008
10009 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10010 QRegularExpressionMatchIterator iterator = stringHaystack
10011 ? re.globalMatch(*stringHaystack)
10012 : re.globalMatchView(viewHaystack);
10013 qsizetype lastIndex = -1;
10014 while (iterator.hasNext()) {
10015 QRegularExpressionMatch match = iterator.next();
10016 qsizetype start = match.capturedStart();
10017 if (start < endpos) {
10018 lastIndex = start;
10019 if (rmatch)
10020 *rmatch = std::move(match);
10021 } else {
10022 break;
10023 }
10024 }
10025
10026 return lastIndex;
10027}
10028
10029qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10030{
10031 return lastIndexOf(haystack, nullptr, re, from, rmatch);
10032}
10033
10034bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10035{
10036 if (!re.isValid()) {
10037 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "contains");
10038 return false;
10039 }
10040 QRegularExpressionMatch m = stringHaystack
10041 ? re.match(*stringHaystack)
10042 : re.matchView(viewHaystack);
10043 bool hasMatch = m.hasMatch();
10044 if (hasMatch && rmatch)
10045 *rmatch = std::move(m);
10046 return hasMatch;
10047}
10048
10049bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10050{
10051 return contains(haystack, nullptr, re, rmatch);
10052}
10053
10054qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10055{
10056 if (!re.isValid()) {
10057 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "count");
10058 return 0;
10059 }
10060 qsizetype count = 0;
10061 qsizetype index = -1;
10062 qsizetype len = haystack.size();
10063 while (index <= len - 1) {
10064 QRegularExpressionMatch match = re.matchView(haystack, index + 1);
10065 if (!match.hasMatch())
10066 break;
10067 count++;
10068
10069 // Search again, from the next character after the beginning of this
10070 // capture. If the capture starts with a surrogate pair, both together
10071 // count as "one character".
10072 index = match.capturedStart();
10073 if (index < len && haystack[index].isHighSurrogate())
10074 ++index;
10075 }
10076 return count;
10077}
10078
10079#endif // QT_CONFIG(regularexpression)
10080
10081/*!
10082 \since 5.0
10083
10084 Converts a plain text string to an HTML string with
10085 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10086 entities.
10087
10088 Example:
10089
10090 \snippet code/src_corelib_text_qstring.cpp 7
10091*/
10092QString QString::toHtmlEscaped() const
10093{
10094 const auto pos = std::u16string_view(*this).find_first_of(u"<>&\"");
10095 if (pos == std::u16string_view::npos)
10096 return *this;
10097 QString rich;
10098 const qsizetype len = size();
10099 rich.reserve(qsizetype(len * 1.1));
10100 rich += qToStringViewIgnoringNull(*this).first(pos);
10101 for (auto ch : qToStringViewIgnoringNull(*this).sliced(pos)) {
10102 if (ch == u'<')
10103 rich += "&lt;"_L1;
10104 else if (ch == u'>')
10105 rich += "&gt;"_L1;
10106 else if (ch == u'&')
10107 rich += "&amp;"_L1;
10108 else if (ch == u'"')
10109 rich += "&quot;"_L1;
10110 else
10111 rich += ch;
10112 }
10113 rich.squeeze();
10114 return rich;
10115}
10116
10117/*!
10118 \macro QStringLiteral(str)
10119 \relates QString
10120
10121 The macro generates the data for a QString out of the string literal \a str
10122 at compile time. Creating a QString from it is free in this case, and the
10123 generated string data is stored in the read-only segment of the compiled
10124 object file.
10125
10126 If you have code that looks like this:
10127
10128 \snippet code/src_corelib_text_qstring.cpp 9
10129
10130 then a temporary QString will be created to be passed as the \c{hasAttribute}
10131 function parameter. This can be quite expensive, as it involves a memory
10132 allocation and the copy/conversion of the data into QString's internal
10133 encoding.
10134
10135 This cost can be avoided by using QStringLiteral instead:
10136
10137 \snippet code/src_corelib_text_qstring.cpp 10
10138
10139 In this case, QString's internal data will be generated at compile time; no
10140 conversion or allocation will occur at runtime.
10141
10142 Using QStringLiteral instead of a double quoted plain C++ string literal can
10143 significantly speed up creation of QString instances from data known at
10144 compile time.
10145
10146 \note QLatin1StringView can still be more efficient than QStringLiteral
10147 when the string is passed to a function that has an overload taking
10148 QLatin1StringView and this overload avoids conversion to QString. For
10149 instance, QString::operator==() can compare to a QLatin1StringView
10150 directly:
10151
10152 \snippet code/src_corelib_text_qstring.cpp 11
10153
10154 \note Some compilers have bugs encoding strings containing characters outside
10155 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10156 those cases. It is optional otherwise.
10157
10158 \sa QByteArrayLiteral
10159*/
10160
10161#if QT_DEPRECATED_SINCE(6, 8)
10162/*!
10163 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10164
10165 \relates QString
10166 \since 6.2
10167 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10168
10169 Literal operator that creates a QString out of the first \a size characters in
10170 the char16_t string literal \a str.
10171
10172 The QString is created at compile time, and the generated string data is stored
10173 in the read-only segment of the compiled object file. Duplicate literals may
10174 share the same read-only memory. This functionality is interchangeable with
10175 QStringLiteral, but saves typing when many string literals are present in the
10176 code.
10177
10178 The following code creates a QString:
10179 \code
10180 auto str = u"hello"_qs;
10181 \endcode
10182
10183 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10184*/
10185#endif // QT_DEPRECATED_SINCE(6, 8)
10186
10187/*!
10188 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10189
10190 \relates QString
10191 \since 6.4
10192
10193 Literal operator that creates a QString out of the first \a size characters in
10194 the char16_t string literal \a str.
10195
10196 The QString is created at compile time, and the generated string data is stored
10197 in the read-only segment of the compiled object file. Duplicate literals may
10198 share the same read-only memory. This functionality is interchangeable with
10199 QStringLiteral, but saves typing when many string literals are present in the
10200 code.
10201
10202 The following code creates a QString:
10203 \code
10204 using namespace Qt::StringLiterals;
10205
10206 auto str = u"hello"_s;
10207 \endcode
10208
10209 \sa Qt::Literals::StringLiterals
10210*/
10211
10212/*!
10213 \internal
10214 */
10215void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10216{
10217 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10218}
10219
10220/*!
10221 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10222 \relates QString
10223 \since 6.1
10224
10225 Removes all elements that compare equal to \a t from the
10226 string \a s. Returns the number of elements removed, if any.
10227
10228 \sa erase_if
10229*/
10230
10231/*!
10232 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10233 \relates QString
10234 \since 6.1
10235
10236 Removes all elements for which the predicate \a pred returns true
10237 from the string \a s. Returns the number of elements removed, if
10238 any.
10239
10240 \sa erase
10241*/
10242
10243/*!
10244 \macro const char *qPrintable(const QString &str)
10245 \relates QString
10246
10247 Returns \a str as a \c{const char *}. This is equivalent to
10248 \a{str}.toLocal8Bit().\l{QByteArray::}{constData()}.
10249
10250 The char pointer will be invalid after the statement in which
10251 qPrintable() is used. This is because the array returned by
10252 QString::toLocal8Bit() will fall out of scope.
10253
10254 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10255 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10256 local 8-bit encoding. Therefore qUtf8Printable() should be used
10257 for logging strings instead of qPrintable().
10258
10259 \sa qUtf8Printable()
10260*/
10261
10262/*!
10263 \macro const char *qUtf8Printable(const QString &str)
10264 \relates QString
10265 \since 5.4
10266
10267 Returns \a str as a \c{const char *}. This is equivalent to
10268 \a{str}.toUtf8().\l{QByteArray::}{constData()}.
10269
10270 The char pointer will be invalid after the statement in which
10271 qUtf8Printable() is used. This is because the array returned by
10272 QString::toUtf8() will fall out of scope.
10273
10274 Example:
10275
10276 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10277
10278 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10279*/
10280
10281/*!
10282 \macro const wchar_t *qUtf16Printable(const QString &str)
10283 \relates QString
10284 \since 5.7
10285
10286 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10287 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10288
10289 The only useful thing you can do with the return value of this macro is to
10290 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10291 the return value is \e{not} a valid \c{const wchar_t*}!
10292
10293 In general, the pointer will be invalid after the statement in which
10294 qUtf16Printable() is used. This is because the pointer may have been
10295 obtained from a temporary expression, which will fall out of scope.
10296
10297 Example:
10298
10299 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10300
10301 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10302*/
10303
10304QT_END_NAMESPACE
10305
10306#undef REHASH
QString convertToQString(QAnyStringView string)
Definition qstring.cpp:5570
Definition qlist.h:80
char32_t next(char32_t invalidAs=QChar::ReplacementCharacter)
bool hasNext() const
\inmodule QtCore
QList< uint > convertToUcs4(QStringView string)
Definition qstring.cpp:5826
QByteArray convertToUtf8(QStringView string)
Definition qstring.cpp:5771
QByteArray convertToLocal8Bit(QStringView string)
Definition qstring.cpp:5728
QByteArray convertToLatin1(QStringView string)
Definition qstring.cpp:5587
Combined button and popup list for selecting options.
static QString convertCase(T &str, QUnicodeTables::Case which)
Definition qstring.cpp:7199
static constexpr NormalizationCorrection uc_normalization_corrections[]
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9733
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9773
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLower(QStringView s) noexcept
Definition qstring.cpp:5507
const QString & asString(const QString &s)
Definition qstring.h:1678
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept
Definition qstring.cpp:906
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool equalStrings(QStringView lhs, QStringView rhs) noexcept
Definition qstring.cpp:1374
qsizetype findString(QStringView str, qsizetype from, QChar needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isRightToLeft(QStringView string) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1StringView s) noexcept
Definition qstring.cpp:851
constexpr bool isLatin1(QLatin1StringView s) noexcept
Definition qstring.h:77
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrcasechr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:776
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isUpper(QStringView s) noexcept
Definition qstring.cpp:5512
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrchr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:688
void qt_to_latin1_unchecked(uchar *dst, const char16_t *uc, qsizetype len)
Definition qstring.cpp:1189
static char16_t foldCase(char16_t ch) noexcept
Definition qchar.cpp:1691
#define __has_feature(x)
uint QT_FASTCALL fetch1Pixel< QPixelLayout::BPP1LSB >(const uchar *src, int index)
bool comparesEqual(const QFileInfo &lhs, const QFileInfo &rhs)
static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
Definition qstring.cpp:859
static Int toIntegral(QStringView string, bool *ok, int base)
Definition qstring.cpp:7688
void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1184
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6744
static void append_utf8(QString &qs, const char *cs, qsizetype len)
Definition qstring.cpp:7322
#define ATTRIBUTE_NO_SANITIZE
Definition qstring.cpp:367
bool qt_is_ascii(const char *&ptr, const char *end) noexcept
Definition qstring.cpp:787
static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
Definition qstring.cpp:5496
static void replace_helper(QString &str, QSpan< qsizetype > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3684
Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
Definition qstring.cpp:921
static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
Definition qstring.cpp:1347
bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6750
Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE)
static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
Definition qstring.cpp:3493
static bool needsReallocate(const QString &str, qsizetype newSize)
Definition qstring.cpp:2629
static int qArgDigitValue(QChar ch) noexcept
Definition qstring.cpp:1614
bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6739
#define REHASH(a)
Definition qstring.cpp:66
bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6728
static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
Definition qstring.cpp:1265
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
Definition qstring.cpp:1220
static QByteArray qt_convert_to_latin1(QStringView string)
Definition qstring.cpp:5593
static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
Definition qstring.cpp:1340
static QList< uint > qt_convert_to_ucs4(QStringView string)
Definition qstring.cpp:5798
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static QByteArray qt_convert_to_local_8bit(QStringView string)
Definition qstring.cpp:5705
static LengthMod parse_length_modifier(const char *&c) noexcept
Definition qstring.cpp:7378
static ArgEscapeData findArgEscapes(QStringView s)
Definition qstring.cpp:8570
static QByteArray qt_convert_to_utf8(QStringView str)
Definition qstring.cpp:5751
static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1005
QtPrivate::QCaseInsensitiveLatin1Hash CaseInsensitiveL1
Definition qstring.cpp:1354
LengthMod
Definition qstring.cpp:7367
@ lm_z
Definition qstring.cpp:7367
@ lm_none
Definition qstring.cpp:7367
@ lm_t
Definition qstring.cpp:7367
@ lm_l
Definition qstring.cpp:7367
@ lm_ll
Definition qstring.cpp:7367
@ lm_hh
Definition qstring.cpp:7367
@ lm_L
Definition qstring.cpp:7367
@ lm_h
Definition qstring.cpp:7367
@ lm_j
Definition qstring.cpp:7367
static void insert_helper(QString &str, qsizetype i, const T &toInsert)
Definition qstring.cpp:2968
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
Definition qstring.cpp:1356
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6722
static char16_t to_unicode(const char c)
Definition qstring.cpp:8975
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6755
static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width, QStringView arg, QStringView larg, QChar fillChar)
Definition qstring.cpp:8646
static QVarLengthArray< char16_t > qt_from_latin1_to_qvla(QLatin1StringView str)
Definition qstring.cpp:996
static Q_NEVER_INLINE int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
Definition qstring.cpp:1238
void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
Definition qstring.cpp:8460
static uint parse_flag_characters(const char *&c) noexcept
Definition qstring.cpp:7330
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
Definition qstring.cpp:1195
static char16_t to_unicode(const QChar c)
Definition qstring.cpp:8974
QDataStream & operator>>(QDataStream &in, QString &str)
Definition qstring.cpp:9507
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
Definition qstring.cpp:8978
static int ucstrncmp(const char16_t *a, const char *b, size_t l)
Definition qstring.cpp:1318
static bool can_consume(const char *&c, char ch) noexcept
Definition qstring.cpp:7369
static int parse_field_width(const char *&c, qsizetype size)
Definition qstring.cpp:7350
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6733
#define qUtf16Printable(string)
Definition qstring.h:1695
qsizetype occurrences
Definition qstring.cpp:8564
qsizetype escape_len
Definition qstring.cpp:8567
qsizetype locale_occurrences
Definition qstring.cpp:8565
\inmodule QtCore \reentrant
Definition qchar.h:18
constexpr char16_t unicode() const noexcept
Converts a Latin-1 character to an 16-bit-encoded Unicode representation of the character.
Definition qchar.h:22
constexpr QLatin1Char(char c) noexcept
Constructs a Latin-1 character for c.
Definition qchar.h:20
@ BlankBeforePositive
Definition qlocale_p.h:270
@ AddTrailingZeroes
Definition qlocale_p.h:267
static int difference(char lhs, char rhs)