Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstring.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5// Qt-Security score:critical reason:data-parser
6
7#include "qstringlist.h"
8#if QT_CONFIG(regularexpression)
9#include "qregularexpression.h"
10#endif
12#include <private/qstringconverter_p.h>
13#include <private/qtools_p.h>
15#include "private/qsimd_p.h"
16#include <qnumeric.h>
17#include <qdatastream.h>
18#include <qlist.h>
19#include "qlocale.h"
20#include "qlocale_p.h"
21#include "qspan.h"
22#include "qstringbuilder.h"
23#include "qstringmatcher.h"
25#include "qdebug.h"
26#include "qendian.h"
27#include "qcollator.h"
28#include "qttypetraits.h"
29
30#ifdef Q_OS_DARWIN
31#include <private/qcore_mac_p.h>
32#endif
33
34#include <private/qfunctions_p.h>
35
36#include <limits.h>
37#include <string.h>
38#include <stdlib.h>
39#include <stdio.h>
40#include <stdarg.h>
41#include <wchar.h>
42
43#include "qchar.cpp"
48#include "qthreadstorage.h"
49
50#include <algorithm>
51#include <functional>
52
53#ifdef Q_OS_WIN
54# include <qt_windows.h>
55# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
56// MSVC requires this, but let's apply it to MinGW compilers too, just in case
57# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, "
58 "otherwise some QString functions will not get exported."
59# endif
60#endif
61
62#ifdef truncate
63# undef truncate
64#endif
65
66#define REHASH(a)
67 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
68 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1;
69 hashHaystack <<= 1
70
72
73using namespace Qt::StringLiterals;
74using namespace QtMiscUtils;
75
76const char16_t QString::_empty = 0;
77
78// in qstringmatcher.cpp
79qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
80
81namespace {
82enum StringComparisonMode {
83 CompareStringsForEquality,
84 CompareStringsForOrdering
85};
86
87template <typename Pointer>
88char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
89
90template <>
91char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
92{
93 return foldCase(reinterpret_cast<const char16_t*>(ch),
94 reinterpret_cast<const char16_t*>(start));
95}
96
97template <>
98char32_t foldCaseHelper<const char*>(const char* ch, const char*)
99{
100 return foldCase(char16_t(uchar(*ch)));
101}
102
103template <typename T>
104char16_t valueTypeToUtf16(T t) = delete;
105
106template <>
107char16_t valueTypeToUtf16<QChar>(QChar t)
108{
109 return t.unicode();
110}
111
112template <>
113char16_t valueTypeToUtf16<char>(char t)
114{
115 return char16_t{uchar(t)};
116}
117
118template <typename T>
119static inline bool foldAndCompare(const T a, const T b)
120{
121 return foldCase(a) == b;
122}
123
124/*!
125 \internal
126
127 Returns the index position of the first occurrence of the
128 character \a ch in the string given by \a str and \a len,
129 searching forward from index
130 position \a from. Returns -1 if \a ch could not be found.
131*/
132template <typename Haystack>
133static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
134 qsizetype from, Qt::CaseSensitivity cs) noexcept
135{
136 if (haystack.size() == 0)
137 return -1;
138 if (from < 0)
139 from += haystack.size();
140 else if (std::size_t(from) > std::size_t(haystack.size()))
141 from = haystack.size() - 1;
142 if (from >= 0) {
143 char16_t c = needle.unicode();
144 const auto b = haystack.data();
145 auto n = b + from;
146 if (cs == Qt::CaseSensitive) {
147 for (; n >= b; --n)
148 if (valueTypeToUtf16(*n) == c)
149 return n - b;
150 } else {
151 c = foldCase(c);
152 for (; n >= b; --n)
153 if (foldCase(valueTypeToUtf16(*n)) == c)
154 return n - b;
155 }
156 }
157 return -1;
158}
159template <> qsizetype
160qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
161
162template<typename Haystack, typename Needle>
163static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
164 Needle needle0, Qt::CaseSensitivity cs) noexcept
165{
166 const qsizetype sl = needle0.size();
167 if (sl == 1)
168 return qLastIndexOf(haystack0, needle0.front(), from, cs);
169
170 const qsizetype l = haystack0.size();
171 if (from < 0)
172 from += l;
173 if (from == l && sl == 0)
174 return from;
175 const qsizetype delta = l - sl;
176 if (std::size_t(from) > std::size_t(l) || delta < 0)
177 return -1;
178 if (from > delta)
179 from = delta;
180
181 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
182
183 auto haystack = haystack0.data();
184 const auto needle = needle0.data();
185 const auto *end = haystack;
186 haystack += from;
187 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
188 const auto *n = needle + sl_minus_1;
189 const auto *h = haystack + sl_minus_1;
190 qregisteruint hashNeedle = 0, hashHaystack = 0;
191
192 if (cs == Qt::CaseSensitive) {
193 for (qsizetype idx = 0; idx < sl; ++idx) {
194 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
195 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
196 }
197 hashHaystack -= valueTypeToUtf16(*haystack);
198
199 while (haystack >= end) {
200 hashHaystack += valueTypeToUtf16(*haystack);
201 if (hashHaystack == hashNeedle
202 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
203 return haystack - end;
204 --haystack;
205 REHASH(valueTypeToUtf16(haystack[sl]));
206 }
207 } else {
208 for (qsizetype idx = 0; idx < sl; ++idx) {
209 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
210 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
211 }
212 hashHaystack -= foldCaseHelper(haystack, end);
213
214 while (haystack >= end) {
215 hashHaystack += foldCaseHelper(haystack, end);
216 if (hashHaystack == hashNeedle
217 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
218 return haystack - end;
219 --haystack;
220 REHASH(foldCaseHelper(haystack + sl, end));
221 }
222 }
223 return -1;
224}
225
226template <typename Haystack, typename Needle>
227bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
228{
229 if (haystack.isNull())
230 return needle.isNull();
231 const auto haystackLen = haystack.size();
232 const auto needleLen = needle.size();
233 if (haystackLen == 0)
234 return needleLen == 0;
235 if (needleLen > haystackLen)
236 return false;
237
238 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
239}
240
241template <typename Haystack, typename Needle>
242bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
243{
244 if (haystack.isNull())
245 return needle.isNull();
246 const auto haystackLen = haystack.size();
247 const auto needleLen = needle.size();
248 if (haystackLen == 0)
249 return needleLen == 0;
250 if (haystackLen < needleLen)
251 return false;
252
253 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
254}
255
256template <typename T>
257static void append_helper(QString &self, T view)
258{
259 const auto strData = view.data();
260 const qsizetype strSize = view.size();
261 auto &d = self.data_ptr();
262 if (strData && strSize > 0) {
263 // the number of UTF-8 code units is always at a minimum equal to the number
264 // of equivalent UTF-16 code units
265 d.detachAndGrow(QArrayData::GrowsAtEnd, strSize, nullptr, nullptr);
266 Q_CHECK_PTR(d.data());
267 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
268
269 auto dst = std::next(d.data(), d.size);
270 if constexpr (std::is_same_v<T, QUtf8StringView>) {
271 dst = QUtf8::convertToUnicode(dst, view);
272 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
273 QLatin1::convertToUnicode(dst, view);
274 dst += strSize;
275 } else {
276 static_assert(QtPrivate::type_dependent_false<T>(),
277 "Can only operate on UTF-8 and Latin-1");
278 }
279 self.resize(std::distance(d.begin(), dst));
280 } else if (d.isNull() && !view.isNull()) { // special case
281 self = QLatin1StringView("");
282 }
283}
284
285template <uint MaxCount> struct UnrollTailLoop
286{
287 template <typename RetType, typename Functor1, typename Functor2, typename Number>
288 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
289 {
290 /* equivalent to:
291 * while (count--) {
292 * if (loopCheck(i))
293 * return returnIfFailed(i);
294 * }
295 * return returnIfExited;
296 */
297
298 if (!count)
299 return returnIfExited;
300
301 bool check = loopCheck(i);
302 if (check)
303 return returnIfFailed(i);
304
305 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
306 }
307
308 template <typename Functor, typename Number>
309 static inline void exec(Number count, Functor code)
310 {
311 /* equivalent to:
312 * for (Number i = 0; i < count; ++i)
313 * code(i);
314 */
315 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
316 }
317};
318template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
319inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
320{
321 return returnIfExited;
322}
323} // unnamed namespace
324
325/*
326 * Note on the use of SIMD in qstring.cpp:
327 *
328 * Several operations with strings are improved with the use of SIMD code,
329 * since they are repetitive. For MIPS, we have hand-written assembly code
330 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
331 * x86, we can only use intrinsics and therefore everything is contained in
332 * qstring.cpp. We need to use intrinsics only for those platforms due to the
333 * different compilers and toolchains used, which have different syntax for
334 * assembly sources.
335 *
336 * ** SSE notes: **
337 *
338 * Whenever multiple alternatives are equivalent or near so, we prefer the one
339 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
340 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
341 * SSE versions should be done when there is a clear performance benefit and
342 * requires fallback code to SSE2, if it exists.
343 *
344 * Performance measurement in the past shows that most strings are short in
345 * size and, therefore, do not benefit from alignment prologues. That is,
346 * trying to find a 16-byte-aligned boundary to operate on is often more
347 * expensive than executing the unaligned operation directly. In addition, note
348 * that the QString private data is designed so that the data is stored on
349 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
350 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
351 * 50% of the time), so skipping the alignment prologue is actually optimizing
352 * for the common case.
353 */
354
355#if defined(__mips_dsp)
356// From qstring_mips_dsp_asm.S
357extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
358extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
359extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
360#endif
361
362#if defined(__SSE2__) && defined(Q_CC_GNU)
363// We may overrun the buffer, but that's a false positive:
364// this won't crash nor produce incorrect results
365# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
366#else
367# define ATTRIBUTE_NO_SANITIZE
368#endif
369
370#ifdef __SSE2__
371static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
372static constexpr bool UseAvx2 = UseSse4_1 &&
373 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
374
375[[maybe_unused]]
376Q_ALWAYS_INLINE static __m128i mm_load8_zero_extend(const void *ptr)
377{
378 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
379 if constexpr (UseSse4_1) {
380 // use a MOVQ followed by PMOVZXBW
381 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
382 __m128i data = _mm_loadl_epi64(dataptr);
383 return _mm_cvtepu8_epi16(data);
384 }
385
386 // use MOVQ followed by PUNPCKLBW
387 __m128i data = _mm_loadl_epi64(dataptr);
388 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
389}
390
391[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
392static qsizetype qustrlen_sse2(const char16_t *str) noexcept
393{
394 // find the 16-byte alignment immediately prior or equal to str
395 quintptr misalignment = quintptr(str) & 0xf;
396 Q_ASSERT((misalignment & 1) == 0);
397 const char16_t *ptr = str - (misalignment / 2);
398
399 // load 16 bytes and see if we have a null
400 // (aligned loads can never segfault)
401 const __m128i zeroes = _mm_setzero_si128();
402 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
403 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
404 uint mask = _mm_movemask_epi8(comparison);
405
406 // ignore the result prior to the beginning of str
407 mask >>= misalignment;
408
409 // Have we found something in the first block? Need to handle it now
410 // because of the left shift above.
411 if (mask)
412 return qCountTrailingZeroBits(mask) / sizeof(char16_t);
413
414 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
415 qsizetype size = Step - misalignment / sizeof(char16_t);
416
417 size -= Step;
418 do {
419 size += Step;
420 data = _mm_load_si128(reinterpret_cast<const __m128i *>(str + size));
421
422 comparison = _mm_cmpeq_epi16(data, zeroes);
423 mask = _mm_movemask_epi8(comparison);
424 } while (mask == 0);
425
426 // found a null
427 return size + qCountTrailingZeroBits(mask) / sizeof(char16_t);
428}
429
430// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
431// the no non-zero was found. Returns false and updates \a ptr to point to the
432// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
433// may be updated to one byte short).
434static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
435{
436 auto updatePtr = [&](uint result) {
437 // found a character matching the mask
438 uint idx = qCountTrailingZeroBits(~result);
439 ptr += idx;
440 return false;
441 };
442
443 if constexpr (UseSse4_1) {
444# ifndef Q_OS_QNX // compiler fails in the code below
445 __m128i mask;
446 auto updatePtrSimd = [&](__m128i data) -> bool {
447 __m128i masked = _mm_and_si128(mask, data);
448 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
449 uint result = _mm_movemask_epi8(comparison);
450 return updatePtr(result);
451 };
452
453 if constexpr (UseAvx2) {
454 // AVX2 implementation: test 32 bytes at a time
455 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
456 while (ptr + 32 <= end) {
457 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
458 if (!_mm256_testz_si256(mask256, data)) {
459 // found a character matching the mask
460 __m256i masked256 = _mm256_and_si256(mask256, data);
461 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
462 return updatePtr(_mm256_movemask_epi8(comparison256));
463 }
464 ptr += 32;
465 }
466
467 mask = _mm256_castsi256_si128(mask256);
468 } else {
469 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
470 // comparisons, unrolled)
471 mask = _mm_set1_epi32(maskval);
472 while (ptr + 32 <= end) {
473 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
474 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
475 if (!_mm_testz_si128(mask, data1))
476 return updatePtrSimd(data1);
477
478 ptr += 16;
479 if (!_mm_testz_si128(mask, data2))
480 return updatePtrSimd(data2);
481 ptr += 16;
482 }
483 }
484
485 // AVX2 and SSE4.1: final 16-byte comparison
486 if (ptr + 16 <= end) {
487 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
488 if (!_mm_testz_si128(mask, data1))
489 return updatePtrSimd(data1);
490 ptr += 16;
491 }
492
493 // and final 8-byte comparison
494 if (ptr + 8 <= end) {
495 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
496 if (!_mm_testz_si128(mask, data1))
497 return updatePtrSimd(data1);
498 ptr += 8;
499 }
500
501 return true;
502# endif // QNX
503 }
504
505 // SSE2 implementation: test 16 bytes at a time.
506 const __m128i mask = _mm_set1_epi32(maskval);
507 while (ptr + 16 <= end) {
508 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
509 __m128i masked = _mm_and_si128(mask, data);
510 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
511 quint16 result = _mm_movemask_epi8(comparison);
512 if (result != 0xffff)
513 return updatePtr(result);
514 ptr += 16;
515 }
516
517 // and one 8-byte comparison
518 if (ptr + 8 <= end) {
519 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
520 __m128i masked = _mm_and_si128(mask, data);
521 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
522 quint8 result = _mm_movemask_epi8(comparison);
523 if (result != 0xff)
524 return updatePtr(result);
525 ptr += 8;
526 }
527
528 return true;
529}
530
531template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
532static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
533{
534 static_assert(std::is_unsigned_v<Char>);
535
536 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
537 // we compare. This lambda helps extract the code unit.
538 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
539 constexpr int Stride = 2;
540 // this is the same as:
541 // return n[idx / Stride];
542 // but using pointer arithmetic to avoid the compiler dividing by two
543 // and multiplying by two in the case of char16_t (we know idx is even,
544 // but the compiler does not). This is not UB.
545
546 auto ptr = reinterpret_cast<const uchar *>(n);
547 ptr += idx / (Stride / sizeof(*n));
548 return *reinterpret_cast<decltype(n)>(ptr);
549 };
550 auto difference = [a, b](uint mask, qptrdiff offset) {
551 if (Mode == CompareStringsForEquality)
552 return 1;
553 uint idx = qCountTrailingZeroBits(mask);
554 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
555 };
556
557 static const auto load8Chars = [](const auto *ptr) {
558 if (sizeof(*ptr) == 2)
559 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
560 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
561 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
562 };
563 static const auto load4Chars = [](const auto *ptr) {
564 if (sizeof(*ptr) == 2)
565 return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
566 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
567 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
568 };
569
570 // we're going to read a[0..15] and b[0..15] (32 bytes)
571 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
572 if constexpr (UseAvx2) {
573 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
574 __m256i b_data;
575 if (sizeof(Char) == 1) {
576 // expand to UTF-16 via zero-extension
577 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
578 b_data = _mm256_cvtepu8_epi16(chunk);
579 } else {
580 b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
581 }
582 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
583 return _mm256_movemask_epi8(result);
584 }
585
586 __m128i a_data1 = load8Chars(a + offset);
587 __m128i a_data2 = load8Chars(a + offset + 8);
588 __m128i b_data1, b_data2;
589 if (sizeof(Char) == 1) {
590 // expand to UTF-16 via unpacking
591 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
592 b_data1 = _mm_unpacklo_epi8(b_data, _mm_setzero_si128());
593 b_data2 = _mm_unpackhi_epi8(b_data, _mm_setzero_si128());
594 } else {
595 b_data1 = load8Chars(b + offset);
596 b_data2 = load8Chars(b + offset + 8);
597 }
598 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
599 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
600 return _mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16;
601 };
602
603 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
604 qptrdiff offset = 0;
605 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
606 uint mask = ~processChunk16Chars(offset);
607 if (mask)
608 return difference(mask, offset);
609 }
610
611 // maybe overlap the last 32 bytes
612 if (size_t(offset) < l) {
613 offset = l - sizeof(__m256i) / sizeof(char16_t);
614 uint mask = ~processChunk16Chars(offset);
615 return mask ? difference(mask, offset) : 0;
616 }
617 } else if (l >= 4) {
618 __m128i a_data1, b_data1;
619 __m128i a_data2, b_data2;
620 int width;
621 if (l >= 8) {
622 width = 8;
623 a_data1 = load8Chars(a);
624 b_data1 = load8Chars(b);
625 a_data2 = load8Chars(a + l - width);
626 b_data2 = load8Chars(b + l - width);
627 } else {
628 // we're going to read a[0..3] and b[0..3] (8 bytes)
629 width = 4;
630 a_data1 = load4Chars(a);
631 b_data1 = load4Chars(b);
632 a_data2 = load4Chars(a + l - width);
633 b_data2 = load4Chars(b + l - width);
634 }
635
636 __m128i result = _mm_cmpeq_epi16(a_data1, b_data1);
637 ushort mask = ~_mm_movemask_epi8(result);
638 if (mask)
639 return difference(mask, 0);
640
641 result = _mm_cmpeq_epi16(a_data2, b_data2);
642 mask = ~_mm_movemask_epi8(result);
643 if (mask)
644 return difference(mask, l - width);
645 } else {
646 // reset l
647 l &= 3;
648
649 const auto lambda = [=](size_t i) -> int {
650 return a[i] - b[i];
651 };
652 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
653 }
654 return 0;
655}
656#endif
657
658Q_NEVER_INLINE
659qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
660{
661#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
662 return qustrlen_sse2(str);
663#endif
664
665 if (sizeof(wchar_t) == sizeof(char16_t))
666 return wcslen(reinterpret_cast<const wchar_t *>(str));
667
668 qsizetype result = 0;
669 while (*str++)
670 ++result;
671 return result;
672}
673
674qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
675{
676 return qustrchr({ str, maxlen }, u'\0') - str;
677}
678
679/*!
680 * \internal
681 *
682 * Searches for character \a c in the string \a str and returns a pointer to
683 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
684 * character is not found, this function returns a pointer to the end of the
685 * string -- that is, \c{str.end()}.
686 */
688const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
689{
690 const char16_t *n = str.utf16();
691 const char16_t *e = n + str.size();
692
693#ifdef __SSE2__
694 bool loops = true;
695 // Using the PMOVMSKB instruction, we get two bits for each character
696 // we compare.
697 __m128i mch;
698 if constexpr (UseAvx2) {
699 // we're going to read n[0..15] (32 bytes)
700 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
701 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
702 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
703 __m256i result = _mm256_cmpeq_epi16(data, mch256);
704 uint mask = uint(_mm256_movemask_epi8(result));
705 if (mask) {
706 uint idx = qCountTrailingZeroBits(mask);
707 return n + idx / 2;
708 }
709 }
710 loops = false;
711 mch = _mm256_castsi256_si128(mch256);
712 } else {
713 mch = _mm_set1_epi32(c | (c << 16));
714 }
715
716 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
717 __m128i result = _mm_cmpeq_epi16(data, mch);
718 uint mask = uint(_mm_movemask_epi8(result));
719 if ((mask & validityMask) == 0)
720 return false;
721 uint idx = qCountTrailingZeroBits(mask);
722 n += idx / 2;
723 return true;
724 };
725
726 // we're going to read n[0..7] (16 bytes)
727 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
728 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
729 if (hasMatch(data, 0xffff))
730 return n;
731
732 if (!loops) {
733 n += 8;
734 break;
735 }
736 }
737
738# if !defined(__OPTIMIZE_SIZE__)
739 // we're going to read n[0..3] (8 bytes)
740 if (e - n > 3) {
741 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
742 if (hasMatch(data, 0xff))
743 return n;
744
745 n += 4;
746 }
747
748 return UnrollTailLoop<3>::exec(e - n, e,
749 [=](qsizetype i) { return n[i] == c; },
750 [=](qsizetype i) { return n + i; });
751# endif
752#elif defined(__ARM_NEON__)
753 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
754 const uint16x8_t ch_vec = vdupq_n_u16(c);
755 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
756 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
757 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
758 if (ushort(mask)) {
759 // found a match
760 return n + qCountTrailingZeroBits(mask);
761 }
762 }
763#endif // aarch64
764
765 return std::find(n, e, c);
766}
767
768/*!
769 * \internal
770 *
771 * Searches case-insensitively for character \a c in the string \a str and
772 * returns a pointer to it. Iif the character is not found, this function
773 * returns a pointer to the end of the string -- that is, \c{str.end()}.
774 */
776const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
777{
778 const QChar *n = str.begin();
779 const QChar *e = str.end();
780 c = foldCase(c);
781 auto it = std::find_if(n, e, [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
782 return reinterpret_cast<const char16_t *>(it);
783}
784
785// Note: ptr on output may be off by one and point to a preceding US-ASCII
786// character. Usually harmless.
787bool qt_is_ascii(const char *&ptr, const char *end) noexcept
788{
789#if defined(__SSE2__)
790 // Testing for the high bit can be done efficiently with just PMOVMSKB
791 bool loops = true;
792 if constexpr (UseAvx2) {
793 while (ptr + 32 <= end) {
794 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
795 quint32 mask = _mm256_movemask_epi8(data);
796 if (mask) {
797 uint idx = qCountTrailingZeroBits(mask);
798 ptr += idx;
799 return false;
800 }
801 ptr += 32;
802 }
803 loops = false;
804 }
805
806 while (ptr + 16 <= end) {
807 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
808 quint32 mask = _mm_movemask_epi8(data);
809 if (mask) {
810 uint idx = qCountTrailingZeroBits(mask);
811 ptr += idx;
812 return false;
813 }
814 ptr += 16;
815
816 if (!loops)
817 break;
818 }
819 if (ptr + 8 <= end) {
820 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
821 quint8 mask = _mm_movemask_epi8(data);
822 if (mask) {
823 uint idx = qCountTrailingZeroBits(mask);
824 ptr += idx;
825 return false;
826 }
827 ptr += 8;
828 }
829#endif
830
831 while (ptr + 4 <= end) {
832 quint32 data = qFromUnaligned<quint32>(ptr);
833 if (data &= 0x80808080U) {
834 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
835 ? qCountLeadingZeroBits(data)
836 : qCountTrailingZeroBits(data);
837 ptr += idx / 8;
838 return false;
839 }
840 ptr += 4;
841 }
842
843 while (ptr != end) {
844 if (quint8(*ptr) & 0x80)
845 return false;
846 ++ptr;
847 }
848 return true;
849}
850
851bool QtPrivate::isAscii(QLatin1StringView s) noexcept
852{
853 const char *ptr = s.begin();
854 const char *end = s.end();
855
856 return qt_is_ascii(ptr, end);
857}
858
859static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
860{
861#ifdef __SSE2__
862 const char *ptr8 = reinterpret_cast<const char *>(ptr);
863 const char *end8 = reinterpret_cast<const char *>(end);
864 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
865 ptr = reinterpret_cast<const char16_t *>(ptr8);
866 if (!ok)
867 return false;
868#endif
869
870 while (ptr != end) {
871 if (*ptr & 0xff80)
872 return false;
873 ++ptr;
874 }
875 return true;
876}
877
878bool QtPrivate::isAscii(QStringView s) noexcept
879{
880 const char16_t *ptr = s.utf16();
881 const char16_t *end = ptr + s.size();
882
883 return isAscii_helper(ptr, end);
884}
885
886bool QtPrivate::isLatin1(QStringView s) noexcept
887{
888 const char16_t *ptr = s.utf16();
889 const char16_t *end = ptr + s.size();
890
891#ifdef __SSE2__
892 const char *ptr8 = reinterpret_cast<const char *>(ptr);
893 const char *end8 = reinterpret_cast<const char *>(end);
894 if (!simdTestMask(ptr8, end8, 0xff00ff00))
895 return false;
896 ptr = reinterpret_cast<const char16_t *>(ptr8);
897#endif
898
899 while (ptr != end) {
900 if (*ptr++ > 0xff)
901 return false;
902 }
903 return true;
904}
905
906bool QtPrivate::isValidUtf16(QStringView s) noexcept
907{
908 constexpr char32_t InvalidCodePoint = UINT_MAX;
909
910 QStringIterator i(s);
911 while (i.hasNext()) {
912 const char32_t c = i.next(InvalidCodePoint);
913 if (c == InvalidCodePoint)
914 return false;
915 }
916
917 return true;
918}
919
920// conversion between Latin 1 and UTF-16
921Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
922{
923 /* SIMD:
924 * Unpacking with SSE has been shown to improve performance on recent CPUs
925 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
926 * itself in exactly the same way as one would do it with intrinsics.
927 */
928#if defined(__SSE2__)
929 // we're going to read str[offset..offset+15] (16 bytes)
930 const __m128i nullMask = _mm_setzero_si128();
931 auto processOneChunk = [=](qptrdiff offset) {
932 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
933 if constexpr (UseAvx2) {
934 // zero extend to an YMM register
935 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
936
937 // store
938 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
939 } else {
940 // unpack the first 8 bytes, padding with zeros
941 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
942 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
943
944 // unpack the last 8 bytes, padding with zeros
945 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
946 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
947 }
948 };
949
950 const char *e = str + size;
951 if (size >= sizeof(__m128i)) {
952 qptrdiff offset = 0;
953 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
954 processOneChunk(offset);
955 if (str + offset < e)
956 processOneChunk(size - sizeof(__m128i));
957 return;
958 }
959
960# if !defined(__OPTIMIZE_SIZE__)
961 if (size >= 4) {
962 // two overlapped loads & stores, of either 64-bit or of 32-bit
963 if (size >= 8) {
964 const __m128i unpacked1 = mm_load8_zero_extend(str);
965 const __m128i unpacked2 = mm_load8_zero_extend(str + size - 8);
966 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), unpacked1);
967 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + size - 8), unpacked2);
968 } else {
969 const __m128i chunk1 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str));
970 const __m128i chunk2 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str + size - 4));
971 const __m128i unpacked1 = _mm_unpacklo_epi8(chunk1, nullMask);
972 const __m128i unpacked2 = _mm_unpacklo_epi8(chunk2, nullMask);
973 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), unpacked1);
974 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + size - 4), unpacked2);
975 }
976 return;
977 } else {
978 size = size % 4;
979 return UnrollTailLoop<3>::exec(qsizetype(size), [=](qsizetype i) { dst[i] = uchar(str[i]); });
980 }
981# endif
982#endif
983#if defined(__mips_dsp)
984 static_assert(sizeof(qsizetype) == sizeof(int),
985 "oops, the assembler implementation needs to be called in a loop");
986 if (size > 20)
987 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
988 else
989 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
990#else
991 while (size--)
992 *dst++ = (uchar)*str++;
993#endif
994}
995
996static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
997{
998 const qsizetype len = str.size();
999 QVarLengthArray<char16_t> arr(len);
1000 qt_from_latin1(arr.data(), str.data(), len);
1001 return arr;
1002}
1003
1004template <bool Checked>
1005static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1006{
1007#if defined(__SSE2__)
1008 auto questionMark256 = []() {
1009 if constexpr (UseAvx2)
1010 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
1011 else
1012 return 0;
1013 }();
1014 auto outOfRange256 = []() {
1015 if constexpr (UseAvx2)
1016 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
1017 else
1018 return 0;
1019 }();
1020 __m128i questionMark, outOfRange;
1021 if constexpr (UseAvx2) {
1022 questionMark = _mm256_castsi256_si128(questionMark256);
1023 outOfRange = _mm256_castsi256_si128(outOfRange256);
1024 } else {
1025 questionMark = _mm_set1_epi16('?');
1026 outOfRange = _mm_set1_epi16(0x100);
1027 }
1028
1029 auto mergeQuestionMarks = [=](__m128i chunk) {
1030 if (!Checked)
1031 return chunk;
1032
1033 // SSE has no compare instruction for unsigned comparison.
1034 if constexpr (UseSse4_1) {
1035 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1036 chunk = _mm_min_epu16(chunk, outOfRange);
1037 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
1038 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
1039 return chunk;
1040 }
1041 // The variables must be shiffted + 0x8000 to be compared
1042 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
1043 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
1044
1045 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
1046 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
1047
1048 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1049 // the 16 bits that were correct contains zeros
1050 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
1051
1052 // correctBytes contains the bytes that were in limit
1053 // the 16 bits that were off limits contains zeros
1054 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
1055
1056 // merge offLimitQuestionMark and correctBytes to have the result
1057 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
1058
1059 Q_UNUSED(outOfRange);
1060 return chunk;
1061 };
1062
1063 // we're going to read to src[offset..offset+15] (16 bytes)
1064 auto loadChunkAt = [=](qptrdiff offset) {
1065 __m128i chunk1, chunk2;
1066 if constexpr (UseAvx2) {
1067 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
1068 if (Checked) {
1069 // See mergeQuestionMarks lambda above for details
1070 chunk = _mm256_min_epu16(chunk, outOfRange256);
1071 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1072 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1073 }
1074
1075 chunk2 = _mm256_extracti128_si256(chunk, 1);
1076 chunk1 = _mm256_castsi256_si128(chunk);
1077 } else {
1078 chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
1079 chunk1 = mergeQuestionMarks(chunk1);
1080
1081 chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
1082 chunk2 = mergeQuestionMarks(chunk2);
1083 }
1084
1085 // pack the two vector to 16 x 8bits elements
1086 return _mm_packus_epi16(chunk1, chunk2);
1087 };
1088
1089 if (size_t(length) >= sizeof(__m128i)) {
1090 // because of possible overlapping, we won't process the last chunk in the loop
1091 qptrdiff offset = 0;
1092 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1093 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1094
1095 // overlapped conversion of the last full chunk and the tail
1096 __m128i last1 = loadChunkAt(offset);
1097 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1098 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), last1);
1099 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), last2);
1100 return;
1101 }
1102
1103# if !defined(__OPTIMIZE_SIZE__)
1104 if (length >= 4) {
1105 // this code is fine even for in-place conversion because we load both
1106 // before any store
1107 if (length >= 8) {
1108 __m128i chunk1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
1109 __m128i chunk2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + length - 8));
1110 chunk1 = mergeQuestionMarks(chunk1);
1111 chunk2 = mergeQuestionMarks(chunk2);
1112
1113 // pack, where the upper half is ignored
1114 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1115 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1116 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), result1);
1117 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + length - 8), result2);
1118 } else {
1119 __m128i chunk1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
1120 __m128i chunk2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + length - 4));
1121 chunk1 = mergeQuestionMarks(chunk1);
1122 chunk2 = mergeQuestionMarks(chunk2);
1123
1124 // pack, we'll zero the upper three quarters
1125 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1126 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1127 qToUnaligned(_mm_cvtsi128_si32(result1), dst);
1128 qToUnaligned(_mm_cvtsi128_si32(result2), dst + length - 4);
1129 }
1130 return;
1131 }
1132
1133 length = length % 4;
1134 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1135 if (Checked)
1136 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1137 else
1138 dst[i] = src[i];
1139 });
1140# else
1141 length = length % 16;
1142# endif // optimize size
1143#elif defined(__ARM_NEON__)
1144 // Refer to the documentation of the SSE2 implementation.
1145 // This uses exactly the same method as for SSE except:
1146 // 1) neon has unsigned comparison
1147 // 2) packing is done to 64 bits (8 x 8bits component).
1148 if (length >= 16) {
1149 const qsizetype chunkCount = length >> 3; // divided by 8
1150 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1151 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1152 for (qsizetype i = 0; i < chunkCount; ++i) {
1153 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1154 src += 8;
1155
1156 if (Checked) {
1157 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1158 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1159 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1160 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1161 }
1162 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1163 vst1_u8(dst, result); // store
1164 dst += 8;
1165 }
1166 length = length % 8;
1167 }
1168#endif
1169#if defined(__mips_dsp)
1170 static_assert(sizeof(qsizetype) == sizeof(int),
1171 "oops, the assembler implementation needs to be called in a loop");
1172 qt_toLatin1_mips_dsp_asm(dst, src, length);
1173#else
1174 while (length--) {
1175 if (Checked)
1176 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1177 else
1178 *dst++ = *src;
1179 ++src;
1180 }
1181#endif
1182}
1183
1184void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1185{
1186 qt_to_latin1_internal<true>(dst, src, length);
1187}
1188
1189void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1190{
1191 qt_to_latin1_internal<false>(dst, src, length);
1192}
1193
1194// Unicode case-insensitive comparison (argument order matches QStringView)
1195Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1196{
1197 if (a == b)
1198 return qt_lencmp(alen, blen);
1199
1200 qsizetype l = qMin(alen, blen);
1201 qsizetype i;
1202 for (i = 0; i < l; ++i) {
1203// qDebug() << Qt::hex << alast << blast;
1204// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1205// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1206 int diff = foldCase(a + i, a) - foldCase(b + i, b);
1207 if ((diff))
1208 return diff;
1209 }
1210 if (i == alen) {
1211 if (i == blen)
1212 return 0;
1213 return -1;
1214 }
1215 return 1;
1216}
1217
1218// Case-insensitive comparison between a QStringView and a QLatin1StringView
1219// (argument order matches those types)
1220Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1221{
1222 qsizetype l = qMin(alen, blen);
1223 qsizetype i;
1224 for (i = 0; i < l; ++i) {
1225 int diff = foldCase(a[i]) - foldCase(char16_t{uchar(b[i])});
1226 if ((diff))
1227 return diff;
1228 }
1229 if (i == alen) {
1230 if (i == blen)
1231 return 0;
1232 return -1;
1233 }
1234 return 1;
1235}
1236
1237// Case-insensitive comparison between a Unicode string and a UTF-8 string
1238Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1239{
1240 auto src1 = reinterpret_cast<const qchar8_t *>(utf8);
1241 auto end1 = reinterpret_cast<const qchar8_t *>(utf8end);
1242 QStringIterator src2(utf16, utf16end);
1243
1244 while (src1 < end1 && src2.hasNext()) {
1245 char32_t uc1 = QChar::toCaseFolded(QUtf8Functions::nextUcs4FromUtf8(src1, end1));
1246 char32_t uc2 = QChar::toCaseFolded(src2.next());
1247 int diff = uc1 - uc2; // can't underflow
1248 if (diff)
1249 return diff;
1250 }
1251
1252 // the shorter string sorts first
1253 return (end1 > src1) - int(src2.hasNext());
1254}
1255
1256#if defined(__mips_dsp)
1257// From qstring_mips_dsp_asm.S
1258extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1259 const char16_t *b,
1260 unsigned len);
1261#endif
1262
1263// Unicode case-sensitive compare two same-sized strings
1264template <StringComparisonMode Mode>
1265static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1266{
1267 // This function isn't memcmp() because that can return the wrong sorting
1268 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1269 // but the bytes in memory are FF 00 and 00 01.
1270
1271#ifndef __OPTIMIZE_SIZE__
1272# if defined(__mips_dsp)
1273 static_assert(sizeof(uint) == sizeof(size_t));
1274 if (l >= 8) {
1275 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1276 }
1277# elif defined(__SSE2__)
1278 return ucstrncmp_sse2<Mode>(a, b, l);
1279# elif defined(__ARM_NEON__)
1280 if (l >= 8) {
1281 const char16_t *end = a + l;
1282 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1283 while (end - a > 7) {
1284 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1285 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1286
1287 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1288 if (r) {
1289 // found a different QChar
1290 if (Mode == CompareStringsForEquality)
1291 return 1;
1292 uint idx = qCountTrailingZeroBits(r);
1293 return a[idx] - b[idx];
1294 }
1295 a += 8;
1296 b += 8;
1297 }
1298 l &= 7;
1299 }
1300 const auto lambda = [=](size_t i) -> int {
1301 return a[i] - b[i];
1302 };
1303 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1304# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1305#endif // __OPTIMIZE_SIZE__
1306
1307 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1308 return memcmp(a, b, l * sizeof(char16_t));
1309
1310 for (size_t i = 0; i < l; ++i) {
1311 if (int diff = a[i] - b[i])
1312 return diff;
1313 }
1314 return 0;
1315}
1316
1317template <StringComparisonMode Mode>
1318static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1319{
1320 const uchar *c = reinterpret_cast<const uchar *>(b);
1321 const char16_t *uc = a;
1322 const char16_t *e = uc + l;
1323
1324#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1325 return ucstrncmp_sse2<Mode>(uc, c, l);
1326#endif
1327
1328 while (uc < e) {
1329 int diff = *uc - *c;
1330 if (diff)
1331 return diff;
1332 uc++, c++;
1333 }
1334
1335 return 0;
1336}
1337
1338// Unicode case-sensitive equality
1339template <typename Char2>
1340static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1341{
1342 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1343}
1344
1345// Unicode case-sensitive comparison
1346template <typename Char2>
1347static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1348{
1349 const size_t l = qMin(alen, blen);
1350 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1351 return cmp ? cmp : qt_lencmp(alen, blen);
1352}
1353
1355
1356static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1357{
1358 // We're called with QLatin1StringView's .data() and .size():
1359 Q_ASSERT(lSize >= 0 && rSize >= 0);
1360 if (!lSize)
1361 return rSize ? -1 : 0;
1362 if (!rSize)
1363 return 1;
1364 const qsizetype size = std::min(lSize, rSize);
1365
1366 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1367 for (qsizetype i = 0; i < size; i++) {
1368 if (int res = CaseInsensitiveL1::difference(lhsChar[i], rhsChar[i]))
1369 return res;
1370 }
1371 return qt_lencmp(lSize, rSize);
1372}
1373
1374bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1375{
1376 Q_ASSERT(lhs.size() == rhs.size());
1377 return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16());
1378}
1379
1380bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1381{
1382 Q_ASSERT(lhs.size() == rhs.size());
1383 return ucstreq(lhs.utf16(), lhs.size(), rhs.latin1());
1384}
1385
1386bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1387{
1388 return QtPrivate::equalStrings(rhs, lhs);
1389}
1390
1391bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1392{
1393 Q_ASSERT(lhs.size() == rhs.size());
1394 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1395}
1396
1397bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1398{
1399 return QUtf8::compareUtf8(lhs, rhs) == 0;
1400}
1401
1402bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1403{
1404 return QtPrivate::equalStrings(rhs, lhs);
1405}
1406
1407bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1408{
1409 return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
1410}
1411
1412bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1413{
1414 return QtPrivate::equalStrings(rhs, lhs);
1415}
1416
1417bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1418{
1419#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1420 Q_ASSERT(lhs.size() == rhs.size());
1421#else
1422 // operator== didn't enforce size prior to Qt 6.2
1423 if (lhs.size() != rhs.size())
1424 return false;
1425#endif
1426 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1427}
1428
1429bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1430{
1431 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1432 return false;
1433 return lhs.visit([rhs](auto lhs) {
1434 return rhs.visit([lhs](auto rhs) {
1435 return QtPrivate::equalStrings(lhs, rhs);
1436 });
1437 });
1438}
1439
1440/*!
1441 \relates QStringView
1442 \internal
1443 \since 5.10
1444
1445 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1446
1447 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1448
1449 Case-sensitive comparison is based exclusively on the numeric Unicode values
1450 of the characters and is very fast, but is not what a human would expect.
1451 Consider sorting user-visible strings with QString::localeAwareCompare().
1452
1453 \sa {Comparing Strings}
1454*/
1455int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1456{
1457 if (cs == Qt::CaseSensitive)
1458 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size());
1459 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.utf16());
1460}
1461
1462/*!
1463 \relates QStringView
1464 \internal
1465 \since 5.10
1466 \overload
1467
1468 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1469
1470 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1471
1472 Case-sensitive comparison is based exclusively on the numeric Unicode values
1473 of the characters and is very fast, but is not what a human would expect.
1474 Consider sorting user-visible strings with QString::localeAwareCompare().
1475
1476 \sa {Comparing Strings}
1477*/
1478int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1479{
1480 if (cs == Qt::CaseSensitive)
1481 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.latin1(), rhs.size());
1482 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.latin1());
1483}
1484
1485/*!
1486 \relates QStringView
1487 \internal
1488 \since 6.0
1489 \overload
1490*/
1491int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1492{
1493 return -compareStrings(rhs, lhs, cs);
1494}
1495
1496/*!
1497 \relates QStringView
1498 \internal
1499 \since 5.10
1500 \overload
1501*/
1502int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1503{
1504 return -compareStrings(rhs, lhs, cs);
1505}
1506
1507/*!
1508 \relates QStringView
1509 \internal
1510 \since 5.10
1511 \overload
1512
1513 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1514
1515 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1516
1517 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1518 of the characters and is very fast, but is not what a human would expect.
1519 Consider sorting user-visible strings with QString::localeAwareCompare().
1520
1521 \sa {Comparing Strings}
1522*/
1523int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1524{
1525 if (lhs.isEmpty())
1526 return qt_lencmp(qsizetype(0), rhs.size());
1527 if (rhs.isEmpty())
1528 return qt_lencmp(lhs.size(), qsizetype(0));
1529 if (cs == Qt::CaseInsensitive)
1530 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1531 const auto l = std::min(lhs.size(), rhs.size());
1532 int r = memcmp(lhs.data(), rhs.data(), l);
1533 return r ? r : qt_lencmp(lhs.size(), rhs.size());
1534}
1535
1536/*!
1537 \relates QStringView
1538 \internal
1539 \since 6.0
1540 \overload
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
1545}
1546
1547/*!
1548 \relates QStringView
1549 \internal
1550 \since 6.0
1551 \overload
1552*/
1553int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1554{
1555 if (cs == Qt::CaseSensitive)
1556 return QUtf8::compareUtf8(lhs, rhs);
1557 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1558}
1559
1560/*!
1561 \relates QStringView
1562 \internal
1563 \since 6.0
1564 \overload
1565*/
1566int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1567{
1568 return -compareStrings(rhs, lhs, cs);
1569}
1570
1571/*!
1572 \relates QStringView
1573 \internal
1574 \since 6.0
1575 \overload
1576*/
1577int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1578{
1579 return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
1580}
1581
1582int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1583{
1584 return lhs.visit([rhs, cs](auto lhs) {
1585 return rhs.visit([lhs, cs](auto rhs) {
1586 return QtPrivate::compareStrings(lhs, rhs, cs);
1587 });
1588 });
1589}
1590
1591// ### Qt 7: do not allow anything but ASCII digits
1592// in arg()'s replacements.
1593#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1594static bool supportUnicodeDigitValuesInArg()
1595{
1596 static const bool result = []() {
1597 static const char supportUnicodeDigitValuesEnvVar[]
1598 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1599
1600 if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
1601 return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
1602
1603#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1604 return true;
1605#else
1606 return false;
1607#endif
1608 }();
1609
1610 return result;
1611}
1612#endif
1613
1614static int qArgDigitValue(QChar ch) noexcept
1615{
1616#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1617 if (supportUnicodeDigitValuesInArg())
1618 return ch.digitValue();
1619#endif
1620 if (ch >= u'0' && ch <= u'9')
1621 return int(ch.unicode() - u'0');
1622 return -1;
1623}
1624
1625#if QT_CONFIG(regularexpression)
1626Q_DECL_COLD_FUNCTION
1627static void qtWarnAboutInvalidRegularExpression(const QRegularExpression &re, const char *cls, const char *method)
1628{
1629 extern void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *cls, const char *method);
1630 qtWarnAboutInvalidRegularExpression(re.pattern(), cls, method);
1631}
1632#endif
1633
1634/*!
1635 \macro QT_RESTRICTED_CAST_FROM_ASCII
1636 \relates QString
1637
1638 Disables most automatic conversions from source literals and 8-bit data
1639 to unicode QStrings, but allows the use of
1640 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1641 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1642 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1643 but does not require user code to wrap character and string literals
1644 with QLatin1Char, QLatin1StringView or similar.
1645
1646 Using this macro together with source strings outside the 7-bit range,
1647 non-literals, or literals with embedded NUL characters is undefined.
1648
1649 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1650*/
1651
1652/*!
1653 \macro QT_NO_CAST_FROM_ASCII
1654 \relates QString
1655 \relates QChar
1656
1657 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1658 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1659 \c{unsigned char}) to QChar.
1660
1661 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1662 QT_NO_CAST_FROM_BYTEARRAY
1663*/
1664
1665/*!
1666 \macro QT_NO_CAST_TO_ASCII
1667 \relates QString
1668
1669 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1670
1671 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1672 QT_NO_CAST_FROM_BYTEARRAY
1673*/
1674
1675/*!
1676 \macro QT_ASCII_CAST_WARNINGS
1677 \internal
1678 \relates QString
1679
1680 This macro can be defined to force a warning whenever a function is
1681 called that automatically converts between unicode and 8-bit encodings.
1682
1683 Note: This only works for compilers that support warnings for
1684 deprecated API.
1685
1686 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1687*/
1688
1689/*!
1690 \class QString
1691 \inmodule QtCore
1692 \reentrant
1693
1694 \brief The QString class provides a Unicode character string.
1695
1696 \ingroup tools
1697 \ingroup shared
1698 \ingroup string-processing
1699
1700 \compares strong
1701 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1702 QStringView QUtf8StringView
1703 \endcompareswith
1704 \compareswith strong QByteArray QByteArrayView {const char *}
1705 When comparing with byte arrays, their content is interpreted as UTF-8.
1706 \endcompareswith
1707
1708 QString stores a string of 16-bit \l{QChar}s, where each QChar
1709 corresponds to one UTF-16 code unit. (Unicode characters
1710 with code values above 65535 are stored using surrogate pairs,
1711 that is, two consecutive \l{QChar}s.)
1712
1713 \l{Unicode} is an international standard that supports most of the
1714 writing systems in use today. It is a superset of US-ASCII (ANSI
1715 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1716 characters are available at the same code positions.
1717
1718 Behind the scenes, QString uses \l{implicit sharing}
1719 (copy-on-write) to reduce memory usage and to avoid the needless
1720 copying of data. This also helps reduce the inherent overhead of
1721 storing 16-bit characters instead of 8-bit characters.
1722
1723 In addition to QString, Qt also provides the QByteArray class to
1724 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1725 For most purposes, QString is the class you want to use. It is
1726 used throughout the Qt API, and the Unicode support ensures that
1727 your applications are easy to translate if you want to expand
1728 your application's market at some point. Two prominent cases
1729 where QByteArray is appropriate are when you need to store raw
1730 binary data, and when memory conservation is critical (like in
1731 embedded systems).
1732
1733 \section1 Initializing a string
1734
1735 One way to initialize a QString is to pass a \c{const char
1736 *} to its constructor. For example, the following code creates a
1737 QString of size 5 containing the data "Hello":
1738
1739 \snippet qstring/main.cpp 0
1740
1741 QString converts the \c{const char *} data into Unicode using the
1742 fromUtf8() function.
1743
1744 In all of the QString functions that take \c{const char *}
1745 parameters, the \c{const char *} is interpreted as a classic
1746 C-style \c{'\\0'}-terminated string. Except where the function's
1747 name overtly indicates some other encoding, such \c{const char *}
1748 parameters are assumed to be encoded in UTF-8.
1749
1750 Since Qt 6.4, it is also possible to initialize QStrings using
1751 the \l {Qt::Literals::StringLiterals::operator""_s()} and
1752 \l {Qt::Literals::StringLiterals::operator""_L1()} literal
1753 operators. In many cases, using the literals results in
1754 \l{More efficient string construction}{more efficient string construction}.
1755
1756
1757 You can also provide string data as an array of \l{QChar}s:
1758
1759 \snippet qstring/main.cpp 1
1760
1761 QString makes a deep copy of the QChar data, so you can modify it
1762 later without experiencing side effects. You can avoid taking a
1763 deep copy of the character data by using QStringView or
1764 QString::fromRawData() instead.
1765
1766 Another approach is to set the size of the string using resize()
1767 and to initialize the data character per character. QString uses
1768 0-based indexes, just like C++ arrays. To access the character at
1769 a particular index position, you can use \l operator[](). On
1770 non-\c{const} strings, \l operator[]() returns a reference to a
1771 character that can be used on the left side of an assignment. For
1772 example:
1773
1774 \snippet qstring/main.cpp 2
1775
1776 For read-only access, an alternative syntax is to use the at()
1777 function:
1778
1779 \snippet qstring/main.cpp 3
1780
1781 The at() function can be faster than \l operator[]() because it
1782 never causes a \l{deep copy} to occur. Alternatively, use the
1783 first(), last(), or sliced() functions to extract several characters
1784 at a time.
1785
1786 A QString can embed '\\0' characters (QChar::Null). The size()
1787 function always returns the size of the whole string, including
1788 embedded '\\0' characters.
1789
1790 After a call to the resize() function, newly allocated characters
1791 have undefined values. To set all the characters in the string to
1792 a particular value, use the fill() function.
1793
1794 QString provides dozens of overloads designed to simplify string
1795 usage. For example, if you want to compare a QString with a string
1796 literal, you can write code like this and it will work as expected:
1797
1798 \snippet qstring/main.cpp 4
1799
1800 You can also pass string literals to functions that take QStrings
1801 as arguments, invoking the QString(const char *)
1802 constructor. Similarly, you can pass a QString to a function that
1803 takes a \c{const char *} argument using the \l qPrintable() macro,
1804 which returns the given QString as a \c{const char *}. This is
1805 equivalent to calling toLocal8Bit().\l{QByteArray::}{constData()}
1806 on the QString.
1807
1808 \section1 Manipulating string data
1809
1810 QString provides the following basic functions for modifying the
1811 character data: append(), prepend(), insert(), replace(), and
1812 remove(). For example:
1813
1814 \snippet qstring/main.cpp 5
1815
1816 In the above example, the replace() function's first two arguments are the
1817 position from which to start replacing and the number of characters that
1818 should be replaced.
1819
1820 When data-modifying functions increase the size of the string,
1821 QString may reallocate the memory in which it holds its data. When
1822 this happens, QString expands by more than it immediately needs so as
1823 to have space for further expansion without reallocation until the size
1824 of the string has significantly increased.
1825
1826 The insert(), remove(), and, when replacing a sub-string with one of
1827 different size, replace() functions can be slow (\l{linear time}) for
1828 large strings because they require moving many characters in the string
1829 by at least one position in memory.
1830
1831 If you are building a QString gradually and know in advance
1832 approximately how many characters the QString will contain, you
1833 can call reserve(), asking QString to preallocate a certain amount
1834 of memory. You can also call capacity() to find out how much
1835 memory the QString actually has allocated.
1836
1837 QString provides \l{STL-style iterators} (QString::const_iterator and
1838 QString::iterator). In practice, iterators are handy when working with
1839 generic algorithms provided by the C++ standard library.
1840
1841 \note Iterators over a QString, and references to individual characters
1842 within one, cannot be relied on to remain valid when any non-\c{const}
1843 method of the QString is called. Accessing such an iterator or reference
1844 after the call to a non-\c{const} method leads to undefined behavior. When
1845 stability for iterator-like functionality is required, you should use
1846 indexes instead of iterators, as they are not tied to QString's internal
1847 state and thus do not get invalidated.
1848
1849 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1850 function used on a given QString may cause it to internally perform a deep
1851 copy of its data. This invalidates all iterators over the string and
1852 references to individual characters within it. Do not call non-const
1853 functions while keeping iterators. Accessing an iterator or reference
1854 after it has been invalidated leads to undefined behavior. See the
1855 \l{Implicit sharing iterator problem} section for more information.
1856
1857 A frequent requirement is to remove or simplify the spacing between
1858 visible characters in a string. The characters that make up that spacing
1859 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1860 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1861 To obtain a copy of a string leaving out any spacing from its start and end,
1862 use \l trimmed(). To also replace each sequence of spacing characters within
1863 the string with a simple space, \c{' '}, use \l simplified().
1864
1865 If you want to find all occurrences of a particular character or
1866 substring in a QString, use the indexOf() or lastIndexOf()
1867 functions.The former searches forward, the latter searches backward.
1868 Either can be told an index position from which to start their search.
1869 Each returns the index position of the character or substring if they
1870 find it; otherwise, they return -1. For example, here is a typical loop
1871 that finds all occurrences of a particular substring:
1872
1873 \snippet qstring/main.cpp 6
1874
1875 QString provides many functions for converting numbers into
1876 strings and strings into numbers. See the arg() functions, the
1877 setNum() functions, the number() static functions, and the
1878 toInt(), toDouble(), and similar functions.
1879
1880 To get an uppercase or lowercase version of a string, use toUpper() or
1881 toLower().
1882
1883 Lists of strings are handled by the QStringList class. You can
1884 split a string into a list of strings using the split() function,
1885 and join a list of strings into a single string with an optional
1886 separator using QStringList::join(). You can obtain a filtered list
1887 from a string list by selecting the entries in it that contain a
1888 particular substring or match a particular QRegularExpression.
1889 See QStringList::filter() for details.
1890
1891 \section1 Querying string data
1892
1893 To see if a QString starts or ends with a particular substring, use
1894 startsWith() or endsWith(). To check whether a QString contains a
1895 specific character or substring, use the contains() function. To
1896 find out how many times a particular character or substring occurs
1897 in a string, use count().
1898
1899 To obtain a pointer to the actual character data, call data() or
1900 constData(). These functions return a pointer to the beginning of
1901 the QChar data. The pointer is guaranteed to remain valid until a
1902 non-\c{const} function is called on the QString.
1903
1904 \section2 Comparing strings
1905
1906 QStrings can be compared using overloaded operators such as \l
1907 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1908 and so on. The comparison is based exclusively on the lexicographical
1909 order of the two strings, seen as sequences of UTF-16 code units.
1910 It is very fast but is not what a human would expect; the
1911 QString::localeAwareCompare() function is usually a better choice for
1912 sorting user-interface strings, when such a comparison is available.
1913
1914 When Qt is linked with the ICU library (which it usually is), its
1915 locale-aware sorting is used. Otherwise, platform-specific solutions
1916 are used:
1917 \list
1918 \li On Windows, localeAwareCompare() uses the current user locale,
1919 as set in the \uicontrol{regional} and \uicontrol{language}
1920 options portion of \uicontrol{Control Panel}.
1921 \li On \macos and iOS, \l localeAwareCompare() compares according
1922 to the \uicontrol{Order for sorted lists} setting in the
1923 \uicontrol{International preferences} panel.
1924 \li On other Unix-like systems, the comparison falls back to the
1925 system library's \c strcoll().
1926 \endlist
1927
1928 \section1 Converting between encoded string data and QString
1929
1930 QString provides the following functions that return a
1931 \c{const char *} version of the string as QByteArray: toUtf8(),
1932 toLatin1(), and toLocal8Bit().
1933
1934 \list
1935 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1936 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1937 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1938 Unicode character set through multibyte sequences.
1939 \li toLocal8Bit() returns an 8-bit string using the system's local
1940 encoding. This is the same as toUtf8() on Unix systems.
1941 \endlist
1942
1943 To convert from one of these encodings, QString provides
1944 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1945 encodings are supported through the QStringEncoder and QStringDecoder
1946 classes.
1947
1948 As mentioned above, QString provides a lot of functions and
1949 operators that make it easy to interoperate with \c{const char *}
1950 strings. But this functionality is a double-edged sword: It makes
1951 QString more convenient to use if all strings are US-ASCII or
1952 Latin-1, but there is always the risk that an implicit conversion
1953 from or to \c{const char *} is done using the wrong 8-bit
1954 encoding. To minimize these risks, you can turn off these implicit
1955 conversions by defining some of the following preprocessor symbols:
1956
1957 \list
1958 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1959 C string literals and pointers to Unicode.
1960 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1961 from C characters and character arrays but disables automatic
1962 conversions from character pointers to Unicode.
1963 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1964 to C strings.
1965 \endlist
1966
1967 You then need to explicitly call fromUtf8(), fromLatin1(),
1968 or fromLocal8Bit() to construct a QString from an
1969 8-bit string, or use the lightweight QLatin1StringView class. For
1970 example:
1971
1972 \snippet code/src_corelib_text_qstring.cpp 1
1973
1974 Similarly, you must call toLatin1(), toUtf8(), or
1975 toLocal8Bit() explicitly to convert the QString to an 8-bit
1976 string.
1977
1978 \table 100 %
1979 \header
1980 \li Note for C Programmers
1981
1982 \row
1983 \li
1984 Due to C++'s type system and the fact that QString is
1985 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1986 other basic types. For example:
1987
1988 \snippet qstring/main.cpp 7
1989
1990 The \c result variable is a normal variable allocated on the
1991 stack. When \c return is called, and because we're returning by
1992 value, the copy constructor is called and a copy of the string is
1993 returned. No actual copying takes place thanks to the implicit
1994 sharing.
1995
1996 \endtable
1997
1998 \section1 Distinction between null and empty strings
1999
2000 For historical reasons, QString distinguishes between null
2001 and empty strings. A \e null string is a string that is
2002 initialized using QString's default constructor or by passing
2003 \nullptr to the constructor. An \e empty string is any
2004 string with size 0. A null string is always empty, but an empty
2005 string isn't necessarily null:
2006
2007 \snippet qstring/main.cpp 8
2008
2009 All functions except isNull() treat null strings the same as empty
2010 strings. For example, toUtf8().\l{QByteArray::}{constData()} returns a valid pointer
2011 (not \nullptr) to a '\\0' character for a null string. We
2012 recommend that you always use the isEmpty() function and avoid isNull().
2013
2014 \section1 Number formats
2015
2016 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2017 qualifier, and the base is ten (its default), the default locale is
2018 used. This can be set using \l{QLocale::setDefault()}. For more refined
2019 control of localized string representations of numbers, see
2020 QLocale::toString(). All other number formatting done by QString follows the
2021 C locale's representation of numbers.
2022
2023 When QString::arg() applies left-padding to numbers, the fill character
2024 \c{'0'} is treated specially. If the number is negative, its minus sign
2025 appears before the zero-padding. If the field is localized, the
2026 locale-appropriate zero character is used in place of \c{'0'}. For
2027 floating-point numbers, this special treatment only applies if the number is
2028 finite.
2029
2030 \section2 Floating-point formats
2031
2032 In member functions (for example, arg() and number()) that format floating-point
2033 numbers (\c float or \c double) as strings, the representation used can be
2034 controlled by a choice of \e format and \e precision, whose meanings are as
2035 for \l {QLocale::toString(double, char, int)}.
2036
2037 If the selected \e format includes an exponent, localized forms follow the
2038 locale's convention on digits in the exponent. For non-localized formatting,
2039 the exponent shows its sign and includes at least two digits, left-padding
2040 with zero if needed.
2041
2042 \section1 More efficient string construction
2043
2044 Many strings are known at compile time. The QString constructor from
2045 C++ string literals will copy the contents of the string,
2046 treating the contents as UTF-8. This requires memory allocation and
2047 re-encoding string data, operations that will happen at runtime.
2048 If the string data is known at compile time, you can use the QStringLiteral
2049 macro or similarly \c{operator""_s} to create QString's payload at compile
2050 time instead.
2051
2052 Using the QString \c{'+'} operator, it is easy to construct a
2053 complex string from multiple substrings. You will often write code
2054 like this:
2055
2056 \snippet qstring/stringbuilder.cpp 0
2057
2058 There is nothing wrong with either of these string constructions,
2059 but there are a few hidden inefficiencies:
2060
2061 First, repeated use of the \c{'+'} operator may lead to
2062 multiple memory allocations. When concatenating \e{n} substrings,
2063 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2064 memory allocator.
2065
2066 These allocations can be optimized by an internal class
2067 \c{QStringBuilder}. This class is marked
2068 internal and does not appear in the documentation, because you
2069 aren't meant to instantiate it in your code. Its use will be
2070 automatic, as described below.
2071
2072 \c{QStringBuilder} uses expression templates and reimplements the
2073 \c{'%'} operator so that when you use \c{'%'} for string
2074 concatenation instead of \c{'+'}, multiple substring
2075 concatenations will be postponed until the final result is about
2076 to be assigned to a QString. At this point, the amount of memory
2077 required for the final result is known. The memory allocator is
2078 then called \e{once} to get the required space, and the substrings
2079 are copied into it one by one.
2080
2081 Additional efficiency is gained by inlining and reducing reference
2082 counting (the QString created from a \c{QStringBuilder}
2083 has a ref count of 1, whereas QString::append() needs an extra
2084 test).
2085
2086 There are two ways you can access this improved method of string
2087 construction. The straightforward way is to include
2088 \c{QStringBuilder} wherever you want to use it and use the
2089 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2090
2091 \snippet qstring/stringbuilder.cpp 5
2092
2093 A more global approach, which is more convenient but not entirely
2094 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2095 it to the compiler flags) at build time. This will make concatenating
2096 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2097
2098 \note Using automatic type deduction (for example, by using the \c
2099 auto keyword) with the result of string concatenation when QStringBuilder
2100 is enabled will show that the concatenation is indeed an object of a
2101 QStringBuilder specialization:
2102
2103 \snippet qstring/stringbuilder.cpp 6
2104
2105 This does not cause any harm, as QStringBuilder will implicitly convert to
2106 QString when required. If this is undesirable, then one should specify
2107 the necessary types instead of having the compiler deduce them:
2108
2109 \snippet qstring/stringbuilder.cpp 7
2110
2111 \section1 Maximum size and out-of-memory conditions
2112
2113 The maximum size of QString depends on the architecture. Most 64-bit
2114 systems can allocate more than 2 GB of memory, with a typical limit
2115 of 2^63 bytes. The actual value also depends on the overhead required for
2116 managing the data block. As a result, you can expect a maximum size
2117 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2118 on 64-bit platforms. The number of elements that can be stored in a
2119 QString is this maximum size divided by the size of QChar.
2120
2121 When memory allocation fails, QString throws a \c std::bad_alloc
2122 exception if the application was compiled with exception support.
2123 Out-of-memory conditions in Qt containers are the only cases where Qt
2124 will throw exceptions. If exceptions are disabled, then running out of
2125 memory is undefined behavior.
2126
2127 \note Target operating systems may impose limits on how much memory an
2128 application can allocate, in total, or on the size of individual allocations.
2129 This may further restrict the size of string a QString can hold.
2130 Mitigating or controlling the behavior these limits cause is beyond the
2131 scope of the Qt API.
2132
2133 \sa {Which string class to use?}, fromRawData(), QChar, QStringView,
2134 QLatin1StringView, QByteArray
2135*/
2136
2137/*! \typedef QString::ConstIterator
2138
2139 Qt-style synonym for QString::const_iterator.
2140*/
2141
2142/*! \typedef QString::Iterator
2143
2144 Qt-style synonym for QString::iterator.
2145*/
2146
2147/*! \typedef QString::const_iterator
2148
2149 \sa QString::iterator
2150*/
2151
2152/*! \typedef QString::iterator
2153
2154 \sa QString::const_iterator
2155*/
2156
2157/*! \typedef QString::const_reverse_iterator
2158 \since 5.6
2159
2160 \sa QString::reverse_iterator, QString::const_iterator
2161*/
2162
2163/*! \typedef QString::reverse_iterator
2164 \since 5.6
2165
2166 \sa QString::const_reverse_iterator, QString::iterator
2167*/
2168
2169/*!
2170 \typedef QString::size_type
2171*/
2172
2173/*!
2174 \typedef QString::difference_type
2175*/
2176
2177/*!
2178 \typedef QString::const_reference
2179*/
2180/*!
2181 \typedef QString::reference
2182*/
2183
2184/*!
2185 \typedef QString::const_pointer
2186
2187 The QString::const_pointer typedef provides an STL-style
2188 const pointer to a QString element (QChar).
2189*/
2190/*!
2191 \typedef QString::pointer
2192
2193 The QString::pointer typedef provides an STL-style
2194 pointer to a QString element (QChar).
2195*/
2196
2197/*!
2198 \typedef QString::value_type
2199*/
2200
2201/*! \fn QString::iterator QString::begin()
2202
2203 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2204 first character in the string.
2205
2206//! [iterator-invalidation-func-desc]
2207 \warning The returned iterator is invalidated on detachment or when the
2208 QString is modified.
2209//! [iterator-invalidation-func-desc]
2210
2211 \sa constBegin(), end()
2212*/
2213
2214/*! \fn QString::const_iterator QString::begin() const
2215
2216 \overload begin()
2217*/
2218
2219/*! \fn QString::const_iterator QString::cbegin() const
2220 \since 5.0
2221
2222 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2223 first character in the string.
2224
2225 \include qstring.cpp iterator-invalidation-func-desc
2226
2227 \sa begin(), cend()
2228*/
2229
2230/*! \fn QString::const_iterator QString::constBegin() const
2231
2232 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2233 first character in the string.
2234
2235 \include qstring.cpp iterator-invalidation-func-desc
2236
2237 \sa begin(), constEnd()
2238*/
2239
2240/*! \fn QString::iterator QString::end()
2241
2242 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2243 the last character in the string.
2244
2245 \include qstring.cpp iterator-invalidation-func-desc
2246
2247 \sa begin(), constEnd()
2248*/
2249
2250/*! \fn QString::const_iterator QString::end() const
2251
2252 \overload end()
2253*/
2254
2255/*! \fn QString::const_iterator QString::cend() const
2256 \since 5.0
2257
2258 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2259 after the last character in the string.
2260
2261 \include qstring.cpp iterator-invalidation-func-desc
2262
2263 \sa cbegin(), end()
2264*/
2265
2266/*! \fn QString::const_iterator QString::constEnd() const
2267
2268 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2269 after the last character in the string.
2270
2271 \include qstring.cpp iterator-invalidation-func-desc
2272
2273 \sa constBegin(), end()
2274*/
2275
2276/*! \fn QString::reverse_iterator QString::rbegin()
2277 \since 5.6
2278
2279 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2280 the first character in the string, in reverse order.
2281
2282 \include qstring.cpp iterator-invalidation-func-desc
2283
2284 \sa begin(), crbegin(), rend()
2285*/
2286
2287/*! \fn QString::const_reverse_iterator QString::rbegin() const
2288 \since 5.6
2289 \overload
2290*/
2291
2292/*! \fn QString::const_reverse_iterator QString::crbegin() const
2293 \since 5.6
2294
2295 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2296 pointing to the first character in the string, in reverse order.
2297
2298 \include qstring.cpp iterator-invalidation-func-desc
2299
2300 \sa begin(), rbegin(), rend()
2301*/
2302
2303/*! \fn QString::reverse_iterator QString::rend()
2304 \since 5.6
2305
2306 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2307 after the last character in the string, in reverse order.
2308
2309 \include qstring.cpp iterator-invalidation-func-desc
2310
2311 \sa end(), crend(), rbegin()
2312*/
2313
2314/*! \fn QString::const_reverse_iterator QString::rend() const
2315 \since 5.6
2316 \overload
2317*/
2318
2319/*! \fn QString::const_reverse_iterator QString::crend() const
2320 \since 5.6
2321
2322 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2323 pointing just after the last character in the string, in reverse order.
2324
2325 \include qstring.cpp iterator-invalidation-func-desc
2326
2327 \sa end(), rend(), rbegin()
2328*/
2329
2330/*!
2331 \fn QString::QString()
2332
2333 Constructs a null string. Null strings are also considered empty.
2334
2335 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2336*/
2337
2338/*!
2339 \fn QString::QString(QString &&other)
2340
2341 Move-constructs a QString instance, making it point at the same
2342 object that \a other was pointing to.
2343
2344 \since 5.2
2345*/
2346
2347/*! \fn QString::QString(const char *str)
2348
2349 Constructs a string initialized with the 8-bit string \a str. The
2350 given const char pointer is converted to Unicode using the
2351 fromUtf8() function.
2352
2353 You can disable this constructor by defining
2354 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2355 can be useful if you want to ensure that all user-visible strings
2356 go through QObject::tr(), for example.
2357
2358 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2359 this constructor, but enables a \c{QString(const char (&ch)[N])}
2360 constructor instead. Using non-literal input, or input with
2361 embedded NUL characters, or non-7-bit characters is undefined
2362 in this case.
2363
2364 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2365*/
2366
2367/*! \fn QString::QString(const char8_t *str)
2368
2369 Constructs a string initialized with the UTF-8 string \a str. The
2370 given const char8_t pointer is converted to Unicode using the
2371 fromUtf8() function.
2372
2373 \since 6.1
2374 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2375*/
2376
2377/*!
2378 \fn QString::QString(QStringView sv)
2379
2380 Constructs a string initialized with the string view's data.
2381
2382 The QString will be null if and only if \a sv is null.
2383
2384 \since 6.8
2385
2386 \sa fromUtf16()
2387*/
2388
2389/*
2390//! [from-std-string]
2391Returns a copy of the \a str string. The given string is assumed to be
2392encoded in \1, and is converted to QString using the \2 function.
2393//! [from-std-string]
2394*/
2395
2396/*! \fn QString QString::fromStdString(const std::string &str)
2397
2398 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2399
2400 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2401*/
2402
2403/*! \fn QString QString::fromStdWString(const std::wstring &str)
2404
2405 Returns a copy of the \a str string. The given string is assumed
2406 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2407 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2408 systems).
2409
2410 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2411 fromStdU16String(), fromStdU32String()
2412*/
2413
2414/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2415 \since 4.2
2416
2417 Reads the first \a size code units of the \c wchar_t array to whose start
2418 \a string points, converting them to Unicode and returning the result as
2419 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2420 type's size is four bytes or UTF-16 if its size is two bytes.
2421
2422 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2423
2424 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2425 fromStdWString()
2426*/
2427
2428/*! \fn std::wstring QString::toStdWString() const
2429
2430 Returns a std::wstring object with the data contained in this
2431 QString. The std::wstring is encoded in UTF-16 on platforms where
2432 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2433 where wchar_t is 4 bytes wide (most Unix systems).
2434
2435 This method is mostly useful to pass a QString to a function
2436 that accepts a std::wstring object.
2437
2438 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2439 toStdU32String()
2440*/
2441
2442qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2443{
2444 qsizetype count = 0;
2445
2446 QStringIterator i(QStringView(uc, length));
2447 while (i.hasNext())
2448 out[count++] = i.next();
2449
2450 return count;
2451}
2452
2453/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2454 \since 4.2
2455
2456 Fills the \a array with the data contained in this QString object.
2457 The array is encoded in UTF-16 on platforms where
2458 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2459 where wchar_t is 4 bytes wide (most Unix systems).
2460
2461 \a array has to be allocated by the caller and contain enough space to
2462 hold the complete string (allocating the array with the same length as the
2463 string is always sufficient).
2464
2465 This function returns the actual length of the string in \a array.
2466
2467 \note This function does not append a null character to the array.
2468
2469 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2470 QStringView::toWCharArray()
2471*/
2472
2473/*! \fn QString::QString(const QString &other)
2474
2475 Constructs a copy of \a other.
2476
2477 This operation takes \l{constant time}, because QString is
2478 \l{implicitly shared}. This makes returning a QString from a
2479 function very fast. If a shared instance is modified, it will be
2480 copied (copy-on-write), and that takes \l{linear time}.
2481
2482 \sa operator=()
2483*/
2484
2485/*!
2486 Constructs a string initialized with the first \a size characters
2487 of the QChar array \a unicode.
2488
2489 If \a unicode is 0, a null string is constructed.
2490
2491 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2492 array and its length is determined dynamically. The terminating
2493 null character is not considered part of the string.
2494
2495 QString makes a deep copy of the string data. The unicode data is copied as
2496 is and the Byte Order Mark is preserved if present.
2497
2498 \sa fromRawData()
2499*/
2500QString::QString(const QChar *unicode, qsizetype size)
2501{
2502 if (!unicode) {
2503 d.clear();
2504 } else {
2505 if (size < 0)
2506 size = QtPrivate::qustrlen(reinterpret_cast<const char16_t *>(unicode));
2507 if (!size) {
2508 d = DataPointer::fromRawData(&_empty, 0);
2509 } else {
2510 d = DataPointer(size, size);
2511 Q_CHECK_PTR(d.data());
2512 memcpy(d.data(), unicode, size * sizeof(QChar));
2513 d.data()[size] = '\0';
2514 }
2515 }
2516}
2517
2518/*!
2519 Constructs a string of the given \a size with every character set
2520 to \a ch.
2521
2522 \sa fill()
2523*/
2524QString::QString(qsizetype size, QChar ch)
2525{
2526 if (size <= 0) {
2527 d = DataPointer::fromRawData(&_empty, 0);
2528 } else {
2529 d = DataPointer(size, size);
2530 Q_CHECK_PTR(d.data());
2531 d.data()[size] = '\0';
2532 char16_t *b = d.data();
2533 char16_t *e = d.data() + size;
2534 const char16_t value = ch.unicode();
2535 std::fill(b, e, value);
2536 }
2537}
2538
2539/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2540 \internal
2541
2542 Constructs a string of the given \a size without initializing the
2543 characters. This is only used in \c QStringBuilder::toString().
2544*/
2545QString::QString(qsizetype size, Qt::Initialization)
2546{
2547 if (size <= 0) {
2548 d = DataPointer::fromRawData(&_empty, 0);
2549 } else {
2550 d = DataPointer(size, size);
2551 Q_CHECK_PTR(d.data());
2552 d.data()[size] = '\0';
2553 }
2554}
2555
2556/*! \fn QString::QString(QLatin1StringView str)
2557
2558 Constructs a copy of the Latin-1 string viewed by \a str.
2559
2560 \sa fromLatin1()
2561*/
2562
2563/*!
2564 Constructs a string of size 1 containing the character \a ch.
2565*/
2566QString::QString(QChar ch)
2567{
2568 d = DataPointer(1, 1);
2569 Q_CHECK_PTR(d.data());
2570 d.data()[0] = ch.unicode();
2571 d.data()[1] = '\0';
2572}
2573
2574/*! \fn QString::QString(const QByteArray &ba)
2575
2576 Constructs a string initialized with the byte array \a ba. The
2577 given byte array is converted to Unicode using fromUtf8().
2578
2579 You can disable this constructor by defining
2580 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2581 can be useful if you want to ensure that all user-visible strings
2582 go through QObject::tr(), for example.
2583
2584 \note Any null ('\\0') bytes in the byte array will be included in this
2585 string, converted to Unicode null characters (U+0000). This behavior is
2586 different from Qt 5.x.
2587
2588 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2589*/
2590
2591/*! \fn QString::QString(const Null &)
2592 \internal
2593*/
2594
2595/*! \fn QString::QString(QStringPrivate)
2596 \internal
2597*/
2598
2599/*! \fn QString &QString::operator=(const QString::Null &)
2600 \internal
2601*/
2602
2603/*!
2604 \fn QString::~QString()
2605
2606 Destroys the string.
2607*/
2608
2609
2610/*! \fn void QString::swap(QString &other)
2611 \since 4.8
2612 \memberswap{string}
2613*/
2614
2615/*! \fn void QString::detach()
2616
2617 Ensures that this string's data is no longer
2618 \l{Implicit Sharing}{shared} with other instances.
2619*/
2620
2621/*! \fn bool QString::isDetached() const
2622
2623 \internal
2624*/
2625
2626/*! \fn bool QString::isSharedWith(const QString &other) const
2627
2628 \internal
2629*/
2630
2631/*! \fn QString::operator std::u16string_view() const
2632 \target qstring-operator-std-u16string_view
2633 \since 6.7
2634
2635 Converts this QString object to a \c{std::u16string_view} object.
2636*/
2637
2638static bool needsReallocate(const QString &str, qsizetype newSize)
2639{
2640 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2641 return newSize > capacityAtEnd;
2642}
2643
2644/*!
2645 Sets the size of the string to \a size characters.
2646
2647 If \a size is greater than the current size, the string is
2648 extended to make it \a size characters long with the extra
2649 characters added to the end. The new characters are uninitialized.
2650
2651 If \a size is less than the current size, characters beyond position
2652 \a size are excluded from the string.
2653
2654 \note While resize() will grow the capacity if needed, it never shrinks
2655 capacity. To shed excess capacity, use squeeze().
2656
2657 Example:
2658
2659 \snippet qstring/main.cpp 45
2660
2661 If you want to append a certain number of identical characters to
2662 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2663
2664 If you want to expand the string so that it reaches a certain
2665 width and fill the new positions with a particular character, use
2666 the leftJustified() function:
2667
2668 If \a size is negative, it is equivalent to passing zero.
2669
2670 \snippet qstring/main.cpp 47
2671
2672 \sa truncate(), reserve(), squeeze()
2673*/
2674
2675void QString::resize(qsizetype size)
2676{
2677 if (size < 0)
2678 size = 0;
2679
2680 if (d->needsDetach() || needsReallocate(*this, size))
2681 reallocData(size, QArrayData::Grow);
2682 d.size = size;
2683 if (d->allocatedCapacity())
2684 d.data()[size] = u'\0';
2685}
2686
2687/*!
2688 \overload
2689 \since 5.7
2690
2691 Unlike \l {QString::}{resize(qsizetype)}, this overload
2692 initializes the new characters to \a fillChar:
2693
2694 \snippet qstring/main.cpp 46
2695*/
2696
2697void QString::resize(qsizetype newSize, QChar fillChar)
2698{
2699 const qsizetype oldSize = size();
2700 resize(newSize);
2701 const qsizetype difference = size() - oldSize;
2702 if (difference > 0)
2703 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2704}
2705
2706
2707/*!
2708 \since 6.8
2709
2710 Sets the size of the string to \a size characters. If the size of
2711 the string grows, the new characters are uninitialized.
2712
2713 The behavior is identical to \c{resize(size)}.
2714
2715 \sa resize()
2716*/
2717
2718void QString::resizeForOverwrite(qsizetype size)
2719{
2720 resize(size);
2721}
2722
2723
2724/*! \fn qsizetype QString::capacity() const
2725
2726 Returns the maximum number of characters that can be stored in
2727 the string without forcing a reallocation.
2728
2729 The sole purpose of this function is to provide a means of fine
2730 tuning QString's memory usage. In general, you will rarely ever
2731 need to call this function. If you want to know how many
2732 characters are in the string, call size().
2733
2734 \note a statically allocated string will report a capacity of 0,
2735 even if it's not empty.
2736
2737 \note The free space position in the allocated memory block is undefined. In
2738 other words, one should not assume that the free memory is always located
2739 after the initialized elements.
2740
2741 \sa reserve(), squeeze()
2742*/
2743
2744/*!
2745 \fn void QString::reserve(qsizetype size)
2746
2747 Ensures the string has space for at least \a size characters.
2748
2749 If you know in advance how large a string will be, you can call this
2750 function to save repeated reallocation while building it.
2751 This can improve performance when building a string incrementally.
2752 A long sequence of operations that add to a string may trigger several
2753 reallocations, the last of which may leave you with significantly more
2754 space than you need. This is less efficient than doing a single
2755 allocation of the right size at the start.
2756
2757 If in doubt about how much space shall be needed, it is usually better to
2758 use an upper bound as \a size, or a high estimate of the most likely size,
2759 if a strict upper bound would be much bigger than this. If \a size is an
2760 underestimate, the string will grow as needed once the reserved size is
2761 exceeded, which may lead to a larger allocation than your best
2762 overestimate would have and will slow the operation that triggers it.
2763
2764 \warning reserve() reserves memory but does not change the size of the
2765 string. Accessing data beyond the end of the string is undefined behavior.
2766 If you need to access memory beyond the current end of the string,
2767 use resize().
2768
2769 This function is useful for code that needs to build up a long
2770 string and wants to avoid repeated reallocation. In this example,
2771 we want to add to the string until some condition is \c true, and
2772 we're fairly sure that size is large enough to make a call to
2773 reserve() worthwhile:
2774
2775 \snippet qstring/main.cpp 44
2776
2777 \sa squeeze(), capacity(), resize()
2778*/
2779
2780/*!
2781 \fn void QString::squeeze()
2782
2783 Releases any memory not required to store the character data.
2784
2785 The sole purpose of this function is to provide a means of fine
2786 tuning QString's memory usage. In general, you will rarely ever
2787 need to call this function.
2788
2789 \sa reserve(), capacity()
2790*/
2791
2792void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2793{
2794 if (!alloc) {
2795 d = DataPointer::fromRawData(&_empty, 0);
2796 return;
2797 }
2798
2799 // don't use reallocate path when reducing capacity and there's free space
2800 // at the beginning: might shift data pointer outside of allocated space
2801 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2802
2803 if (d->needsDetach() || cannotUseReallocate) {
2804 DataPointer dd(alloc, qMin(alloc, d.size), option);
2805 Q_CHECK_PTR(dd.data());
2806 if (dd.size > 0)
2807 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2808 dd.data()[dd.size] = 0;
2809 d.swap(dd);
2810 } else {
2811 d->reallocate(alloc, option);
2812 }
2813}
2814
2815void QString::reallocGrowData(qsizetype n)
2816{
2817 if (!n) // expected to always allocate
2818 n = 1;
2819
2820 if (d->needsDetach()) {
2821 DataPointer dd(DataPointer::allocateGrow(d, n, QArrayData::GrowsAtEnd));
2822 Q_CHECK_PTR(dd.data());
2823 dd->copyAppend(d.data(), d.data() + d.size);
2824 dd.data()[dd.size] = 0;
2825 d.swap(dd);
2826 } else {
2827 d->reallocate(d.constAllocatedCapacity() + n, QArrayData::Grow);
2828 }
2829}
2830
2831/*! \fn void QString::clear()
2832
2833 Clears the contents of the string and makes it null.
2834
2835 \sa resize(), isNull()
2836*/
2837
2838/*! \fn QString &QString::operator=(const QString &other)
2839
2840 Assigns \a other to this string and returns a reference to this
2841 string.
2842*/
2843
2844QString &QString::operator=(const QString &other) noexcept
2845{
2846 d = other.d;
2847 return *this;
2848}
2849
2850/*!
2851 \fn QString &QString::operator=(QString &&other)
2852
2853 Move-assigns \a other to this QString instance.
2854
2855 \since 5.2
2856*/
2857
2858/*! \fn QString &QString::operator=(QLatin1StringView str)
2859
2860 \overload operator=()
2861
2862 Assigns the Latin-1 string viewed by \a str to this string.
2863*/
2864QString &QString::operator=(QLatin1StringView other)
2865{
2866 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2867 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2868 d.size = other.size();
2869 d.data()[other.size()] = 0;
2870 qt_from_latin1(d.data(), other.latin1(), other.size());
2871 } else {
2872 *this = fromLatin1(other.latin1(), other.size());
2873 }
2874 return *this;
2875}
2876
2877/*! \fn QString &QString::operator=(const QByteArray &ba)
2878
2879 \overload operator=()
2880
2881 Assigns \a ba to this string. The byte array is converted to Unicode
2882 using the fromUtf8() function.
2883
2884 You can disable this operator by defining
2885 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2886 can be useful if you want to ensure that all user-visible strings
2887 go through QObject::tr(), for example.
2888*/
2889
2890/*! \fn QString &QString::operator=(const char *str)
2891
2892 \overload operator=()
2893
2894 Assigns \a str to this string. The const char pointer is converted
2895 to Unicode using the fromUtf8() function.
2896
2897 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2898 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2899 This can be useful if you want to ensure that all user-visible strings
2900 go through QObject::tr(), for example.
2901*/
2902
2903/*!
2904 \overload operator=()
2905
2906 Sets the string to contain the single character \a ch.
2907*/
2908QString &QString::operator=(QChar ch)
2909{
2910 return assign(1, ch);
2911}
2912
2913/*!
2914 \fn QString& QString::insert(qsizetype position, const QString &str)
2915
2916 Inserts the string \a str at the given index \a position and
2917 returns a reference to this string.
2918
2919 Example:
2920
2921 \snippet qstring/main.cpp 26
2922
2923//! [string-grow-at-insertion]
2924 This string grows to accommodate the insertion. If \a position is beyond
2925 the end of the string, space characters are appended to the string to reach
2926 this \a position, followed by \a str.
2927//! [string-grow-at-insertion]
2928
2929 \sa append(), prepend(), replace(), remove()
2930*/
2931
2932/*!
2933 \fn QString& QString::insert(qsizetype position, QStringView str)
2934 \since 6.0
2935 \overload insert()
2936
2937 Inserts the string view \a str at the given index \a position and
2938 returns a reference to this string.
2939
2940 \include qstring.cpp string-grow-at-insertion
2941*/
2942
2943
2944/*!
2945 \fn QString& QString::insert(qsizetype position, const char *str)
2946 \since 5.5
2947 \overload insert()
2948
2949 Inserts the C string \a str at the given index \a position and
2950 returns a reference to this string.
2951
2952 \include qstring.cpp string-grow-at-insertion
2953
2954 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2955 defined.
2956*/
2957
2958/*!
2959 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2960 \since 5.5
2961 \overload insert()
2962
2963 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2964 it encodes at the given index \a position and returns a reference to
2965 this string.
2966
2967 \include qstring.cpp string-grow-at-insertion
2968
2969 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2970 defined.
2971*/
2972
2973/*! \internal
2974 T is a view or a container on/of QChar, char16_t, or char
2975*/
2976template <typename T>
2977static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2978{
2979 auto &str_d = str.data_ptr();
2980 qsizetype difference = 0;
2981 if (Q_UNLIKELY(i > str_d.size))
2982 difference = i - str_d.size;
2983 const qsizetype oldSize = str_d.size;
2984 const qsizetype insert_size = toInsert.size();
2985 const qsizetype newSize = str_d.size + difference + insert_size;
2986 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2987
2988 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2989 const auto cbegin = str.cbegin();
2990 const auto cend = str.cend();
2991 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend;
2992 QString other;
2993 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2994 // unittests pass
2995 other.data_ptr().detachAndGrow(side, newSize, nullptr, nullptr);
2996 other.append(QStringView(cbegin, insert_start));
2997 other.resize(i, u' ');
2998 other.append(toInsert);
2999 other.append(QStringView(insert_start, cend));
3000 str.swap(other);
3001 return;
3002 }
3003
3004 str_d.detachAndGrow(side, difference + insert_size, nullptr, nullptr);
3005 Q_CHECK_PTR(str_d.data());
3006 str.resize(newSize);
3007
3008 auto begin = str_d.begin();
3009 auto old_end = std::next(begin, oldSize);
3010 std::fill_n(old_end, difference, u' ');
3011 auto insert_start = std::next(begin, i);
3012 if (difference == 0)
3013 std::move_backward(insert_start, old_end, str_d.end());
3014
3015 using Char = std::remove_cv_t<typename T::value_type>;
3016 if constexpr(std::is_same_v<Char, QChar>)
3017 std::copy_n(reinterpret_cast<const char16_t *>(toInsert.data()), insert_size, insert_start);
3018 else if constexpr (std::is_same_v<Char, char16_t>)
3019 std::copy_n(toInsert.data(), insert_size, insert_start);
3020 else if constexpr (std::is_same_v<Char, char>)
3021 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3022}
3023
3024/*!
3025 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3026 \overload insert()
3027
3028 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3029
3030 \include qstring.cpp string-grow-at-insertion
3031*/
3032QString &QString::insert(qsizetype i, QLatin1StringView str)
3033{
3034 const char *s = str.latin1();
3035 if (i < 0 || !s || !(*s))
3036 return *this;
3037
3038 insert_helper(*this, i, str);
3039 return *this;
3040}
3041
3042/*!
3043 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3044 \overload insert()
3045 \since 6.5
3046
3047 Inserts the UTF-8 string view \a str at the given index \a position.
3048
3049 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3050 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3051 (QLatin1StringView) and should thus be used sparingly.
3052
3053 \include qstring.cpp string-grow-at-insertion
3054*/
3055QString &QString::insert(qsizetype i, QUtf8StringView s)
3056{
3057 auto insert_size = s.size();
3058 if (i < 0 || insert_size <= 0)
3059 return *this;
3060
3061 qsizetype difference = 0;
3062 if (Q_UNLIKELY(i > d.size))
3063 difference = i - d.size;
3064
3065 const qsizetype newSize = d.size + difference + insert_size;
3066
3067 if (d.needsDetach() || needsReallocate(*this, newSize)) {
3068 const auto cbegin = this->cbegin();
3069 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend();
3070 QString other;
3071 other.reserve(newSize);
3072 other.append(QStringView(cbegin, insert_start));
3073 if (difference > 0)
3074 other.resize(i, u' ');
3075 other.append(s);
3076 other.append(QStringView(insert_start, cend()));
3077 swap(other);
3078 return *this;
3079 }
3080
3081 if (i >= d.size) {
3082 d.detachAndGrow(QArrayData::GrowsAtEnd, difference + insert_size, nullptr, nullptr);
3083 Q_CHECK_PTR(d.data());
3084
3085 if (difference > 0)
3086 resize(i, u' ');
3087 append(s);
3088 } else {
3089 // Optimal insertion of Utf8 data is at the end, anywhere else could
3090 // potentially lead to moving characters twice if Utf8 data size
3091 // (variable-width) is less than the equivalent Utf16 data size
3092 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3093 char16_t *b = QUtf8::convertToUnicode(buffer.data(), s);
3094 insert_helper(*this, i, QStringView(buffer.data(), b));
3095 }
3096
3097 return *this;
3098}
3099
3100/*!
3101 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3102 \overload insert()
3103
3104 Inserts the first \a size characters of the QChar array \a unicode
3105 at the given index \a position in the string.
3106
3107 This string grows to accommodate the insertion. If \a position is beyond
3108 the end of the string, space characters are appended to the string to reach
3109 this \a position, followed by \a size characters of the QChar array
3110 \a unicode.
3111*/
3112QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3113{
3114 if (i < 0 || size <= 0)
3115 return *this;
3116
3117 // In case when data points into "this"
3118 if (!d->needsDetach() && QtPrivate::q_points_into_range(unicode, *this)) {
3119 QVarLengthArray copy(unicode, unicode + size);
3120 insert(i, copy.data(), size);
3121 } else {
3122 insert_helper(*this, i, QStringView(unicode, size));
3123 }
3124
3125 return *this;
3126}
3127
3128/*!
3129 \fn QString& QString::insert(qsizetype position, QChar ch)
3130 \overload insert()
3131
3132 Inserts \a ch at the given index \a position in the string.
3133
3134 This string grows to accommodate the insertion. If \a position is beyond
3135 the end of the string, space characters are appended to the string to reach
3136 this \a position, followed by \a ch.
3137*/
3138
3139QString& QString::insert(qsizetype i, QChar ch)
3140{
3141 if (i < 0)
3142 i += d.size;
3143 return insert(i, &ch, 1);
3144}
3145
3146/*!
3147 Appends the string \a str onto the end of this string.
3148
3149 Example:
3150
3151 \snippet qstring/main.cpp 9
3152
3153 This is the same as using the insert() function:
3154
3155 \snippet qstring/main.cpp 10
3156
3157 The append() function is typically very fast (\l{constant time}),
3158 because QString preallocates extra space at the end of the string
3159 data so it can grow without reallocating the entire string each
3160 time.
3161
3162 \sa operator+=(), prepend(), insert()
3163*/
3164QString &QString::append(const QString &str)
3165{
3166 if (!str.isNull()) {
3167 if (isNull()) {
3168 if (Q_UNLIKELY(!str.d.isMutable()))
3169 assign(str); // fromRawData, so we do a deep copy
3170 else
3171 operator=(str);
3172 } else if (str.size()) {
3173 append(str.constData(), str.size());
3174 }
3175 }
3176 return *this;
3177}
3178
3179/*!
3180 \fn QString &QString::append(QStringView v)
3181 \overload append()
3182 \since 6.0
3183
3184 Appends the given string view \a v to this string and returns the result.
3185*/
3186
3187/*!
3188 \overload append()
3189 \since 5.0
3190
3191 Appends \a len characters from the QChar array \a str to this string.
3192*/
3193QString &QString::append(const QChar *str, qsizetype len)
3194{
3195 if (str && len > 0) {
3196 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3197 // the following should be safe as QChar uses char16_t as underlying data
3198 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3199 d->growAppend(char16String, char16String + len);
3200 d.data()[d.size] = u'\0';
3201 }
3202 return *this;
3203}
3204
3205/*!
3206 \overload append()
3207
3208 Appends the Latin-1 string viewed by \a str to this string.
3209*/
3210QString &QString::append(QLatin1StringView str)
3211{
3212 append_helper(*this, str);
3213 return *this;
3214}
3215
3216/*!
3217 \overload append()
3218 \since 6.5
3219
3220 Appends the UTF-8 string view \a str to this string.
3221*/
3222QString &QString::append(QUtf8StringView str)
3223{
3224 append_helper(*this, str);
3225 return *this;
3226}
3227
3228/*! \fn QString &QString::append(const QByteArray &ba)
3229
3230 \overload append()
3231
3232 Appends the byte array \a ba to this string. The given byte array
3233 is converted to Unicode using the fromUtf8() function.
3234
3235 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3236 when you compile your applications. This can be useful if you want
3237 to ensure that all user-visible strings go through QObject::tr(),
3238 for example.
3239*/
3240
3241/*! \fn QString &QString::append(const char *str)
3242
3243 \overload append()
3244
3245 Appends the string \a str to this string. The given const char
3246 pointer is converted to Unicode using the fromUtf8() function.
3247
3248 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3249 when you compile your applications. This can be useful if you want
3250 to ensure that all user-visible strings go through QObject::tr(),
3251 for example.
3252*/
3253
3254/*!
3255 \overload append()
3256
3257 Appends the character \a ch to this string.
3258*/
3259QString &QString::append(QChar ch)
3260{
3261 d.detachAndGrow(QArrayData::GrowsAtEnd, 1, nullptr, nullptr);
3262 d->copyAppend(1, ch.unicode());
3263 d.data()[d.size] = '\0';
3264 return *this;
3265}
3266
3267/*! \fn QString &QString::prepend(const QString &str)
3268
3269 Prepends the string \a str to the beginning of this string and
3270 returns a reference to this string.
3271
3272 This operation is typically very fast (\l{constant time}), because
3273 QString preallocates extra space at the beginning of the string data,
3274 so it can grow without reallocating the entire string each time.
3275
3276 Example:
3277
3278 \snippet qstring/main.cpp 36
3279
3280 \sa append(), insert()
3281*/
3282
3283/*! \fn QString &QString::prepend(QLatin1StringView str)
3284
3285 \overload prepend()
3286
3287 Prepends the Latin-1 string viewed by \a str to this string.
3288*/
3289
3290/*! \fn QString &QString::prepend(QUtf8StringView str)
3291 \since 6.5
3292 \overload prepend()
3293
3294 Prepends the UTF-8 string view \a str to this string.
3295*/
3296
3297/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3298 \since 5.5
3299 \overload prepend()
3300
3301 Prepends \a len characters from the QChar array \a str to this string and
3302 returns a reference to this string.
3303*/
3304
3305/*! \fn QString &QString::prepend(QStringView str)
3306 \since 6.0
3307 \overload prepend()
3308
3309 Prepends the string view \a str to the beginning of this string and
3310 returns a reference to this string.
3311*/
3312
3313/*! \fn QString &QString::prepend(const QByteArray &ba)
3314
3315 \overload prepend()
3316
3317 Prepends the byte array \a ba to this string. The byte array is
3318 converted to Unicode using the fromUtf8() function.
3319
3320 You can disable this function by defining
3321 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3322 can be useful if you want to ensure that all user-visible strings
3323 go through QObject::tr(), for example.
3324*/
3325
3326/*! \fn QString &QString::prepend(const char *str)
3327
3328 \overload prepend()
3329
3330 Prepends the string \a str to this string. The const char pointer
3331 is converted to Unicode using the fromUtf8() function.
3332
3333 You can disable this function by defining
3334 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3335 can be useful if you want to ensure that all user-visible strings
3336 go through QObject::tr(), for example.
3337*/
3338
3339/*! \fn QString &QString::prepend(QChar ch)
3340
3341 \overload prepend()
3342
3343 Prepends the character \a ch to this string.
3344*/
3345
3346/*!
3347 \fn QString &QString::assign(QAnyStringView v)
3348 \since 6.6
3349
3350 Replaces the contents of this string with a copy of \a v and returns a
3351 reference to this string.
3352
3353 The size of this string will be equal to the size of \a v, converted to
3354 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3355 this function only allocates memory if the estimated size exceeds the capacity
3356 of this string or this string is shared.
3357
3358 \sa QAnyStringView::toString()
3359*/
3360
3361/*!
3362 \fn QString &QString::assign(qsizetype n, QChar c)
3363 \since 6.6
3364
3365 Replaces the contents of this string with \a n copies of \a c and
3366 returns a reference to this string.
3367
3368 The size of this string will be equal to \a n, which has to be non-negative.
3369
3370 This function will only allocate memory if \a n exceeds the capacity of this
3371 string or this string is shared.
3372
3373 \sa fill()
3374*/
3375
3376/*!
3377 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3378 \since 6.6
3379
3380 Replaces the contents of this string with a copy of the elements in the
3381 iterator range [\a first, \a last) and returns a reference to this string.
3382
3383 The size of this string will be equal to the decoded length of the elements
3384 in the range [\a first, \a last), which need not be the same as the length of
3385 the range itself, because this function transparently recodes the input
3386 character set to UTF-16.
3387
3388 This function will only allocate memory if the number of elements in the
3389 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3390 resulting string, exceeds the capacity of this string, or if this string is
3391 shared.
3392
3393 \note The behavior is undefined if either argument is an iterator into *this or
3394 [\a first, \a last) is not a valid range.
3395
3396 \constraints
3397 \c InputIterator meets the requirements of a
3398 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3399 and the \c{value_type} of \c InputIterator is one of the following character types:
3400 \list
3401 \li QChar
3402 \li QLatin1Char
3403 \li \c {char}
3404 \li \c {unsigned char}
3405 \li \c {signed char}
3406 \li \c {char8_t}
3407 \li \c char16_t
3408 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3409 \li \c char32_t
3410 \endlist
3411*/
3412
3413QString &QString::assign(QAnyStringView s)
3414{
3415 if (s.size() <= capacity() && isDetached()) {
3416 const auto offset = d.freeSpaceAtBegin();
3417 if (offset)
3418 d.setBegin(d.begin() - offset);
3419 resize(0);
3420 s.visit([this](auto input) {
3421 this->append(input);
3422 });
3423 } else {
3424 *this = s.toString();
3425 }
3426 return *this;
3427}
3428
3429#ifndef QT_BOOTSTRAPPED
3430QString &QString::assign_helper(const char32_t *data, qsizetype len)
3431{
3432 // worst case: each char32_t requires a surrogate pair, so
3433 const auto requiredCapacity = len * 2;
3434 if (requiredCapacity <= capacity() && isDetached()) {
3435 const auto offset = d.freeSpaceAtBegin();
3436 if (offset)
3437 d.setBegin(d.begin() - offset);
3438 auto begin = reinterpret_cast<QChar *>(d.begin());
3439 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3440 QStringConverter::State state;
3441 const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
3442 d.size = end - begin;
3443 d.data()[d.size] = u'\0';
3444 } else {
3445 *this = QString::fromUcs4(data, len);
3446 }
3447 return *this;
3448}
3449#endif
3450
3451/*!
3452 \fn QString &QString::remove(qsizetype position, qsizetype n)
3453
3454 Removes \a n characters from the string, starting at the given \a
3455 position index, and returns a reference to the string.
3456
3457 If the specified \a position index is within the string, but \a
3458 position + \a n is beyond the end of the string, the string is
3459 truncated at the specified \a position.
3460
3461 If \a n is <= 0 nothing is changed.
3462
3463 \snippet qstring/main.cpp 37
3464
3465//! [shrinking-erase]
3466 Element removal will preserve the string's capacity and not reduce the
3467 amount of allocated memory. To shed extra capacity and free as much memory
3468 as possible, call squeeze() after the last change to the string's size.
3469//! [shrinking-erase]
3470
3471 \sa insert(), replace()
3472*/
3473QString &QString::remove(qsizetype pos, qsizetype len)
3474{
3475 if (pos < 0) // count from end of string
3476 pos += size();
3477
3478 if (size_t(pos) >= size_t(size()) || len <= 0)
3479 return *this;
3480
3481 len = std::min(len, size() - pos);
3482
3483 if (!d->isShared()) {
3484 d->erase(d.begin() + pos, len);
3485 d.data()[d.size] = u'\0';
3486 } else {
3487 // TODO: either reserve "size()", which is bigger than needed, or
3488 // modify the shrinking-erase docs of this method (since the size
3489 // of "copy" won't have any extra capacity any more)
3490 const qsizetype sz = size() - len;
3491 QString copy{sz, Qt::Uninitialized};
3492 auto begin = d.begin();
3493 auto toRemove_start = d.begin() + pos;
3494 copy.d->copyRanges({{begin, toRemove_start},
3495 {toRemove_start + len, d.end()}});
3496 swap(copy);
3497 }
3498 return *this;
3499}
3500
3501template<typename T>
3502static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3503{
3504 const auto needleSize = needle.size();
3505 if (!needleSize)
3506 return;
3507
3508 // avoid detach if nothing to do:
3509 qsizetype i = s.indexOf(needle, 0, cs);
3510 if (i < 0)
3511 return;
3512
3513 QString::DataPointer &dptr = s.data_ptr();
3514 auto begin = dptr.begin();
3515 auto end = dptr.end();
3516
3517 auto copyFunc = [&](auto &dst) {
3518 auto src = begin + i + needleSize;
3519 while (src < end) {
3520 i = s.indexOf(needle, std::distance(begin, src), cs);
3521 auto hit = i == -1 ? end : begin + i;
3522 dst = std::copy(src, hit, dst);
3523 src = hit + needleSize;
3524 }
3525 return dst;
3526 };
3527
3528 if (!dptr->needsDetach()) {
3529 auto dst = begin + i;
3530 dst = copyFunc(dst);
3531 s.truncate(std::distance(begin, dst));
3532 } else {
3533 QString copy{s.size(), Qt::Uninitialized};
3534 auto copy_begin = copy.begin();
3535 auto dst = std::copy(begin, begin + i, copy_begin); // Chunk before the first hit
3536 dst = copyFunc(dst);
3537 copy.resize(std::distance(copy_begin, dst));
3538 s.swap(copy);
3539 }
3540}
3541
3542/*!
3543 Removes every occurrence of the given \a str string in this
3544 string, and returns a reference to this string.
3545
3546 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3547
3548 This is the same as \c replace(str, "", cs).
3549
3550 \include qstring.cpp shrinking-erase
3551
3552 \sa replace()
3553*/
3554QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3555{
3556 const auto s = str.d.data();
3557 if (QtPrivate::q_points_into_range(s, d))
3558 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3559 else
3560 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3561 return *this;
3562}
3563
3564/*!
3565 \since 5.11
3566 \overload
3567
3568 Removes every occurrence of the given Latin-1 string viewed by \a str
3569 from this string, and returns a reference to this string.
3570
3571 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3572
3573 This is the same as \c replace(str, "", cs).
3574
3575 \include qstring.cpp shrinking-erase
3576
3577 \sa replace()
3578*/
3579QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3580{
3581 removeStringImpl(*this, str, cs);
3582 return *this;
3583}
3584
3585/*!
3586 \fn QString &QString::removeAt(qsizetype pos)
3587
3588 \since 6.5
3589
3590 Removes the character at index \a pos. If \a pos is out of bounds
3591 (i.e. \a pos >= size()), this function does nothing.
3592
3593 \sa remove()
3594*/
3595
3596/*!
3597 \fn QString &QString::removeFirst()
3598
3599 \since 6.5
3600
3601 Removes the first character in this string. If the string is empty,
3602 this function does nothing.
3603
3604 \sa remove()
3605*/
3606
3607/*!
3608 \fn QString &QString::removeLast()
3609
3610 \since 6.5
3611
3612 Removes the last character in this string. If the string is empty,
3613 this function does nothing.
3614
3615 \sa remove()
3616*/
3617
3618/*!
3619 Removes every occurrence of the character \a ch in this string, and
3620 returns a reference to this string.
3621
3622 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3623
3624 Example:
3625
3626 \snippet qstring/main.cpp 38
3627
3628 This is the same as \c replace(ch, "", cs).
3629
3630 \include qstring.cpp shrinking-erase
3631
3632 \sa replace()
3633*/
3634QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3635{
3636 const qsizetype idx = indexOf(ch, 0, cs);
3637 if (idx == -1)
3638 return *this;
3639
3640 const bool isCase = cs == Qt::CaseSensitive;
3641 ch = isCase ? ch : ch.toCaseFolded();
3642 auto match = [ch, isCase](QChar x) {
3643 return ch == (isCase ? x : x.toCaseFolded());
3644 };
3645
3646
3647 auto begin = d.begin();
3648 auto first_match = begin + idx;
3649 auto end = d.end();
3650 if (!d->isShared()) {
3651 auto it = std::remove_if(first_match, end, match);
3652 d->erase(it, std::distance(it, end));
3653 d.data()[d.size] = u'\0';
3654 } else {
3655 // Instead of detaching, create a new string and copy all characters except for
3656 // the ones we're removing
3657 // TODO: size() is more than the needed since "copy" would be shorter
3658 QString copy{size(), Qt::Uninitialized};
3659 auto dst = copy.d.begin();
3660 auto it = std::copy(begin, first_match, dst); // Chunk before idx
3661 it = std::remove_copy_if(first_match + 1, end, it, match);
3662 copy.d.size = std::distance(dst, it);
3663 copy.d.data()[copy.d.size] = u'\0';
3664 *this = std::move(copy);
3665 }
3666 return *this;
3667}
3668
3669/*!
3670 \fn QString &QString::remove(const QRegularExpression &re)
3671 \since 5.0
3672
3673 Removes every occurrence of the regular expression \a re in the
3674 string, and returns a reference to the string. For example:
3675
3676 \snippet qstring/main.cpp 96
3677
3678 \include qstring.cpp shrinking-erase
3679
3680 \sa indexOf(), lastIndexOf(), replace()
3681*/
3682
3683/*!
3684 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3685 \since 6.1
3686
3687 Removes all elements for which the predicate \a pred returns true
3688 from the string. Returns a reference to the string.
3689
3690 \sa remove()
3691*/
3692
3693static void replace_helper(QString &str, QSpan<qsizetype> indices, qsizetype blen, QStringView after)
3694{
3695 const qsizetype oldSize = str.data_ptr().size;
3696 const qsizetype adjust = indices.size() * (after.size() - blen);
3697 const qsizetype newSize = oldSize + adjust;
3698 using A = QStringAlgorithms<QString>;
3699 if (str.data_ptr().needsDetach() || needsReallocate(str, newSize)) {
3700 A::replace_helper(str, blen, after, indices);
3701 return;
3702 }
3703
3704 if (QtPrivate::q_points_into_range(after.begin(), str)) {
3705 // Copy after if it lies inside our own d.b area (which we could
3706 // possibly invalidate via a realloc or modify by replacement)
3707 A::replace_helper(str, blen, QVarLengthArray(after.begin(), after.end()), indices);
3708 } else {
3709 A::replace_helper(str, blen, after, indices);
3710 }
3711}
3712
3713/*!
3714 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3715
3716 Replaces \a n characters beginning at index \a position with
3717 the string \a after and returns a reference to this string.
3718
3719 \note If the specified \a position index is within the string,
3720 but \a position + \a n goes outside the strings range,
3721 then \a n will be adjusted to stop at the end of the string.
3722
3723 Example:
3724
3725 \snippet qstring/main.cpp 40
3726
3727 \sa insert(), remove()
3728*/
3729QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3730{
3731 return replace(pos, len, after.constData(), after.size());
3732}
3733
3734/*!
3735 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3736 \overload replace()
3737 Replaces \a n characters beginning at index \a position with the
3738 first \a alen characters of the QChar array \a after and returns a
3739 reference to this string.
3740
3741 \a n must not be negative.
3742*/
3743QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3744{
3745 Q_PRE(len >= 0);
3746
3747 if (size_t(pos) > size_t(this->size()))
3748 return *this;
3749 if (len > this->size() - pos)
3750 len = this->size() - pos;
3751
3752 qsizetype indices[] = {pos};
3753 replace_helper(*this, indices, len, QStringView{after, alen});
3754 return *this;
3755}
3756
3757/*!
3758 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3759 \overload replace()
3760
3761 Replaces \a n characters beginning at index \a position with the
3762 character \a after and returns a reference to this string.
3763*/
3764QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3765{
3766 return replace(pos, len, &after, 1);
3767}
3768
3769/*!
3770 \overload replace()
3771 Replaces every occurrence of the string \a before with the string \a
3772 after and returns a reference to this string.
3773
3774 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3775
3776 Example:
3777
3778 \snippet qstring/main.cpp 41
3779
3780 \note The replacement text is not rescanned after it is inserted.
3781
3782 Example:
3783
3784 \snippet qstring/main.cpp 86
3785
3786//! [empty-before-arg-in-replace]
3787 \note If you use an empty \a before argument, the \a after argument will be
3788 inserted \e {before and after} each character of the string.
3789//! [empty-before-arg-in-replace]
3790
3791*/
3792QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3793{
3794 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3795}
3796
3797/*!
3798 \since 4.5
3799 \overload replace()
3800
3801 Replaces each occurrence in this string of the first \a blen
3802 characters of \a before with the first \a alen characters of \a
3803 after and returns a reference to this string.
3804
3805 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3806
3807 \note If \a before points to an \e empty string (that is, \a blen == 0),
3808 the string pointed to by \a after will be inserted \e {before and after}
3809 each character in this string.
3810*/
3811QString &QString::replace(const QChar *before, qsizetype blen,
3812 const QChar *after, qsizetype alen,
3813 Qt::CaseSensitivity cs)
3814{
3815 if (isEmpty()) {
3816 if (blen)
3817 return *this;
3818 } else {
3819 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3820 return *this;
3821 }
3822 if (alen == 0 && blen == 0)
3823 return *this;
3824 if (alen == 1 && blen == 1)
3825 return replace(*before, *after, cs);
3826
3827 QStringMatcher matcher(before, blen, cs);
3828
3829 qsizetype index = 0;
3830
3831 QVarLengthArray<qsizetype> indices;
3832 while ((index = matcher.indexIn(*this, index)) != -1) {
3833 indices.push_back(index);
3834 if (blen) // Step over before:
3835 index += blen;
3836 else // Only count one instance of empty between any two characters:
3837 index++;
3838 }
3839 if (indices.isEmpty())
3840 return *this;
3841
3842 replace_helper(*this, indices, blen, QStringView{after, alen});
3843 return *this;
3844}
3845
3846/*!
3847 \overload replace()
3848 Replaces every occurrence of the character \a ch in the string with
3849 \a after and returns a reference to this string.
3850
3851 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3852*/
3853QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3854{
3855 if (after.size() == 0)
3856 return remove(ch, cs);
3857
3858 if (after.size() == 1)
3859 return replace(ch, after.front(), cs);
3860
3861 if (size() == 0)
3862 return *this;
3863
3864 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3865
3866 QVarLengthArray<qsizetype> indices;
3867 if (cs == Qt::CaseSensitive) {
3868 const char16_t *begin = d.begin();
3869 const char16_t *end = d.end();
3870 QStringView view(begin, end);
3871 const char16_t *hit = nullptr;
3872 while ((hit = QtPrivate::qustrchr(view, cc)) != end) {
3873 indices.push_back(std::distance(begin, hit));
3874 view = QStringView(std::next(hit), end);
3875 }
3876 } else {
3877 for (qsizetype i = 0; i < d.size; ++i)
3878 if (QChar::toCaseFolded(d.data()[i]) == cc)
3879 indices.push_back(i);
3880 }
3881 if (indices.isEmpty())
3882 return *this;
3883
3884 replace_helper(*this, indices, 1, after);
3885 return *this;
3886}
3887
3888/*!
3889 \overload replace()
3890 Replaces every occurrence of the character \a before with the
3891 character \a after and returns a reference to this string.
3892
3893 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3894*/
3895QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3896{
3897 const qsizetype idx = indexOf(before, 0, cs);
3898 if (idx == -1)
3899 return *this;
3900
3901 const char16_t achar = after.unicode();
3902 char16_t bchar = before.unicode();
3903
3904 auto matchesCIS = [](char16_t beforeChar) {
3905 return [beforeChar](char16_t ch) { return foldAndCompare(ch, beforeChar); };
3906 };
3907
3908 auto hit = d.begin() + idx;
3909 if (!d.needsDetach()) {
3910 *hit++ = achar;
3911 if (cs == Qt::CaseSensitive) {
3912 std::replace(hit, d.end(), bchar, achar);
3913 } else {
3914 bchar = foldCase(bchar);
3915 std::replace_if(hit, d.end(), matchesCIS(bchar), achar);
3916 }
3917 } else {
3918 QString other{ d.size, Qt::Uninitialized };
3919 auto dest = std::copy(d.begin(), hit, other.d.begin());
3920 *dest++ = achar;
3921 ++hit;
3922 if (cs == Qt::CaseSensitive) {
3923 std::replace_copy(hit, d.end(), dest, bchar, achar);
3924 } else {
3925 bchar = foldCase(bchar);
3926 std::replace_copy_if(hit, d.end(), dest, matchesCIS(bchar), achar);
3927 }
3928
3929 swap(other);
3930 }
3931 return *this;
3932}
3933
3934/*!
3935 \since 4.5
3936 \overload replace()
3937
3938 Replaces every occurrence in this string of the Latin-1 string viewed
3939 by \a before with the Latin-1 string viewed by \a after, and returns a
3940 reference to this string.
3941
3942 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3943
3944 \note The text is not rescanned after a replacement.
3945
3946 \include qstring.cpp empty-before-arg-in-replace
3947*/
3948QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
3949{
3950 const qsizetype alen = after.size();
3951 const qsizetype blen = before.size();
3952 if (blen == 1 && alen == 1)
3953 return replace(before.front(), after.front(), cs);
3954
3955 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
3956 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3957 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
3958}
3959
3960/*!
3961 \since 4.5
3962 \overload replace()
3963
3964 Replaces every occurrence in this string of the Latin-1 string viewed
3965 by \a before with the string \a after, and returns a reference to this
3966 string.
3967
3968 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3969
3970 \note The text is not rescanned after a replacement.
3971
3972 \include qstring.cpp empty-before-arg-in-replace
3973*/
3974QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
3975{
3976 const qsizetype blen = before.size();
3977 if (blen == 1 && after.size() == 1)
3978 return replace(before.front(), after.front(), cs);
3979
3980 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3981 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
3982}
3983
3984/*!
3985 \since 4.5
3986 \overload replace()
3987
3988 Replaces every occurrence of the string \a before with the string \a
3989 after and returns a reference to this string.
3990
3991 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3992
3993 \note The text is not rescanned after a replacement.
3994
3995 \include qstring.cpp empty-before-arg-in-replace
3996*/
3997QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
3998{
3999 const qsizetype alen = after.size();
4000 if (before.size() == 1 && alen == 1)
4001 return replace(before.front(), after.front(), cs);
4002
4003 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4004 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
4005}
4006
4007/*!
4008 \since 4.5
4009 \overload replace()
4010
4011 Replaces every occurrence of the character \a c with the string \a
4012 after and returns a reference to this string.
4013
4014 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4015
4016 \note The text is not rescanned after a replacement.
4017*/
4018QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4019{
4020 const qsizetype alen = after.size();
4021 if (alen == 1)
4022 return replace(c, after.front(), cs);
4023
4024 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4025 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
4026}
4027
4028/*!
4029 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4030 \overload operator==()
4031
4032 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4033 returns \c false.
4034
4035 \include qstring.cpp compare-isNull-vs-isEmpty
4036
4037 \sa {Comparing Strings}
4038*/
4039
4040/*!
4041 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4042
4043 \overload operator==()
4044
4045 Returns \c true if \a lhs is equal to \a rhs; otherwise
4046 returns \c false.
4047*/
4048
4049/*!
4050 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4051
4052 \overload operator==()
4053
4054 Returns \c true if \a lhs is equal to \a rhs; otherwise
4055 returns \c false.
4056*/
4057
4058/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4059
4060 \overload operator==()
4061
4062 The \a rhs byte array is converted to a QUtf8StringView.
4063
4064 You can disable this operator by defining
4065 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4066 can be useful if you want to ensure that all user-visible strings
4067 go through QObject::tr(), for example.
4068
4069 Returns \c true if string \a lhs is lexically equal to \a rhs.
4070 Otherwise returns \c false.
4071*/
4072
4073/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4074
4075 \overload operator==()
4076
4077 The \a rhs const char pointer is converted to a QUtf8StringView.
4078
4079 You can disable this operator by defining
4080 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4081 can be useful if you want to ensure that all user-visible strings
4082 go through QObject::tr(), for example.
4083*/
4084
4085/*!
4086 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4087
4088 \overload operator<()
4089
4090 Returns \c true if string \a lhs is lexically less than string
4091 \a rhs; otherwise returns \c false.
4092
4093 \sa {Comparing Strings}
4094*/
4095
4096/*!
4097 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4098
4099 \overload operator<()
4100
4101 Returns \c true if \a lhs is lexically less than \a rhs;
4102 otherwise returns \c false.
4103*/
4104
4105/*!
4106 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4107
4108 \overload operator<()
4109
4110 Returns \c true if \a lhs is lexically less than \a rhs;
4111 otherwise returns \c false.
4112*/
4113
4114/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4115
4116 \overload operator<()
4117
4118 The \a rhs byte array is converted to a QUtf8StringView.
4119 If any NUL characters ('\\0') are embedded in the byte array, they will be
4120 included in the transformation.
4121
4122 You can disable this operator
4123 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4124 can be useful if you want to ensure that all user-visible strings
4125 go through QObject::tr(), for example.
4126*/
4127
4128/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4129
4130 Returns \c true if string \a lhs is lexically less than string \a rhs.
4131 Otherwise returns \c false.
4132
4133 \overload operator<()
4134
4135 The \a rhs const char pointer is converted to a QUtf8StringView.
4136
4137 You can disable this operator by defining
4138 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4139 can be useful if you want to ensure that all user-visible strings
4140 go through QObject::tr(), for example.
4141*/
4142
4143/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4144
4145 Returns \c true if string \a lhs is lexically less than or equal to
4146 string \a rhs; otherwise returns \c false.
4147
4148 \sa {Comparing Strings}
4149*/
4150
4151/*!
4152 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4153
4154 \overload operator<=()
4155
4156 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4157 otherwise returns \c false.
4158*/
4159
4160/*!
4161 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4162
4163 \overload operator<=()
4164
4165 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4166 otherwise returns \c false.
4167*/
4168
4169/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4170
4171 \overload operator<=()
4172
4173 The \a rhs byte array is converted to a QUtf8StringView.
4174 If any NUL characters ('\\0') are embedded in the byte array, they will be
4175 included in the transformation.
4176
4177 You can disable this operator by defining
4178 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4179 can be useful if you want to ensure that all user-visible strings
4180 go through QObject::tr(), for example.
4181*/
4182
4183/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4184
4185 \overload operator<=()
4186
4187 The \a rhs const char pointer is converted to a QUtf8StringView.
4188
4189 You can disable this operator by defining
4190 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4191 can be useful if you want to ensure that all user-visible strings
4192 go through QObject::tr(), for example.
4193*/
4194
4195/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4196
4197 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4198 otherwise returns \c false.
4199
4200 \sa {Comparing Strings}
4201*/
4202
4203/*!
4204 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4205
4206 \overload operator>()
4207
4208 Returns \c true if \a lhs is lexically greater than \a rhs;
4209 otherwise returns \c false.
4210*/
4211
4212/*!
4213 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4214
4215 \overload operator>()
4216
4217 Returns \c true if \a lhs is lexically greater than \a rhs;
4218 otherwise returns \c false.
4219*/
4220
4221/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4222
4223 \overload operator>()
4224
4225 The \a rhs byte array is converted to a QUtf8StringView.
4226 If any NUL characters ('\\0') are embedded in the byte array, they will be
4227 included in the transformation.
4228
4229 You can disable this operator by defining
4230 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4231 can be useful if you want to ensure that all user-visible strings
4232 go through QObject::tr(), for example.
4233*/
4234
4235/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4236
4237 \overload operator>()
4238
4239 The \a rhs const char pointer is converted to a QUtf8StringView.
4240
4241 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4242 when you compile your applications. This can be useful if you want
4243 to ensure that all user-visible strings go through QObject::tr(),
4244 for example.
4245*/
4246
4247/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4248
4249 Returns \c true if string \a lhs is lexically greater than or equal to
4250 string \a rhs; otherwise returns \c false.
4251
4252 \sa {Comparing Strings}
4253*/
4254
4255/*!
4256 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4257
4258 \overload operator>=()
4259
4260 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4261 otherwise returns \c false.
4262*/
4263
4264/*!
4265 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4266
4267 \overload operator>=()
4268
4269 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4270 otherwise returns \c false.
4271*/
4272
4273/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4274
4275 \overload operator>=()
4276
4277 The \a rhs byte array is converted to a QUtf8StringView.
4278 If any NUL characters ('\\0') are embedded in the byte array, they will be
4279 included in the transformation.
4280
4281 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4282 when you compile your applications. This can be useful if you want
4283 to ensure that all user-visible strings go through QObject::tr(),
4284 for example.
4285*/
4286
4287/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4288
4289 \overload operator>=()
4290
4291 The \a rhs const char pointer is converted to a QUtf8StringView.
4292
4293 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4294 when you compile your applications. This can be useful if you want
4295 to ensure that all user-visible strings go through QObject::tr(),
4296 for example.
4297*/
4298
4299/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4300
4301 Returns \c true if string \a lhs is not equal to string \a rhs;
4302 otherwise returns \c false.
4303
4304 \sa {Comparing Strings}
4305*/
4306
4307/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4308
4309 Returns \c true if string \a lhs is not equal to string \a rhs.
4310 Otherwise returns \c false.
4311
4312 \overload operator!=()
4313*/
4314
4315/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4316
4317 \overload operator!=()
4318
4319 The \a rhs byte array is converted to a QUtf8StringView.
4320 If any NUL characters ('\\0') are embedded in the byte array, they will be
4321 included in the transformation.
4322
4323 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4324 when you compile your applications. This can be useful if you want
4325 to ensure that all user-visible strings go through QObject::tr(),
4326 for example.
4327*/
4328
4329/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4330
4331 \overload operator!=()
4332
4333 The \a rhs const char pointer is converted to a QUtf8StringView.
4334
4335 You can disable this operator by defining
4336 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4337 can be useful if you want to ensure that all user-visible strings
4338 go through QObject::tr(), for example.
4339*/
4340
4341/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4342
4343 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4344 \a rhs; otherwise returns \c false.
4345
4346 The comparison is case sensitive.
4347
4348 You can disable this operator by defining \c
4349 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4350 then need to call QString::fromUtf8(), QString::fromLatin1(),
4351 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4352 array to a QString before doing the comparison.
4353*/
4354
4355/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4356
4357 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4358 \a rhs; otherwise returns \c false.
4359
4360 The comparison is case sensitive.
4361
4362 You can disable this operator by defining \c
4363 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4364 then need to call QString::fromUtf8(), QString::fromLatin1(),
4365 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4366 array to a QString before doing the comparison.
4367*/
4368
4369/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4370
4371 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4372 of \a rhs; otherwise returns \c false.
4373
4374 The comparison is case sensitive.
4375
4376 You can disable this operator by defining \c
4377 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4378 then need to call QString::fromUtf8(), QString::fromLatin1(),
4379 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4380 array to a QString before doing the comparison.
4381*/
4382
4383/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4384
4385 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4386 encoding of \a rhs; otherwise returns \c false.
4387
4388 The comparison is case sensitive.
4389
4390 You can disable this operator by defining \c
4391 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4392 then need to call QString::fromUtf8(), QString::fromLatin1(),
4393 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4394 array to a QString before doing the comparison.
4395*/
4396
4397/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4398
4399 Returns \c true if byte array \a lhs is lexically less than or equal to the
4400 UTF-8 encoding of \a rhs; otherwise returns \c false.
4401
4402 The comparison is case sensitive.
4403
4404 You can disable this operator by defining \c
4405 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4406 then need to call QString::fromUtf8(), QString::fromLatin1(),
4407 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4408 array to a QString before doing the comparison.
4409*/
4410
4411/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4412
4413 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4414 encoding of \a rhs; otherwise returns \c false.
4415
4416 The comparison is case sensitive.
4417
4418 You can disable this operator by defining \c
4419 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4420 then need to call QString::fromUtf8(), QString::fromLatin1(),
4421 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4422 array to a QString before doing the comparison.
4423*/
4424
4425/*!
4426 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4427
4428 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4429
4430 Example:
4431
4432 \snippet qstring/main.cpp 24
4433
4434 \include qstring.qdocinc negative-index-start-search-from-end
4435
4436 \sa lastIndexOf(), contains(), count()
4437*/
4438qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4439{
4440 return QtPrivate::findString(QStringView(unicode(), size()), from, QStringView(str.unicode(), str.size()), cs);
4441}
4442
4443/*!
4444 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4445 \since 5.14
4446 \overload indexOf()
4447
4448 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4449
4450 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4451
4452 \include qstring.qdocinc negative-index-start-search-from-end
4453
4454 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4455*/
4456
4457/*!
4458 \since 4.5
4459
4460 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4461
4462 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4463
4464 Example:
4465
4466 \snippet qstring/main.cpp 24
4467
4468 \include qstring.qdocinc negative-index-start-search-from-end
4469
4470 \sa lastIndexOf(), contains(), count()
4471*/
4472
4473qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4474{
4475 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
4476}
4477
4478/*!
4479 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4480 \overload indexOf()
4481
4482 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4483*/
4484
4485/*!
4486 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4487
4488 \include qstring.qdocinc negative-index-start-search-from-end
4489
4490 Returns -1 if \a str is not found.
4491
4492 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4493
4494 Example:
4495
4496 \snippet qstring/main.cpp 29
4497
4498 \note When searching for a 0-length \a str, the match at the end of
4499 the data is excluded from the search by a negative \a from, even
4500 though \c{-1} is normally thought of as searching from the end of the
4501 string: the match at the end is \e after the last character, so it is
4502 excluded. To include such a final empty match, either give a positive
4503 value for \a from or omit the \a from parameter entirely.
4504
4505 \sa indexOf(), contains(), count()
4506*/
4507qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4508{
4509 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
4510}
4511
4512/*!
4513 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4514 \since 6.2
4515 \overload lastIndexOf()
4516
4517 Returns the index position of the last occurrence of the string \a
4518 str in this string. Returns -1 if \a str is not found.
4519
4520 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4521
4522 Example:
4523
4524 \snippet qstring/main.cpp 29
4525
4526 \sa indexOf(), contains(), count()
4527*/
4528
4529
4530/*!
4531 \since 4.5
4532 \overload lastIndexOf()
4533
4534 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4535
4536 \include qstring.qdocinc negative-index-start-search-from-end
4537
4538 Returns -1 if \a str is not found.
4539
4540 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4541
4542 Example:
4543
4544 \snippet qstring/main.cpp 29
4545
4546 \note When searching for a 0-length \a str, the match at the end of
4547 the data is excluded from the search by a negative \a from, even
4548 though \c{-1} is normally thought of as searching from the end of the
4549 string: the match at the end is \e after the last character, so it is
4550 excluded. To include such a final empty match, either give a positive
4551 value for \a from or omit the \a from parameter entirely.
4552
4553 \sa indexOf(), contains(), count()
4554*/
4555qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4556{
4557 return QtPrivate::lastIndexOf(*this, from, str, cs);
4558}
4559
4560/*!
4561 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4562 \since 6.2
4563 \overload lastIndexOf()
4564
4565 Returns the index position of the last occurrence of the string \a
4566 str in this string. Returns -1 if \a str is not found.
4567
4568 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4569
4570 Example:
4571
4572 \snippet qstring/main.cpp 29
4573
4574 \sa indexOf(), contains(), count()
4575*/
4576
4577/*!
4578 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4579 \overload lastIndexOf()
4580
4581 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4582*/
4583
4584/*!
4585 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4586 \since 6.3
4587 \overload lastIndexOf()
4588*/
4589
4590/*!
4591 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4592 \since 5.14
4593 \overload lastIndexOf()
4594
4595 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4596
4597 \include qstring.qdocinc negative-index-start-search-from-end
4598
4599 Returns -1 if \a str is not found.
4600
4601 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4602
4603 \note When searching for a 0-length \a str, the match at the end of
4604 the data is excluded from the search by a negative \a from, even
4605 though \c{-1} is normally thought of as searching from the end of the
4606 string: the match at the end is \e after the last character, so it is
4607 excluded. To include such a final empty match, either give a positive
4608 value for \a from or omit the \a from parameter entirely.
4609
4610 \sa indexOf(), contains(), count()
4611*/
4612
4613/*!
4614 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4615 \since 6.2
4616 \overload lastIndexOf()
4617
4618 Returns the index position of the last occurrence of the string view \a
4619 str in this string. Returns -1 if \a str is not found.
4620
4621 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4622
4623 \sa indexOf(), contains(), count()
4624*/
4625
4626#if QT_CONFIG(regularexpression)
4627struct QStringCapture
4628{
4629 qsizetype pos;
4630 qsizetype len;
4631 int no;
4632};
4633Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4634
4635/*!
4636 \overload replace()
4637 \since 5.0
4638
4639 Replaces every occurrence of the regular expression \a re in the
4640 string with \a after. Returns a reference to the string. For
4641 example:
4642
4643 \snippet qstring/main.cpp 87
4644
4645 For regular expressions containing capturing groups,
4646 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4647 with the string captured by the corresponding capturing group.
4648
4649 \snippet qstring/main.cpp 88
4650
4651 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4652*/
4653QString &QString::replace(const QRegularExpression &re, const QString &after)
4654{
4655 if (!re.isValid()) {
4656 qtWarnAboutInvalidRegularExpression(re, "QString", "replace");
4657 return *this;
4658 }
4659
4660 const QString copy(*this);
4661 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4662 if (!iterator.hasNext()) // no matches at all
4663 return *this;
4664
4665 reallocData(d.size, QArrayData::KeepSize);
4666
4667 qsizetype numCaptures = re.captureCount();
4668
4669 // 1. build the backreferences list, holding where the backreferences
4670 // are in the replacement string
4671 QVarLengthArray<QStringCapture> backReferences;
4672 const qsizetype al = after.size();
4673 const QChar *ac = after.unicode();
4674
4675 for (qsizetype i = 0; i < al - 1; i++) {
4676 if (ac[i] == u'\\') {
4677 int no = ac[i + 1].digitValue();
4678 if (no > 0 && no <= numCaptures) {
4679 QStringCapture backReference;
4680 backReference.pos = i;
4681 backReference.len = 2;
4682
4683 if (i < al - 2) {
4684 int secondDigit = ac[i + 2].digitValue();
4685 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4686 no = (no * 10) + secondDigit;
4687 ++backReference.len;
4688 }
4689 }
4690
4691 backReference.no = no;
4692 backReferences.append(backReference);
4693 }
4694 }
4695 }
4696
4697 // 2. iterate on the matches. For every match, copy in chunks
4698 // - the part before the match
4699 // - the after string, with the proper replacements for the backreferences
4700
4701 qsizetype newLength = 0; // length of the new string, with all the replacements
4702 qsizetype lastEnd = 0;
4703 QVarLengthArray<QStringView> chunks;
4704 const QStringView copyView{ copy }, afterView{ after };
4705 while (iterator.hasNext()) {
4706 QRegularExpressionMatch match = iterator.next();
4707 qsizetype len;
4708 // add the part before the match
4709 len = match.capturedStart() - lastEnd;
4710 if (len > 0) {
4711 chunks << copyView.mid(lastEnd, len);
4712 newLength += len;
4713 }
4714
4715 lastEnd = 0;
4716 // add the after string, with replacements for the backreferences
4717 for (const QStringCapture &backReference : std::as_const(backReferences)) {
4718 // part of "after" before the backreference
4719 len = backReference.pos - lastEnd;
4720 if (len > 0) {
4721 chunks << afterView.mid(lastEnd, len);
4722 newLength += len;
4723 }
4724
4725 // backreference itself
4726 len = match.capturedLength(backReference.no);
4727 if (len > 0) {
4728 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4729 newLength += len;
4730 }
4731
4732 lastEnd = backReference.pos + backReference.len;
4733 }
4734
4735 // add the last part of the after string
4736 len = afterView.size() - lastEnd;
4737 if (len > 0) {
4738 chunks << afterView.mid(lastEnd, len);
4739 newLength += len;
4740 }
4741
4742 lastEnd = match.capturedEnd();
4743 }
4744
4745 // 3. trailing string after the last match
4746 if (copyView.size() > lastEnd) {
4747 chunks << copyView.mid(lastEnd);
4748 newLength += copyView.size() - lastEnd;
4749 }
4750
4751 // 4. assemble the chunks together
4752 resize(newLength);
4753 qsizetype i = 0;
4754 QChar *uc = data();
4755 for (const QStringView &chunk : std::as_const(chunks)) {
4756 qsizetype len = chunk.size();
4757 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4758 i += len;
4759 }
4760
4761 return *this;
4762}
4763#endif // QT_CONFIG(regularexpression)
4764
4765/*!
4766 Returns the number of (potentially overlapping) occurrences of
4767 the string \a str in this string.
4768
4769 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4770
4771 \sa contains(), indexOf()
4772*/
4773
4774qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4775{
4776 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4777}
4778
4779/*!
4780 \overload count()
4781
4782 Returns the number of occurrences of character \a ch in the string.
4783
4784 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4785
4786 \sa contains(), indexOf()
4787*/
4788
4789qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4790{
4791 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4792}
4793
4794/*!
4795 \since 6.0
4796 \overload count()
4797 Returns the number of (potentially overlapping) occurrences of the
4798 string view \a str in this string.
4799
4800 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4801
4802 \sa contains(), indexOf()
4803*/
4804qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4805{
4806 return QtPrivate::count(*this, str, cs);
4807}
4808
4809/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4810
4811 Returns \c true if this string contains an occurrence of the string
4812 \a str; otherwise returns \c false.
4813
4814 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4815
4816 Example:
4817 \snippet qstring/main.cpp 17
4818
4819 \sa indexOf(), count()
4820*/
4821
4822/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4823 \since 5.3
4824
4825 \overload contains()
4826
4827 Returns \c true if this string contains an occurrence of the latin-1 string
4828 \a str; otherwise returns \c false.
4829*/
4830
4831/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4832
4833 \overload contains()
4834
4835 Returns \c true if this string contains an occurrence of the
4836 character \a ch; otherwise returns \c false.
4837*/
4838
4839/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4840 \since 5.14
4841 \overload contains()
4842
4843 Returns \c true if this string contains an occurrence of the string view
4844 \a str; otherwise returns \c false.
4845
4846 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4847
4848 \sa indexOf(), count()
4849*/
4850
4851#if QT_CONFIG(regularexpression)
4852/*!
4853 \since 5.5
4854
4855 Returns the index position of the first match of the regular
4856 expression \a re in the string, searching forward from index
4857 position \a from. Returns -1 if \a re didn't match anywhere.
4858
4859 If the match is successful and \a rmatch is not \nullptr, it also
4860 writes the results of the match into the QRegularExpressionMatch object
4861 pointed to by \a rmatch.
4862
4863 Example:
4864
4865 \snippet qstring/main.cpp 93
4866*/
4867qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4868{
4869 return QtPrivate::indexOf(QStringView(*this), this, re, from, rmatch);
4870}
4871
4872/*!
4873 \since 5.5
4874
4875 Returns the index position of the last match of the regular
4876 expression \a re in the string, which starts before the index
4877 position \a from.
4878
4879 \include qstring.qdocinc negative-index-start-search-from-end
4880
4881 Returns -1 if \a re didn't match anywhere.
4882
4883 If the match is successful and \a rmatch is not \nullptr, it also
4884 writes the results of the match into the QRegularExpressionMatch object
4885 pointed to by \a rmatch.
4886
4887 Example:
4888
4889 \snippet qstring/main.cpp 94
4890
4891 \note Due to how the regular expression matching algorithm works,
4892 this function will actually match repeatedly from the beginning of
4893 the string until the position \a from is reached.
4894
4895 \note When searching for a regular expression \a re that may match
4896 0 characters, the match at the end of the data is excluded from the
4897 search by a negative \a from, even though \c{-1} is normally
4898 thought of as searching from the end of the string: the match at
4899 the end is \e after the last character, so it is excluded. To
4900 include such a final empty match, either give a positive value for
4901 \a from or omit the \a from parameter entirely.
4902*/
4903qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4904{
4905 return QtPrivate::lastIndexOf(QStringView(*this), this, re, from, rmatch);
4906}
4907
4908/*!
4909 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4910 \since 6.2
4911 \overload lastIndexOf()
4912
4913 Returns the index position of the last match of the regular
4914 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4915
4916 If the match is successful and \a rmatch is not \nullptr, it also
4917 writes the results of the match into the QRegularExpressionMatch object
4918 pointed to by \a rmatch.
4919
4920 Example:
4921
4922 \snippet qstring/main.cpp 94
4923
4924 \note Due to how the regular expression matching algorithm works,
4925 this function will actually match repeatedly from the beginning of
4926 the string until the end of the string is reached.
4927*/
4928
4929/*!
4930 \since 5.1
4931
4932 Returns \c true if the regular expression \a re matches somewhere in this
4933 string; otherwise returns \c false.
4934
4935 If the match is successful and \a rmatch is not \nullptr, it also
4936 writes the results of the match into the QRegularExpressionMatch object
4937 pointed to by \a rmatch.
4938
4939 \sa QRegularExpression::match()
4940*/
4941
4942bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4943{
4944 return QtPrivate::contains(QStringView(*this), this, re, rmatch);
4945}
4946
4947/*!
4948 \overload count()
4949 \since 5.0
4950
4951 Returns the number of times the regular expression \a re matches
4952 in the string.
4953
4954 For historical reasons, this function counts overlapping matches,
4955 so in the example below, there are four instances of "ana" or
4956 "ama":
4957
4958 \snippet qstring/main.cpp 95
4959
4960 This behavior is different from simply iterating over the matches
4961 in the string using QRegularExpressionMatchIterator.
4962
4963 \sa QRegularExpression::globalMatch()
4964*/
4965qsizetype QString::count(const QRegularExpression &re) const
4966{
4967 return QtPrivate::count(QStringView(*this), re);
4968}
4969#endif // QT_CONFIG(regularexpression)
4970
4971#if QT_DEPRECATED_SINCE(6, 4)
4972/*! \fn qsizetype QString::count() const
4973 \deprecated [6.4] Use size() or length() instead.
4974 \overload count()
4975
4976 Same as size().
4977*/
4978#endif
4979
4980/*!
4981 \enum QString::SectionFlag
4982
4983 This enum specifies flags that can be used to affect various
4984 aspects of the section() function's behavior with respect to
4985 separators and empty fields.
4986
4987 \value SectionDefault Empty fields are counted, leading and
4988 trailing separators are not included, and the separator is
4989 compared case sensitively.
4990
4991 \value SectionSkipEmpty Treat empty fields as if they don't exist,
4992 i.e. they are not considered as far as \e start and \e end are
4993 concerned.
4994
4995 \value SectionIncludeLeadingSep Include the leading separator (if
4996 any) in the result string.
4997
4998 \value SectionIncludeTrailingSep Include the trailing separator
4999 (if any) in the result string.
5000
5001 \value SectionCaseInsensitiveSeps Compare the separator
5002 case-insensitively.
5003
5004 \sa section()
5005*/
5006
5007/*!
5008 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5009
5010 This function returns a section of the string.
5011
5012 This string is treated as a sequence of fields separated by the
5013 character, \a sep. The returned string consists of the fields from
5014 position \a start to position \a end inclusive. If \a end is not
5015 specified, all fields from position \a start to the end of the
5016 string are included. Fields are numbered 0, 1, 2, etc., counting
5017 from the left, and -1, -2, etc., counting from right to left.
5018
5019 The \a flags argument can be used to affect some aspects of the
5020 function's behavior, e.g. whether to be case sensitive, whether
5021 to skip empty fields and how to deal with leading and trailing
5022 separators; see \l{SectionFlags}.
5023
5024 \snippet qstring/main.cpp 52
5025
5026 If \a start or \a end is negative, we count fields from the right
5027 of the string, the right-most field being -1, the one from
5028 right-most field being -2, and so on.
5029
5030 \snippet qstring/main.cpp 53
5031
5032 \sa split()
5033*/
5034
5035/*!
5036 \overload section()
5037
5038 \snippet qstring/main.cpp 51
5039 \snippet qstring/main.cpp 54
5040
5041 \sa split()
5042*/
5043
5044QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5045{
5046 const QList<QStringView> sections = QStringView{ *this }.split(
5047 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5048 const qsizetype sectionsSize = sections.size();
5049 if (!(flags & SectionSkipEmpty)) {
5050 if (start < 0)
5051 start += sectionsSize;
5052 if (end < 0)
5053 end += sectionsSize;
5054 } else {
5055 qsizetype skip = 0;
5056 for (qsizetype k = 0; k < sectionsSize; ++k) {
5057 if (sections.at(k).isEmpty())
5058 skip++;
5059 }
5060 if (start < 0)
5061 start += sectionsSize - skip;
5062 if (end < 0)
5063 end += sectionsSize - skip;
5064 }
5065 if (start >= sectionsSize || end < 0 || start > end)
5066 return QString();
5067
5068 QString ret;
5069 qsizetype first_i = start, last_i = end;
5070 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5071 const QStringView &section = sections.at(i);
5072 const bool empty = section.isEmpty();
5073 if (x >= start) {
5074 if (x == start)
5075 first_i = i;
5076 if (x == end)
5077 last_i = i;
5078 if (x > start && i > 0)
5079 ret += sep;
5080 ret += section;
5081 }
5082 if (!empty || !(flags & SectionSkipEmpty))
5083 x++;
5084 }
5085 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5086 ret.prepend(sep);
5087 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5088 ret += sep;
5089 return ret;
5090}
5091
5092#if QT_CONFIG(regularexpression)
5093struct qt_section_chunk
5094{
5095 qsizetype length;
5096 QStringView string;
5097};
5098Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5099
5100static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5101 QString::SectionFlags flags)
5102{
5103 const qsizetype sectionsSize = sections.size();
5104
5105 if (!(flags & QString::SectionSkipEmpty)) {
5106 if (start < 0)
5107 start += sectionsSize;
5108 if (end < 0)
5109 end += sectionsSize;
5110 } else {
5111 qsizetype skip = 0;
5112 for (qsizetype k = 0; k < sectionsSize; ++k) {
5113 const qt_section_chunk &section = sections[k];
5114 if (section.length == section.string.size())
5115 skip++;
5116 }
5117 if (start < 0)
5118 start += sectionsSize - skip;
5119 if (end < 0)
5120 end += sectionsSize - skip;
5121 }
5122 if (start >= sectionsSize || end < 0 || start > end)
5123 return QString();
5124
5125 QString ret;
5126 qsizetype x = 0;
5127 qsizetype first_i = start, last_i = end;
5128 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5129 const qt_section_chunk &section = sections[i];
5130 const bool empty = (section.length == section.string.size());
5131 if (x >= start) {
5132 if (x == start)
5133 first_i = i;
5134 if (x == end)
5135 last_i = i;
5136 if (x != start)
5137 ret += section.string;
5138 else
5139 ret += section.string.mid(section.length);
5140 }
5141 if (!empty || !(flags & QString::SectionSkipEmpty))
5142 x++;
5143 }
5144
5145 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5146 const qt_section_chunk &section = sections[first_i];
5147 ret.prepend(section.string.left(section.length));
5148 }
5149
5150 if ((flags & QString::SectionIncludeTrailingSep)
5151 && last_i < sectionsSize - 1) {
5152 const qt_section_chunk &section = sections[last_i + 1];
5153 ret += section.string.left(section.length);
5154 }
5155
5156 return ret;
5157}
5158
5159/*!
5160 \overload section()
5161 \since 5.0
5162
5163 This string is treated as a sequence of fields separated by the
5164 regular expression, \a re.
5165
5166 \snippet qstring/main.cpp 89
5167
5168 \warning Using this QRegularExpression version is much more expensive than
5169 the overloaded string and character versions.
5170
5171 \sa split(), simplified()
5172*/
5173QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5174{
5175 if (!re.isValid()) {
5176 qtWarnAboutInvalidRegularExpression(re, "QString", "section");
5177 return QString();
5178 }
5179
5180 const QChar *uc = unicode();
5181 if (!uc)
5182 return QString();
5183
5184 QRegularExpression sep(re);
5185 if (flags & SectionCaseInsensitiveSeps)
5186 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5187
5188 QVarLengthArray<qt_section_chunk> sections;
5189 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5190 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
5191 while (iterator.hasNext()) {
5192 QRegularExpressionMatch match = iterator.next();
5193 m = match.capturedStart();
5194 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, m - last_m)});
5195 last_m = m;
5196 last_len = match.capturedLength();
5197 }
5198 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, n - last_m)});
5199
5200 return extractSections(sections, start, end, flags);
5201}
5202#endif // QT_CONFIG(regularexpression)
5203
5204/*!
5205 \fn QString QString::left(qsizetype n) const &
5206 \fn QString QString::left(qsizetype n) &&
5207
5208 Returns a substring that contains the \a n leftmost characters of
5209 this string (that is, from the beginning of this string up to, but not
5210 including, the element at index position \a n).
5211
5212 If you know that \a n cannot be out of bounds, use first() instead in new
5213 code, because it is faster.
5214
5215 The entire string is returned if \a n is greater than or equal
5216 to size(), or less than zero.
5217
5218 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5219*/
5220
5221/*!
5222 \fn QString QString::right(qsizetype n) const &
5223 \fn QString QString::right(qsizetype n) &&
5224
5225 Returns a substring that contains the \a n rightmost characters
5226 of the string.
5227
5228 If you know that \a n cannot be out of bounds, use last() instead in new
5229 code, because it is faster.
5230
5231 The entire string is returned if \a n is greater than or equal
5232 to size(), or less than zero.
5233
5234 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5235*/
5236
5237/*!
5238 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5239 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5240
5241 Returns a string that contains \a n characters of this string, starting
5242 at the specified \a position index up to, but not including, the element
5243 at index position \c {\a position + n}.
5244
5245 If you know that \a position and \a n cannot be out of bounds, use sliced()
5246 instead in new code, because it is faster.
5247
5248 Returns a null string if the \a position index exceeds the
5249 length of the string. If there are less than \a n characters
5250 available in the string starting at the given \a position, or if
5251 \a n is -1 (default), the function returns all characters that
5252 are available from the specified \a position.
5253
5254 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5255*/
5256QString QString::mid(qsizetype position, qsizetype n) const &
5257{
5258 qsizetype p = position;
5259 qsizetype l = n;
5260 using namespace QtPrivate;
5261 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5262 case QContainerImplHelper::Null:
5263 return QString();
5264 case QContainerImplHelper::Empty:
5265 return QString(DataPointer::fromRawData(&_empty, 0));
5266 case QContainerImplHelper::Full:
5267 return *this;
5268 case QContainerImplHelper::Subset:
5269 return sliced(p, l);
5270 }
5271 Q_UNREACHABLE_RETURN(QString());
5272}
5273
5274QString QString::mid(qsizetype position, qsizetype n) &&
5275{
5276 qsizetype p = position;
5277 qsizetype l = n;
5278 using namespace QtPrivate;
5279 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5280 case QContainerImplHelper::Null:
5281 return QString();
5282 case QContainerImplHelper::Empty:
5283 resize(0); // keep capacity if we've reserve()d
5284 [[fallthrough]];
5285 case QContainerImplHelper::Full:
5286 return std::move(*this);
5287 case QContainerImplHelper::Subset:
5288 return std::move(*this).sliced(p, l);
5289 }
5290 Q_UNREACHABLE_RETURN(QString());
5291}
5292
5293/*!
5294 \fn QString QString::first(qsizetype n) const &
5295 \fn QString QString::first(qsizetype n) &&
5296 \since 6.0
5297
5298 Returns a string that contains the first \a n characters of this string,
5299 (that is, from the beginning of this string up to, but not including,
5300 the element at index position \a n).
5301
5302 \note The behavior is undefined when \a n < 0 or \a n > size().
5303
5304 \snippet qstring/main.cpp 31
5305
5306 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5307*/
5308
5309/*!
5310 \fn QString QString::last(qsizetype n) const &
5311 \fn QString QString::last(qsizetype n) &&
5312 \since 6.0
5313
5314 Returns the string that contains the last \a n characters of this string.
5315
5316 \note The behavior is undefined when \a n < 0 or \a n > size().
5317
5318 \snippet qstring/main.cpp 48
5319
5320 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5321*/
5322
5323/*!
5324 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5325 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5326 \since 6.0
5327
5328 Returns a string that contains \a n characters of this string, starting
5329 at position \a pos up to, but not including, the element at index position
5330 \c {\a pos + n}.
5331
5332 \note The behavior is undefined when \a pos < 0, \a n < 0,
5333 or \a pos + \a n > size().
5334
5335 \snippet qstring/main.cpp 34
5336
5337 \sa first(), last(), chopped(), chop(), truncate(), slice()
5338*/
5339QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5340{
5341 if (n == 0)
5342 return QString(DataPointer::fromRawData(&_empty, 0));
5343 DataPointer d = std::move(str.d).sliced(pos, n);
5344 d.data()[n] = 0;
5345 return QString(std::move(d));
5346}
5347
5348/*!
5349 \fn QString QString::sliced(qsizetype pos) const &
5350 \fn QString QString::sliced(qsizetype pos) &&
5351 \since 6.0
5352 \overload
5353
5354 Returns a string that contains the portion of this string starting at
5355 position \a pos and extending to its end.
5356
5357 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5358
5359 \sa first(), last(), chopped(), chop(), truncate(), slice()
5360*/
5361
5362/*!
5363 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5364 \since 6.8
5365
5366 Modifies this string to start at position \a pos, up to, but not including,
5367 the character (code point) at index position \c {\a pos + n}; and returns
5368 a reference to this string.
5369
5370 \note The behavior is undefined if \a pos < 0, \a n < 0,
5371 or \a pos + \a n > size().
5372
5373 \snippet qstring/main.cpp slice97
5374
5375 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5376*/
5377
5378/*!
5379 \fn QString &QString::slice(qsizetype pos)
5380 \since 6.8
5381 \overload
5382
5383 Modifies this string to start at position \a pos and extending to its end,
5384 and returns a reference to this string.
5385
5386 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5387
5388 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5389*/
5390
5391/*!
5392 \fn QString QString::chopped(qsizetype len) const &
5393 \fn QString QString::chopped(qsizetype len) &&
5394 \since 5.10
5395
5396 Returns a string that contains the size() - \a len leftmost characters
5397 of this string.
5398
5399 \note The behavior is undefined if \a len is negative or greater than size().
5400
5401 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5402*/
5403
5404/*!
5405 Returns \c true if the string starts with \a s; otherwise returns
5406 \c false.
5407
5408 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5409
5410 \snippet qstring/main.cpp 65
5411
5412 \sa endsWith()
5413*/
5414bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5415{
5416 return qt_starts_with_impl(QStringView(*this), QStringView(s), cs);
5417}
5418
5419/*!
5420 \overload startsWith()
5421 */
5422bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5423{
5424 return qt_starts_with_impl(QStringView(*this), s, cs);
5425}
5426
5427/*!
5428 \overload startsWith()
5429
5430 Returns \c true if the string starts with \a c; otherwise returns
5431 \c false.
5432*/
5433bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5434{
5435 if (!size())
5436 return false;
5437 if (cs == Qt::CaseSensitive)
5438 return at(0) == c;
5439 return foldCase(at(0)) == foldCase(c);
5440}
5441
5442/*!
5443 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5444 \since 5.10
5445 \overload
5446
5447 Returns \c true if the string starts with the string view \a str;
5448 otherwise returns \c false.
5449
5450 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5451
5452 \sa endsWith()
5453*/
5454
5455/*!
5456 Returns \c true if the string ends with \a s; otherwise returns
5457 \c false.
5458
5459 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5460
5461 \snippet qstring/main.cpp 20
5462
5463 \sa startsWith()
5464*/
5465bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5466{
5467 return qt_ends_with_impl(QStringView(*this), QStringView(s), cs);
5468}
5469
5470/*!
5471 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5472 \since 5.10
5473 \overload endsWith()
5474 Returns \c true if the string ends with the string view \a str;
5475 otherwise returns \c false.
5476
5477 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5478
5479 \sa startsWith()
5480*/
5481
5482/*!
5483 \overload endsWith()
5484*/
5485bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5486{
5487 return qt_ends_with_impl(QStringView(*this), s, cs);
5488}
5489
5490/*!
5491 Returns \c true if the string ends with \a c; otherwise returns
5492 \c false.
5493
5494 \overload endsWith()
5495 */
5496bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5497{
5498 if (!size())
5499 return false;
5500 if (cs == Qt::CaseSensitive)
5501 return at(size() - 1) == c;
5502 return foldCase(at(size() - 1)) == foldCase(c);
5503}
5504
5505static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5506{
5507 QStringIterator it(s);
5508 while (it.hasNext()) {
5509 const char32_t uc = it.next();
5510 if (caseConversion(uc)[c].diff)
5511 return false;
5512 }
5513 return true;
5514}
5515
5516bool QtPrivate::isLower(QStringView s) noexcept
5517{
5518 return checkCase(s, QUnicodeTables::LowerCase);
5519}
5520
5521bool QtPrivate::isUpper(QStringView s) noexcept
5522{
5523 return checkCase(s, QUnicodeTables::UpperCase);
5524}
5525
5526/*!
5527 Returns \c true if the string is uppercase, that is, it's identical
5528 to its toUpper() folding.
5529
5530 Note that this does \e not mean that the string does not contain
5531 lowercase letters (some lowercase letters do not have a uppercase
5532 folding; they are left unchanged by toUpper()).
5533 For more information, refer to the Unicode standard, section 3.13.
5534
5535 \since 5.12
5536
5537 \sa QChar::toUpper(), isLower()
5538*/
5539bool QString::isUpper() const
5540{
5541 return QtPrivate::isUpper(qToStringViewIgnoringNull(*this));
5542}
5543
5544/*!
5545 Returns \c true if the string is lowercase, that is, it's identical
5546 to its toLower() folding.
5547
5548 Note that this does \e not mean that the string does not contain
5549 uppercase letters (some uppercase letters do not have a lowercase
5550 folding; they are left unchanged by toLower()).
5551 For more information, refer to the Unicode standard, section 3.13.
5552
5553 \since 5.12
5554
5555 \sa QChar::toLower(), isUpper()
5556 */
5557bool QString::isLower() const
5558{
5559 return QtPrivate::isLower(qToStringViewIgnoringNull(*this));
5560}
5561
5562static QByteArray qt_convert_to_latin1(QStringView string);
5563
5564QByteArray QString::toLatin1_helper(const QString &string)
5565{
5566 return qt_convert_to_latin1(string);
5567}
5568
5569/*!
5570 \since 6.0
5571 \internal
5572 \relates QAnyStringView
5573
5574 Returns a UTF-16 representation of \a string as a QString.
5575
5576 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5577 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5578*/
5579QString QtPrivate::convertToQString(QAnyStringView string)
5580{
5581 return string.visit([] (auto string) { return string.toString(); });
5582}
5583
5584/*!
5585 \since 5.10
5586 \internal
5587 \relates QStringView
5588
5589 Returns a Latin-1 representation of \a string as a QByteArray.
5590
5591 The behavior is undefined if \a string contains non-Latin1 characters.
5592
5593 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5594 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5595*/
5597{
5598 return qt_convert_to_latin1(string);
5599}
5600
5601Q_NEVER_INLINE
5602static QByteArray qt_convert_to_latin1(QStringView string)
5603{
5604 if (Q_UNLIKELY(string.isNull()))
5605 return QByteArray();
5606
5607 QByteArray ba(string.size(), Qt::Uninitialized);
5608
5609 // since we own the only copy, we're going to const_cast the constData;
5610 // that avoids an unnecessary call to detach() and expansion code that will never get used
5611 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5612 string.utf16(), string.size());
5613 return ba;
5614}
5615
5616QByteArray QString::toLatin1_helper_inplace(QString &s)
5617{
5618 if (!s.isDetached())
5619 return qt_convert_to_latin1(s);
5620
5621 // We can return our own buffer to the caller.
5622 // Conversion to Latin-1 always shrinks the buffer by half.
5623 // This relies on the fact that we use QArrayData for everything behind the scenes
5624
5625 // First, do the in-place conversion. Since isDetached() == true, the data
5626 // was allocated by QArrayData, so the null terminator must be there.
5627 qsizetype length = s.size();
5628 char16_t *sdata = s.d->data();
5629 Q_ASSERT(sdata[length] == u'\0');
5630 qt_to_latin1(reinterpret_cast<uchar *>(sdata), sdata, length + 1);
5631
5632 // Move the internals over to the byte array.
5633 // Kids, avert your eyes. Don't try this at home.
5634 auto ba_d = std::move(s.d).reinterpreted<char>();
5635
5636 // Some sanity checks
5637 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5638 Q_ASSERT(s.isNull());
5639 Q_ASSERT(s.isEmpty());
5640 Q_ASSERT(s.constData() == QString().constData());
5641
5642 return QByteArray(std::move(ba_d));
5643}
5644
5645/*!
5646 \since 6.9
5647 \internal
5648 \relates QLatin1StringView
5649
5650 Returns a UTF-8 representation of \a string as a QByteArray.
5651*/
5652QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
5653{
5654 if (Q_UNLIKELY(string.isNull()))
5655 return QByteArray();
5656
5657 // create a QByteArray with the worst case scenario size
5658 QByteArray ba(string.size() * 2, Qt::Uninitialized);
5659 const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
5660 ba.truncate(sz);
5661
5662 return ba;
5663}
5664
5665// QLatin1 methods that use helpers from qstring.cpp
5666char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5667{
5668 const qsizetype len = in.size();
5669 qt_from_latin1(out, in.data(), len);
5670 return std::next(out, len);
5671}
5672
5673char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5674{
5675 const qsizetype len = in.size();
5676 qt_to_latin1(reinterpret_cast<uchar *>(out), in.utf16(), len);
5677 return out + len;
5678}
5679
5680/*!
5681 \fn QByteArray QString::toLatin1() const
5682
5683 Returns a Latin-1 representation of the string as a QByteArray.
5684
5685 The returned byte array is undefined if the string contains non-Latin1
5686 characters. Those characters may be suppressed or replaced with a
5687 question mark.
5688
5689 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5690*/
5691
5692static QByteArray qt_convert_to_local_8bit(QStringView string);
5693
5694/*!
5695 \fn QByteArray QString::toLocal8Bit() const
5696
5697 Returns the local 8-bit representation of the string as a
5698 QByteArray.
5699
5700 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5701
5702 If this string contains any characters that cannot be encoded in the
5703 local 8-bit encoding, the returned byte array is undefined. Those
5704 characters may be suppressed or replaced by another.
5705
5706 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5707*/
5708
5709QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5710{
5711 return qt_convert_to_local_8bit(QStringView(data, size));
5712}
5713
5714static QByteArray qt_convert_to_local_8bit(QStringView string)
5715{
5716 if (string.isNull())
5717 return QByteArray();
5718 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5719 return fromUtf16(string);
5720}
5721
5722/*!
5723 \since 5.10
5724 \internal
5725 \relates QStringView
5726
5727 Returns a local 8-bit representation of \a string as a QByteArray.
5728
5729 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5730 current code page is being used.
5731
5732 The behavior is undefined if \a string contains characters not
5733 supported by the locale's 8-bit encoding.
5734
5735 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5736*/
5738{
5739 return qt_convert_to_local_8bit(string);
5740}
5741
5742static QByteArray qt_convert_to_utf8(QStringView str);
5743
5744/*!
5745 \fn QByteArray QString::toUtf8() const
5746
5747 Returns a UTF-8 representation of the string as a QByteArray.
5748
5749 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5750 string like QString.
5751
5752 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5753*/
5754
5755QByteArray QString::toUtf8_helper(const QString &str)
5756{
5757 return qt_convert_to_utf8(str);
5758}
5759
5760static QByteArray qt_convert_to_utf8(QStringView str)
5761{
5762 if (str.isNull())
5763 return QByteArray();
5764
5765 return QUtf8::convertFromUnicode(str);
5766}
5767
5768/*!
5769 \since 5.10
5770 \internal
5771 \relates QStringView
5772
5773 Returns a UTF-8 representation of \a string as a QByteArray.
5774
5775 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5776 string like QStringView.
5777
5778 \sa QString::toUtf8(), QStringView::toUtf8()
5779*/
5781{
5782 return qt_convert_to_utf8(string);
5783}
5784
5785static QList<uint> qt_convert_to_ucs4(QStringView string);
5786
5787/*!
5788 \since 4.2
5789
5790 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5791
5792 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5793 this string will be encoded in UTF-32. Any invalid sequence of code units in
5794 this string is replaced by the Unicode replacement character
5795 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5796
5797 The returned list is not 0-terminated.
5798
5799 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5800 fromUcs4(), toWCharArray()
5801*/
5802QList<uint> QString::toUcs4() const
5803{
5804 return qt_convert_to_ucs4(*this);
5805}
5806
5807static QList<uint> qt_convert_to_ucs4(QStringView string)
5808{
5809 QList<uint> v(string.size());
5810 uint *a = const_cast<uint*>(v.constData());
5811 QStringIterator it(string);
5812 while (it.hasNext())
5813 *a++ = it.next();
5814 v.resize(a - v.constData());
5815 return v;
5816}
5817
5818/*!
5819 \since 5.10
5820 \internal
5821 \relates QStringView
5822
5823 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5824
5825 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5826 this string will be encoded in UTF-32. Any invalid sequence of code units in
5827 this string is replaced by the Unicode replacement character
5828 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5829
5830 The returned list is not 0-terminated.
5831
5832 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5833 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5834*/
5835QList<uint> QtPrivate::convertToUcs4(QStringView string)
5836{
5837 return qt_convert_to_ucs4(string);
5838}
5839
5840/*!
5841 \fn QString QString::fromLatin1(QByteArrayView str)
5842 \overload
5843 \since 6.0
5844
5845 Returns a QString initialized with the Latin-1 string \a str.
5846
5847 \note: any null ('\\0') bytes in the byte array will be included in this
5848 string, converted to Unicode null characters (U+0000).
5849*/
5850QString QString::fromLatin1(QByteArrayView ba)
5851{
5852 DataPointer d;
5853 if (!ba.data()) {
5854 // nothing to do
5855 } else if (ba.size() == 0) {
5856 d = DataPointer::fromRawData(&_empty, 0);
5857 } else {
5858 d = DataPointer(ba.size(), ba.size());
5859 Q_CHECK_PTR(d.data());
5860 d.data()[ba.size()] = '\0';
5861 char16_t *dst = d.data();
5862
5863 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5864 }
5865 return QString(std::move(d));
5866}
5867
5868/*!
5869 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5870 Returns a QString initialized with the first \a size characters
5871 of the Latin-1 string \a str.
5872
5873 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5874
5875 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5876*/
5877
5878/*!
5879 \fn QString QString::fromLatin1(const QByteArray &str)
5880 \overload
5881 \since 5.0
5882
5883 Returns a QString initialized with the Latin-1 string \a str.
5884
5885 \note: any null ('\\0') bytes in the byte array will be included in this
5886 string, converted to Unicode null characters (U+0000). This behavior is
5887 different from Qt 5.x.
5888*/
5889
5890/*!
5891 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5892 Returns a QString initialized with the first \a size characters
5893 of the 8-bit string \a str.
5894
5895 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5896
5897 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5898
5899 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5900*/
5901
5902/*!
5903 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5904 \overload
5905 \since 5.0
5906
5907 Returns a QString initialized with the 8-bit string \a str.
5908
5909 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5910
5911 \note: any null ('\\0') bytes in the byte array will be included in this
5912 string, converted to Unicode null characters (U+0000). This behavior is
5913 different from Qt 5.x.
5914*/
5915
5916/*!
5917 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5918 \overload
5919 \since 6.0
5920
5921 Returns a QString initialized with the 8-bit string \a str.
5922
5923 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5924
5925 \note: any null ('\\0') bytes in the byte array will be included in this
5926 string, converted to Unicode null characters (U+0000).
5927*/
5928QString QString::fromLocal8Bit(QByteArrayView ba)
5929{
5930 if (ba.isNull())
5931 return QString();
5932 if (ba.isEmpty())
5933 return QString(DataPointer::fromRawData(&_empty, 0));
5934 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5935 return toUtf16(ba);
5936}
5937
5938/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5939 Returns a QString initialized with the first \a size bytes
5940 of the UTF-8 string \a str.
5941
5942 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5943
5944 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5945 string like QString. However, invalid sequences are possible with UTF-8
5946 and, if any such are found, they will be replaced with one or more
5947 "replacement characters", or suppressed. These include non-Unicode
5948 sequences, non-characters, overlong sequences or surrogate codepoints
5949 encoded into UTF-8.
5950
5951 This function can be used to process incoming data incrementally as long as
5952 all UTF-8 characters are terminated within the incoming data. Any
5953 unterminated characters at the end of the string will be replaced or
5954 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5955
5956 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5957*/
5958
5959/*!
5960 \fn QString QString::fromUtf8(const char8_t *str)
5961 \overload
5962 \since 6.1
5963
5964 This overload is only available when compiling in C++20 mode.
5965*/
5966
5967/*!
5968 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
5969 \overload
5970 \since 6.0
5971
5972 This overload is only available when compiling in C++20 mode.
5973*/
5974
5975/*!
5976 \fn QString QString::fromUtf8(const QByteArray &str)
5977 \overload
5978 \since 5.0
5979
5980 Returns a QString initialized with the UTF-8 string \a str.
5981
5982 \note: any null ('\\0') bytes in the byte array will be included in this
5983 string, converted to Unicode null characters (U+0000). This behavior is
5984 different from Qt 5.x.
5985*/
5986
5987/*!
5988 \fn QString QString::fromUtf8(QByteArrayView str)
5989 \overload
5990 \since 6.0
5991
5992 Returns a QString initialized with the UTF-8 string \a str.
5993
5994 \note: any null ('\\0') bytes in the byte array will be included in this
5995 string, converted to Unicode null characters (U+0000).
5996*/
5997QString QString::fromUtf8(QByteArrayView ba)
5998{
5999 if (ba.isNull())
6000 return QString();
6001 if (ba.isEmpty())
6002 return QString(DataPointer::fromRawData(&_empty, 0));
6003 return QUtf8::convertToUnicode(ba);
6004}
6005
6006#ifndef QT_BOOTSTRAPPED
6007/*!
6008 \since 5.3
6009 Returns a QString initialized with the first \a size characters
6010 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6011
6012 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6013
6014 This function checks for a Byte Order Mark (BOM). If it is missing,
6015 host byte order is assumed.
6016
6017 This function is slow compared to the other Unicode conversions.
6018 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6019
6020 QString makes a deep copy of the Unicode data.
6021
6022 \sa utf16(), setUtf16(), fromStdU16String()
6023*/
6024QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6025{
6026 if (!unicode)
6027 return QString();
6028 if (size < 0)
6029 size = QtPrivate::qustrlen(unicode);
6030 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6031 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6032}
6033
6034/*!
6035 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6036 \deprecated [6.0] Use the \c char16_t overload instead.
6037*/
6038
6039/*!
6040 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6041 \since 4.2
6042 \deprecated [6.0] Use the \c char32_t overload instead.
6043*/
6044
6045/*!
6046 \since 5.3
6047
6048 Returns a QString initialized with the first \a size characters
6049 of the Unicode string \a unicode (encoded as UTF-32).
6050
6051 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6052
6053 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6054 fromStdU32String()
6055*/
6056QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6057{
6058 if (!unicode)
6059 return QString();
6060 if (size < 0) {
6061 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6062 size = wcslen(reinterpret_cast<const wchar_t *>(unicode));
6063 else
6064 size = std::char_traits<char32_t>::length(unicode);
6065 }
6066 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6067 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6068}
6069#endif // !QT_BOOTSTRAPPED
6070
6071/*!
6072 Resizes the string to \a size characters and copies \a unicode
6073 into the string.
6074
6075 If \a unicode is \nullptr, nothing is copied, but the string is still
6076 resized to \a size.
6077
6078 \sa unicode(), setUtf16()
6079*/
6080QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6081{
6082 resize(size);
6083 if (unicode && size)
6084 memcpy(d.data(), unicode, size * sizeof(QChar));
6085 return *this;
6086}
6087
6088/*!
6089 \fn QString::setUnicode(const char16_t *unicode, qsizetype size)
6090 \overload
6091 \since 6.9
6092
6093 \sa unicode(), setUtf16()
6094*/
6095
6096/*!
6097 \fn QString::setUtf16(const char16_t *unicode, qsizetype size)
6098 \since 6.9
6099
6100 Resizes the string to \a size characters and copies \a unicode
6101 into the string.
6102
6103 If \a unicode is \nullptr, nothing is copied, but the string is still
6104 resized to \a size.
6105
6106 Note that unlike fromUtf16(), this function does not consider BOMs and
6107 possibly differing byte ordering.
6108
6109 \sa utf16(), setUnicode()
6110*/
6111
6112/*!
6113 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6114 \obsolete Use the \c char16_t overload instead.
6115*/
6116
6117/*!
6118 \fn QString QString::simplified() const
6119
6120 Returns a string that has whitespace removed from the start
6121 and the end, and that has each sequence of internal whitespace
6122 replaced with a single space.
6123
6124 Whitespace means any character for which QChar::isSpace() returns
6125 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6126 '\\f', '\\r', and ' '.
6127
6128 Example:
6129
6130 \snippet qstring/main.cpp 57
6131
6132 \sa trimmed()
6133*/
6134QString QString::simplified_helper(const QString &str)
6135{
6136 return QStringAlgorithms<const QString>::simplified_helper(str);
6137}
6138
6139QString QString::simplified_helper(QString &str)
6140{
6141 return QStringAlgorithms<QString>::simplified_helper(str);
6142}
6143
6144namespace {
6145 template <typename StringView>
6146 StringView qt_trimmed(StringView s) noexcept
6147 {
6148 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6149 return StringView{begin, end};
6150 }
6151}
6152
6153/*!
6154 \fn QStringView QtPrivate::trimmed(QStringView s)
6155 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6156 \internal
6157 \relates QStringView
6158 \since 5.10
6159
6160 Returns \a s with whitespace removed from the start and the end.
6161
6162 Whitespace means any character for which QChar::isSpace() returns
6163 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6164 '\\f', '\\r', and ' '.
6165
6166 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6167*/
6168QStringView QtPrivate::trimmed(QStringView s) noexcept
6169{
6170 return qt_trimmed(s);
6171}
6172
6173QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6174{
6175 return qt_trimmed(s);
6176}
6177
6178/*!
6179 \fn QString QString::trimmed() const
6180
6181 Returns a string that has whitespace removed from the start and
6182 the end.
6183
6184 Whitespace means any character for which QChar::isSpace() returns
6185 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6186 '\\f', '\\r', and ' '.
6187
6188 Example:
6189
6190 \snippet qstring/main.cpp 82
6191
6192 Unlike simplified(), trimmed() leaves internal whitespace alone.
6193
6194 \sa simplified()
6195*/
6196QString QString::trimmed_helper(const QString &str)
6197{
6198 return QStringAlgorithms<const QString>::trimmed_helper(str);
6199}
6200
6201QString QString::trimmed_helper(QString &str)
6202{
6203 return QStringAlgorithms<QString>::trimmed_helper(str);
6204}
6205
6206/*! \fn const QChar QString::at(qsizetype position) const
6207
6208 Returns the character at the given index \a position in the
6209 string.
6210
6211 The \a position must be a valid index position in the string
6212 (i.e., 0 <= \a position < size()).
6213
6214 \sa operator[]()
6215*/
6216
6217/*!
6218 \fn QChar &QString::operator[](qsizetype position)
6219
6220 Returns the character at the specified \a position in the string as a
6221 modifiable reference.
6222
6223 Example:
6224
6225 \snippet qstring/main.cpp 85
6226
6227 \sa at()
6228*/
6229
6230/*!
6231 \fn const QChar QString::operator[](qsizetype position) const
6232
6233 \overload operator[]()
6234*/
6235
6236/*!
6237 \fn QChar QString::front() const
6238 \since 5.10
6239
6240 Returns the first character in the string.
6241 Same as \c{at(0)}.
6242
6243 This function is provided for STL compatibility.
6244
6245 \warning Calling this function on an empty string constitutes
6246 undefined behavior.
6247
6248 \sa back(), at(), operator[]()
6249*/
6250
6251/*!
6252 \fn QChar QString::back() const
6253 \since 5.10
6254
6255 Returns the last character in the string.
6256 Same as \c{at(size() - 1)}.
6257
6258 This function is provided for STL compatibility.
6259
6260 \warning Calling this function on an empty string constitutes
6261 undefined behavior.
6262
6263 \sa front(), at(), operator[]()
6264*/
6265
6266/*!
6267 \fn QChar &QString::front()
6268 \since 5.10
6269
6270 Returns a reference to the first character in the string.
6271 Same as \c{operator[](0)}.
6272
6273 This function is provided for STL compatibility.
6274
6275 \warning Calling this function on an empty string constitutes
6276 undefined behavior.
6277
6278 \sa back(), at(), operator[]()
6279*/
6280
6281/*!
6282 \fn QChar &QString::back()
6283 \since 5.10
6284
6285 Returns a reference to the last character in the string.
6286 Same as \c{operator[](size() - 1)}.
6287
6288 This function is provided for STL compatibility.
6289
6290 \warning Calling this function on an empty string constitutes
6291 undefined behavior.
6292
6293 \sa front(), at(), operator[]()
6294*/
6295
6296/*!
6297 \fn void QString::truncate(qsizetype position)
6298
6299 Truncates the string starting from, and including, the element at index
6300 \a position.
6301
6302 If the specified \a position index is beyond the end of the
6303 string, nothing happens.
6304
6305 Example:
6306
6307 \snippet qstring/main.cpp 83
6308
6309 If \a position is negative, it is equivalent to passing zero.
6310
6311 \sa chop(), resize(), first(), QStringView::truncate()
6312*/
6313
6314void QString::truncate(qsizetype pos)
6315{
6316 if (pos < size())
6317 resize(pos);
6318}
6319
6320
6321/*!
6322 Removes \a n characters from the end of the string.
6323
6324 If \a n is greater than or equal to size(), the result is an
6325 empty string; if \a n is negative, it is equivalent to passing zero.
6326
6327 Example:
6328 \snippet qstring/main.cpp 15
6329
6330 If you want to remove characters from the \e beginning of the
6331 string, use remove() instead.
6332
6333 \sa truncate(), resize(), remove(), QStringView::chop()
6334*/
6335void QString::chop(qsizetype n)
6336{
6337 if (n > 0)
6338 resize(d.size - n);
6339}
6340
6341/*!
6342 Sets every character in the string to character \a ch. If \a size
6343 is different from -1 (default), the string is resized to \a
6344 size beforehand.
6345
6346 Example:
6347
6348 \snippet qstring/main.cpp 21
6349
6350 \sa resize()
6351*/
6352
6353QString& QString::fill(QChar ch, qsizetype size)
6354{
6355 resize(size < 0 ? d.size : size);
6356 if (d.size)
6357 std::fill(d.data(), d.data() + d.size, ch.unicode());
6358 return *this;
6359}
6360
6361/*!
6362 \fn qsizetype QString::length() const
6363
6364 Returns the number of characters in this string. Equivalent to
6365 size().
6366
6367 \sa resize()
6368*/
6369
6370/*!
6371 \fn qsizetype QString::size() const
6372
6373 Returns the number of characters in this string.
6374
6375 The last character in the string is at position size() - 1.
6376
6377 Example:
6378 \snippet qstring/main.cpp 58
6379
6380 \sa isEmpty(), resize()
6381*/
6382
6383/*!
6384 \fn qsizetype QString::max_size() const
6385 \fn qsizetype QString::maxSize()
6386 \since 6.8
6387
6388 It returns the maximum number of elements that the string can
6389 theoretically hold. In practice, the number can be much smaller,
6390 limited by the amount of memory available to the system.
6391*/
6392
6393/*! \fn bool QString::isNull() const
6394
6395 Returns \c true if this string is null; otherwise returns \c false.
6396
6397 Example:
6398
6399 \snippet qstring/main.cpp 28
6400
6401 Qt makes a distinction between null strings and empty strings for
6402 historical reasons. For most applications, what matters is
6403 whether or not a string contains any data, and this can be
6404 determined using the isEmpty() function.
6405
6406 \sa isEmpty()
6407*/
6408
6409/*! \fn bool QString::isEmpty() const
6410
6411 Returns \c true if the string has no characters; otherwise returns
6412 \c false.
6413
6414 Example:
6415
6416 \snippet qstring/main.cpp 27
6417
6418 \sa size()
6419*/
6420
6421/*! \fn QString &QString::operator+=(const QString &other)
6422
6423 Appends the string \a other onto the end of this string and
6424 returns a reference to this string.
6425
6426 Example:
6427
6428 \snippet qstring/main.cpp 84
6429
6430 This operation is typically very fast (\l{constant time}),
6431 because QString preallocates extra space at the end of the string
6432 data so it can grow without reallocating the entire string each
6433 time.
6434
6435 \sa append(), prepend()
6436*/
6437
6438/*! \fn QString &QString::operator+=(QLatin1StringView str)
6439
6440 \overload operator+=()
6441
6442 Appends the Latin-1 string viewed by \a str to this string.
6443*/
6444
6445/*! \fn QString &QString::operator+=(QUtf8StringView str)
6446 \since 6.5
6447 \overload operator+=()
6448
6449 Appends the UTF-8 string view \a str to this string.
6450*/
6451
6452/*! \fn QString &QString::operator+=(const QByteArray &ba)
6453
6454 \overload operator+=()
6455
6456 Appends the byte array \a ba to this string. The byte array is converted
6457 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6458 are embedded in the \a ba byte array, they will be included in the
6459 transformation.
6460
6461 You can disable this function by defining
6462 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6463 can be useful if you want to ensure that all user-visible strings
6464 go through QObject::tr(), for example.
6465*/
6466
6467/*! \fn QString &QString::operator+=(const char *str)
6468
6469 \overload operator+=()
6470
6471 Appends the string \a str to this string. The const char pointer
6472 is converted to Unicode using the fromUtf8() function.
6473
6474 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6475 when you compile your applications. This can be useful if you want
6476 to ensure that all user-visible strings go through QObject::tr(),
6477 for example.
6478*/
6479
6480/*! \fn QString &QString::operator+=(QStringView str)
6481 \since 6.0
6482 \overload operator+=()
6483
6484 Appends the string view \a str to this string.
6485*/
6486
6487/*! \fn QString &QString::operator+=(QChar ch)
6488
6489 \overload operator+=()
6490
6491 Appends the character \a ch to the string.
6492*/
6493
6494/*!
6495 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6496
6497 \overload operator==()
6498
6499 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6500 Note that no string is equal to \a lhs being 0.
6501
6502 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6503*/
6504
6505/*!
6506 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6507
6508 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6509 \c false.
6510
6511 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6512 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6513*/
6514
6515/*!
6516 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6517
6518 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6519 returns \c false. For \a lhs != 0, this is equivalent to \c
6520 {compare(lhs, rhs) < 0}.
6521
6522 \sa {Comparing Strings}
6523*/
6524
6525/*!
6526 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6527
6528 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6529 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6530 {compare(lhs, rhs) <= 0}.
6531
6532 \sa {Comparing Strings}
6533*/
6534
6535/*!
6536 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6537
6538 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6539 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6540
6541 \sa {Comparing Strings}
6542*/
6543
6544/*!
6545 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6546
6547 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6548 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6549 {compare(lhs, rhs) >= 0}.
6550
6551 \sa {Comparing Strings}
6552*/
6553
6554/*!
6555 \fn QString operator+(const QString &s1, const QString &s2)
6556 \fn QString operator+(QString &&s1, const QString &s2)
6557 \relates QString
6558
6559 Returns a string which is the result of concatenating \a s1 and \a
6560 s2.
6561*/
6562
6563/*!
6564 \fn QString operator+(const QString &s1, const char *s2)
6565 \relates QString
6566
6567 Returns a string which is the result of concatenating \a s1 and \a
6568 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6569 function).
6570
6571 \sa QString::fromUtf8()
6572*/
6573
6574/*!
6575 \fn QString operator+(const char *s1, const QString &s2)
6576 \relates QString
6577
6578 Returns a string which is the result of concatenating \a s1 and \a
6579 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6580 function).
6581
6582 \sa QString::fromUtf8()
6583*/
6584
6585/*!
6586 \fn QString operator+(QStringView lhs, const QString &rhs)
6587 \fn QString operator+(const QString &lhs, QStringView rhs)
6588
6589 \relates QString
6590 \since 6.9
6591
6592 Returns a string that is the result of concatenating \a lhs and \a rhs.
6593*/
6594
6595/*!
6596 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6597 \since 4.2
6598
6599 Compares the string \a s1 with the string \a s2 and returns a negative integer
6600 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6601 and zero if they are equal.
6602
6603 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6604
6605 Case sensitive comparison is based exclusively on the numeric
6606 Unicode values of the characters and is very fast, but is not what
6607 a human would expect. Consider sorting user-visible strings with
6608 localeAwareCompare().
6609
6610 \snippet qstring/main.cpp 16
6611
6612//! [compare-isNull-vs-isEmpty]
6613 \note This function treats null strings the same as empty strings,
6614 for more details see \l {Distinction Between Null and Empty Strings}.
6615//! [compare-isNull-vs-isEmpty]
6616
6617 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6618*/
6619
6620/*!
6621 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6622 \since 4.2
6623 \overload compare()
6624
6625 Performs a comparison of \a s1 and \a s2, using the case
6626 sensitivity setting \a cs.
6627*/
6628
6629/*!
6630 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6631
6632 \since 4.2
6633 \overload compare()
6634
6635 Performs a comparison of \a s1 and \a s2, using the case
6636 sensitivity setting \a cs.
6637*/
6638
6639/*!
6640 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6641
6642 \since 5.12
6643 \overload compare()
6644
6645 Performs a comparison of this with \a s, using the case
6646 sensitivity setting \a cs.
6647*/
6648
6649/*!
6650 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6651
6652 \since 5.14
6653 \overload compare()
6654
6655 Performs a comparison of this with \a ch, using the case
6656 sensitivity setting \a cs.
6657*/
6658
6659/*!
6660 \overload compare()
6661 \since 4.2
6662
6663 Lexically compares this string with the string \a other and returns
6664 a negative integer if this string is less than \a other, a positive
6665 integer if it is greater than \a other, and zero if they are equal.
6666
6667 Same as compare(*this, \a other, \a cs).
6668*/
6669int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6670{
6671 return QtPrivate::compareStrings(*this, other, cs);
6672}
6673
6674/*!
6675 \internal
6676 \since 4.5
6677*/
6678int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6679 Qt::CaseSensitivity cs) noexcept
6680{
6681 Q_ASSERT(length1 >= 0);
6682 Q_ASSERT(length2 >= 0);
6683 Q_ASSERT(data1 || length1 == 0);
6684 Q_ASSERT(data2 || length2 == 0);
6685 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
6686}
6687
6688/*!
6689 \overload compare()
6690 \since 4.2
6691
6692 Same as compare(*this, \a other, \a cs).
6693*/
6694int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6695{
6696 return QtPrivate::compareStrings(*this, other, cs);
6697}
6698
6699/*!
6700 \internal
6701 \since 5.0
6702*/
6703int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6704 Qt::CaseSensitivity cs) noexcept
6705{
6706 Q_ASSERT(length1 >= 0);
6707 Q_ASSERT(data1 || length1 == 0);
6708 if (!data2)
6709 return qt_lencmp(length1, 0);
6710 if (Q_UNLIKELY(length2 < 0))
6711 length2 = qsizetype(strlen(data2));
6712 return QtPrivate::compareStrings(QStringView(data1, length1),
6713 QUtf8StringView(data2, length2), cs);
6714}
6715
6716/*!
6717 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6718 \overload compare()
6719*/
6720
6721/*!
6722 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6723 \overload compare()
6724*/
6725
6726bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6727{
6728 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6729}
6730
6731Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6732{
6733 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6734 return Qt::compareThreeWay(res, 0);
6735}
6736
6737bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6738{
6739 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6740}
6741
6742Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6743{
6744 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6745 return Qt::compareThreeWay(res, 0);
6746}
6747
6748bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6749{
6750 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6751}
6752
6753Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6754{
6755 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6756 return Qt::compareThreeWay(res, 0);
6757}
6758
6759bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6760{
6761 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6762}
6763
6764Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6765{
6766 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6767 return Qt::compareThreeWay(res, 0);
6768}
6769
6770/*!
6771 \internal
6772 \since 6.8
6773*/
6774bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6775{
6776 return QtPrivate::equalStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6777}
6778
6779int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6780{
6781 return QtPrivate::compareStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6782}
6783
6784/*!
6785 \internal
6786 \since 6.8
6787*/
6788bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6789{
6790 Q_ASSERT(len >= 0);
6791 Q_ASSERT(data || len == 0);
6792 return QtPrivate::equalStrings(sv, QUtf8StringView(data, len));
6793}
6794
6795/*!
6796 \internal
6797 \since 6.8
6798*/
6799int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6800{
6801 Q_ASSERT(len >= 0);
6802 Q_ASSERT(data || len == 0);
6803 return QtPrivate::compareStrings(sv, QUtf8StringView(data, len));
6804}
6805
6806/*!
6807 \internal
6808 \since 6.8
6809*/
6810bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6811{
6812 // because qlatin1stringview.h can't include qutf8stringview.h
6813 Q_ASSERT(len >= 0);
6814 Q_ASSERT(s2 || len == 0);
6815 return QtPrivate::equalStrings(s1, QUtf8StringView(s2, len));
6816}
6817
6818/*!
6819 \internal
6820 \since 6.6
6821*/
6822int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6823{
6824 // because qlatin1stringview.h can't include qutf8stringview.h
6825 Q_ASSERT(len >= 0);
6826 Q_ASSERT(s2 || len == 0);
6827 return QtPrivate::compareStrings(s1, QUtf8StringView(s2, len));
6828}
6829
6830/*!
6831 \internal
6832 \since 4.5
6833*/
6834int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6835 Qt::CaseSensitivity cs) noexcept
6836{
6837 Q_ASSERT(length1 >= 0);
6838 Q_ASSERT(data1 || length1 == 0);
6839 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
6840}
6841
6842/*!
6843 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6844
6845 Compares \a s1 with \a s2 and returns an integer less than, equal
6846 to, or greater than zero if \a s1 is less than, equal to, or
6847 greater than \a s2.
6848
6849 The comparison is performed in a locale- and also
6850 platform-dependent manner. Use this function to present sorted
6851 lists of strings to the user.
6852
6853 \sa compare(), QLocale, {Comparing Strings}
6854*/
6855
6856/*!
6857 \fn int QString::localeAwareCompare(QStringView other) const
6858 \since 6.0
6859 \overload localeAwareCompare()
6860
6861 Compares this string with the \a other string and returns an
6862 integer less than, equal to, or greater than zero if this string
6863 is less than, equal to, or greater than the \a other string.
6864
6865 The comparison is performed in a locale- and also
6866 platform-dependent manner. Use this function to present sorted
6867 lists of strings to the user.
6868
6869 Same as \c {localeAwareCompare(*this, other)}.
6870
6871 \sa {Comparing Strings}
6872*/
6873
6874/*!
6875 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6876 \since 6.0
6877 \overload localeAwareCompare()
6878
6879 Compares \a s1 with \a s2 and returns an integer less than, equal
6880 to, or greater than zero if \a s1 is less than, equal to, or
6881 greater than \a s2.
6882
6883 The comparison is performed in a locale- and also
6884 platform-dependent manner. Use this function to present sorted
6885 lists of strings to the user.
6886
6887 \sa {Comparing Strings}
6888*/
6889
6890
6891#if !defined(CSTR_LESS_THAN)
6892#define CSTR_LESS_THAN 1
6893#define CSTR_EQUAL 2
6894#define CSTR_GREATER_THAN 3
6895#endif
6896
6897/*!
6898 \overload localeAwareCompare()
6899
6900 Compares this string with the \a other string and returns an
6901 integer less than, equal to, or greater than zero if this string
6902 is less than, equal to, or greater than the \a other string.
6903
6904 The comparison is performed in a locale- and also
6905 platform-dependent manner. Use this function to present sorted
6906 lists of strings to the user.
6907
6908 Same as \c {localeAwareCompare(*this, other)}.
6909
6910 \sa {Comparing Strings}
6911*/
6912int QString::localeAwareCompare(const QString &other) const
6913{
6914 return localeAwareCompare_helper(constData(), size(), other.constData(), other.size());
6915}
6916
6917/*!
6918 \internal
6919 \since 4.5
6920*/
6921int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6922 const QChar *data2, qsizetype length2)
6923{
6924 Q_ASSERT(length1 >= 0);
6925 Q_ASSERT(data1 || length1 == 0);
6926 Q_ASSERT(length2 >= 0);
6927 Q_ASSERT(data2 || length2 == 0);
6928
6929 // do the right thing for null and empty
6930 if (length1 == 0 || length2 == 0)
6931 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6932 Qt::CaseSensitive);
6933
6934#if QT_CONFIG(icu)
6935 return QCollator::defaultCompare(QStringView(data1, length1), QStringView(data2, length2));
6936#else
6937 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6938 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6939# if defined(Q_OS_WIN)
6940 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6941
6942 switch (res) {
6943 case CSTR_LESS_THAN:
6944 return -1;
6945 case CSTR_GREATER_THAN:
6946 return 1;
6947 default:
6948 return 0;
6949 }
6950# elif defined (Q_OS_DARWIN)
6951 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6952 // strings the same way as native applications do, and also respects
6953 // the "Order for sorted lists" setting in the International preferences
6954 // panel.
6955 const CFStringRef thisString =
6956 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6957 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6958 const CFStringRef otherString =
6959 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6960 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6961
6962 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6963 CFRelease(thisString);
6964 CFRelease(otherString);
6965 return result;
6966# elif defined(Q_OS_UNIX)
6967 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6968 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6969# else
6970# error "This case shouldn't happen"
6971 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6972# endif
6973#endif // !QT_CONFIG(icu)
6974}
6975
6976
6977/*!
6978 \fn const QChar *QString::unicode() const
6979
6980 Returns a Unicode representation of the string.
6981 The result remains valid until the string is modified.
6982
6983 \note The returned string may not be '\\0'-terminated.
6984 Use size() to determine the length of the array.
6985
6986 \sa utf16(), fromRawData()
6987*/
6988
6989/*!
6990 \fn const ushort *QString::utf16() const
6991 \obsolete [6.11] Use nullTerminate() and cast data() to \c{const char16_t *}
6992
6993 Returns the QString as a '\\0\'-terminated array of unsigned
6994 shorts. The result remains valid until the string is modified.
6995
6996 The returned string is in host byte order.
6997
6998 \sa unicode()
6999*/
7000
7001const ushort *QString::utf16() const
7002{
7003 if (!d->isMutable()) {
7004 // ensure '\0'-termination for ::fromRawData strings
7005 const_cast<QString*>(this)->reallocData(d.size, QArrayData::KeepSize);
7006 }
7007 return reinterpret_cast<const ushort *>(d.data());
7008}
7009
7010/*!
7011 \fn QString &QString::nullTerminate()
7012 \since 6.10
7013
7014 If this string data isn't null-terminated, this method will make a deep
7015 copy of the data and make it null-terminated.
7016
7017 A QString is null-terminated by default, however in some cases (e.g.
7018 when using fromRawData()), the string data doesn't necessarily end
7019 with a \c {\0} character, which could be a problem when calling methods
7020 that expect a null-terminated string.
7021
7022 \sa nullTerminated(), fromRawData(), setRawData()
7023*/
7024QString &QString::nullTerminate()
7025{
7026 // ensure '\0'-termination for ::fromRawData strings
7027 if (!d->isMutable())
7028 *this = QString{constData(), size()};
7029 return *this;
7030}
7031
7032/*!
7033 \fn QString QString::nullTerminated() const &
7034 \fn QString QString::nullTerminated() &&
7035 \since 6.10
7036
7037 Returns a copy of this string that is always null-terminated.
7038
7039 \sa nullTerminate(), fromRawData(), setRawData()
7040*/
7041QString QString::nullTerminated() const &
7042{
7043 // ensure '\0'-termination for ::fromRawData strings
7044 if (!d->isMutable())
7045 return QString{constData(), size()};
7046 return *this;
7047}
7048
7049QString QString::nullTerminated() &&
7050{
7051 nullTerminate();
7052 return std::move(*this);
7053}
7054
7055/*!
7056 Returns a string of size \a width that contains this string
7057 padded by the \a fill character.
7058
7059 If \a truncate is \c false and the size() of the string is more than
7060 \a width, then the returned string is a copy of the string.
7061
7062 \snippet qstring/main.cpp 32
7063
7064 If \a truncate is \c true and the size() of the string is more than
7065 \a width, then any characters in a copy of the string after
7066 position \a width are removed, and the copy is returned.
7067
7068 \snippet qstring/main.cpp 33
7069
7070 \sa rightJustified()
7071*/
7072
7073QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7074{
7075 QString result;
7076 qsizetype len = size();
7077 qsizetype padlen = width - len;
7078 if (padlen > 0) {
7079 result.resize(len+padlen);
7080 if (len)
7081 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
7082 QChar *uc = (QChar*)result.d.data() + len;
7083 while (padlen--)
7084 * uc++ = fill;
7085 } else {
7086 if (truncate)
7087 result = left(width);
7088 else
7089 result = *this;
7090 }
7091 return result;
7092}
7093
7094/*!
7095 Returns a string of size() \a width that contains the \a fill
7096 character followed by the string. For example:
7097
7098 \snippet qstring/main.cpp 49
7099
7100 If \a truncate is \c false and the size() of the string is more than
7101 \a width, then the returned string is a copy of the string.
7102
7103 If \a truncate is true and the size() of the string is more than
7104 \a width, then the resulting string is truncated at position \a
7105 width.
7106
7107 \snippet qstring/main.cpp 50
7108
7109 \sa leftJustified()
7110*/
7111
7112QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7113{
7114 QString result;
7115 qsizetype len = size();
7116 qsizetype padlen = width - len;
7117 if (padlen > 0) {
7118 result.resize(len+padlen);
7119 QChar *uc = (QChar*)result.d.data();
7120 while (padlen--)
7121 * uc++ = fill;
7122 if (len)
7123 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
7124 } else {
7125 if (truncate)
7126 result = left(width);
7127 else
7128 result = *this;
7129 }
7130 return result;
7131}
7132
7133/*!
7134 \fn QString QString::toLower() const
7135
7136 Returns a lowercase copy of the string.
7137
7138 \snippet qstring/main.cpp 75
7139
7140 The case conversion will always happen in the 'C' locale. For
7141 locale-dependent case folding use QLocale::toLower()
7142
7143 \sa toUpper(), QLocale::toLower()
7144*/
7145
7146namespace QUnicodeTables {
7147/*
7148 \internal
7149 Converts the \a str string starting from the position pointed to by the \a
7150 it iterator, using the Unicode case traits \c Traits, and returns the
7151 result. The input string must not be empty (the convertCase function below
7152 guarantees that).
7153
7154 The string type \c{T} is also a template and is either \c{const QString} or
7155 \c{QString}. This function can do both copy-conversion and in-place
7156 conversion depending on the state of the \a str parameter:
7157 \list
7158 \li \c{T} is \c{const QString}: copy-convert
7159 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7160 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7161 \endlist
7162
7163 In copy-convert mode, the local variable \c{s} is detached from the input
7164 \a str. In the in-place convert mode, \a str is in moved-from state and
7165 \c{s} contains the only copy of the string, without reallocation (thus,
7166 \a it is still valid).
7167
7168 There is one pathological case left: when the in-place conversion needs to
7169 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7170 it pointer.
7171 */
7172template <typename T>
7173Q_NEVER_INLINE
7175{
7176 Q_ASSERT(!str.isEmpty());
7177 QString s = std::move(str); // will copy if T is const QString
7178 QChar *pp = s.begin() + it.index(); // will detach if necessary
7179
7180 do {
7181 const auto folded = fullConvertCase(it.next(), which);
7182 if (Q_UNLIKELY(folded.size() > 1)) {
7183 if (folded.chars[0] == *pp && folded.size() == 2) {
7184 // special case: only second actually changed (e.g. surrogate pairs),
7185 // avoid slow case
7186 ++pp;
7187 *pp++ = folded.chars[1];
7188 } else {
7189 // slow path: the string is growing
7190 qsizetype inpos = it.index() - 1;
7192
7193 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
7194 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7195
7196 // Adjust the input iterator if we are performing an in-place conversion
7197 if constexpr (!std::is_const<T>::value)
7199 }
7200 } else {
7201 *pp++ = folded.chars[0];
7202 }
7203 } while (it.hasNext());
7204
7205 return s;
7206}
7207
7208template <typename T>
7209static QString convertCase(T &str, QUnicodeTables::Case which)
7210{
7211 const QChar *p = str.constBegin();
7212 const QChar *e = p + str.size();
7213
7214 // this avoids out of bounds check in the loop
7215 while (e != p && e[-1].isHighSurrogate())
7216 --e;
7217
7218 QStringIterator it(p, e);
7219 while (it.hasNext()) {
7220 const char32_t uc = it.next();
7221 if (caseConversion(uc)[which].diff) {
7222 it.recede();
7223 return detachAndConvertCase(str, it, which);
7224 }
7225 }
7226 return std::move(str);
7227}
7228} // namespace QUnicodeTables
7229
7230QString QString::toLower_helper(const QString &str)
7231{
7232 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7233}
7234
7235QString QString::toLower_helper(QString &str)
7236{
7237 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7238}
7239
7240/*!
7241 \fn QString QString::toCaseFolded() const
7242
7243 Returns the case folded equivalent of the string. For most Unicode
7244 characters this is the same as toLower().
7245*/
7246
7247QString QString::toCaseFolded_helper(const QString &str)
7248{
7249 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7250}
7251
7252QString QString::toCaseFolded_helper(QString &str)
7253{
7254 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7255}
7256
7257/*!
7258 \fn QString QString::toUpper() const
7259
7260 Returns an uppercase copy of the string.
7261
7262 \snippet qstring/main.cpp 81
7263
7264 The case conversion will always happen in the 'C' locale. For
7265 locale-dependent case folding use QLocale::toUpper().
7266
7267 \note In some cases the uppercase form of a string may be longer than the
7268 original.
7269
7270 \note Since 2024, the German language officially prefers to uppercase ß
7271 (U+00DF LATIN SMALL LETTER SHARP S) as ẞ (U+1E9E LATIN CAPITAL LETTER SHARP S).
7272 Qt's implementation follows Unicode, which still mandates the use of "SS".
7273 If you need to implement the new German rules, you need to manually do
7274 \c{replace(u'ß', u'ẞ')} \e{before} calling this function.
7275
7276 \sa toLower(), QLocale::toLower()
7277*/
7278
7279QString QString::toUpper_helper(const QString &str)
7280{
7281 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7282}
7283
7284QString QString::toUpper_helper(QString &str)
7285{
7286 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7287}
7288
7289/*!
7290 \since 5.5
7291
7292 Safely builds a formatted string from the format string \a cformat
7293 and an arbitrary list of arguments.
7294
7295 The format string supports the conversion specifiers, length modifiers,
7296 and flags provided by printf() in the standard C++ library. The \a cformat
7297 string and \c{%s} arguments must be UTF-8 encoded.
7298
7299 \note The \c{%lc} escape sequence expects a unicode character of type
7300 \c char16_t (as returned by QChar::unicode()), or \c ushort.
7301 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7302 of unicode characters of type \c char16_t, or \c ushort (as returned by
7303 QString::utf16()). This is at odds with the printf() in the standard C++
7304 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7305 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7306 where the size of \c {wchar_t} is not 16 bits.
7307
7308 \warning We do not recommend using QString::asprintf() in new Qt
7309 code. Instead, consider using QTextStream or arg(), both of
7310 which support Unicode strings seamlessly and are type-safe.
7311 Here is an example that uses QTextStream:
7312
7313 \snippet qstring/main.cpp 64
7314
7315 For \l {QObject::tr()}{translations}, especially if the strings
7316 contains more than one escape sequence, you should consider using
7317 the arg() function instead. This allows the order of the
7318 replacements to be controlled by the translator.
7319
7320 \sa arg()
7321*/
7322
7323QString QString::asprintf(const char *cformat, ...)
7324{
7325 va_list ap;
7326 va_start(ap, cformat);
7327 QString s = vasprintf(cformat, ap);
7328 va_end(ap);
7329 return s;
7330}
7331
7332static void append_utf8(QString &qs, const char *cs, qsizetype len)
7333{
7334 const qsizetype oldSize = qs.size();
7335 qs.resize(oldSize + len);
7336 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
7337 qs.resize(newEnd - qs.constData());
7338}
7339
7340static uint parse_flag_characters(const char * &c) noexcept
7341{
7342 uint flags = QLocaleData::ZeroPadExponent;
7343 while (true) {
7344 switch (*c) {
7345 case '#':
7348 break;
7349 case '0': flags |= QLocaleData::ZeroPadded; break;
7350 case '-': flags |= QLocaleData::LeftAdjusted; break;
7351 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7352 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7353 case '\'': flags |= QLocaleData::GroupDigits; break;
7354 default: return flags;
7355 }
7356 ++c;
7357 }
7358}
7359
7360static int parse_field_width(const char *&c, qsizetype size)
7361{
7362 Q_ASSERT(isAsciiDigit(*c));
7363 const char *const stop = c + size;
7364
7365 // can't be negative - started with a digit
7366 // contains at least one digit
7367 auto [result, used] = qstrntoull(c, size, 10);
7368 c += used;
7369 if (used <= 0)
7370 return false;
7371 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7372 while (c < stop && isAsciiDigit(*c))
7373 ++c;
7374 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7375}
7376
7378
7379static inline bool can_consume(const char * &c, char ch) noexcept
7380{
7381 if (*c == ch) {
7382 ++c;
7383 return true;
7384 }
7385 return false;
7386}
7387
7388static LengthMod parse_length_modifier(const char * &c) noexcept
7389{
7390 switch (*c++) {
7391 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
7392 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
7393 case 'L': return lm_L;
7394 case 'j': return lm_j;
7395 case 'z':
7396 case 'Z': return lm_z;
7397 case 't': return lm_t;
7398 }
7399 --c; // don't consume *c - it wasn't a flag
7400 return lm_none;
7401}
7402
7403/*!
7404 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7405 \since 5.5
7406
7407 Equivalent method to asprintf(), but takes a va_list \a ap
7408 instead a list of variable arguments. See the asprintf()
7409 documentation for an explanation of \a cformat.
7410
7411 This method does not call the va_end macro, the caller
7412 is responsible to call va_end on \a ap.
7413
7414 \sa asprintf()
7415*/
7416
7417QString QString::vasprintf(const char *cformat, va_list ap)
7418{
7419 if (!cformat || !*cformat) {
7420 // Qt 1.x compat
7421 return fromLatin1("");
7422 }
7423
7424 // Parse cformat
7425
7426 QString result;
7427 const char *c = cformat;
7428 const char *formatEnd = cformat + qstrlen(cformat);
7429 for (;;) {
7430 // Copy non-escape chars to result
7431 const char *cb = c;
7432 while (*c != '\0' && *c != '%')
7433 c++;
7434 append_utf8(result, cb, qsizetype(c - cb));
7435
7436 if (*c == '\0')
7437 break;
7438
7439 // Found '%'
7440 const char *escape_start = c;
7441 ++c;
7442
7443 if (*c == '\0') {
7444 result.append(u'%'); // a % at the end of the string - treat as non-escape text
7445 break;
7446 }
7447 if (*c == '%') {
7448 result.append(u'%'); // %%
7449 ++c;
7450 continue;
7451 }
7452
7453 uint flags = parse_flag_characters(c);
7454
7455 if (*c == '\0') {
7456 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7457 break;
7458 }
7459
7460 // Parse field width
7461 int width = -1; // -1 means unspecified
7462 if (isAsciiDigit(*c)) {
7463 width = parse_field_width(c, formatEnd - c);
7464 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7465 width = va_arg(ap, int);
7466 if (width < 0)
7467 width = -1; // treat all negative numbers as unspecified
7468 ++c;
7469 }
7470
7471 if (*c == '\0') {
7472 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7473 break;
7474 }
7475
7476 // Parse precision
7477 int precision = -1; // -1 means unspecified
7478 if (*c == '.') {
7479 ++c;
7480 precision = 0;
7481 if (isAsciiDigit(*c)) {
7482 precision = parse_field_width(c, formatEnd - c);
7483 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7484 precision = va_arg(ap, int);
7485 if (precision < 0)
7486 precision = -1; // treat all negative numbers as unspecified
7487 ++c;
7488 }
7489 }
7490
7491 if (*c == '\0') {
7492 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7493 break;
7494 }
7495
7496 const LengthMod length_mod = parse_length_modifier(c);
7497
7498 if (*c == '\0') {
7499 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7500 break;
7501 }
7502
7503 // Parse the conversion specifier and do the conversion
7504 QString subst;
7505 switch (*c) {
7506 case 'd':
7507 case 'i': {
7508 qint64 i;
7509 switch (length_mod) {
7510 case lm_none: i = va_arg(ap, int); break;
7511 case lm_hh: i = va_arg(ap, int); break;
7512 case lm_h: i = va_arg(ap, int); break;
7513 case lm_l: i = va_arg(ap, long int); break;
7514 case lm_ll: i = va_arg(ap, qint64); break;
7515 case lm_j: i = va_arg(ap, long int); break;
7516
7517 /* ptrdiff_t actually, but it should be the same for us */
7518 case lm_z: i = va_arg(ap, qsizetype); break;
7519 case lm_t: i = va_arg(ap, qsizetype); break;
7520 default: i = 0; break;
7521 }
7522 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
7523 ++c;
7524 break;
7525 }
7526 case 'o':
7527 case 'u':
7528 case 'x':
7529 case 'X': {
7530 quint64 u;
7531 switch (length_mod) {
7532 case lm_none: u = va_arg(ap, uint); break;
7533 case lm_hh: u = va_arg(ap, uint); break;
7534 case lm_h: u = va_arg(ap, uint); break;
7535 case lm_l: u = va_arg(ap, ulong); break;
7536 case lm_ll: u = va_arg(ap, quint64); break;
7537 case lm_t: u = va_arg(ap, size_t); break;
7538 case lm_z: u = va_arg(ap, size_t); break;
7539 default: u = 0; break;
7540 }
7541
7542 if (isAsciiUpper(*c))
7543 flags |= QLocaleData::CapitalEorX;
7544
7545 int base = 10;
7546 switch (QtMiscUtils::toAsciiLower(*c)) {
7547 case 'o':
7548 base = 8; break;
7549 case 'u':
7550 base = 10; break;
7551 case 'x':
7552 base = 16; break;
7553 default: break;
7554 }
7555 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
7556 ++c;
7557 break;
7558 }
7559 case 'E':
7560 case 'e':
7561 case 'F':
7562 case 'f':
7563 case 'G':
7564 case 'g':
7565 case 'A':
7566 case 'a': {
7567 double d;
7568 if (length_mod == lm_L)
7569 d = va_arg(ap, long double); // not supported - converted to a double
7570 else
7571 d = va_arg(ap, double);
7572
7573 if (isAsciiUpper(*c))
7574 flags |= QLocaleData::CapitalEorX;
7575
7576 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7577 switch (QtMiscUtils::toAsciiLower(*c)) {
7578 case 'e': form = QLocaleData::DFExponent; break;
7579 case 'a': // not supported - decimal form used instead
7580 case 'f': form = QLocaleData::DFDecimal; break;
7581 case 'g': form = QLocaleData::DFSignificantDigits; break;
7582 default: break;
7583 }
7584 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7585 ++c;
7586 break;
7587 }
7588 case 'c': {
7589 if (length_mod == lm_l)
7590 subst = QChar::fromUcs2(va_arg(ap, int));
7591 else
7592 subst = QLatin1Char((uchar) va_arg(ap, int));
7593 ++c;
7594 break;
7595 }
7596 case 's': {
7597 if (length_mod == lm_l) {
7598 const char16_t *buff = va_arg(ap, const char16_t*);
7599 const auto *ch = buff;
7600 while (precision != 0 && *ch != 0) {
7601 ++ch;
7602 --precision;
7603 }
7604 subst.setUtf16(buff, ch - buff);
7605 } else if (precision == -1) {
7606 subst = QString::fromUtf8(va_arg(ap, const char*));
7607 } else {
7608 const char *buff = va_arg(ap, const char*);
7609 subst = QString::fromUtf8(buff, qstrnlen(buff, precision));
7610 }
7611 ++c;
7612 break;
7613 }
7614 case 'p': {
7615 void *arg = va_arg(ap, void*);
7616 const quint64 i = reinterpret_cast<quintptr>(arg);
7617 flags |= QLocaleData::ShowBase;
7618 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
7619 ++c;
7620 break;
7621 }
7622 case 'n':
7623 switch (length_mod) {
7624 case lm_hh: {
7625 signed char *n = va_arg(ap, signed char*);
7626 *n = result.size();
7627 break;
7628 }
7629 case lm_h: {
7630 short int *n = va_arg(ap, short int*);
7631 *n = result.size();
7632 break;
7633 }
7634 case lm_l: {
7635 long int *n = va_arg(ap, long int*);
7636 *n = result.size();
7637 break;
7638 }
7639 case lm_ll: {
7640 qint64 *n = va_arg(ap, qint64*);
7641 *n = result.size();
7642 break;
7643 }
7644 default: {
7645 int *n = va_arg(ap, int*);
7646 *n = int(result.size());
7647 break;
7648 }
7649 }
7650 ++c;
7651 break;
7652
7653 default: // bad escape, treat as non-escape text
7654 for (const char *cc = escape_start; cc != c; ++cc)
7655 result.append(QLatin1Char(*cc));
7656 continue;
7657 }
7658
7659 if (flags & QLocaleData::LeftAdjusted)
7660 result.append(subst.leftJustified(width));
7661 else
7662 result.append(subst.rightJustified(width));
7663 }
7664
7665 return result;
7666}
7667
7668/*!
7669 \fn QString::toLongLong(bool *ok, int base) const
7670
7671 Returns the string converted to a \c{long long} using base \a
7672 base, which is 10 by default and must be between 2 and 36, or 0.
7673 Returns 0 if the conversion fails.
7674
7675 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7676 to \c false, and success by setting *\a{ok} to \c true.
7677
7678 If \a base is 0, the C language convention is used: if the string begins
7679 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7680 2 is used; otherwise, if the string begins with "0", base 8 is used;
7681 otherwise, base 10 is used.
7682
7683 The string conversion will always happen in the 'C' locale. For
7684 locale-dependent conversion use QLocale::toLongLong()
7685
7686 Example:
7687
7688 \snippet qstring/main.cpp 74
7689
7690 This function ignores leading and trailing whitespace.
7691
7692 \note Support for the "0b" prefix was added in Qt 6.4.
7693
7694 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7695*/
7696
7697template <typename Int>
7698static Int toIntegral(QStringView string, bool *ok, int base)
7699{
7700#if defined(QT_CHECK_RANGE)
7701 if (base != 0 && (base < 2 || base > 36)) {
7702 qWarning("QString::toIntegral: Invalid base (%d)", base);
7703 base = 10;
7704 }
7705#endif
7706
7707 QVarLengthArray<uchar> latin1(string.size());
7708 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7709 QSimpleParsedNumber<Int> r;
7710 if constexpr (std::is_signed_v<Int>)
7711 r = QLocaleData::bytearrayToLongLong(latin1, base);
7712 else
7713 r = QLocaleData::bytearrayToUnsLongLong(latin1, base);
7714 if (ok)
7715 *ok = r.ok();
7716 return r.result;
7717}
7718
7719qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7720{
7721 return toIntegral<qlonglong>(string, ok, base);
7722}
7723
7724/*!
7725 \fn QString::toULongLong(bool *ok, int base) const
7726
7727 Returns the string converted to an \c{unsigned long long} using base \a
7728 base, which is 10 by default and must be between 2 and 36, or 0.
7729 Returns 0 if the conversion fails.
7730
7731 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7732 to \c false, and success by setting *\a{ok} to \c true.
7733
7734 If \a base is 0, the C language convention is used: if the string begins
7735 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7736 2 is used; otherwise, if the string begins with "0", base 8 is used;
7737 otherwise, base 10 is used.
7738
7739 The string conversion will always happen in the 'C' locale. For
7740 locale-dependent conversion use QLocale::toULongLong()
7741
7742 Example:
7743
7744 \snippet qstring/main.cpp 79
7745
7746 This function ignores leading and trailing whitespace.
7747
7748 \note Support for the "0b" prefix was added in Qt 6.4.
7749
7750 \sa number(), toLongLong(), QLocale::toULongLong()
7751*/
7752
7753qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7754{
7755 return toIntegral<qulonglong>(string, ok, base);
7756}
7757
7758/*!
7759 \fn long QString::toLong(bool *ok, int base) const
7760
7761 Returns the string converted to a \c long using base \a
7762 base, which is 10 by default and must be between 2 and 36, or 0.
7763 Returns 0 if the conversion fails.
7764
7765 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7766 to \c false, and success by setting *\a{ok} to \c true.
7767
7768 If \a base is 0, the C language convention is used: if the string begins
7769 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7770 2 is used; otherwise, if the string begins with "0", base 8 is used;
7771 otherwise, base 10 is used.
7772
7773 The string conversion will always happen in the 'C' locale. For
7774 locale-dependent conversion use QLocale::toLongLong()
7775
7776 Example:
7777
7778 \snippet qstring/main.cpp 73
7779
7780 This function ignores leading and trailing whitespace.
7781
7782 \note Support for the "0b" prefix was added in Qt 6.4.
7783
7784 \sa number(), toULong(), toInt(), QLocale::toInt()
7785*/
7786
7787/*!
7788 \fn ulong QString::toULong(bool *ok, int base) const
7789
7790 Returns the string converted to an \c{unsigned long} using base \a
7791 base, which is 10 by default and must be between 2 and 36, or 0.
7792 Returns 0 if the conversion fails.
7793
7794 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7795 to \c false, and success by setting *\a{ok} to \c true.
7796
7797 If \a base is 0, the C language convention is used: if the string begins
7798 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7799 2 is used; otherwise, if the string begins with "0", base 8 is used;
7800 otherwise, base 10 is used.
7801
7802 The string conversion will always happen in the 'C' locale. For
7803 locale-dependent conversion use QLocale::toULongLong()
7804
7805 Example:
7806
7807 \snippet qstring/main.cpp 78
7808
7809 This function ignores leading and trailing whitespace.
7810
7811 \note Support for the "0b" prefix was added in Qt 6.4.
7812
7813 \sa number(), QLocale::toUInt()
7814*/
7815
7816/*!
7817 \fn int QString::toInt(bool *ok, int base) const
7818 Returns the string converted to an \c int using base \a
7819 base, which is 10 by default and must be between 2 and 36, or 0.
7820 Returns 0 if the conversion fails.
7821
7822 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7823 to \c false, and success by setting *\a{ok} to \c true.
7824
7825 If \a base is 0, the C language convention is used: if the string begins
7826 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7827 2 is used; otherwise, if the string begins with "0", base 8 is used;
7828 otherwise, base 10 is used.
7829
7830 The string conversion will always happen in the 'C' locale. For
7831 locale-dependent conversion use QLocale::toInt()
7832
7833 Example:
7834
7835 \snippet qstring/main.cpp 72
7836
7837 This function ignores leading and trailing whitespace.
7838
7839 \note Support for the "0b" prefix was added in Qt 6.4.
7840
7841 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7842*/
7843
7844/*!
7845 \fn uint QString::toUInt(bool *ok, int base) const
7846 Returns the string converted to an \c{unsigned int} using base \a
7847 base, which is 10 by default and must be between 2 and 36, or 0.
7848 Returns 0 if the conversion fails.
7849
7850 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7851 to \c false, and success by setting *\a{ok} to \c true.
7852
7853 If \a base is 0, the C language convention is used: if the string begins
7854 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7855 2 is used; otherwise, if the string begins with "0", base 8 is used;
7856 otherwise, base 10 is used.
7857
7858 The string conversion will always happen in the 'C' locale. For
7859 locale-dependent conversion use QLocale::toUInt()
7860
7861 Example:
7862
7863 \snippet qstring/main.cpp 77
7864
7865 This function ignores leading and trailing whitespace.
7866
7867 \note Support for the "0b" prefix was added in Qt 6.4.
7868
7869 \sa number(), toInt(), QLocale::toUInt()
7870*/
7871
7872/*!
7873 \fn short QString::toShort(bool *ok, int base) const
7874
7875 Returns the string converted to a \c short using base \a
7876 base, which is 10 by default and must be between 2 and 36, or 0.
7877 Returns 0 if the conversion fails.
7878
7879 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7880 to \c false, and success by setting *\a{ok} to \c true.
7881
7882 If \a base is 0, the C language convention is used: if the string begins
7883 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7884 2 is used; otherwise, if the string begins with "0", base 8 is used;
7885 otherwise, base 10 is used.
7886
7887 The string conversion will always happen in the 'C' locale. For
7888 locale-dependent conversion use QLocale::toShort()
7889
7890 Example:
7891
7892 \snippet qstring/main.cpp 76
7893
7894 This function ignores leading and trailing whitespace.
7895
7896 \note Support for the "0b" prefix was added in Qt 6.4.
7897
7898 \sa number(), toUShort(), toInt(), QLocale::toShort()
7899*/
7900
7901/*!
7902 \fn ushort QString::toUShort(bool *ok, int base) const
7903
7904 Returns the string converted to an \c{unsigned short} using base \a
7905 base, which is 10 by default and must be between 2 and 36, or 0.
7906 Returns 0 if the conversion fails.
7907
7908 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7909 to \c false, and success by setting *\a{ok} to \c true.
7910
7911 If \a base is 0, the C language convention is used: if the string begins
7912 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7913 2 is used; otherwise, if the string begins with "0", base 8 is used;
7914 otherwise, base 10 is used.
7915
7916 The string conversion will always happen in the 'C' locale. For
7917 locale-dependent conversion use QLocale::toUShort()
7918
7919 Example:
7920
7921 \snippet qstring/main.cpp 80
7922
7923 This function ignores leading and trailing whitespace.
7924
7925 \note Support for the "0b" prefix was added in Qt 6.4.
7926
7927 \sa number(), toShort(), QLocale::toUShort()
7928*/
7929
7930/*!
7931 Returns the string converted to a \c double value.
7932
7933 Returns an infinity if the conversion overflows or 0.0 if the
7934 conversion fails for other reasons (e.g. underflow).
7935
7936 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7937 to \c false, and success by setting *\a{ok} to \c true.
7938
7939 \snippet qstring/main.cpp 66
7940
7941 \warning The QString content may only contain valid numerical characters
7942 which includes the plus/minus sign, the character e used in scientific
7943 notation, and the decimal point. Including the unit or additional characters
7944 leads to a conversion error.
7945
7946 \snippet qstring/main.cpp 67
7947
7948 The string conversion will always happen in the 'C' locale. For
7949 locale-dependent conversion use QLocale::toDouble()
7950
7951 \snippet qstring/main.cpp 68
7952
7953 For historical reasons, this function does not handle
7954 thousands group separators. If you need to convert such numbers,
7955 use QLocale::toDouble().
7956
7957 \snippet qstring/main.cpp 69
7958
7959 This function ignores leading and trailing whitespace.
7960
7961 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7962*/
7963
7964double QString::toDouble(bool *ok) const
7965{
7966 return QStringView(*this).toDouble(ok);
7967}
7968
7969double QStringView::toDouble(bool *ok) const
7970{
7971 QStringView string = qt_trimmed(*this);
7972 QVarLengthArray<uchar> latin1(string.size());
7973 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7974 auto r = qt_asciiToDouble(reinterpret_cast<const char *>(latin1.data()), string.size());
7975 if (ok != nullptr)
7976 *ok = r.ok();
7977 return r.result;
7978}
7979
7980/*!
7981 Returns the string converted to a \c float value.
7982
7983 Returns an infinity if the conversion overflows or 0.0 if the
7984 conversion fails for other reasons (e.g. underflow).
7985
7986 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7987 to \c false, and success by setting *\a{ok} to \c true.
7988
7989 \warning The QString content may only contain valid numerical characters
7990 which includes the plus/minus sign, the character e used in scientific
7991 notation, and the decimal point. Including the unit or additional characters
7992 leads to a conversion error.
7993
7994 The string conversion will always happen in the 'C' locale. For
7995 locale-dependent conversion use QLocale::toFloat()
7996
7997 For historical reasons, this function does not handle
7998 thousands group separators. If you need to convert such numbers,
7999 use QLocale::toFloat().
8000
8001 Example:
8002
8003 \snippet qstring/main.cpp 71
8004
8005 This function ignores leading and trailing whitespace.
8006
8007 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
8008*/
8009
8010float QString::toFloat(bool *ok) const
8011{
8012 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8013}
8014
8015float QStringView::toFloat(bool *ok) const
8016{
8017 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8018}
8019
8020/*! \fn QString &QString::setNum(int n, int base)
8021
8022 Sets the string to the printed value of \a n in the specified \a
8023 base, and returns a reference to the string.
8024
8025 The base is 10 by default and must be between 2 and 36.
8026
8027 \snippet qstring/main.cpp 56
8028
8029 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8030 To get a localized string representation of a number, use
8031 QLocale::toString() with the appropriate locale.
8032
8033 \sa number()
8034*/
8035
8036/*! \fn QString &QString::setNum(uint n, int base)
8037
8038 \overload
8039*/
8040
8041/*! \fn QString &QString::setNum(long n, int base)
8042
8043 \overload
8044*/
8045
8046/*! \fn QString &QString::setNum(ulong n, int base)
8047
8048 \overload
8049*/
8050
8051/*!
8052 \overload
8053*/
8054QString &QString::setNum(qlonglong n, int base)
8055{
8056 return *this = number(n, base);
8057}
8058
8059/*!
8060 \overload
8061*/
8062QString &QString::setNum(qulonglong n, int base)
8063{
8064 return *this = number(n, base);
8065}
8066
8067/*! \fn QString &QString::setNum(short n, int base)
8068
8069 \overload
8070*/
8071
8072/*! \fn QString &QString::setNum(ushort n, int base)
8073
8074 \overload
8075*/
8076
8077/*!
8078 \overload
8079
8080 Sets the string to the printed value of \a n, formatted according to the
8081 given \a format and \a precision, and returns a reference to the string.
8082
8083 \sa number(), QLocale::FloatingPointPrecisionOption, {Number formats}
8084*/
8085
8086QString &QString::setNum(double n, char format, int precision)
8087{
8088 return *this = number(n, format, precision);
8089}
8090
8091/*!
8092 \fn QString &QString::setNum(float n, char format, int precision)
8093 \overload
8094
8095 Sets the string to the printed value of \a n, formatted according
8096 to the given \a format and \a precision, and returns a reference
8097 to the string.
8098
8099 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8100 To get a localized string representation of a number, use
8101 QLocale::toString() with the appropriate locale.
8102
8103 \sa number()
8104*/
8105
8106
8107/*!
8108 \fn QString QString::number(long n, int base)
8109
8110 Returns a string equivalent of the number \a n according to the
8111 specified \a base.
8112
8113 The base is 10 by default and must be between 2
8114 and 36. For bases other than 10, \a n is treated as an
8115 unsigned integer.
8116
8117 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8118 To get a localized string representation of a number, use
8119 QLocale::toString() with the appropriate locale.
8120
8121 \snippet qstring/main.cpp 35
8122
8123 \sa setNum()
8124*/
8125
8126QString QString::number(long n, int base)
8127{
8128 return number(qlonglong(n), base);
8129}
8130
8131/*!
8132 \fn QString QString::number(ulong n, int base)
8133
8134 \overload
8135*/
8136QString QString::number(ulong n, int base)
8137{
8138 return number(qulonglong(n), base);
8139}
8140
8141/*!
8142 \overload
8143*/
8144QString QString::number(int n, int base)
8145{
8146 return number(qlonglong(n), base);
8147}
8148
8149/*!
8150 \overload
8151*/
8152QString QString::number(uint n, int base)
8153{
8154 return number(qulonglong(n), base);
8155}
8156
8157/*!
8158 \overload
8159*/
8160QString QString::number(qlonglong n, int base)
8161{
8162#if defined(QT_CHECK_RANGE)
8163 if (base < 2 || base > 36) {
8164 qWarning("QString::setNum: Invalid base (%d)", base);
8165 base = 10;
8166 }
8167#endif
8168 bool negative = n < 0;
8169 /*
8170 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8171 taking an absolute value has to take a slight detour.
8172 */
8173 return qulltoBasicLatin(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8174}
8175
8176/*!
8177 \overload
8178*/
8179QString QString::number(qulonglong n, int base)
8180{
8181#if defined(QT_CHECK_RANGE)
8182 if (base < 2 || base > 36) {
8183 qWarning("QString::setNum: Invalid base (%d)", base);
8184 base = 10;
8185 }
8186#endif
8187 return qulltoBasicLatin(n, base, false);
8188}
8189
8190
8191/*!
8192 Returns a string representing the floating-point number \a n.
8193
8194 Returns a string that represents \a n, formatted according to the specified
8195 \a format and \a precision.
8196
8197 For formats with an exponent, the exponent will show its sign and have at
8198 least two digits, left-padding the exponent with zero if needed.
8199
8200 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number formats}
8201*/
8202QString QString::number(double n, char format, int precision)
8203{
8204 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8205
8206 switch (QtMiscUtils::toAsciiLower(format)) {
8207 case 'f':
8208 form = QLocaleData::DFDecimal;
8209 break;
8210 case 'e':
8211 form = QLocaleData::DFExponent;
8212 break;
8213 case 'g':
8214 form = QLocaleData::DFSignificantDigits;
8215 break;
8216 default:
8217#if defined(QT_CHECK_RANGE)
8218 qWarning("QString::setNum: Invalid format char '%c'", format);
8219#endif
8220 break;
8221 }
8222
8223 return qdtoBasicLatin(n, form, precision, isAsciiUpper(format));
8224}
8225
8226namespace {
8227template<class ResultList, class StringSource>
8228static ResultList splitString(const StringSource &source, QStringView sep,
8229 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8230{
8231 ResultList list;
8232 typename StringSource::size_type start = 0;
8233 typename StringSource::size_type end;
8234 typename StringSource::size_type extra = 0;
8235 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8236 if (start != end || behavior == Qt::KeepEmptyParts)
8237 list.append(source.sliced(start, end - start));
8238 start = end + sep.size();
8239 extra = (sep.size() == 0 ? 1 : 0);
8240 }
8241 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8242 list.append(source.sliced(start));
8243 return list;
8244}
8245
8246} // namespace
8247
8248/*!
8249 Splits the string into substrings wherever \a sep occurs, and
8250 returns the list of those strings. If \a sep does not match
8251 anywhere in the string, split() returns a single-element list
8252 containing this string.
8253
8254 \a cs specifies whether \a sep should be matched case
8255 sensitively or case insensitively.
8256
8257 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8258 appear in the result. By default, empty entries are kept.
8259
8260 Example:
8261
8262 \snippet qstring/main.cpp 62
8263
8264 If \a sep is empty, split() returns an empty string, followed
8265 by each of the string's characters, followed by another empty string:
8266
8267 \snippet qstring/main.cpp 62-empty
8268
8269 To understand this behavior, recall that the empty string matches
8270 everywhere, so the above is qualitatively the same as:
8271
8272 \snippet qstring/main.cpp 62-slashes
8273
8274 \sa QStringList::join(), section()
8275
8276 \since 5.14
8277*/
8278QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8279{
8280 return splitString<QStringList>(*this, sep, behavior, cs);
8281}
8282
8283/*!
8284 \overload
8285 \since 5.14
8286*/
8287QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8288{
8289 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
8290}
8291
8292/*!
8293 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8294 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8295
8296
8297 Splits the view into substring views wherever \a sep occurs, and
8298 returns the list of those string views.
8299
8300 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8301 the result.
8302
8303 \note All the returned views are valid as long as the data referenced by
8304 this string view is valid. Destroying the data will cause all views to
8305 become dangling.
8306
8307 \since 6.0
8308*/
8309QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8310{
8311 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
8312}
8313
8314QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8315{
8316 return split(QStringView(&sep, 1), behavior, cs);
8317}
8318
8319#if QT_CONFIG(regularexpression)
8320namespace {
8321template<class ResultList, typename String, typename MatchingFunction>
8322static ResultList splitString(const String &source, const QRegularExpression &re,
8323 MatchingFunction matchingFunction,
8324 Qt::SplitBehavior behavior)
8325{
8326 ResultList list;
8327 if (!re.isValid()) {
8328 qtWarnAboutInvalidRegularExpression(re, "QString", "split");
8329 return list;
8330 }
8331
8332 qsizetype start = 0;
8333 qsizetype end = 0;
8334 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8335 while (iterator.hasNext()) {
8336 QRegularExpressionMatch match = iterator.next();
8337 end = match.capturedStart();
8338 if (start != end || behavior == Qt::KeepEmptyParts)
8339 list.append(source.sliced(start, end - start));
8340 start = match.capturedEnd();
8341 }
8342
8343 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8344 list.append(source.sliced(start));
8345
8346 return list;
8347}
8348} // namespace
8349
8350/*!
8351 \overload
8352 \since 5.14
8353
8354 Splits the string into substrings wherever the regular expression
8355 \a re matches, and returns the list of those strings. If \a re
8356 does not match anywhere in the string, split() returns a
8357 single-element list containing this string.
8358
8359 Here is an example where we extract the words in a sentence
8360 using one or more whitespace characters as the separator:
8361
8362 \snippet qstring/main.cpp 90
8363
8364 Here is a similar example, but this time we use any sequence of
8365 non-word characters as the separator:
8366
8367 \snippet qstring/main.cpp 91
8368
8369 Here is a third example where we use a zero-length assertion,
8370 \b{\\b} (word boundary), to split the string into an
8371 alternating sequence of non-word and word tokens:
8372
8373 \snippet qstring/main.cpp 92
8374
8375 \sa QStringList::join(), section()
8376*/
8377QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8378{
8379#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8380 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8381#else
8382 const auto matchingFunction = &QRegularExpression::globalMatch;
8383#endif
8384 return splitString<QStringList>(*this,
8385 re,
8386 matchingFunction,
8387 behavior);
8388}
8389
8390/*!
8391 \overload
8392 \since 6.0
8393
8394 Splits the string into substring views wherever the regular expression \a re
8395 matches, and returns the list of those strings. If \a re does not match
8396 anywhere in the string, split() returns a single-element list containing
8397 this string as view.
8398
8399 \note The views in the returned list are sub-views of this view; as such,
8400 they reference the same data as it and only remain valid for as long as that
8401 data remains live.
8402*/
8403QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8404{
8405 return splitString<QList<QStringView>>(*this, re, &QRegularExpression::globalMatchView, behavior);
8406}
8407
8408#endif // QT_CONFIG(regularexpression)
8409
8410/*!
8411 \enum QString::NormalizationForm
8412
8413 This enum describes the various normalized forms of Unicode text.
8414
8415 \value NormalizationForm_D Canonical Decomposition
8416 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8417 \value NormalizationForm_KD Compatibility Decomposition
8418 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8419
8420 \sa normalized(),
8421 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8422*/
8423
8424/*!
8425 \since 4.5
8426
8427 Returns a copy of this string repeated the specified number of \a times.
8428
8429 If \a times is less than 1, an empty string is returned.
8430
8431 Example:
8432
8433 \snippet code/src_corelib_text_qstring.cpp 8
8434*/
8435QString QString::repeated(qsizetype times) const
8436{
8437 if (d.size == 0)
8438 return *this;
8439
8440 if (times <= 1) {
8441 if (times == 1)
8442 return *this;
8443 return QString();
8444 }
8445
8446 const qsizetype resultSize = times * d.size;
8447
8448 QString result;
8449 result.reserve(resultSize);
8450 if (result.capacity() != resultSize)
8451 return QString(); // not enough memory
8452
8453 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
8454
8455 qsizetype sizeSoFar = d.size;
8456 char16_t *end = result.d.data() + sizeSoFar;
8457
8458 const qsizetype halfResultSize = resultSize >> 1;
8459 while (sizeSoFar <= halfResultSize) {
8460 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
8461 end += sizeSoFar;
8462 sizeSoFar <<= 1;
8463 }
8464 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
8465 result.d.data()[resultSize] = '\0';
8466 result.d.size = resultSize;
8467 return result;
8468}
8469
8470void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8471{
8472 {
8473 // check if it's fully ASCII first, because then we have no work
8474 auto start = reinterpret_cast<const char16_t *>(data->constData());
8475 const char16_t *p = start + from;
8476 if (isAscii_helper(p, p + data->size() - from))
8477 return;
8478 if (p > start + from)
8479 from = p - start - 1; // need one before the non-ASCII to perform NFC
8480 }
8481
8482 if (version == QChar::Unicode_Unassigned) {
8483 version = QChar::currentUnicodeVersion();
8484 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8485 const QString &s = *data;
8486 QChar *d = nullptr;
8488 if (n.version > version) {
8489 qsizetype pos = from;
8490 if (QChar::requiresSurrogates(n.ucs4)) {
8491 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
8492 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
8493
8494 // scan for this codepoint
8495 for ( ; pos < s.size() - 1; ++pos) {
8496 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low)
8497 break;
8498 }
8499 if (pos == s.size())
8500 continue; // no correction necessary
8501
8502 // detach if necessary
8503 if (!d)
8504 d = data->data();
8505 if (QChar::requiresSurrogates(n.old_mapping)) {
8506 // no shrinking
8507 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
8508 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
8509 while (pos < s.size() - 1) {
8510 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
8511 d[pos] = QChar(oldHigh);
8512 d[++pos] = QChar(oldLow);
8513 }
8514 ++pos;
8515 }
8516 } else {
8517 // shrinking, so a little harder
8518 char16_t old = char16_t(n.old_mapping);
8519 qsizetype outpos = pos;
8520 for ( ; pos < s.size(); ++outpos, ++pos) {
8521 if (pos < s.size() - 1 && s.at(pos).unicode() == ucs4High
8522 && s.at(pos + 1).unicode() == ucs4Low) {
8523 d[outpos] = QChar(old);
8524 ++pos;
8525 }
8526 }
8527 data->truncate(outpos);
8528 d = nullptr;
8529 }
8530 } else {
8531 Q_ASSERT(!QChar::requiresSurrogates(n.old_mapping)); // BMP maps to BMP
8532 while (pos < s.size()) {
8533 if (s.at(pos).unicode() == n.ucs4) {
8534 if (!d)
8535 d = data->data();
8536 d[pos] = QChar(n.old_mapping);
8537 }
8538 ++pos;
8539 }
8540 }
8541 }
8542 }
8543 }
8544
8545 if (normalizationQuickCheckHelper(data, mode, from, &from))
8546 return;
8547
8548 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
8549
8550 canonicalOrderHelper(data, version, from);
8551
8552 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8553 return;
8554
8555 composeHelper(data, version, from);
8556}
8557
8558/*!
8559 Returns the string in the given Unicode normalization \a mode,
8560 according to the given \a version of the Unicode standard.
8561*/
8562QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8563{
8564 QString copy = *this;
8565 qt_string_normalize(&copy, mode, version, 0);
8566 return copy;
8567}
8568
8569#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8570static void checkArgEscape(QStringView s)
8571{
8572 // If we're in here, it means that qArgDigitValue has accepted the
8573 // digit. We can skip the check in case we already know it will
8574 // succeed.
8575 if (!supportUnicodeDigitValuesInArg())
8576 return;
8577
8578 const auto isNonAsciiDigit = [](QChar c) {
8579 return c.unicode() < u'0' || c.unicode() > u'9';
8580 };
8581
8582 if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
8583 const auto accumulateDigit = [](int partial, QChar digit) {
8584 return partial * 10 + digit.digitValue();
8585 };
8586 const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
8587
8588 qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8589 " it is currently being interpreted as the %d-th substitution.\n"
8590 " This is deprecated; support for non-ASCII digits will be dropped\n"
8591 " in a future version of Qt.",
8592 qUtf16Printable(s.toString()),
8593 parsedNumber);
8594 }
8595}
8596#endif
8597
8599{
8600 int min_escape; // lowest escape sequence number
8601 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8602 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8603 // contain 'L'
8604 qsizetype escape_len; // total length of escape sequences which will be replaced
8605};
8606
8607static ArgEscapeData findArgEscapes(QStringView s)
8608{
8609 const QChar *uc_begin = s.begin();
8610 const QChar *uc_end = s.end();
8611
8612 ArgEscapeData d;
8613
8614 d.min_escape = INT_MAX;
8615 d.occurrences = 0;
8616 d.escape_len = 0;
8617 d.locale_occurrences = 0;
8618
8619 const QChar *c = uc_begin;
8620 while (c != uc_end) {
8621 while (c != uc_end && c->unicode() != '%')
8622 ++c;
8623
8624 if (c == uc_end)
8625 break;
8626 const QChar *escape_start = c;
8627 if (++c == uc_end)
8628 break;
8629
8630 bool locale_arg = false;
8631 if (c->unicode() == 'L') {
8632 locale_arg = true;
8633 if (++c == uc_end)
8634 break;
8635 }
8636
8637 int escape = qArgDigitValue(*c);
8638 if (escape == -1)
8639 continue;
8640
8641 // ### Qt 7: do not allow anything but ASCII digits
8642 // in arg()'s replacements.
8643#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8644 const QChar *escapeBegin = c;
8645 const QChar *escapeEnd = escapeBegin + 1;
8646#endif
8647
8648 ++c;
8649
8650 if (c != uc_end) {
8651 const int next_escape = qArgDigitValue(*c);
8652 if (next_escape != -1) {
8653 escape = (10 * escape) + next_escape;
8654 ++c;
8655#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8656 ++escapeEnd;
8657#endif
8658 }
8659 }
8660
8661#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8662 checkArgEscape(QStringView(escapeBegin, escapeEnd));
8663#endif
8664
8665 if (escape > d.min_escape)
8666 continue;
8667
8668 if (escape < d.min_escape) {
8669 d.min_escape = escape;
8670 d.occurrences = 0;
8671 d.escape_len = 0;
8672 d.locale_occurrences = 0;
8673 }
8674
8675 ++d.occurrences;
8676 if (locale_arg)
8677 ++d.locale_occurrences;
8678 d.escape_len += c - escape_start;
8679 }
8680 return d;
8681}
8682
8683static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8684 QStringView arg, QStringView larg, QChar fillChar)
8685{
8686 // Negative field-width for right-padding, positive for left-padding:
8687 const qsizetype abs_field_width = qAbs(field_width);
8688 const qsizetype result_len =
8689 s.size() - d.escape_len
8690 + (d.occurrences - d.locale_occurrences) * qMax(abs_field_width, arg.size())
8691 + d.locale_occurrences * qMax(abs_field_width, larg.size());
8692
8693 QString result(result_len, Qt::Uninitialized);
8694 QChar *rc = const_cast<QChar *>(result.unicode());
8695 QChar *const result_end = rc + result_len;
8696 qsizetype repl_cnt = 0;
8697
8698 const QChar *c = s.begin();
8699 const QChar *const uc_end = s.end();
8700 while (c != uc_end) {
8701 Q_ASSERT(d.occurrences > repl_cnt);
8702 /* We don't have to check increments of c against uc_end because, as
8703 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8704 sequences remaining. */
8705
8706 const QChar *text_start = c;
8707 while (c->unicode() != '%')
8708 ++c;
8709
8710 const QChar *escape_start = c++;
8711 const bool localize = c->unicode() == 'L';
8712 if (localize)
8713 ++c;
8714
8715 int escape = qArgDigitValue(*c);
8716 if (escape != -1 && c + 1 != uc_end) {
8717 const int digit = qArgDigitValue(c[1]);
8718 if (digit != -1) {
8719 ++c;
8720 escape = 10 * escape + digit;
8721 }
8722 }
8723
8724 if (escape != d.min_escape) {
8725 memcpy(rc, text_start, (c - text_start) * sizeof(QChar));
8726 rc += c - text_start;
8727 } else {
8728 ++c;
8729
8730 memcpy(rc, text_start, (escape_start - text_start) * sizeof(QChar));
8731 rc += escape_start - text_start;
8732
8733 const QStringView use = localize ? larg : arg;
8734 const qsizetype pad_chars = abs_field_width - use.size();
8735 // (If negative, relevant loops are no-ops: no need to check.)
8736
8737 if (field_width > 0) { // left padded
8738 rc = std::fill_n(rc, pad_chars, fillChar);
8739 }
8740
8741 if (use.size())
8742 memcpy(rc, use.data(), use.size() * sizeof(QChar));
8743 rc += use.size();
8744
8745 if (field_width < 0) { // right padded
8746 rc = std::fill_n(rc, pad_chars, fillChar);
8747 }
8748
8749 if (++repl_cnt == d.occurrences) {
8750 memcpy(rc, c, (uc_end - c) * sizeof(QChar));
8751 rc += uc_end - c;
8752 Q_ASSERT(rc == result_end);
8753 c = uc_end;
8754 }
8755 }
8756 }
8757 Q_ASSERT(rc == result_end);
8758
8759 return result;
8760}
8761
8762/*!
8763 \fn template <typename T, QString::if_string_like<T> = true> QString QString::arg(const T &a, int fieldWidth, QChar fillChar) const
8764
8765 Returns a copy of this string with the lowest-numbered place-marker
8766 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8767
8768 \a fieldWidth specifies the minimum amount of space that \a a
8769 shall occupy. If \a a requires less space than \a fieldWidth, it
8770 is padded to \a fieldWidth with character \a fillChar. A positive
8771 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8772 produces left-aligned text.
8773
8774 This example shows how we might create a \c status string for
8775 reporting progress while processing a list of files:
8776
8777 \snippet qstring/main.cpp 11-qstringview
8778
8779 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8780 %2. Finally, \c arg(fileName) replaces \c %3.
8781
8782 One advantage of using arg() over asprintf() is that the order of the
8783 numbered place markers can change, if the application's strings are
8784 translated into other languages, but each arg() will still replace
8785 the lowest-numbered unreplaced place-marker, no matter where it
8786 appears. Also, if place-marker \c %i appears more than once in the
8787 string, arg() replaces all of them.
8788
8789 If there is no unreplaced place-marker remaining, a warning message
8790 is printed and the result is undefined. Place-marker numbers must be
8791 in the range 1 to 99.
8792
8793 \note In Qt versions prior to 6.9, this function was overloaded on
8794 \c{char}, QChar, QString, QStringView, and QLatin1StringView and in some
8795 cases, \c{wchar_t} and \c{char16_t} arguments would resolve to the integer
8796 overloads. In Qt versions prior to 5.10, this function lacked the
8797 QStringView and QLatin1StringView overloads.
8798*/
8799QString QString::arg_impl(QAnyStringView a, int fieldWidth, QChar fillChar) const
8800{
8801 ArgEscapeData d = findArgEscapes(*this);
8802
8803 if (Q_UNLIKELY(d.occurrences == 0)) {
8804 qWarning("QString::arg: Argument missing: \"%ls\", \"%ls\"", qUtf16Printable(*this),
8805 qUtf16Printable(a.toString()));
8806 return *this;
8807 }
8808 struct {
8809 QVarLengthArray<char16_t> out;
8810 QStringView operator()(QStringView in) noexcept { return in; }
8811 QStringView operator()(QLatin1StringView in)
8812 {
8813 out.resize(in.size());
8814 qt_from_latin1(out.data(), in.data(), size_t(in.size()));
8815 return out;
8816 }
8817 QStringView operator()(QUtf8StringView in)
8818 {
8819 out.resize(in.size());
8820 return QStringView{out.data(), QUtf8::convertToUnicode(out.data(), in)};
8821 }
8822 } convert;
8823
8824 QStringView sv = a.visit(std::ref(convert));
8825 return replaceArgEscapes(*this, d, fieldWidth, sv, sv, fillChar);
8826}
8827
8828/*!
8829 \fn template <typename T, QString::if_integral_non_char<T> = true> QString QString::arg(T a, int fieldWidth, int base, QChar fillChar) const
8830 \overload arg()
8831
8832 The \a a argument is expressed in base \a base, which is 10 by
8833 default and must be between 2 and 36. For bases other than 10, \a a
8834 is treated as an unsigned integer.
8835
8836 \a fieldWidth specifies the minimum amount of space that \a a is
8837 padded to and filled with the character \a fillChar. A positive
8838 value produces right-aligned text; a negative value produces
8839 left-aligned text.
8840
8841 The '%' can be followed by an 'L', in which case the sequence is
8842 replaced with a localized representation of \a a. The conversion
8843 uses the default locale, set by QLocale::setDefault(). If no default
8844 locale was specified, the system locale is used. The 'L' flag is
8845 ignored if \a base is not 10.
8846
8847 \snippet qstring/main.cpp 12
8848 \snippet qstring/main.cpp 14
8849
8850 \note In Qt versions prior to 6.10.1, this function accepted arguments of
8851 types that implicitly convert to integral types. This is no longer supported,
8852 except for (unscoped) enums, because it also accepted types convertible to
8853 floating-point types, losing precision when those were printed as integers. A
8854 backwards-compatible fix is to cast such types to a C++ type whose displayed
8855 form matches your intent (\c int, \c float, ...).
8856
8857 \note In Qt versions prior to 6.9, this function was overloaded on various
8858 integral types and sometimes incorrectly accepted \c char and \c char16_t
8859 arguments.
8860
8861 \sa {Number formats}
8862*/
8863QString QString::arg_impl(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8864{
8865 ArgEscapeData d = findArgEscapes(*this);
8866
8867 if (d.occurrences == 0) {
8868 qWarning("QString::arg: Argument missing: \"%ls\", %llu", qUtf16Printable(*this), a);
8869 return *this;
8870 }
8871
8872 unsigned flags = QLocaleData::NoFlags;
8873 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8874 if (fillChar == u'0')
8875 flags = QLocaleData::ZeroPadded;
8876
8877 QString arg;
8878 if (d.occurrences > d.locale_occurrences) {
8879 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
8880 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8881 }
8882
8883 QString localeArg;
8884 if (d.locale_occurrences > 0) {
8885 QLocale locale;
8886 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8887 flags |= QLocaleData::GroupDigits;
8888 localeArg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
8889 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8890 }
8891
8892 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8893}
8894
8895QString QString::arg_impl(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8896{
8897 ArgEscapeData d = findArgEscapes(*this);
8898
8899 if (d.occurrences == 0) {
8900 qWarning("QString::arg: Argument missing: \"%ls\", %lld", qUtf16Printable(*this), a);
8901 return *this;
8902 }
8903
8904 unsigned flags = QLocaleData::NoFlags;
8905 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8906 if (fillChar == u'0')
8907 flags = QLocaleData::ZeroPadded;
8908
8909 QString arg;
8910 if (d.occurrences > d.locale_occurrences) {
8911 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8912 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8913 }
8914
8915 QString localeArg;
8916 if (d.locale_occurrences > 0) {
8917 QLocale locale;
8918 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8919 flags |= QLocaleData::GroupDigits;
8920 localeArg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8921 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8922 }
8923
8924 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8925}
8926
8927/*!
8928 \fn template <typename T, QString::if_floating_point<T> = true> QString QString::arg(T a, int fieldWidth, char format, int precision, QChar fillChar) const
8929 \overload arg()
8930
8931 Argument \a a is formatted according to the specified \a format and
8932 \a precision. See \l{Floating-point Formats} for details.
8933
8934 \a fieldWidth specifies the minimum amount of space that \a a is
8935 padded to and filled with the character \a fillChar. A positive
8936 value produces right-aligned text; a negative value produces
8937 left-aligned text.
8938
8939 \snippet code/src_corelib_text_qstring.cpp 2
8940
8941 \note In Qt versions prior to 6.9, this function was a regular function
8942 taking \c double. As a consequence of being a template function now, it no
8943 longer accepts arguments that merely implicitly convert to floating-point
8944 types. A backwards-compatible fix is to cast such types to one of the C++
8945 floating-point types.
8946
8947 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number formats}
8948*/
8949QString QString::arg_impl(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8950{
8951 ArgEscapeData d = findArgEscapes(*this);
8952
8953 if (d.occurrences == 0) {
8954 qWarning("QString::arg: Argument missing: \"%ls\", %g", qUtf16Printable(*this), a);
8955 return *this;
8956 }
8957
8958 unsigned flags = QLocaleData::NoFlags;
8959 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8960 if (fillChar == u'0')
8961 flags |= QLocaleData::ZeroPadded;
8962
8963 if (isAsciiUpper(format))
8964 flags |= QLocaleData::CapitalEorX;
8965
8966 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8967 switch (QtMiscUtils::toAsciiLower(format)) {
8968 case 'f':
8969 form = QLocaleData::DFDecimal;
8970 break;
8971 case 'e':
8972 form = QLocaleData::DFExponent;
8973 break;
8974 case 'g':
8975 form = QLocaleData::DFSignificantDigits;
8976 break;
8977 default:
8978#if defined(QT_CHECK_RANGE)
8979 qWarning("QString::arg: Invalid format char '%c'", format);
8980#endif
8981 break;
8982 }
8983
8984 QString arg;
8985 if (d.occurrences > d.locale_occurrences) {
8986 arg = QLocaleData::c()->doubleToString(a, precision, form, fieldWidth,
8987 flags | QLocaleData::ZeroPadExponent);
8988 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8989 || fieldWidth <= arg.size());
8990 }
8991
8992 QString localeArg;
8993 if (d.locale_occurrences > 0) {
8994 QLocale locale;
8995
8996 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8997 if (!(numberOptions & QLocale::OmitGroupSeparator))
8998 flags |= QLocaleData::GroupDigits;
8999 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
9000 flags |= QLocaleData::ZeroPadExponent;
9001 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
9002 flags |= QLocaleData::AddTrailingZeroes;
9003 localeArg = locale.d->m_data->doubleToString(a, precision, form, fieldWidth, flags);
9004 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9005 || fieldWidth <= localeArg.size());
9006 }
9007
9008 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
9009}
9010
9011static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9012static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9013
9014template <typename Char>
9015static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9016{
9017 qsizetype i = *pos;
9018 ++i;
9019 if (i < len && uc[i] == u'L')
9020 ++i;
9021 if (i < len) {
9022 int escape = to_unicode(uc[i]) - '0';
9023 if (uint(escape) >= 10U)
9024 return -1;
9025 ++i;
9026 if (i < len) {
9027 // there's a second digit
9028 int digit = to_unicode(uc[i]) - '0';
9029 if (uint(digit) < 10U) {
9030 escape = (escape * 10) + digit;
9031 ++i;
9032 }
9033 }
9034 *pos = i;
9035 return escape;
9036 }
9037 return -1;
9038}
9039
9040/*
9041 Algorithm for multiArg:
9042
9043 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9044 The L is parsed and accepted for compatibility with non-multi-arg, but since
9045 multiArg only accepts strings as replacements, the localization request can
9046 be safely ignored.
9047 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9048 either points at text to be copied verbatim (in which case the int is -1),
9049 or, initially, at the textual representation of the placeholder. In that case,
9050 the int contains the numerical number as parsed from the placeholder.
9051 3. Next, collect all the non-negative ints found, sort them in ascending order and
9052 remove duplicates.
9053 3a. If the result has more entries than multiArg() was given replacement strings,
9054 we have found placeholders we can't satisfy with replacement strings. That is
9055 fine (there could be another .arg() call coming after this one), so just
9056 truncate the result to the number of actual multiArg() replacement strings.
9057 3b. If the result has less entries than multiArg() was given replacement strings,
9058 the string is missing placeholders. This is an error that the user should be
9059 warned about.
9060 4. The result of step (3) is a mapping from the index of any replacement string to
9061 placeholder number. This is the wrong way around, but since placeholder
9062 numbers could get as large as 999, while we typically don't have more than 9
9063 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9064 each time we need to map a placeholder number to a replacement string index
9065 (that's a linear search; but still *much* faster than using an associative container).
9066 5. Next, for each of the tuples found in step (1), do the following:
9067 5a. If the int is negative, do nothing.
9068 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9069 the string-ref with a string-ref for the (complete) I'th replacement string.
9070 5c. Otherwise, do nothing.
9071 6. Concatenate all string refs into a single result string.
9072*/
9073
9074namespace {
9075struct Part
9076{
9077 Part() = default; // for QVarLengthArray; do not use
9078 constexpr Part(QAnyStringView s, int num = -1)
9079 : string{s}, number{num} {}
9080
9081 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9082
9083 QAnyStringView string;
9084 int number;
9085};
9086} // unnamed namespace
9087
9089
9090namespace {
9091
9092enum { ExpectedParts = 32 };
9093
9094typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9095typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9096
9097template <typename StringView>
9098static ParseResult parseMultiArgFormatString_impl(StringView s)
9099{
9100 ParseResult result;
9101
9102 const auto uc = s.data();
9103 const auto len = s.size();
9104 const auto end = len - 1;
9105 qsizetype i = 0;
9106 qsizetype last = 0;
9107
9108 while (i < end) {
9109 if (uc[i] == u'%') {
9110 qsizetype percent = i;
9111 int number = getEscape(uc, &i, len);
9112 if (number != -1) {
9113 if (last != percent)
9114 result.push_back(Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9115 result.push_back(Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9116 last = i;
9117 continue;
9118 }
9119 }
9120 ++i;
9121 }
9122
9123 if (last < len)
9124 result.push_back(Part{s.sliced(last, len - last)}); // trailing literal text
9125
9126 return result;
9127}
9128
9129static ParseResult parseMultiArgFormatString(QAnyStringView s)
9130{
9131 return s.visit([] (auto s) { return parseMultiArgFormatString_impl(s); });
9132}
9133
9134static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9135{
9136 ArgIndexToPlaceholderMap result;
9137
9138 for (const Part &part : parts) {
9139 if (part.number >= 0)
9140 result.push_back(part.number);
9141 }
9142
9143 std::sort(result.begin(), result.end());
9144 result.erase(std::unique(result.begin(), result.end()),
9145 result.end());
9146
9147 return result;
9148}
9149
9150static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9151{
9152 using namespace QtPrivate;
9153 qsizetype totalSize = 0;
9154 for (Part &part : parts) {
9155 if (part.number != -1) {
9156 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
9157 if (it != argIndexToPlaceholderMap.end()) {
9158 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9159 switch (arg.tag) {
9160 case ArgBase::L1:
9161 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
9162 break;
9163 case ArgBase::Any:
9164 part.reset(static_cast<const QAnyStringArg&>(arg).string);
9165 break;
9166 case ArgBase::U16:
9167 part.reset(static_cast<const QStringViewArg&>(arg).string);
9168 break;
9169 }
9170 }
9171 }
9172 totalSize += part.string.size();
9173 }
9174 return totalSize;
9175}
9176
9177} // unnamed namespace
9178
9179QString QtPrivate::argToQString(QAnyStringView pattern, size_t numArgs, const ArgBase **args)
9180{
9181 // Step 1-2 above
9182 ParseResult parts = parseMultiArgFormatString(pattern);
9183
9184 // 3-4
9185 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9186
9187 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9188 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
9189 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9190 qWarning("QString::arg: %d argument(s) missing in %ls",
9191 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9192
9193 // 5
9194 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9195
9196 // 6:
9197 QString result(totalSize, Qt::Uninitialized);
9198 auto out = const_cast<QChar*>(result.constData());
9199
9200 struct Concatenate {
9201 QChar *out;
9202 QChar *operator()(QLatin1String part) noexcept
9203 {
9204 if (part.size()) {
9205 qt_from_latin1(reinterpret_cast<char16_t*>(out),
9206 part.data(), part.size());
9207 }
9208 return out + part.size();
9209 }
9210 QChar *operator()(QUtf8StringView part) noexcept
9211 {
9212 return QUtf8::convertToUnicode(out, part);
9213 }
9214 QChar *operator()(QStringView part) noexcept
9215 {
9216 if (part.size())
9217 memcpy(out, part.data(), part.size() * sizeof(QChar));
9218 return out + part.size();
9219 }
9220 };
9221
9222 for (const Part &part : parts)
9223 out = part.string.visit(Concatenate{out});
9224
9225 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9226 result.truncate(out - result.cbegin());
9227
9228 return result;
9229}
9230
9231/*! \fn bool QString::isRightToLeft() const
9232
9233 Returns \c true if the string is read right to left.
9234
9235 \sa QStringView::isRightToLeft()
9236*/
9237bool QString::isRightToLeft() const
9238{
9239 return QtPrivate::isRightToLeft(QStringView(*this));
9240}
9241
9242/*!
9243 \fn bool QString::isValidUtf16() const noexcept
9244 \since 5.15
9245
9246 Returns \c true if the string contains valid UTF-16 encoded data,
9247 or \c false otherwise.
9248
9249 Note that this function does not perform any special validation of the
9250 data; it merely checks if it can be successfully decoded from UTF-16.
9251 The data is assumed to be in host byte order; the presence of a BOM
9252 is meaningless.
9253
9254 \sa QStringView::isValidUtf16()
9255*/
9256
9257/*! \fn QChar *QString::data()
9258
9259 Returns a pointer to the data stored in the QString. The pointer
9260 can be used to access and modify the characters that compose the
9261 string.
9262
9263 Unlike constData() and unicode(), the returned data is always
9264 '\\0'-terminated.
9265
9266 Example:
9267
9268 \snippet qstring/main.cpp 19
9269
9270 Note that the pointer remains valid only as long as the string is
9271 not modified by other means. For read-only access, constData() is
9272 faster because it never causes a \l{deep copy} to occur.
9273
9274 \sa constData(), operator[]()
9275*/
9276
9277/*! \fn const QChar *QString::data() const
9278
9279 \overload
9280
9281 \note The returned string may not be '\\0'-terminated.
9282 Use size() to determine the length of the array.
9283
9284 \sa fromRawData()
9285*/
9286
9287/*! \fn const QChar *QString::constData() const
9288
9289 Returns a pointer to the data stored in the QString. The pointer
9290 can be used to access the characters that compose the string.
9291
9292 Note that the pointer remains valid only as long as the string is
9293 not modified.
9294
9295 \note The returned string may not be '\\0'-terminated.
9296 Use size() to determine the length of the array.
9297
9298 \sa data(), operator[](), fromRawData()
9299*/
9300
9301/*! \fn void QString::push_front(const QString &other)
9302
9303 This function is provided for STL compatibility, prepending the
9304 given \a other string to the beginning of this string. It is
9305 equivalent to \c prepend(other).
9306
9307 \sa prepend()
9308*/
9309
9310/*! \fn void QString::push_front(QChar ch)
9311
9312 \overload
9313
9314 Prepends the given \a ch character to the beginning of this string.
9315*/
9316
9317/*! \fn void QString::push_back(const QString &other)
9318
9319 This function is provided for STL compatibility, appending the
9320 given \a other string onto the end of this string. It is
9321 equivalent to \c append(other).
9322
9323 \sa append()
9324*/
9325
9326/*! \fn void QString::push_back(QChar ch)
9327
9328 \overload
9329
9330 Appends the given \a ch character onto the end of this string.
9331*/
9332
9333/*!
9334 \since 6.1
9335
9336 Removes from the string the characters in the half-open range
9337 [ \a first , \a last ). Returns an iterator to the character
9338 immediately after the last erased character (i.e. the character
9339 referred to by \a last before the erase).
9340*/
9341QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9342{
9343 const auto start = std::distance(cbegin(), first);
9344 const auto len = std::distance(first, last);
9345 remove(start, len);
9346 return begin() + start;
9347}
9348
9349/*!
9350 \fn QString::iterator QString::erase(QString::const_iterator it)
9351
9352 \overload
9353 \since 6.5
9354
9355 Removes the character denoted by \c it from the string.
9356 Returns an iterator to the character immediately after the
9357 erased character.
9358
9359 \code
9360 QString c = "abcdefg";
9361 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9362 \endcode
9363*/
9364
9365/*! \fn void QString::shrink_to_fit()
9366 \since 5.10
9367
9368 This function is provided for STL compatibility. It is
9369 equivalent to squeeze().
9370
9371 \sa squeeze()
9372*/
9373
9374/*!
9375 \fn std::string QString::toStdString() const
9376
9377 Returns a std::string object with the data contained in this
9378 QString. The Unicode data is converted into 8-bit characters using
9379 the toUtf8() function.
9380
9381 This method is mostly useful to pass a QString to a function
9382 that accepts a std::string object.
9383
9384 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9385*/
9386std::string QString::toStdString() const
9387{
9388 std::string result;
9389 if (isEmpty())
9390 return result;
9391
9392 auto writeToBuffer = [this](char *out, size_t) {
9393 char *last = QUtf8::convertFromUnicode(out, *this);
9394 return last - out;
9395 };
9396 size_t maxSize = size() * 3; // worst case for UTF-8
9397#ifdef __cpp_lib_string_resize_and_overwrite
9398 // C++23
9399 result.resize_and_overwrite(maxSize, writeToBuffer);
9400#else
9401 result.resize(maxSize);
9402 result.resize(writeToBuffer(result.data(), result.size()));
9403#endif
9404 return result;
9405}
9406
9407/*!
9408 \fn QString QString::fromRawData(const char16_t *unicode, qsizetype size)
9409 \since 6.10
9410
9411 Constructs a QString that uses the first \a size Unicode characters
9412 in the array \a unicode. The data in \a unicode is \e not
9413 copied. The caller must be able to guarantee that \a unicode will
9414 not be deleted or modified as long as the QString (or an
9415 unmodified copy of it) exists.
9416
9417 Any attempts to modify the QString or copies of it will cause it
9418 to create a deep copy of the data, ensuring that the raw data
9419 isn't modified.
9420
9421 Here is an example of how we can use a QRegularExpression on raw data in
9422 memory without requiring to copy the data into a QString:
9423
9424 \snippet qstring/main.cpp 22
9425 \snippet qstring/main.cpp 23
9426
9427 \warning A string created with fromRawData() is \e not
9428 '\\0'-terminated, unless the raw data contains a '\\0' character
9429 at position \a size. This means unicode() will \e not return a
9430 '\\0'-terminated string (although utf16() does, at the cost of
9431 copying the raw data).
9432
9433 \sa fromUtf16(), setRawData(), data(), constData(),
9434 nullTerminate(), nullTerminated()
9435*/
9436
9437/*!
9438 \fn QString QString::fromRawData(const QChar *unicode, qsizetype size)
9439 \overload
9440*/
9441
9442/*!
9443 \since 4.7
9444
9445 Resets the QString to use the first \a size Unicode characters
9446 in the array \a unicode. The data in \a unicode is \e not
9447 copied. The caller must be able to guarantee that \a unicode will
9448 not be deleted or modified as long as the QString (or an
9449 unmodified copy of it) exists.
9450
9451 This function can be used instead of fromRawData() to re-use
9452 existings QString objects to save memory re-allocations.
9453
9454 \sa fromRawData(), nullTerminate(), nullTerminated()
9455*/
9456QString &QString::setRawData(const QChar *unicode, qsizetype size)
9457{
9458 if (!unicode || !size) {
9459 clear();
9460 }
9461 *this = fromRawData(unicode, size);
9462 return *this;
9463}
9464
9465/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9466 \since 5.5
9467
9468 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9469
9470 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9471*/
9472
9473/*!
9474 \fn std::u16string QString::toStdU16String() const
9475 \since 5.5
9476
9477 Returns a std::u16string object with the data contained in this
9478 QString. The Unicode data is the same as returned by the utf16()
9479 method.
9480
9481 \sa utf16(), toStdWString(), toStdU32String()
9482*/
9483
9484/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9485 \since 5.5
9486
9487 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9488
9489 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9490*/
9491
9492/*!
9493 \fn std::u32string QString::toStdU32String() const
9494 \since 5.5
9495
9496 Returns a std::u32string object with the data contained in this
9497 QString. The Unicode data is the same as returned by the toUcs4()
9498 method.
9499
9500 \sa toUcs4(), toStdWString(), toStdU16String()
9501*/
9502
9503#if !defined(QT_NO_DATASTREAM)
9504/*!
9505 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9506 \relates QString
9507
9508 Writes the given \a string to the specified \a stream.
9509
9510 \sa {Serializing Qt Data Types}
9511*/
9512
9513QDataStream &operator<<(QDataStream &out, const QString &str)
9514{
9515 if (out.version() == 1) {
9516 out << str.toLatin1();
9517 } else {
9518 if (!str.isNull() || out.version() < 3) {
9519 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9520 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9521 static_cast<qsizetype>(sizeof(QChar) * str.size()));
9522 } else {
9523 QVarLengthArray<char16_t> buffer(str.size());
9524 qbswap<sizeof(char16_t)>(str.constData(), str.size(), buffer.data());
9525 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9526 static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9527 }
9528 } else {
9529 QDataStream::writeQSizeType(out, -1); // write null marker
9530 }
9531 }
9532 return out;
9533}
9534
9535/*!
9536 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9537 \relates QString
9538
9539 Reads a string from the specified \a stream into the given \a string.
9540
9541 \sa {Serializing Qt Data Types}
9542*/
9543
9544QDataStream &operator>>(QDataStream &in, QString &str)
9545{
9546 if (in.version() == 1) {
9547 QByteArray l;
9548 in >> l;
9549 str = QString::fromLatin1(l);
9550 } else {
9551 qint64 size = QDataStream::readQSizeType(in);
9552 qsizetype bytes = size;
9553 if (size != bytes || size < -1) {
9554 str.clear();
9555 in.setStatus(QDataStream::SizeLimitExceeded);
9556 return in;
9557 }
9558 if (bytes == -1) { // null string
9559 str = QString();
9560 } else if (bytes > 0) {
9561 if (bytes & 0x1) {
9562 str.clear();
9563 in.setStatus(QDataStream::ReadCorruptData);
9564 return in;
9565 }
9566
9567 const qsizetype Step = 1024 * 1024;
9568 qsizetype len = bytes / 2;
9569 qsizetype allocated = 0;
9570
9571 while (allocated < len) {
9572 int blockSize = qMin(Step, len - allocated);
9573 str.resize(allocated + blockSize);
9574 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9575 blockSize * 2) != blockSize * 2) {
9576 str.clear();
9577 in.setStatus(QDataStream::ReadPastEnd);
9578 return in;
9579 }
9580 allocated += blockSize;
9581 }
9582
9583 if ((in.byteOrder() == QDataStream::BigEndian)
9584 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9585 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9586 qbswap<sizeof(*data)>(data, len, data);
9587 }
9588 } else {
9589 str = QString(QLatin1StringView(""));
9590 }
9591 }
9592 return in;
9593}
9594#endif // QT_NO_DATASTREAM
9595
9596/*!
9597 \typedef QString::Data
9598 \internal
9599*/
9600
9601/*!
9602 \typedef QString::DataPtr
9603 \internal
9604*/
9605
9606/*!
9607 \fn DataPtr & QString::data_ptr()
9608 \internal
9609*/
9610
9611/*!
9612 \since 5.11
9613 \internal
9614 \relates QStringView
9615
9616 Returns \c true if the string is read right to left.
9617
9618 \sa QString::isRightToLeft()
9619*/
9620bool QtPrivate::isRightToLeft(QStringView string) noexcept
9621{
9622 int isolateLevel = 0;
9623
9624 for (QStringIterator i(string); i.hasNext();) {
9625 const char32_t c = i.next();
9626
9627 switch (QChar::direction(c)) {
9628 case QChar::DirRLI:
9629 case QChar::DirLRI:
9630 case QChar::DirFSI:
9631 ++isolateLevel;
9632 break;
9633 case QChar::DirPDI:
9634 if (isolateLevel)
9635 --isolateLevel;
9636 break;
9637 case QChar::DirL:
9638 if (isolateLevel)
9639 break;
9640 return false;
9641 case QChar::DirR:
9642 case QChar::DirAL:
9643 if (isolateLevel)
9644 break;
9645 return true;
9646 case QChar::DirEN:
9647 case QChar::DirES:
9648 case QChar::DirET:
9649 case QChar::DirAN:
9650 case QChar::DirCS:
9651 case QChar::DirB:
9652 case QChar::DirS:
9653 case QChar::DirWS:
9654 case QChar::DirON:
9655 case QChar::DirLRE:
9656 case QChar::DirLRO:
9657 case QChar::DirRLE:
9658 case QChar::DirRLO:
9659 case QChar::DirPDF:
9660 case QChar::DirNSM:
9661 case QChar::DirBN:
9662 break;
9663 }
9664 }
9665 return false;
9666}
9667
9668qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9669{
9670 qsizetype num = 0;
9671 qsizetype i = -1;
9672 if (haystack.size() > 500 && needle.size() > 5) {
9673 QStringMatcher matcher(needle, cs);
9674 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9675 ++num;
9676 } else {
9677 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9678 ++num;
9679 }
9680 return num;
9681}
9682
9683qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9684{
9685 if (cs == Qt::CaseSensitive)
9686 return std::count(haystack.cbegin(), haystack.cend(), needle);
9687
9688 needle = foldCase(needle);
9689 return std::count_if(haystack.cbegin(), haystack.cend(),
9690 [needle](const QChar c) { return foldAndCompare(c, needle); });
9691}
9692
9693qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9694{
9695 qsizetype num = 0;
9696 qsizetype i = -1;
9697
9698 QLatin1StringMatcher matcher(needle, cs);
9699 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9700 ++num;
9701
9702 return num;
9703}
9704
9705qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9706{
9707 if (haystack.size() < needle.size())
9708 return 0;
9709
9710 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9711 return 0;
9712
9713 qsizetype num = 0;
9714 qsizetype i = -1;
9715
9716 QVarLengthArray<uchar> s(needle.size());
9717 qt_to_latin1_unchecked(s.data(), needle.utf16(), needle.size());
9718
9719 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9720 cs);
9721 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9722 ++num;
9723
9724 return num;
9725}
9726
9727qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9728{
9729 if (haystack.size() < needle.size())
9730 return -1;
9731
9732 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9733 return QtPrivate::count(haystack, QStringView(s.data(), s.size()), cs);
9734}
9735
9736qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9737{
9738 // non-L1 needles cannot possibly match in L1-only haystacks
9739 if (needle.unicode() > 0xff)
9740 return 0;
9741
9742 if (cs == Qt::CaseSensitive) {
9743 return std::count(haystack.cbegin(), haystack.cend(), needle.toLatin1());
9744 } else {
9745 return std::count_if(haystack.cbegin(), haystack.cend(),
9746 CaseInsensitiveL1::matcher(needle.toLatin1()));
9747 }
9748}
9749
9750/*!
9751 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9752 \since 5.10
9753 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9754 \since 5.10
9755 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9756 \since 5.10
9757 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9758 \since 5.10
9759 \internal
9760 \relates QStringView
9761
9762 Returns \c true if \a haystack starts with \a needle,
9763 otherwise returns \c false.
9764
9765 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9766
9767 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9768*/
9769
9770bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9771{
9772 return qt_starts_with_impl(haystack, needle, cs);
9773}
9774
9775bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9776{
9777 return qt_starts_with_impl(haystack, needle, cs);
9778}
9779
9780bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9781{
9782 return qt_starts_with_impl(haystack, needle, cs);
9783}
9784
9785bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9786{
9787 return qt_starts_with_impl(haystack, needle, cs);
9788}
9789
9790/*!
9791 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9792 \since 5.10
9793 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9794 \since 5.10
9795 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9796 \since 5.10
9797 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9798 \since 5.10
9799 \internal
9800 \relates QStringView
9801
9802 Returns \c true if \a haystack ends with \a needle,
9803 otherwise returns \c false.
9804
9805 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9806
9807 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9808*/
9809
9810bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9811{
9812 return qt_ends_with_impl(haystack, needle, cs);
9813}
9814
9815bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9816{
9817 return qt_ends_with_impl(haystack, needle, cs);
9818}
9819
9820bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9821{
9822 return qt_ends_with_impl(haystack, needle, cs);
9823}
9824
9825bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9826{
9827 return qt_ends_with_impl(haystack, needle, cs);
9828}
9829
9830qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9831{
9832 const qsizetype l = haystack0.size();
9833 const qsizetype sl = needle0.size();
9834 if (sl == 1)
9835 return findString(haystack0, from, needle0[0], cs);
9836 if (from < 0)
9837 from += l;
9838 if (std::size_t(sl + from) > std::size_t(l))
9839 return -1;
9840 if (!sl)
9841 return from;
9842 if (!l)
9843 return -1;
9844
9845 /*
9846 We use the Boyer-Moore algorithm in cases where the overhead
9847 for the skip table should pay off, otherwise we use a simple
9848 hash function.
9849 */
9850 if (l > 500 && sl > 5)
9851 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9852
9853 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9854 /*
9855 We use some hashing for efficiency's sake. Instead of
9856 comparing strings, we compare the hash value of str with that
9857 of a part of this QString. Only if that matches, we call
9858 qt_string_compare().
9859 */
9860 const char16_t *needle = needle0.utf16();
9861 const char16_t *haystack = haystack0.utf16() + from;
9862 const char16_t *end = haystack0.utf16() + (l - sl);
9863 const qregisteruint sl_minus_1 = sl - 1;
9864 qregisteruint hashNeedle = 0, hashHaystack = 0;
9865 qsizetype idx;
9866
9867 if (cs == Qt::CaseSensitive) {
9868 for (idx = 0; idx < sl; ++idx) {
9869 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9870 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9871 }
9872 hashHaystack -= haystack[sl_minus_1];
9873
9874 while (haystack <= end) {
9875 hashHaystack += haystack[sl_minus_1];
9876 if (hashHaystack == hashNeedle
9877 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9878 return haystack - haystack0.utf16();
9879
9880 REHASH(*haystack);
9881 ++haystack;
9882 }
9883 } else {
9884 const char16_t *haystack_start = haystack0.utf16();
9885 for (idx = 0; idx < sl; ++idx) {
9886 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9887 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9888 }
9889 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9890
9891 while (haystack <= end) {
9892 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
9893 if (hashHaystack == hashNeedle
9894 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
9895 return haystack - haystack0.utf16();
9896
9897 REHASH(foldCase(haystack, haystack_start));
9898 ++haystack;
9899 }
9900 }
9901 return -1;
9902}
9903
9904qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9905{
9906 if (haystack.size() < needle.size())
9907 return -1;
9908
9909 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9910 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9911}
9912
9913qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9914{
9915 if (haystack.size() < needle.size())
9916 return -1;
9917
9918 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9919 return -1;
9920
9921 if (needle.size() == 1) {
9922 const char n = needle.front().toLatin1();
9923 return QtPrivate::findString(haystack, from, QLatin1StringView(&n, 1), cs);
9924 }
9925
9926 QVarLengthArray<char> s(needle.size());
9927 qt_to_latin1_unchecked(reinterpret_cast<uchar *>(s.data()), needle.utf16(), needle.size());
9928 return QtPrivate::findString(haystack, from, QLatin1StringView(s.data(), s.size()), cs);
9929}
9930
9931qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9932{
9933 if (from < 0)
9934 from += haystack.size();
9935 if (from < 0)
9936 return -1;
9937 qsizetype adjustedSize = haystack.size() - from;
9938 if (adjustedSize < needle.size())
9939 return -1;
9940 if (needle.size() == 0)
9941 return from;
9942
9943 if (cs == Qt::CaseSensitive) {
9944
9945 if (needle.size() == 1) {
9946 Q_ASSERT(haystack.data() != nullptr); // see size check above
9947 if (auto it = memchr(haystack.data() + from, needle.front().toLatin1(), adjustedSize))
9948 return static_cast<const char *>(it) - haystack.data();
9949 return -1;
9950 }
9951
9952 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
9953 return matcher.indexIn(haystack, from);
9954 }
9955
9956 // If the needle is sufficiently small we simply iteratively search through
9957 // the haystack. When the needle is too long we use a boyer-moore searcher
9958 // from the standard library, if available. If it is not available then the
9959 // QLatin1Strings are converted to QString and compared as such. Though
9960 // initialization is slower the boyer-moore search it employs still makes up
9961 // for it when haystack and needle are sufficiently long.
9962 // The needle size was chosen by testing various lengths using the
9963 // qstringtokenizer benchmark with the
9964 // "tokenize_qlatin1string_qlatin1string" test.
9965#ifdef Q_CC_MSVC
9966 const qsizetype threshold = 1;
9967#else
9968 const qsizetype threshold = 13;
9969#endif
9970 if (needle.size() <= threshold) {
9971 const auto begin = haystack.begin();
9972 const auto end = haystack.end() - needle.size() + 1;
9973 auto ciMatch = CaseInsensitiveL1::matcher(needle[0].toLatin1());
9974 const qsizetype nlen1 = needle.size() - 1;
9975 for (auto it = std::find_if(begin + from, end, ciMatch); it != end;
9976 it = std::find_if(it + 1, end, ciMatch)) {
9977 // In this comparison we skip the first character because we know it's a match
9978 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
9979 return std::distance(begin, it);
9980 }
9981 return -1;
9982 }
9983
9984 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
9985 return matcher.indexIn(haystack, from);
9986}
9987
9988qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
9989{
9990 return qLastIndexOf(haystack, QChar(needle), from, cs);
9991}
9992
9993qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9994{
9995 return qLastIndexOf(haystack, from, needle, cs);
9996}
9997
9998qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9999{
10000 return qLastIndexOf(haystack, from, needle, cs);
10001}
10002
10003qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10004{
10005 return qLastIndexOf(haystack, from, needle, cs);
10006}
10007
10008qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10009{
10010 return qLastIndexOf(haystack, from, needle, cs);
10011}
10012
10013#if QT_CONFIG(regularexpression)
10014qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10015{
10016 if (!re.isValid()) {
10017 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "indexOf");
10018 return -1;
10019 }
10020
10021 QRegularExpressionMatch match = stringHaystack
10022 ? re.match(*stringHaystack, from)
10023 : re.matchView(viewHaystack, from);
10024 if (match.hasMatch()) {
10025 const qsizetype ret = match.capturedStart();
10026 if (rmatch)
10027 *rmatch = std::move(match);
10028 return ret;
10029 }
10030
10031 return -1;
10032}
10033
10034qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10035{
10036 return indexOf(haystack, nullptr, re, from, rmatch);
10037}
10038
10039qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10040{
10041 if (!re.isValid()) {
10042 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "lastIndexOf");
10043 return -1;
10044 }
10045
10046 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10047 QRegularExpressionMatchIterator iterator = stringHaystack
10048 ? re.globalMatch(*stringHaystack)
10049 : re.globalMatchView(viewHaystack);
10050 qsizetype lastIndex = -1;
10051 while (iterator.hasNext()) {
10052 QRegularExpressionMatch match = iterator.next();
10053 qsizetype start = match.capturedStart();
10054 if (start < endpos) {
10055 lastIndex = start;
10056 if (rmatch)
10057 *rmatch = std::move(match);
10058 } else {
10059 break;
10060 }
10061 }
10062
10063 return lastIndex;
10064}
10065
10066qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10067{
10068 return lastIndexOf(haystack, nullptr, re, from, rmatch);
10069}
10070
10071bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10072{
10073 if (!re.isValid()) {
10074 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "contains");
10075 return false;
10076 }
10077 QRegularExpressionMatch m = stringHaystack
10078 ? re.match(*stringHaystack)
10079 : re.matchView(viewHaystack);
10080 bool hasMatch = m.hasMatch();
10081 if (hasMatch && rmatch)
10082 *rmatch = std::move(m);
10083 return hasMatch;
10084}
10085
10086bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10087{
10088 return contains(haystack, nullptr, re, rmatch);
10089}
10090
10091qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10092{
10093 if (!re.isValid()) {
10094 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "count");
10095 return 0;
10096 }
10097 qsizetype count = 0;
10098 qsizetype index = -1;
10099 qsizetype len = haystack.size();
10100 while (index <= len - 1) {
10101 QRegularExpressionMatch match = re.matchView(haystack, index + 1);
10102 if (!match.hasMatch())
10103 break;
10104 count++;
10105
10106 // Search again, from the next character after the beginning of this
10107 // capture. If the capture starts with a surrogate pair, both together
10108 // count as "one character".
10109 index = match.capturedStart();
10110 if (index < len && haystack[index].isHighSurrogate())
10111 ++index;
10112 }
10113 return count;
10114}
10115
10116#endif // QT_CONFIG(regularexpression)
10117
10118/*!
10119 \since 5.0
10120
10121 Converts a plain text string to an HTML string with
10122 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10123 entities.
10124
10125 Example:
10126
10127 \snippet code/src_corelib_text_qstring.cpp 7
10128*/
10129QString QString::toHtmlEscaped() const
10130{
10131 const auto pos = std::u16string_view(*this).find_first_of(u"<>&\"");
10132 if (pos == std::u16string_view::npos)
10133 return *this;
10134 QString rich;
10135 const qsizetype len = size();
10136 rich.reserve(qsizetype(len * 1.1));
10137 rich += qToStringViewIgnoringNull(*this).first(pos);
10138 for (auto ch : qToStringViewIgnoringNull(*this).sliced(pos)) {
10139 if (ch == u'<')
10140 rich += "&lt;"_L1;
10141 else if (ch == u'>')
10142 rich += "&gt;"_L1;
10143 else if (ch == u'&')
10144 rich += "&amp;"_L1;
10145 else if (ch == u'"')
10146 rich += "&quot;"_L1;
10147 else
10148 rich += ch;
10149 }
10150 rich.squeeze();
10151 return rich;
10152}
10153
10154/*!
10155 \macro QStringLiteral(str)
10156 \relates QString
10157
10158 The macro generates the data for a QString out of the string literal \a str
10159 at compile time. Creating a QString from it is free in this case, and the
10160 generated string data is stored in the read-only segment of the compiled
10161 object file.
10162
10163 If you have code that looks like this:
10164
10165 \snippet code/src_corelib_text_qstring.cpp 9
10166
10167 then a temporary QString will be created to be passed as the \c{hasAttribute}
10168 function parameter. This can be quite expensive, as it involves a memory
10169 allocation and the copy/conversion of the data into QString's internal
10170 encoding.
10171
10172 This cost can be avoided by using QStringLiteral instead:
10173
10174 \snippet code/src_corelib_text_qstring.cpp 10
10175
10176 In this case, QString's internal data will be generated at compile time; no
10177 conversion or allocation will occur at runtime.
10178
10179 Using QStringLiteral instead of a double quoted plain C++ string literal can
10180 significantly speed up creation of QString instances from data known at
10181 compile time.
10182
10183 \note QLatin1StringView can still be more efficient than QStringLiteral
10184 when the string is passed to a function that has an overload taking
10185 QLatin1StringView and this overload avoids conversion to QString. For
10186 instance, QString::operator==() can compare to a QLatin1StringView
10187 directly:
10188
10189 \snippet code/src_corelib_text_qstring.cpp 11
10190
10191 \note Some compilers have bugs encoding strings containing characters outside
10192 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10193 those cases. It is optional otherwise.
10194
10195 \note QStringLiteral is interchangeable with \l operator""_s. The latter saves
10196 typing when many string literals are present in the code.
10197
10198 \sa QByteArrayLiteral
10199*/
10200
10201#if QT_DEPRECATED_SINCE(6, 8)
10202/*!
10203 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10204
10205 \relates QString
10206 \since 6.2
10207 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10208
10209 Literal operator that creates a QString out of the first \a size characters in
10210 the char16_t string literal \a str.
10211
10212 The QString is created at compile time, and the generated string data is stored
10213 in the read-only segment of the compiled object file. Duplicate literals may
10214 share the same read-only memory. This functionality is interchangeable with
10215 QStringLiteral, but saves typing when many string literals are present in the
10216 code.
10217
10218 The following code creates a QString:
10219 \code
10220 auto str = u"hello"_qs;
10221 \endcode
10222
10223 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10224*/
10225#endif // QT_DEPRECATED_SINCE(6, 8)
10226
10227/*!
10228 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10229
10230 \relates QString
10231 \since 6.4
10232
10233 Literal operator that creates a QString out of the first \a size characters in
10234 the char16_t string literal \a str.
10235
10236 The QString is created at compile time, and the generated string data is stored
10237 in the read-only segment of the compiled object file. Duplicate literals may
10238 share the same read-only memory. This functionality is interchangeable with
10239 QStringLiteral, but saves typing when many string literals are present in the
10240 code.
10241
10242 The following code creates a QString:
10243 \code
10244 using namespace Qt::StringLiterals;
10245
10246 auto str = u"hello"_s;
10247 \endcode
10248
10249 \sa Qt::Literals::StringLiterals
10250*/
10251
10252/*!
10253 \internal
10254 */
10255void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10256{
10257 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10258}
10259
10260/*!
10261 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10262 \relates QString
10263 \since 6.1
10264
10265 Removes all elements that compare equal to \a t from the
10266 string \a s. Returns the number of elements removed, if any.
10267
10268 \sa erase_if
10269*/
10270
10271/*!
10272 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10273 \relates QString
10274 \since 6.1
10275
10276 Removes all elements for which the predicate \a pred returns true
10277 from the string \a s. Returns the number of elements removed, if
10278 any.
10279
10280 \sa erase
10281*/
10282
10283/*!
10284 \macro const char *qPrintable(const QString &str)
10285 \relates QString
10286
10287 Returns \a str as a \c{const char *}. This is equivalent to
10288 \a{str}.toLocal8Bit().\l{QByteArray::}{constData()}.
10289
10290 The char pointer will be invalid after the statement in which
10291 qPrintable() is used. This is because the array returned by
10292 QString::toLocal8Bit() will fall out of scope.
10293
10294 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10295 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10296 local 8-bit encoding. Therefore qUtf8Printable() should be used
10297 for logging strings instead of qPrintable().
10298
10299 \sa qUtf8Printable()
10300*/
10301
10302/*!
10303 \macro const char *qUtf8Printable(const QString &str)
10304 \relates QString
10305 \since 5.4
10306
10307 Returns \a str as a \c{const char *}. This is equivalent to
10308 \a{str}.toUtf8().\l{QByteArray::}{constData()}.
10309
10310 The char pointer will be invalid after the statement in which
10311 qUtf8Printable() is used. This is because the array returned by
10312 QString::toUtf8() will fall out of scope.
10313
10314 Example:
10315
10316 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10317
10318 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10319*/
10320
10321/*!
10322 \macro const wchar_t *qUtf16Printable(const QString &str)
10323 \relates QString
10324 \since 5.7
10325
10326 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10327 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10328
10329 The only useful thing you can do with the return value of this macro is to
10330 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10331 the return value is \e{not} a valid \c{const wchar_t*}!
10332
10333 In general, the pointer will be invalid after the statement in which
10334 qUtf16Printable() is used. This is because the pointer may have been
10335 obtained from a temporary expression, which will fall out of scope.
10336
10337 Example:
10338
10339 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10340
10341 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10342*/
10343
10344QT_END_NAMESPACE
10345
10346#undef REHASH
QString convertToQString(QAnyStringView string)
Definition qstring.cpp:5579
Definition qlist.h:81
char32_t next(char32_t invalidAs=QChar::ReplacementCharacter)
bool hasNext() const
\inmodule QtCore
QList< uint > convertToUcs4(QStringView string)
Definition qstring.cpp:5835
QByteArray convertToUtf8(QStringView string)
Definition qstring.cpp:5780
QByteArray convertToLocal8Bit(QStringView string)
Definition qstring.cpp:5737
QByteArray convertToLatin1(QStringView string)
Definition qstring.cpp:5596
Combined button and popup list for selecting options.
static QString convertCase(T &str, QUnicodeTables::Case which)
Definition qstring.cpp:7209
static constexpr NormalizationCorrection uc_normalization_corrections[]
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9770
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9810
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLower(QStringView s) noexcept
Definition qstring.cpp:5516
const QString & asString(const QString &s)
Definition qstring.h:1678
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept
Definition qstring.cpp:906
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool equalStrings(QStringView lhs, QStringView rhs) noexcept
Definition qstring.cpp:1374
qsizetype findString(QStringView str, qsizetype from, QChar needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isRightToLeft(QStringView string) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1StringView s) noexcept
Definition qstring.cpp:851
constexpr bool isLatin1(QLatin1StringView s) noexcept
Definition qstring.h:77
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrcasechr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:776
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isUpper(QStringView s) noexcept
Definition qstring.cpp:5521
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrchr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:688
void qt_to_latin1_unchecked(uchar *dst, const char16_t *uc, qsizetype len)
Definition qstring.cpp:1189
static char16_t foldCase(char16_t ch) noexcept
Definition qchar.cpp:1696
#define __has_feature(x)
uint QT_FASTCALL fetch1Pixel< QPixelLayout::BPP1LSB >(const uchar *src, int index)
bool comparesEqual(const QFileInfo &lhs, const QFileInfo &rhs)
static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
Definition qstring.cpp:859
static Int toIntegral(QStringView string, bool *ok, int base)
Definition qstring.cpp:7698
void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1184
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6753
static void append_utf8(QString &qs, const char *cs, qsizetype len)
Definition qstring.cpp:7332
#define ATTRIBUTE_NO_SANITIZE
Definition qstring.cpp:367
bool qt_is_ascii(const char *&ptr, const char *end) noexcept
Definition qstring.cpp:787
static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
Definition qstring.cpp:5505
static void replace_helper(QString &str, QSpan< qsizetype > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3693
Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
Definition qstring.cpp:921
static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
Definition qstring.cpp:1347
bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6759
Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE)
static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
Definition qstring.cpp:3502
static bool needsReallocate(const QString &str, qsizetype newSize)
Definition qstring.cpp:2638
static int qArgDigitValue(QChar ch) noexcept
Definition qstring.cpp:1614
bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6748
#define REHASH(a)
Definition qstring.cpp:66
bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6737
static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
Definition qstring.cpp:1265
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
Definition qstring.cpp:1220
static QByteArray qt_convert_to_latin1(QStringView string)
Definition qstring.cpp:5602
static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
Definition qstring.cpp:1340
static QList< uint > qt_convert_to_ucs4(QStringView string)
Definition qstring.cpp:5807
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static QByteArray qt_convert_to_local_8bit(QStringView string)
Definition qstring.cpp:5714
static LengthMod parse_length_modifier(const char *&c) noexcept
Definition qstring.cpp:7388
static ArgEscapeData findArgEscapes(QStringView s)
Definition qstring.cpp:8607
static QByteArray qt_convert_to_utf8(QStringView str)
Definition qstring.cpp:5760
static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1005
QtPrivate::QCaseInsensitiveLatin1Hash CaseInsensitiveL1
Definition qstring.cpp:1354
LengthMod
Definition qstring.cpp:7377
@ lm_z
Definition qstring.cpp:7377
@ lm_none
Definition qstring.cpp:7377
@ lm_t
Definition qstring.cpp:7377
@ lm_l
Definition qstring.cpp:7377
@ lm_ll
Definition qstring.cpp:7377
@ lm_hh
Definition qstring.cpp:7377
@ lm_L
Definition qstring.cpp:7377
@ lm_h
Definition qstring.cpp:7377
@ lm_j
Definition qstring.cpp:7377
static void insert_helper(QString &str, qsizetype i, const T &toInsert)
Definition qstring.cpp:2977
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
Definition qstring.cpp:1356
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6731
static char16_t to_unicode(const char c)
Definition qstring.cpp:9012
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6764
static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width, QStringView arg, QStringView larg, QChar fillChar)
Definition qstring.cpp:8683
static QVarLengthArray< char16_t > qt_from_latin1_to_qvla(QLatin1StringView str)
Definition qstring.cpp:996
static Q_NEVER_INLINE int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
Definition qstring.cpp:1238
void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
Definition qstring.cpp:8470
static uint parse_flag_characters(const char *&c) noexcept
Definition qstring.cpp:7340
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
Definition qstring.cpp:1195
static char16_t to_unicode(const QChar c)
Definition qstring.cpp:9011
QDataStream & operator>>(QDataStream &in, QString &str)
Definition qstring.cpp:9544
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
Definition qstring.cpp:9015
static int ucstrncmp(const char16_t *a, const char *b, size_t l)
Definition qstring.cpp:1318
static bool can_consume(const char *&c, char ch) noexcept
Definition qstring.cpp:7379
static int parse_field_width(const char *&c, qsizetype size)
Definition qstring.cpp:7360
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6742
#define qUtf16Printable(string)
Definition qstring.h:1695
qsizetype occurrences
Definition qstring.cpp:8601
qsizetype escape_len
Definition qstring.cpp:8604
qsizetype locale_occurrences
Definition qstring.cpp:8602
\inmodule QtCore \reentrant
Definition qchar.h:18
constexpr char16_t unicode() const noexcept
Converts a Latin-1 character to an 16-bit-encoded Unicode representation of the character.
Definition qchar.h:22
constexpr QLatin1Char(char c) noexcept
Constructs a Latin-1 character for c.
Definition qchar.h:20
@ BlankBeforePositive
Definition qlocale_p.h:270
@ AddTrailingZeroes
Definition qlocale_p.h:267
static int difference(char lhs, char rhs)