Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstring.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5// Qt-Security score:critical reason:data-parser
6
7#include "qstringlist.h"
8#if QT_CONFIG(regularexpression)
9#include "qregularexpression.h"
10#endif
12#include <private/qstringconverter_p.h>
13#include <private/qtools_p.h>
15#include "private/qsimd_p.h"
16#include <qnumeric.h>
17#include <qdatastream.h>
18#include <qlist.h>
19#include "qlocale.h"
20#include "qlocale_p.h"
21#include "qspan.h"
22#include "qstringbuilder.h"
23#include "qstringmatcher.h"
25#include "qdebug.h"
26#include "qendian.h"
27#include "qcollator.h"
28#include "qttypetraits.h"
29
30#ifdef Q_OS_DARWIN
31#include <private/qcore_mac_p.h>
32#endif
33
34#include <private/qfunctions_p.h>
35
36#include <limits.h>
37#include <string.h>
38#include <stdlib.h>
39#include <stdio.h>
40#include <stdarg.h>
41#include <wchar.h>
42
43#include "qchar.cpp"
48#include "qthreadstorage.h"
49
50#include <algorithm>
51#include <functional>
52
53#ifdef Q_OS_WIN
54# include <qt_windows.h>
55# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
56// MSVC requires this, but let's apply it to MinGW compilers too, just in case
57# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, "
58 "otherwise some QString functions will not get exported."
59# endif
60#endif
61
62#ifdef truncate
63# undef truncate
64#endif
65
66#define REHASH(a)
67 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
68 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1;
69 hashHaystack <<= 1
70
72
73using namespace Qt::StringLiterals;
74using namespace QtMiscUtils;
75
76const char16_t QString::_empty = 0;
77
78// in qstringmatcher.cpp
79qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
80
81namespace {
82enum StringComparisonMode {
83 CompareStringsForEquality,
84 CompareStringsForOrdering
85};
86
87template <typename Pointer>
88char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
89
90template <>
91char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
92{
93 return foldCase(reinterpret_cast<const char16_t*>(ch),
94 reinterpret_cast<const char16_t*>(start));
95}
96
97template <>
98char32_t foldCaseHelper<const char*>(const char* ch, const char*)
99{
100 return foldCase(char16_t(uchar(*ch)));
101}
102
103template <typename T>
104char16_t valueTypeToUtf16(T t) = delete;
105
106template <>
107char16_t valueTypeToUtf16<QChar>(QChar t)
108{
109 return t.unicode();
110}
111
112template <>
113char16_t valueTypeToUtf16<char>(char t)
114{
115 return char16_t{uchar(t)};
116}
117
118template <typename T>
119static inline bool foldAndCompare(const T a, const T b)
120{
121 return foldCase(a) == b;
122}
123
124/*!
125 \internal
126
127 Returns the index position of the first occurrence of the
128 character \a ch in the string given by \a str and \a len,
129 searching forward from index
130 position \a from. Returns -1 if \a ch could not be found.
131*/
132template <typename Haystack>
133static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
134 qsizetype from, Qt::CaseSensitivity cs) noexcept
135{
136 if (haystack.size() == 0)
137 return -1;
138 if (from < 0)
139 from += haystack.size();
140 else if (std::size_t(from) > std::size_t(haystack.size()))
141 from = haystack.size() - 1;
142 if (from >= 0) {
143 char16_t c = needle.unicode();
144 const auto b = haystack.data();
145 auto n = b + from;
146 if (cs == Qt::CaseSensitive) {
147 for (; n >= b; --n)
148 if (valueTypeToUtf16(*n) == c)
149 return n - b;
150 } else {
151 c = foldCase(c);
152 for (; n >= b; --n)
153 if (foldCase(valueTypeToUtf16(*n)) == c)
154 return n - b;
155 }
156 }
157 return -1;
158}
159template <> qsizetype
160qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
161
162template<typename Haystack, typename Needle>
163static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
164 Needle needle0, Qt::CaseSensitivity cs) noexcept
165{
166 const qsizetype sl = needle0.size();
167 if (sl == 1)
168 return qLastIndexOf(haystack0, needle0.front(), from, cs);
169
170 const qsizetype l = haystack0.size();
171 if (from < 0)
172 from += l;
173 if (from == l && sl == 0)
174 return from;
175 const qsizetype delta = l - sl;
176 if (std::size_t(from) > std::size_t(l) || delta < 0)
177 return -1;
178 if (from > delta)
179 from = delta;
180
181 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
182
183 auto haystack = haystack0.data();
184 const auto needle = needle0.data();
185 const auto *end = haystack;
186 haystack += from;
187 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
188 const auto *n = needle + sl_minus_1;
189 const auto *h = haystack + sl_minus_1;
190 qregisteruint hashNeedle = 0, hashHaystack = 0;
191
192 if (cs == Qt::CaseSensitive) {
193 for (qsizetype idx = 0; idx < sl; ++idx) {
194 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
195 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
196 }
197 hashHaystack -= valueTypeToUtf16(*haystack);
198
199 while (haystack >= end) {
200 hashHaystack += valueTypeToUtf16(*haystack);
201 if (hashHaystack == hashNeedle
202 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
203 return haystack - end;
204 --haystack;
205 REHASH(valueTypeToUtf16(haystack[sl]));
206 }
207 } else {
208 for (qsizetype idx = 0; idx < sl; ++idx) {
209 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
210 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
211 }
212 hashHaystack -= foldCaseHelper(haystack, end);
213
214 while (haystack >= end) {
215 hashHaystack += foldCaseHelper(haystack, end);
216 if (hashHaystack == hashNeedle
217 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
218 return haystack - end;
219 --haystack;
220 REHASH(foldCaseHelper(haystack + sl, end));
221 }
222 }
223 return -1;
224}
225
226template <typename Haystack, typename Needle>
227bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
228{
229 if (haystack.isNull())
230 return needle.isNull();
231 const auto haystackLen = haystack.size();
232 const auto needleLen = needle.size();
233 if (haystackLen == 0)
234 return needleLen == 0;
235 if (needleLen > haystackLen)
236 return false;
237
238 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
239}
240
241template <typename Haystack, typename Needle>
242bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
243{
244 if (haystack.isNull())
245 return needle.isNull();
246 const auto haystackLen = haystack.size();
247 const auto needleLen = needle.size();
248 if (haystackLen == 0)
249 return needleLen == 0;
250 if (haystackLen < needleLen)
251 return false;
252
253 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
254}
255
256template <typename T>
257static void append_helper(QString &self, T view)
258{
259 const auto strData = view.data();
260 const qsizetype strSize = view.size();
261 auto &d = self.data_ptr();
262 if (strData && strSize > 0) {
263 // the number of UTF-8 code units is always at a minimum equal to the number
264 // of equivalent UTF-16 code units
265 d.detachAndGrow(QArrayData::GrowsAtEnd, strSize, nullptr, nullptr);
266 Q_CHECK_PTR(d.data());
267 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
268
269 auto dst = std::next(d.data(), d.size);
270 if constexpr (std::is_same_v<T, QUtf8StringView>) {
271 dst = QUtf8::convertToUnicode(dst, view);
272 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
273 QLatin1::convertToUnicode(dst, view);
274 dst += strSize;
275 } else {
276 static_assert(QtPrivate::type_dependent_false<T>(),
277 "Can only operate on UTF-8 and Latin-1");
278 }
279 self.resize(std::distance(d.begin(), dst));
280 } else if (d.isNull() && !view.isNull()) { // special case
281 self = QLatin1StringView("");
282 }
283}
284
285template <uint MaxCount> struct UnrollTailLoop
286{
287 template <typename RetType, typename Functor1, typename Functor2, typename Number>
288 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
289 {
290 /* equivalent to:
291 * while (count--) {
292 * if (loopCheck(i))
293 * return returnIfFailed(i);
294 * }
295 * return returnIfExited;
296 */
297
298 if (!count)
299 return returnIfExited;
300
301 bool check = loopCheck(i);
302 if (check)
303 return returnIfFailed(i);
304
305 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
306 }
307
308 template <typename Functor, typename Number>
309 static inline void exec(Number count, Functor code)
310 {
311 /* equivalent to:
312 * for (Number i = 0; i < count; ++i)
313 * code(i);
314 */
315 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
316 }
317};
318template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
319inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
320{
321 return returnIfExited;
322}
323} // unnamed namespace
324
325/*
326 * Note on the use of SIMD in qstring.cpp:
327 *
328 * Several operations with strings are improved with the use of SIMD code,
329 * since they are repetitive. For MIPS, we have hand-written assembly code
330 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
331 * x86, we can only use intrinsics and therefore everything is contained in
332 * qstring.cpp. We need to use intrinsics only for those platforms due to the
333 * different compilers and toolchains used, which have different syntax for
334 * assembly sources.
335 *
336 * ** SSE notes: **
337 *
338 * Whenever multiple alternatives are equivalent or near so, we prefer the one
339 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
340 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
341 * SSE versions should be done when there is a clear performance benefit and
342 * requires fallback code to SSE2, if it exists.
343 *
344 * Performance measurement in the past shows that most strings are short in
345 * size and, therefore, do not benefit from alignment prologues. That is,
346 * trying to find a 16-byte-aligned boundary to operate on is often more
347 * expensive than executing the unaligned operation directly. In addition, note
348 * that the QString private data is designed so that the data is stored on
349 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
350 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
351 * 50% of the time), so skipping the alignment prologue is actually optimizing
352 * for the common case.
353 */
354
355#if defined(__mips_dsp)
356// From qstring_mips_dsp_asm.S
357extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
358extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
359extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
360#endif
361
362#if defined(__SSE2__) && defined(Q_CC_GNU)
363// We may overrun the buffer, but that's a false positive:
364// this won't crash nor produce incorrect results
365# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
366#else
367# define ATTRIBUTE_NO_SANITIZE
368#endif
369
370#ifdef __SSE2__
371static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
372static constexpr bool UseAvx2 = UseSse4_1 &&
373 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
374
375[[maybe_unused]]
376Q_ALWAYS_INLINE static __m128i mm_load8_zero_extend(const void *ptr)
377{
378 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
379 if constexpr (UseSse4_1) {
380 // use a MOVQ followed by PMOVZXBW
381 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
382 __m128i data = _mm_loadl_epi64(dataptr);
383 return _mm_cvtepu8_epi16(data);
384 }
385
386 // use MOVQ followed by PUNPCKLBW
387 __m128i data = _mm_loadl_epi64(dataptr);
388 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
389}
390
391[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
392static qsizetype qustrlen_sse2(const char16_t *str) noexcept
393{
394 // find the 16-byte alignment immediately prior or equal to str
395 quintptr misalignment = quintptr(str) & 0xf;
396 Q_ASSERT((misalignment & 1) == 0);
397 const char16_t *ptr = str - (misalignment / 2);
398
399 // load 16 bytes and see if we have a null
400 // (aligned loads can never segfault)
401 const __m128i zeroes = _mm_setzero_si128();
402 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
403 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
404 uint mask = _mm_movemask_epi8(comparison);
405
406 // ignore the result prior to the beginning of str
407 mask >>= misalignment;
408
409 // Have we found something in the first block? Need to handle it now
410 // because of the left shift above.
411 if (mask)
412 return qCountTrailingZeroBits(mask) / sizeof(char16_t);
413
414 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
415 qsizetype size = Step - misalignment / sizeof(char16_t);
416
417 size -= Step;
418 do {
419 size += Step;
420 data = _mm_load_si128(reinterpret_cast<const __m128i *>(str + size));
421
422 comparison = _mm_cmpeq_epi16(data, zeroes);
423 mask = _mm_movemask_epi8(comparison);
424 } while (mask == 0);
425
426 // found a null
427 return size + qCountTrailingZeroBits(mask) / sizeof(char16_t);
428}
429
430// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
431// the no non-zero was found. Returns false and updates \a ptr to point to the
432// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
433// may be updated to one byte short).
434static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
435{
436 auto updatePtr = [&](uint result) {
437 // found a character matching the mask
438 uint idx = qCountTrailingZeroBits(~result);
439 ptr += idx;
440 return false;
441 };
442
443 if constexpr (UseSse4_1) {
444# ifndef Q_OS_QNX // compiler fails in the code below
445 __m128i mask;
446 auto updatePtrSimd = [&](__m128i data) -> bool {
447 __m128i masked = _mm_and_si128(mask, data);
448 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
449 uint result = _mm_movemask_epi8(comparison);
450 return updatePtr(result);
451 };
452
453 if constexpr (UseAvx2) {
454 // AVX2 implementation: test 32 bytes at a time
455 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
456 while (ptr + 32 <= end) {
457 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
458 if (!_mm256_testz_si256(mask256, data)) {
459 // found a character matching the mask
460 __m256i masked256 = _mm256_and_si256(mask256, data);
461 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
462 return updatePtr(_mm256_movemask_epi8(comparison256));
463 }
464 ptr += 32;
465 }
466
467 mask = _mm256_castsi256_si128(mask256);
468 } else {
469 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
470 // comparisons, unrolled)
471 mask = _mm_set1_epi32(maskval);
472 while (ptr + 32 <= end) {
473 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
474 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
475 if (!_mm_testz_si128(mask, data1))
476 return updatePtrSimd(data1);
477
478 ptr += 16;
479 if (!_mm_testz_si128(mask, data2))
480 return updatePtrSimd(data2);
481 ptr += 16;
482 }
483 }
484
485 // AVX2 and SSE4.1: final 16-byte comparison
486 if (ptr + 16 <= end) {
487 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
488 if (!_mm_testz_si128(mask, data1))
489 return updatePtrSimd(data1);
490 ptr += 16;
491 }
492
493 // and final 8-byte comparison
494 if (ptr + 8 <= end) {
495 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
496 if (!_mm_testz_si128(mask, data1))
497 return updatePtrSimd(data1);
498 ptr += 8;
499 }
500
501 return true;
502# endif // QNX
503 }
504
505 // SSE2 implementation: test 16 bytes at a time.
506 const __m128i mask = _mm_set1_epi32(maskval);
507 while (ptr + 16 <= end) {
508 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
509 __m128i masked = _mm_and_si128(mask, data);
510 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
511 quint16 result = _mm_movemask_epi8(comparison);
512 if (result != 0xffff)
513 return updatePtr(result);
514 ptr += 16;
515 }
516
517 // and one 8-byte comparison
518 if (ptr + 8 <= end) {
519 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
520 __m128i masked = _mm_and_si128(mask, data);
521 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
522 quint8 result = _mm_movemask_epi8(comparison);
523 if (result != 0xff)
524 return updatePtr(result);
525 ptr += 8;
526 }
527
528 return true;
529}
530
531template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
532static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
533{
534 static_assert(std::is_unsigned_v<Char>);
535
536 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
537 // we compare. This lambda helps extract the code unit.
538 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
539 constexpr int Stride = 2;
540 // this is the same as:
541 // return n[idx / Stride];
542 // but using pointer arithmetic to avoid the compiler dividing by two
543 // and multiplying by two in the case of char16_t (we know idx is even,
544 // but the compiler does not). This is not UB.
545
546 auto ptr = reinterpret_cast<const uchar *>(n);
547 ptr += idx / (Stride / sizeof(*n));
548 return *reinterpret_cast<decltype(n)>(ptr);
549 };
550 auto difference = [a, b](uint mask, qptrdiff offset) {
551 if (Mode == CompareStringsForEquality)
552 return 1;
553 uint idx = qCountTrailingZeroBits(mask);
554 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
555 };
556
557 static const auto load8Chars = [](const auto *ptr) {
558 if (sizeof(*ptr) == 2)
559 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
560 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
561 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
562 };
563 static const auto load4Chars = [](const auto *ptr) {
564 if (sizeof(*ptr) == 2)
565 return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
566 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
567 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
568 };
569
570 // we're going to read a[0..15] and b[0..15] (32 bytes)
571 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
572 if constexpr (UseAvx2) {
573 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
574 __m256i b_data;
575 if (sizeof(Char) == 1) {
576 // expand to UTF-16 via zero-extension
577 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
578 b_data = _mm256_cvtepu8_epi16(chunk);
579 } else {
580 b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
581 }
582 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
583 return _mm256_movemask_epi8(result);
584 }
585
586 __m128i a_data1 = load8Chars(a + offset);
587 __m128i a_data2 = load8Chars(a + offset + 8);
588 __m128i b_data1, b_data2;
589 if (sizeof(Char) == 1) {
590 // expand to UTF-16 via unpacking
591 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
592 b_data1 = _mm_unpacklo_epi8(b_data, _mm_setzero_si128());
593 b_data2 = _mm_unpackhi_epi8(b_data, _mm_setzero_si128());
594 } else {
595 b_data1 = load8Chars(b + offset);
596 b_data2 = load8Chars(b + offset + 8);
597 }
598 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
599 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
600 return _mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16;
601 };
602
603 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
604 qptrdiff offset = 0;
605 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
606 uint mask = ~processChunk16Chars(offset);
607 if (mask)
608 return difference(mask, offset);
609 }
610
611 // maybe overlap the last 32 bytes
612 if (size_t(offset) < l) {
613 offset = l - sizeof(__m256i) / sizeof(char16_t);
614 uint mask = ~processChunk16Chars(offset);
615 return mask ? difference(mask, offset) : 0;
616 }
617 } else if (l >= 4) {
618 __m128i a_data1, b_data1;
619 __m128i a_data2, b_data2;
620 int width;
621 if (l >= 8) {
622 width = 8;
623 a_data1 = load8Chars(a);
624 b_data1 = load8Chars(b);
625 a_data2 = load8Chars(a + l - width);
626 b_data2 = load8Chars(b + l - width);
627 } else {
628 // we're going to read a[0..3] and b[0..3] (8 bytes)
629 width = 4;
630 a_data1 = load4Chars(a);
631 b_data1 = load4Chars(b);
632 a_data2 = load4Chars(a + l - width);
633 b_data2 = load4Chars(b + l - width);
634 }
635
636 __m128i result = _mm_cmpeq_epi16(a_data1, b_data1);
637 ushort mask = ~_mm_movemask_epi8(result);
638 if (mask)
639 return difference(mask, 0);
640
641 result = _mm_cmpeq_epi16(a_data2, b_data2);
642 mask = ~_mm_movemask_epi8(result);
643 if (mask)
644 return difference(mask, l - width);
645 } else {
646 // reset l
647 l &= 3;
648
649 const auto lambda = [=](size_t i) -> int {
650 return a[i] - b[i];
651 };
652 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
653 }
654 return 0;
655}
656#endif
657
658Q_NEVER_INLINE
659qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
660{
661#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
662 return qustrlen_sse2(str);
663#endif
664
665 if (sizeof(wchar_t) == sizeof(char16_t))
666 return wcslen(reinterpret_cast<const wchar_t *>(str));
667
668 qsizetype result = 0;
669 while (*str++)
670 ++result;
671 return result;
672}
673
674qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
675{
676 return qustrchr({ str, maxlen }, u'\0') - str;
677}
678
679/*!
680 * \internal
681 *
682 * Searches for character \a c in the string \a str and returns a pointer to
683 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
684 * character is not found, this function returns a pointer to the end of the
685 * string -- that is, \c{str.end()}.
686 */
688const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
689{
690 const char16_t *n = str.utf16();
691 const char16_t *e = n + str.size();
692
693#ifdef __SSE2__
694 bool loops = true;
695 // Using the PMOVMSKB instruction, we get two bits for each character
696 // we compare.
697 __m128i mch;
698 if constexpr (UseAvx2) {
699 // we're going to read n[0..15] (32 bytes)
700 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
701 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
702 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
703 __m256i result = _mm256_cmpeq_epi16(data, mch256);
704 uint mask = uint(_mm256_movemask_epi8(result));
705 if (mask) {
706 uint idx = qCountTrailingZeroBits(mask);
707 return n + idx / 2;
708 }
709 }
710 loops = false;
711 mch = _mm256_castsi256_si128(mch256);
712 } else {
713 mch = _mm_set1_epi32(c | (c << 16));
714 }
715
716 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
717 __m128i result = _mm_cmpeq_epi16(data, mch);
718 uint mask = uint(_mm_movemask_epi8(result));
719 if ((mask & validityMask) == 0)
720 return false;
721 uint idx = qCountTrailingZeroBits(mask);
722 n += idx / 2;
723 return true;
724 };
725
726 // we're going to read n[0..7] (16 bytes)
727 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
728 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
729 if (hasMatch(data, 0xffff))
730 return n;
731
732 if (!loops) {
733 n += 8;
734 break;
735 }
736 }
737
738# if !defined(__OPTIMIZE_SIZE__)
739 // we're going to read n[0..3] (8 bytes)
740 if (e - n > 3) {
741 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
742 if (hasMatch(data, 0xff))
743 return n;
744
745 n += 4;
746 }
747
748 return UnrollTailLoop<3>::exec(e - n, e,
749 [=](qsizetype i) { return n[i] == c; },
750 [=](qsizetype i) { return n + i; });
751# endif
752#elif defined(__ARM_NEON__)
753 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
754 const uint16x8_t ch_vec = vdupq_n_u16(c);
755 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
756 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
757 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
758 if (ushort(mask)) {
759 // found a match
760 return n + qCountTrailingZeroBits(mask);
761 }
762 }
763#endif // aarch64
764
765 return std::find(n, e, c);
766}
767
768/*!
769 * \internal
770 *
771 * Searches case-insensitively for character \a c in the string \a str and
772 * returns a pointer to it. Iif the character is not found, this function
773 * returns a pointer to the end of the string -- that is, \c{str.end()}.
774 */
776const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
777{
778 const QChar *n = str.begin();
779 const QChar *e = str.end();
780 c = foldCase(c);
781 auto it = std::find_if(n, e, [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
782 return reinterpret_cast<const char16_t *>(it);
783}
784
785// Note: ptr on output may be off by one and point to a preceding US-ASCII
786// character. Usually harmless.
787bool qt_is_ascii(const char *&ptr, const char *end) noexcept
788{
789#if defined(__SSE2__)
790 // Testing for the high bit can be done efficiently with just PMOVMSKB
791 bool loops = true;
792 if constexpr (UseAvx2) {
793 while (ptr + 32 <= end) {
794 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
795 quint32 mask = _mm256_movemask_epi8(data);
796 if (mask) {
797 uint idx = qCountTrailingZeroBits(mask);
798 ptr += idx;
799 return false;
800 }
801 ptr += 32;
802 }
803 loops = false;
804 }
805
806 while (ptr + 16 <= end) {
807 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
808 quint32 mask = _mm_movemask_epi8(data);
809 if (mask) {
810 uint idx = qCountTrailingZeroBits(mask);
811 ptr += idx;
812 return false;
813 }
814 ptr += 16;
815
816 if (!loops)
817 break;
818 }
819 if (ptr + 8 <= end) {
820 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
821 quint8 mask = _mm_movemask_epi8(data);
822 if (mask) {
823 uint idx = qCountTrailingZeroBits(mask);
824 ptr += idx;
825 return false;
826 }
827 ptr += 8;
828 }
829#endif
830
831 while (ptr + 4 <= end) {
832 quint32 data = qFromUnaligned<quint32>(ptr);
833 if (data &= 0x80808080U) {
834 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
835 ? qCountLeadingZeroBits(data)
836 : qCountTrailingZeroBits(data);
837 ptr += idx / 8;
838 return false;
839 }
840 ptr += 4;
841 }
842
843 while (ptr != end) {
844 if (quint8(*ptr) & 0x80)
845 return false;
846 ++ptr;
847 }
848 return true;
849}
850
851bool QtPrivate::isAscii(QLatin1StringView s) noexcept
852{
853 const char *ptr = s.begin();
854 const char *end = s.end();
855
856 return qt_is_ascii(ptr, end);
857}
858
859static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
860{
861#ifdef __SSE2__
862 const char *ptr8 = reinterpret_cast<const char *>(ptr);
863 const char *end8 = reinterpret_cast<const char *>(end);
864 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
865 ptr = reinterpret_cast<const char16_t *>(ptr8);
866 if (!ok)
867 return false;
868#endif
869
870 while (ptr != end) {
871 if (*ptr & 0xff80)
872 return false;
873 ++ptr;
874 }
875 return true;
876}
877
878bool QtPrivate::isAscii(QStringView s) noexcept
879{
880 const char16_t *ptr = s.utf16();
881 const char16_t *end = ptr + s.size();
882
883 return isAscii_helper(ptr, end);
884}
885
886bool QtPrivate::isLatin1(QStringView s) noexcept
887{
888 const char16_t *ptr = s.utf16();
889 const char16_t *end = ptr + s.size();
890
891#ifdef __SSE2__
892 const char *ptr8 = reinterpret_cast<const char *>(ptr);
893 const char *end8 = reinterpret_cast<const char *>(end);
894 if (!simdTestMask(ptr8, end8, 0xff00ff00))
895 return false;
896 ptr = reinterpret_cast<const char16_t *>(ptr8);
897#endif
898
899 while (ptr != end) {
900 if (*ptr++ > 0xff)
901 return false;
902 }
903 return true;
904}
905
906bool QtPrivate::isValidUtf16(QStringView s) noexcept
907{
908 constexpr char32_t InvalidCodePoint = UINT_MAX;
909
910 QStringIterator i(s);
911 while (i.hasNext()) {
912 const char32_t c = i.next(InvalidCodePoint);
913 if (c == InvalidCodePoint)
914 return false;
915 }
916
917 return true;
918}
919
920// conversion between Latin 1 and UTF-16
921Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
922{
923 /* SIMD:
924 * Unpacking with SSE has been shown to improve performance on recent CPUs
925 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
926 * itself in exactly the same way as one would do it with intrinsics.
927 */
928#if defined(__SSE2__)
929 // we're going to read str[offset..offset+15] (16 bytes)
930 const __m128i nullMask = _mm_setzero_si128();
931 auto processOneChunk = [=](qptrdiff offset) {
932 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
933 if constexpr (UseAvx2) {
934 // zero extend to an YMM register
935 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
936
937 // store
938 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
939 } else {
940 // unpack the first 8 bytes, padding with zeros
941 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
942 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
943
944 // unpack the last 8 bytes, padding with zeros
945 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
946 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
947 }
948 };
949
950 const char *e = str + size;
951 if (size >= sizeof(__m128i)) {
952 qptrdiff offset = 0;
953 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
954 processOneChunk(offset);
955 if (str + offset < e)
956 processOneChunk(size - sizeof(__m128i));
957 return;
958 }
959
960# if !defined(__OPTIMIZE_SIZE__)
961 if (size >= 4) {
962 // two overlapped loads & stores, of either 64-bit or of 32-bit
963 if (size >= 8) {
964 const __m128i unpacked1 = mm_load8_zero_extend(str);
965 const __m128i unpacked2 = mm_load8_zero_extend(str + size - 8);
966 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), unpacked1);
967 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + size - 8), unpacked2);
968 } else {
969 const __m128i chunk1 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str));
970 const __m128i chunk2 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str + size - 4));
971 const __m128i unpacked1 = _mm_unpacklo_epi8(chunk1, nullMask);
972 const __m128i unpacked2 = _mm_unpacklo_epi8(chunk2, nullMask);
973 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), unpacked1);
974 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + size - 4), unpacked2);
975 }
976 return;
977 } else {
978 size = size % 4;
979 return UnrollTailLoop<3>::exec(qsizetype(size), [=](qsizetype i) { dst[i] = uchar(str[i]); });
980 }
981# endif
982#endif
983#if defined(__mips_dsp)
984 static_assert(sizeof(qsizetype) == sizeof(int),
985 "oops, the assembler implementation needs to be called in a loop");
986 if (size > 20)
987 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
988 else
989 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
990#else
991 while (size--)
992 *dst++ = (uchar)*str++;
993#endif
994}
995
996static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
997{
998 const qsizetype len = str.size();
999 QVarLengthArray<char16_t> arr(len);
1000 qt_from_latin1(arr.data(), str.data(), len);
1001 return arr;
1002}
1003
1004template <bool Checked>
1005static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1006{
1007#if defined(__SSE2__)
1008 auto questionMark256 = []() {
1009 if constexpr (UseAvx2)
1010 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
1011 else
1012 return 0;
1013 }();
1014 auto outOfRange256 = []() {
1015 if constexpr (UseAvx2)
1016 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
1017 else
1018 return 0;
1019 }();
1020 __m128i questionMark, outOfRange;
1021 if constexpr (UseAvx2) {
1022 questionMark = _mm256_castsi256_si128(questionMark256);
1023 outOfRange = _mm256_castsi256_si128(outOfRange256);
1024 } else {
1025 questionMark = _mm_set1_epi16('?');
1026 outOfRange = _mm_set1_epi16(0x100);
1027 }
1028
1029 auto mergeQuestionMarks = [=](__m128i chunk) {
1030 if (!Checked)
1031 return chunk;
1032
1033 // SSE has no compare instruction for unsigned comparison.
1034 if constexpr (UseSse4_1) {
1035 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1036 chunk = _mm_min_epu16(chunk, outOfRange);
1037 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
1038 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
1039 return chunk;
1040 }
1041 // The variables must be shiffted + 0x8000 to be compared
1042 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
1043 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
1044
1045 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
1046 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
1047
1048 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1049 // the 16 bits that were correct contains zeros
1050 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
1051
1052 // correctBytes contains the bytes that were in limit
1053 // the 16 bits that were off limits contains zeros
1054 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
1055
1056 // merge offLimitQuestionMark and correctBytes to have the result
1057 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
1058
1059 Q_UNUSED(outOfRange);
1060 return chunk;
1061 };
1062
1063 // we're going to read to src[offset..offset+15] (16 bytes)
1064 auto loadChunkAt = [=](qptrdiff offset) {
1065 __m128i chunk1, chunk2;
1066 if constexpr (UseAvx2) {
1067 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
1068 if (Checked) {
1069 // See mergeQuestionMarks lambda above for details
1070 chunk = _mm256_min_epu16(chunk, outOfRange256);
1071 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1072 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1073 }
1074
1075 chunk2 = _mm256_extracti128_si256(chunk, 1);
1076 chunk1 = _mm256_castsi256_si128(chunk);
1077 } else {
1078 chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
1079 chunk1 = mergeQuestionMarks(chunk1);
1080
1081 chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
1082 chunk2 = mergeQuestionMarks(chunk2);
1083 }
1084
1085 // pack the two vector to 16 x 8bits elements
1086 return _mm_packus_epi16(chunk1, chunk2);
1087 };
1088
1089 if (size_t(length) >= sizeof(__m128i)) {
1090 // because of possible overlapping, we won't process the last chunk in the loop
1091 qptrdiff offset = 0;
1092 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1093 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1094
1095 // overlapped conversion of the last full chunk and the tail
1096 __m128i last1 = loadChunkAt(offset);
1097 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1098 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), last1);
1099 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), last2);
1100 return;
1101 }
1102
1103# if !defined(__OPTIMIZE_SIZE__)
1104 if (length >= 4) {
1105 // this code is fine even for in-place conversion because we load both
1106 // before any store
1107 if (length >= 8) {
1108 __m128i chunk1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
1109 __m128i chunk2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + length - 8));
1110 chunk1 = mergeQuestionMarks(chunk1);
1111 chunk2 = mergeQuestionMarks(chunk2);
1112
1113 // pack, where the upper half is ignored
1114 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1115 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1116 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), result1);
1117 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + length - 8), result2);
1118 } else {
1119 __m128i chunk1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
1120 __m128i chunk2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + length - 4));
1121 chunk1 = mergeQuestionMarks(chunk1);
1122 chunk2 = mergeQuestionMarks(chunk2);
1123
1124 // pack, we'll zero the upper three quarters
1125 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1126 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1127 qToUnaligned(_mm_cvtsi128_si32(result1), dst);
1128 qToUnaligned(_mm_cvtsi128_si32(result2), dst + length - 4);
1129 }
1130 return;
1131 }
1132
1133 length = length % 4;
1134 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1135 if (Checked)
1136 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1137 else
1138 dst[i] = src[i];
1139 });
1140# else
1141 length = length % 16;
1142# endif // optimize size
1143#elif defined(__ARM_NEON__)
1144 // Refer to the documentation of the SSE2 implementation.
1145 // This uses exactly the same method as for SSE except:
1146 // 1) neon has unsigned comparison
1147 // 2) packing is done to 64 bits (8 x 8bits component).
1148 if (length >= 16) {
1149 const qsizetype chunkCount = length >> 3; // divided by 8
1150 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1151 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1152 for (qsizetype i = 0; i < chunkCount; ++i) {
1153 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1154 src += 8;
1155
1156 if (Checked) {
1157 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1158 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1159 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1160 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1161 }
1162 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1163 vst1_u8(dst, result); // store
1164 dst += 8;
1165 }
1166 length = length % 8;
1167 }
1168#endif
1169#if defined(__mips_dsp)
1170 static_assert(sizeof(qsizetype) == sizeof(int),
1171 "oops, the assembler implementation needs to be called in a loop");
1172 qt_toLatin1_mips_dsp_asm(dst, src, length);
1173#else
1174 while (length--) {
1175 if (Checked)
1176 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1177 else
1178 *dst++ = *src;
1179 ++src;
1180 }
1181#endif
1182}
1183
1184void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1185{
1186 qt_to_latin1_internal<true>(dst, src, length);
1187}
1188
1189void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1190{
1191 qt_to_latin1_internal<false>(dst, src, length);
1192}
1193
1194// Unicode case-insensitive comparison (argument order matches QStringView)
1195Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1196{
1197 if (a == b)
1198 return qt_lencmp(alen, blen);
1199
1200 qsizetype l = qMin(alen, blen);
1201 qsizetype i;
1202 for (i = 0; i < l; ++i) {
1203// qDebug() << Qt::hex << alast << blast;
1204// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1205// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1206 int diff = foldCase(a + i, a) - foldCase(b + i, b);
1207 if ((diff))
1208 return diff;
1209 }
1210 if (i == alen) {
1211 if (i == blen)
1212 return 0;
1213 return -1;
1214 }
1215 return 1;
1216}
1217
1218// Case-insensitive comparison between a QStringView and a QLatin1StringView
1219// (argument order matches those types)
1220Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1221{
1222 qsizetype l = qMin(alen, blen);
1223 qsizetype i;
1224 for (i = 0; i < l; ++i) {
1225 int diff = foldCase(a[i]) - foldCase(char16_t{uchar(b[i])});
1226 if ((diff))
1227 return diff;
1228 }
1229 if (i == alen) {
1230 if (i == blen)
1231 return 0;
1232 return -1;
1233 }
1234 return 1;
1235}
1236
1237// Case-insensitive comparison between a Unicode string and a UTF-8 string
1238Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1239{
1240 auto src1 = reinterpret_cast<const qchar8_t *>(utf8);
1241 auto end1 = reinterpret_cast<const qchar8_t *>(utf8end);
1242 QStringIterator src2(utf16, utf16end);
1243
1244 while (src1 < end1 && src2.hasNext()) {
1245 char32_t uc1 = QChar::toCaseFolded(QUtf8Functions::nextUcs4FromUtf8(src1, end1));
1246 char32_t uc2 = QChar::toCaseFolded(src2.next());
1247 int diff = uc1 - uc2; // can't underflow
1248 if (diff)
1249 return diff;
1250 }
1251
1252 // the shorter string sorts first
1253 return (end1 > src1) - int(src2.hasNext());
1254}
1255
1256#if defined(__mips_dsp)
1257// From qstring_mips_dsp_asm.S
1258extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1259 const char16_t *b,
1260 unsigned len);
1261#endif
1262
1263// Unicode case-sensitive compare two same-sized strings
1264template <StringComparisonMode Mode>
1265static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1266{
1267 // This function isn't memcmp() because that can return the wrong sorting
1268 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1269 // but the bytes in memory are FF 00 and 00 01.
1270
1271#ifndef __OPTIMIZE_SIZE__
1272# if defined(__mips_dsp)
1273 static_assert(sizeof(uint) == sizeof(size_t));
1274 if (l >= 8) {
1275 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1276 }
1277# elif defined(__SSE2__)
1278 return ucstrncmp_sse2<Mode>(a, b, l);
1279# elif defined(__ARM_NEON__)
1280 if (l >= 8) {
1281 const char16_t *end = a + l;
1282 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1283 while (end - a > 7) {
1284 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1285 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1286
1287 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1288 if (r) {
1289 // found a different QChar
1290 if (Mode == CompareStringsForEquality)
1291 return 1;
1292 uint idx = qCountTrailingZeroBits(r);
1293 return a[idx] - b[idx];
1294 }
1295 a += 8;
1296 b += 8;
1297 }
1298 l &= 7;
1299 }
1300 const auto lambda = [=](size_t i) -> int {
1301 return a[i] - b[i];
1302 };
1303 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1304# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1305#endif // __OPTIMIZE_SIZE__
1306
1307 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1308 return memcmp(a, b, l * sizeof(char16_t));
1309
1310 for (size_t i = 0; i < l; ++i) {
1311 if (int diff = a[i] - b[i])
1312 return diff;
1313 }
1314 return 0;
1315}
1316
1317template <StringComparisonMode Mode>
1318static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1319{
1320 const uchar *c = reinterpret_cast<const uchar *>(b);
1321 const char16_t *uc = a;
1322 const char16_t *e = uc + l;
1323
1324#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1325 return ucstrncmp_sse2<Mode>(uc, c, l);
1326#endif
1327
1328 while (uc < e) {
1329 int diff = *uc - *c;
1330 if (diff)
1331 return diff;
1332 uc++, c++;
1333 }
1334
1335 return 0;
1336}
1337
1338// Unicode case-sensitive equality
1339template <typename Char2>
1340static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1341{
1342 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1343}
1344
1345// Unicode case-sensitive comparison
1346template <typename Char2>
1347static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1348{
1349 const size_t l = qMin(alen, blen);
1350 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1351 return cmp ? cmp : qt_lencmp(alen, blen);
1352}
1353
1355
1356static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1357{
1358 // We're called with QLatin1StringView's .data() and .size():
1359 Q_ASSERT(lSize >= 0 && rSize >= 0);
1360 if (!lSize)
1361 return rSize ? -1 : 0;
1362 if (!rSize)
1363 return 1;
1364 const qsizetype size = std::min(lSize, rSize);
1365
1366 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1367 for (qsizetype i = 0; i < size; i++) {
1368 if (int res = CaseInsensitiveL1::difference(lhsChar[i], rhsChar[i]))
1369 return res;
1370 }
1371 return qt_lencmp(lSize, rSize);
1372}
1373
1374bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1375{
1376 Q_ASSERT(lhs.size() == rhs.size());
1377 return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16());
1378}
1379
1380bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1381{
1382 Q_ASSERT(lhs.size() == rhs.size());
1383 return ucstreq(lhs.utf16(), lhs.size(), rhs.latin1());
1384}
1385
1386bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1387{
1388 return QtPrivate::equalStrings(rhs, lhs);
1389}
1390
1391bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1392{
1393 Q_ASSERT(lhs.size() == rhs.size());
1394 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1395}
1396
1397bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1398{
1399 return QUtf8::compareUtf8(lhs, rhs) == 0;
1400}
1401
1402bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1403{
1404 return QtPrivate::equalStrings(rhs, lhs);
1405}
1406
1407bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1408{
1409 return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
1410}
1411
1412bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1413{
1414 return QtPrivate::equalStrings(rhs, lhs);
1415}
1416
1417bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1418{
1419#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1420 Q_ASSERT(lhs.size() == rhs.size());
1421#else
1422 // operator== didn't enforce size prior to Qt 6.2
1423 if (lhs.size() != rhs.size())
1424 return false;
1425#endif
1426 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1427}
1428
1429bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1430{
1431 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1432 return false;
1433 return lhs.visit([rhs](auto lhs) {
1434 return rhs.visit([lhs](auto rhs) {
1435 return QtPrivate::equalStrings(lhs, rhs);
1436 });
1437 });
1438}
1439
1440/*!
1441 \relates QStringView
1442 \internal
1443 \since 5.10
1444
1445 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1446
1447 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1448
1449 Case-sensitive comparison is based exclusively on the numeric Unicode values
1450 of the characters and is very fast, but is not what a human would expect.
1451 Consider sorting user-visible strings with QString::localeAwareCompare().
1452
1453 \sa {Comparing Strings}
1454*/
1455int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1456{
1457 if (cs == Qt::CaseSensitive)
1458 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size());
1459 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.utf16());
1460}
1461
1462/*!
1463 \relates QStringView
1464 \internal
1465 \since 5.10
1466 \overload
1467
1468 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1469
1470 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1471
1472 Case-sensitive comparison is based exclusively on the numeric Unicode values
1473 of the characters and is very fast, but is not what a human would expect.
1474 Consider sorting user-visible strings with QString::localeAwareCompare().
1475
1476 \sa {Comparing Strings}
1477*/
1478int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1479{
1480 if (cs == Qt::CaseSensitive)
1481 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.latin1(), rhs.size());
1482 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.latin1());
1483}
1484
1485/*!
1486 \relates QStringView
1487 \internal
1488 \since 6.0
1489 \overload
1490*/
1491int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1492{
1493 return -compareStrings(rhs, lhs, cs);
1494}
1495
1496/*!
1497 \relates QStringView
1498 \internal
1499 \since 5.10
1500 \overload
1501*/
1502int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1503{
1504 return -compareStrings(rhs, lhs, cs);
1505}
1506
1507/*!
1508 \relates QStringView
1509 \internal
1510 \since 5.10
1511 \overload
1512
1513 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1514
1515 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1516
1517 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1518 of the characters and is very fast, but is not what a human would expect.
1519 Consider sorting user-visible strings with QString::localeAwareCompare().
1520
1521 \sa {Comparing Strings}
1522*/
1523int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1524{
1525 if (lhs.isEmpty())
1526 return qt_lencmp(qsizetype(0), rhs.size());
1527 if (rhs.isEmpty())
1528 return qt_lencmp(lhs.size(), qsizetype(0));
1529 if (cs == Qt::CaseInsensitive)
1530 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1531 const auto l = std::min(lhs.size(), rhs.size());
1532 int r = memcmp(lhs.data(), rhs.data(), l);
1533 return r ? r : qt_lencmp(lhs.size(), rhs.size());
1534}
1535
1536/*!
1537 \relates QStringView
1538 \internal
1539 \since 6.0
1540 \overload
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
1545}
1546
1547/*!
1548 \relates QStringView
1549 \internal
1550 \since 6.0
1551 \overload
1552*/
1553int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1554{
1555 if (cs == Qt::CaseSensitive)
1556 return QUtf8::compareUtf8(lhs, rhs);
1557 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1558}
1559
1560/*!
1561 \relates QStringView
1562 \internal
1563 \since 6.0
1564 \overload
1565*/
1566int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1567{
1568 return -compareStrings(rhs, lhs, cs);
1569}
1570
1571/*!
1572 \relates QStringView
1573 \internal
1574 \since 6.0
1575 \overload
1576*/
1577int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1578{
1579 return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
1580}
1581
1582int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1583{
1584 return lhs.visit([rhs, cs](auto lhs) {
1585 return rhs.visit([lhs, cs](auto rhs) {
1586 return QtPrivate::compareStrings(lhs, rhs, cs);
1587 });
1588 });
1589}
1590
1591// ### Qt 7: do not allow anything but ASCII digits
1592// in arg()'s replacements.
1593#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1594static bool supportUnicodeDigitValuesInArg()
1595{
1596 static const bool result = []() {
1597 static const char supportUnicodeDigitValuesEnvVar[]
1598 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1599
1600 if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
1601 return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
1602
1603#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1604 return true;
1605#else
1606 return false;
1607#endif
1608 }();
1609
1610 return result;
1611}
1612#endif
1613
1614static int qArgDigitValue(QChar ch) noexcept
1615{
1616#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1617 if (supportUnicodeDigitValuesInArg())
1618 return ch.digitValue();
1619#endif
1620 if (ch >= u'0' && ch <= u'9')
1621 return int(ch.unicode() - u'0');
1622 return -1;
1623}
1624
1625#if QT_CONFIG(regularexpression)
1626Q_DECL_COLD_FUNCTION
1627static void qtWarnAboutInvalidRegularExpression(const QRegularExpression &re, const char *cls, const char *method)
1628{
1629 extern void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *cls, const char *method);
1630 qtWarnAboutInvalidRegularExpression(re.pattern(), cls, method);
1631}
1632#endif
1633
1634/*!
1635 \macro QT_RESTRICTED_CAST_FROM_ASCII
1636 \relates QString
1637
1638 Disables most automatic conversions from source literals and 8-bit data
1639 to unicode QStrings, but allows the use of
1640 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1641 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1642 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1643 but does not require user code to wrap character and string literals
1644 with QLatin1Char, QLatin1StringView or similar.
1645
1646 Using this macro together with source strings outside the 7-bit range,
1647 non-literals, or literals with embedded NUL characters is undefined.
1648
1649 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1650*/
1651
1652/*!
1653 \macro QT_NO_CAST_FROM_ASCII
1654 \relates QString
1655 \relates QChar
1656
1657 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1658 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1659 \c{unsigned char}) to QChar.
1660
1661 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1662 QT_NO_CAST_FROM_BYTEARRAY
1663*/
1664
1665/*!
1666 \macro QT_NO_CAST_TO_ASCII
1667 \relates QString
1668
1669 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1670
1671 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1672 QT_NO_CAST_FROM_BYTEARRAY
1673*/
1674
1675/*!
1676 \macro QT_ASCII_CAST_WARNINGS
1677 \internal
1678 \relates QString
1679
1680 This macro can be defined to force a warning whenever a function is
1681 called that automatically converts between unicode and 8-bit encodings.
1682
1683 Note: This only works for compilers that support warnings for
1684 deprecated API.
1685
1686 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1687*/
1688
1689/*!
1690 \class QString
1691 \inmodule QtCore
1692 \reentrant
1693
1694 \brief The QString class provides a Unicode character string.
1695
1696 \ingroup tools
1697 \ingroup shared
1698 \ingroup string-processing
1699
1700 \compares strong
1701 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1702 QStringView QUtf8StringView
1703 \endcompareswith
1704 \compareswith strong QByteArray QByteArrayView {const char *}
1705 When comparing with byte arrays, their content is interpreted as UTF-8.
1706 \endcompareswith
1707
1708 QString stores a string of 16-bit \l{QChar}s, where each QChar
1709 corresponds to one UTF-16 code unit. (Unicode characters
1710 with code values above 65535 are stored using surrogate pairs,
1711 that is, two consecutive \l{QChar}s.)
1712
1713 \l{Unicode} is an international standard that supports most of the
1714 writing systems in use today. It is a superset of US-ASCII (ANSI
1715 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1716 characters are available at the same code positions.
1717
1718 Behind the scenes, QString uses \l{implicit sharing}
1719 (copy-on-write) to reduce memory usage and to avoid the needless
1720 copying of data. This also helps reduce the inherent overhead of
1721 storing 16-bit characters instead of 8-bit characters.
1722
1723 In addition to QString, Qt also provides the QByteArray class to
1724 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1725 For most purposes, QString is the class you want to use. It is
1726 used throughout the Qt API, and the Unicode support ensures that
1727 your applications are easy to translate if you want to expand
1728 your application's market at some point. Two prominent cases
1729 where QByteArray is appropriate are when you need to store raw
1730 binary data, and when memory conservation is critical (like in
1731 embedded systems).
1732
1733 \section1 Initializing a string
1734
1735 One way to initialize a QString is to pass a \c{const char
1736 *} to its constructor. For example, the following code creates a
1737 QString of size 5 containing the data "Hello":
1738
1739 \snippet qstring/main.cpp 0
1740
1741 QString converts the \c{const char *} data into Unicode using the
1742 fromUtf8() function.
1743
1744 In all of the QString functions that take \c{const char *}
1745 parameters, the \c{const char *} is interpreted as a classic
1746 C-style \c{'\\0'}-terminated string. Except where the function's
1747 name overtly indicates some other encoding, such \c{const char *}
1748 parameters are assumed to be encoded in UTF-8.
1749
1750 Since Qt 6.4, it is also possible to initialize QStrings using
1751 the \l {Qt::Literals::StringLiterals::operator""_s()} and
1752 \l {Qt::Literals::StringLiterals::operator""_L1()} literal
1753 operators. In many cases, using the literals results in
1754 \l{More efficient string construction}{more efficient string construction}.
1755
1756
1757 You can also provide string data as an array of \l{QChar}s:
1758
1759 \snippet qstring/main.cpp 1
1760
1761 QString makes a deep copy of the QChar data, so you can modify it
1762 later without experiencing side effects. You can avoid taking a
1763 deep copy of the character data by using QStringView or
1764 QString::fromRawData() instead.
1765
1766 Another approach is to set the size of the string using resize()
1767 and to initialize the data character per character. QString uses
1768 0-based indexes, just like C++ arrays. To access the character at
1769 a particular index position, you can use \l operator[](). On
1770 non-\c{const} strings, \l operator[]() returns a reference to a
1771 character that can be used on the left side of an assignment. For
1772 example:
1773
1774 \snippet qstring/main.cpp 2
1775
1776 For read-only access, an alternative syntax is to use the at()
1777 function:
1778
1779 \snippet qstring/main.cpp 3
1780
1781 The at() function can be faster than \l operator[]() because it
1782 never causes a \l{deep copy} to occur. Alternatively, use the
1783 first(), last(), or sliced() functions to extract several characters
1784 at a time.
1785
1786 A QString can embed '\\0' characters (QChar::Null). The size()
1787 function always returns the size of the whole string, including
1788 embedded '\\0' characters.
1789
1790 After a call to the resize() function, newly allocated characters
1791 have undefined values. To set all the characters in the string to
1792 a particular value, use the fill() function.
1793
1794 QString provides dozens of overloads designed to simplify string
1795 usage. For example, if you want to compare a QString with a string
1796 literal, you can write code like this and it will work as expected:
1797
1798 \snippet qstring/main.cpp 4
1799
1800 You can also pass string literals to functions that take QStrings
1801 as arguments, invoking the QString(const char *)
1802 constructor. Similarly, you can pass a QString to a function that
1803 takes a \c{const char *} argument using the \l qPrintable() macro,
1804 which returns the given QString as a \c{const char *}. This is
1805 equivalent to calling toLocal8Bit().\l{QByteArray::}{constData()}
1806 on the QString.
1807
1808 \section1 Manipulating string data
1809
1810 QString provides the following basic functions for modifying the
1811 character data: append(), prepend(), insert(), replace(), and
1812 remove(). For example:
1813
1814 \snippet qstring/main.cpp 5
1815
1816 In the above example, the replace() function's first two arguments are the
1817 position from which to start replacing and the number of characters that
1818 should be replaced.
1819
1820 When data-modifying functions increase the size of the string,
1821 QString may reallocate the memory in which it holds its data. When
1822 this happens, QString expands by more than it immediately needs so as
1823 to have space for further expansion without reallocation until the size
1824 of the string has significantly increased.
1825
1826 The insert(), remove(), and, when replacing a sub-string with one of
1827 different size, replace() functions can be slow (\l{linear time}) for
1828 large strings because they require moving many characters in the string
1829 by at least one position in memory.
1830
1831 If you are building a QString gradually and know in advance
1832 approximately how many characters the QString will contain, you
1833 can call reserve(), asking QString to preallocate a certain amount
1834 of memory. You can also call capacity() to find out how much
1835 memory the QString actually has allocated.
1836
1837 QString provides \l{STL-style iterators} (QString::const_iterator and
1838 QString::iterator). In practice, iterators are handy when working with
1839 generic algorithms provided by the C++ standard library.
1840
1841 \note Iterators over a QString, and references to individual characters
1842 within one, cannot be relied on to remain valid when any non-\c{const}
1843 method of the QString is called. Accessing such an iterator or reference
1844 after the call to a non-\c{const} method leads to undefined behavior. When
1845 stability for iterator-like functionality is required, you should use
1846 indexes instead of iterators, as they are not tied to QString's internal
1847 state and thus do not get invalidated.
1848
1849 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1850 function used on a given QString may cause it to internally perform a deep
1851 copy of its data. This invalidates all iterators over the string and
1852 references to individual characters within it. Do not call non-const
1853 functions while keeping iterators. Accessing an iterator or reference
1854 after it has been invalidated leads to undefined behavior. See the
1855 \l{Implicit sharing iterator problem} section for more information.
1856
1857 A frequent requirement is to remove or simplify the spacing between
1858 visible characters in a string. The characters that make up that spacing
1859 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1860 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1861 To obtain a copy of a string leaving out any spacing from its start and end,
1862 use \l trimmed(). To also replace each sequence of spacing characters within
1863 the string with a simple space, \c{' '}, use \l simplified().
1864
1865 If you want to find all occurrences of a particular character or
1866 substring in a QString, use the indexOf() or lastIndexOf()
1867 functions.The former searches forward, the latter searches backward.
1868 Either can be told an index position from which to start their search.
1869 Each returns the index position of the character or substring if they
1870 find it; otherwise, they return -1. For example, here is a typical loop
1871 that finds all occurrences of a particular substring:
1872
1873 \snippet qstring/main.cpp 6
1874
1875 QString provides many functions for converting numbers into
1876 strings and strings into numbers. See the arg() functions, the
1877 setNum() functions, the number() static functions, and the
1878 toInt(), toDouble(), and similar functions.
1879
1880 To get an uppercase or lowercase version of a string, use toUpper() or
1881 toLower().
1882
1883 Lists of strings are handled by the QStringList class. You can
1884 split a string into a list of strings using the split() function,
1885 and join a list of strings into a single string with an optional
1886 separator using QStringList::join(). You can obtain a filtered list
1887 from a string list by selecting the entries in it that contain a
1888 particular substring or match a particular QRegularExpression.
1889 See QStringList::filter() for details.
1890
1891 \section1 Querying string data
1892
1893 To see if a QString starts or ends with a particular substring, use
1894 startsWith() or endsWith(). To check whether a QString contains a
1895 specific character or substring, use the contains() function. To
1896 find out how many times a particular character or substring occurs
1897 in a string, use count().
1898
1899 To obtain a pointer to the actual character data, call data() or
1900 constData(). These functions return a pointer to the beginning of
1901 the QChar data. The pointer is guaranteed to remain valid until a
1902 non-\c{const} function is called on the QString.
1903
1904 \section2 Comparing strings
1905
1906 QStrings can be compared using overloaded operators such as \l
1907 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1908 and so on. The comparison is based exclusively on the lexicographical
1909 order of the two strings, seen as sequences of UTF-16 code units.
1910 It is very fast but is not what a human would expect; the
1911 QString::localeAwareCompare() function is usually a better choice for
1912 sorting user-interface strings, when such a comparison is available.
1913
1914 When Qt is linked with the ICU library (which it usually is), its
1915 locale-aware sorting is used. Otherwise, platform-specific solutions
1916 are used:
1917 \list
1918 \li On Windows, localeAwareCompare() uses the current user locale,
1919 as set in the \uicontrol{regional} and \uicontrol{language}
1920 options portion of \uicontrol{Control Panel}.
1921 \li On \macos and iOS, \l localeAwareCompare() compares according
1922 to the \uicontrol{Order for sorted lists} setting in the
1923 \uicontrol{International preferences} panel.
1924 \li On other Unix-like systems, the comparison falls back to the
1925 system library's \c strcoll().
1926 \endlist
1927
1928 \section1 Converting between encoded string data and QString
1929
1930 QString provides the following functions that return a
1931 \c{const char *} version of the string as QByteArray: toUtf8(),
1932 toLatin1(), and toLocal8Bit().
1933
1934 \list
1935 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1936 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1937 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1938 Unicode character set through multibyte sequences.
1939 \li toLocal8Bit() returns an 8-bit string using the system's local
1940 encoding. This is the same as toUtf8() on Unix systems.
1941 \endlist
1942
1943 To convert from one of these encodings, QString provides
1944 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1945 encodings are supported through the QStringEncoder and QStringDecoder
1946 classes.
1947
1948 As mentioned above, QString provides a lot of functions and
1949 operators that make it easy to interoperate with \c{const char *}
1950 strings. But this functionality is a double-edged sword: It makes
1951 QString more convenient to use if all strings are US-ASCII or
1952 Latin-1, but there is always the risk that an implicit conversion
1953 from or to \c{const char *} is done using the wrong 8-bit
1954 encoding. To minimize these risks, you can turn off these implicit
1955 conversions by defining some of the following preprocessor symbols:
1956
1957 \list
1958 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1959 C string literals and pointers to Unicode.
1960 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1961 from C characters and character arrays but disables automatic
1962 conversions from character pointers to Unicode.
1963 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1964 to C strings.
1965 \endlist
1966
1967 You then need to explicitly call fromUtf8(), fromLatin1(),
1968 or fromLocal8Bit() to construct a QString from an
1969 8-bit string, or use the lightweight QLatin1StringView class. For
1970 example:
1971
1972 \snippet code/src_corelib_text_qstring.cpp 1
1973
1974 Similarly, you must call toLatin1(), toUtf8(), or
1975 toLocal8Bit() explicitly to convert the QString to an 8-bit
1976 string.
1977
1978 \table 100 %
1979 \header
1980 \li Note for C Programmers
1981
1982 \row
1983 \li
1984 Due to C++'s type system and the fact that QString is
1985 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1986 other basic types. For example:
1987
1988 \snippet qstring/main.cpp 7
1989
1990 The \c result variable is a normal variable allocated on the
1991 stack. When \c return is called, and because we're returning by
1992 value, the copy constructor is called and a copy of the string is
1993 returned. No actual copying takes place thanks to the implicit
1994 sharing.
1995
1996 \endtable
1997
1998 \section1 Distinction between null and empty strings
1999
2000 For historical reasons, QString distinguishes between null
2001 and empty strings. A \e null string is a string that is
2002 initialized using QString's default constructor or by passing
2003 \nullptr to the constructor. An \e empty string is any
2004 string with size 0. A null string is always empty, but an empty
2005 string isn't necessarily null:
2006
2007 \snippet qstring/main.cpp 8
2008
2009 All functions except isNull() treat null strings the same as empty
2010 strings. For example, toUtf8().\l{QByteArray::}{constData()} returns a valid pointer
2011 (not \nullptr) to a '\\0' character for a null string. We
2012 recommend that you always use the isEmpty() function and avoid isNull().
2013
2014 \section1 Number formats
2015
2016 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2017 qualifier, and the base is ten (its default), the default locale is
2018 used. This can be set using \l{QLocale::setDefault()}. For more refined
2019 control of localized string representations of numbers, see
2020 QLocale::toString(). All other number formatting done by QString follows the
2021 C locale's representation of numbers.
2022
2023 When QString::arg() applies left-padding to numbers, the fill character
2024 \c{'0'} is treated specially. If the number is negative, its minus sign
2025 appears before the zero-padding. If the field is localized, the
2026 locale-appropriate zero character is used in place of \c{'0'}. For
2027 floating-point numbers, this special treatment only applies if the number is
2028 finite.
2029
2030 \section2 Floating-point formats
2031
2032 In member functions (for example, arg() and number()) that format floating-point
2033 numbers (\c float or \c double) as strings, the representation used can be
2034 controlled by a choice of \e format and \e precision, whose meanings are as
2035 for \l {QLocale::toString(double, char, int)}.
2036
2037 If the selected \e format includes an exponent, localized forms follow the
2038 locale's convention on digits in the exponent. For non-localized formatting,
2039 the exponent shows its sign and includes at least two digits, left-padding
2040 with zero if needed.
2041
2042 \section1 More efficient string construction
2043
2044 Many strings are known at compile time. The QString constructor from
2045 C++ string literals will copy the contents of the string,
2046 treating the contents as UTF-8. This requires memory allocation and
2047 re-encoding string data, operations that will happen at runtime.
2048 If the string data is known at compile time, you can use the QStringLiteral
2049 macro or similarly \c{operator""_s} to create QString's payload at compile
2050 time instead.
2051
2052 Using the QString \c{'+'} operator, it is easy to construct a
2053 complex string from multiple substrings. You will often write code
2054 like this:
2055
2056 \snippet qstring/stringbuilder.cpp 0
2057
2058 There is nothing wrong with either of these string constructions,
2059 but there are a few hidden inefficiencies:
2060
2061 First, repeated use of the \c{'+'} operator may lead to
2062 multiple memory allocations. When concatenating \e{n} substrings,
2063 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2064 memory allocator.
2065
2066 These allocations can be optimized by an internal class
2067 \c{QStringBuilder}. This class is marked
2068 internal and does not appear in the documentation, because you
2069 aren't meant to instantiate it in your code. Its use will be
2070 automatic, as described below.
2071
2072 \c{QStringBuilder} uses expression templates and reimplements the
2073 \c{'%'} operator so that when you use \c{'%'} for string
2074 concatenation instead of \c{'+'}, multiple substring
2075 concatenations will be postponed until the final result is about
2076 to be assigned to a QString. At this point, the amount of memory
2077 required for the final result is known. The memory allocator is
2078 then called \e{once} to get the required space, and the substrings
2079 are copied into it one by one.
2080
2081 Additional efficiency is gained by inlining and reducing reference
2082 counting (the QString created from a \c{QStringBuilder}
2083 has a ref count of 1, whereas QString::append() needs an extra
2084 test).
2085
2086 There are two ways you can access this improved method of string
2087 construction. The straightforward way is to include
2088 \c{QStringBuilder} wherever you want to use it and use the
2089 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2090
2091 \snippet qstring/stringbuilder.cpp 5
2092
2093 A more global approach, which is more convenient but not entirely
2094 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2095 it to the compiler flags) at build time. This will make concatenating
2096 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2097
2098 \note Using automatic type deduction (for example, by using the \c
2099 auto keyword) with the result of string concatenation when QStringBuilder
2100 is enabled will show that the concatenation is indeed an object of a
2101 QStringBuilder specialization:
2102
2103 \snippet qstring/stringbuilder.cpp 6
2104
2105 This does not cause any harm, as QStringBuilder will implicitly convert to
2106 QString when required. If this is undesirable, then one should specify
2107 the necessary types instead of having the compiler deduce them:
2108
2109 \snippet qstring/stringbuilder.cpp 7
2110
2111 \section1 Maximum size and out-of-memory conditions
2112
2113 The maximum size of QString depends on the architecture. Most 64-bit
2114 systems can allocate more than 2 GB of memory, with a typical limit
2115 of 2^63 bytes. The actual value also depends on the overhead required for
2116 managing the data block. As a result, you can expect a maximum size
2117 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2118 on 64-bit platforms. The number of elements that can be stored in a
2119 QString is this maximum size divided by the size of QChar.
2120
2121 When memory allocation fails, QString throws a \c std::bad_alloc
2122 exception if the application was compiled with exception support.
2123 Out-of-memory conditions in Qt containers are the only cases where Qt
2124 will throw exceptions. If exceptions are disabled, then running out of
2125 memory is undefined behavior.
2126
2127 \note Target operating systems may impose limits on how much memory an
2128 application can allocate, in total, or on the size of individual allocations.
2129 This may further restrict the size of string a QString can hold.
2130 Mitigating or controlling the behavior these limits cause is beyond the
2131 scope of the Qt API.
2132
2133 \sa {Which string class to use?}, fromRawData(), QChar, QStringView,
2134 QLatin1StringView, QByteArray
2135*/
2136
2137/*! \typedef QString::ConstIterator
2138
2139 Qt-style synonym for QString::const_iterator.
2140*/
2141
2142/*! \typedef QString::Iterator
2143
2144 Qt-style synonym for QString::iterator.
2145*/
2146
2147/*! \typedef QString::const_iterator
2148
2149 \sa QString::iterator
2150*/
2151
2152/*! \typedef QString::iterator
2153
2154 \sa QString::const_iterator
2155*/
2156
2157/*! \typedef QString::const_reverse_iterator
2158 \since 5.6
2159
2160 \sa QString::reverse_iterator, QString::const_iterator
2161*/
2162
2163/*! \typedef QString::reverse_iterator
2164 \since 5.6
2165
2166 \sa QString::const_reverse_iterator, QString::iterator
2167*/
2168
2169/*!
2170 \typedef QString::size_type
2171*/
2172
2173/*!
2174 \typedef QString::difference_type
2175*/
2176
2177/*!
2178 \typedef QString::const_reference
2179*/
2180/*!
2181 \typedef QString::reference
2182*/
2183
2184/*!
2185 \typedef QString::const_pointer
2186
2187 The QString::const_pointer typedef provides an STL-style
2188 const pointer to a QString element (QChar).
2189*/
2190/*!
2191 \typedef QString::pointer
2192
2193 The QString::pointer typedef provides an STL-style
2194 pointer to a QString element (QChar).
2195*/
2196
2197/*!
2198 \typedef QString::value_type
2199*/
2200
2201/*! \fn QString::iterator QString::begin()
2202
2203 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2204 first character in the string.
2205
2206//! [iterator-invalidation-func-desc]
2207 \warning The returned iterator is invalidated on detachment or when the
2208 QString is modified.
2209//! [iterator-invalidation-func-desc]
2210
2211 \sa constBegin(), end()
2212*/
2213
2214/*! \fn QString::const_iterator QString::begin() const
2215
2216 \overload begin()
2217*/
2218
2219/*! \fn QString::const_iterator QString::cbegin() const
2220 \since 5.0
2221
2222 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2223 first character in the string.
2224
2225 \include qstring.cpp iterator-invalidation-func-desc
2226
2227 \sa begin(), cend()
2228*/
2229
2230/*! \fn QString::const_iterator QString::constBegin() const
2231
2232 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2233 first character in the string.
2234
2235 \include qstring.cpp iterator-invalidation-func-desc
2236
2237 \sa begin(), constEnd()
2238*/
2239
2240/*! \fn QString::iterator QString::end()
2241
2242 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2243 the last character in the string.
2244
2245 \include qstring.cpp iterator-invalidation-func-desc
2246
2247 \sa begin(), constEnd()
2248*/
2249
2250/*! \fn QString::const_iterator QString::end() const
2251
2252 \overload end()
2253*/
2254
2255/*! \fn QString::const_iterator QString::cend() const
2256 \since 5.0
2257
2258 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2259 after the last character in the string.
2260
2261 \include qstring.cpp iterator-invalidation-func-desc
2262
2263 \sa cbegin(), end()
2264*/
2265
2266/*! \fn QString::const_iterator QString::constEnd() const
2267
2268 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2269 after the last character in the string.
2270
2271 \include qstring.cpp iterator-invalidation-func-desc
2272
2273 \sa constBegin(), end()
2274*/
2275
2276/*! \fn QString::reverse_iterator QString::rbegin()
2277 \since 5.6
2278
2279 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2280 the first character in the string, in reverse order.
2281
2282 \include qstring.cpp iterator-invalidation-func-desc
2283
2284 \sa begin(), crbegin(), rend()
2285*/
2286
2287/*! \fn QString::const_reverse_iterator QString::rbegin() const
2288 \since 5.6
2289 \overload
2290*/
2291
2292/*! \fn QString::const_reverse_iterator QString::crbegin() const
2293 \since 5.6
2294
2295 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2296 pointing to the first character in the string, in reverse order.
2297
2298 \include qstring.cpp iterator-invalidation-func-desc
2299
2300 \sa begin(), rbegin(), rend()
2301*/
2302
2303/*! \fn QString::reverse_iterator QString::rend()
2304 \since 5.6
2305
2306 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2307 after the last character in the string, in reverse order.
2308
2309 \include qstring.cpp iterator-invalidation-func-desc
2310
2311 \sa end(), crend(), rbegin()
2312*/
2313
2314/*! \fn QString::const_reverse_iterator QString::rend() const
2315 \since 5.6
2316 \overload
2317*/
2318
2319/*! \fn QString::const_reverse_iterator QString::crend() const
2320 \since 5.6
2321
2322 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2323 pointing just after the last character in the string, in reverse order.
2324
2325 \include qstring.cpp iterator-invalidation-func-desc
2326
2327 \sa end(), rend(), rbegin()
2328*/
2329
2330/*!
2331 \fn QString::QString()
2332
2333 Constructs a null string. Null strings are also considered empty.
2334
2335 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2336*/
2337
2338/*!
2339 \fn QString::QString(QString &&other)
2340
2341 Move-constructs a QString instance, making it point at the same
2342 object that \a other was pointing to.
2343
2344 \since 5.2
2345*/
2346
2347/*! \fn QString::QString(const char *str)
2348
2349 Constructs a string initialized with the 8-bit string \a str. The
2350 given const char pointer is converted to Unicode using the
2351 fromUtf8() function.
2352
2353 You can disable this constructor by defining
2354 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2355 can be useful if you want to ensure that all user-visible strings
2356 go through QObject::tr(), for example.
2357
2358 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2359 this constructor, but enables a \c{QString(const char (&ch)[N])}
2360 constructor instead. Using non-literal input, or input with
2361 embedded NUL characters, or non-7-bit characters is undefined
2362 in this case.
2363
2364 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2365*/
2366
2367/*! \fn QString::QString(const char8_t *str)
2368
2369 Constructs a string initialized with the UTF-8 string \a str. The
2370 given const char8_t pointer is converted to Unicode using the
2371 fromUtf8() function.
2372
2373 \since 6.1
2374 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2375*/
2376
2377/*!
2378 \fn QString::QString(QStringView sv)
2379
2380 Constructs a string initialized with the string view's data.
2381
2382 The QString will be null if and only if \a sv is null.
2383
2384 \since 6.8
2385
2386 \sa fromUtf16()
2387*/
2388
2389/*
2390//! [from-std-string]
2391Returns a copy of the \a str string. The given string is assumed to be
2392encoded in \1, and is converted to QString using the \2 function.
2393//! [from-std-string]
2394*/
2395
2396/*! \fn QString QString::fromStdString(const std::string &str)
2397
2398 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2399
2400 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2401*/
2402
2403/*! \fn QString QString::fromStdWString(const std::wstring &str)
2404
2405 Returns a copy of the \a str string. The given string is assumed
2406 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2407 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2408 systems).
2409
2410 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2411 fromStdU16String(), fromStdU32String()
2412*/
2413
2414/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2415 \since 4.2
2416
2417 Reads the first \a size code units of the \c wchar_t array to whose start
2418 \a string points, converting them to Unicode and returning the result as
2419 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2420 type's size is four bytes or UTF-16 if its size is two bytes.
2421
2422 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2423
2424 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2425 fromStdWString()
2426*/
2427
2428/*! \fn std::wstring QString::toStdWString() const
2429
2430 Returns a std::wstring object with the data contained in this
2431 QString. The std::wstring is encoded in UTF-16 on platforms where
2432 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2433 where wchar_t is 4 bytes wide (most Unix systems).
2434
2435 This method is mostly useful to pass a QString to a function
2436 that accepts a std::wstring object.
2437
2438 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2439 toStdU32String()
2440*/
2441
2442qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2443{
2444 qsizetype count = 0;
2445
2446 QStringIterator i(QStringView(uc, length));
2447 while (i.hasNext())
2448 out[count++] = i.next();
2449
2450 return count;
2451}
2452
2453/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2454 \since 4.2
2455
2456 Fills the \a array with the data contained in this QString object.
2457 The array is encoded in UTF-16 on platforms where
2458 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2459 where wchar_t is 4 bytes wide (most Unix systems).
2460
2461 \a array has to be allocated by the caller and contain enough space to
2462 hold the complete string (allocating the array with the same length as the
2463 string is always sufficient).
2464
2465 This function returns the actual length of the string in \a array.
2466
2467 \note This function does not append a null character to the array.
2468
2469 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2470 QStringView::toWCharArray()
2471*/
2472
2473/*! \fn QString::QString(const QString &other)
2474
2475 Constructs a copy of \a other.
2476
2477 This operation takes \l{constant time}, because QString is
2478 \l{implicitly shared}. This makes returning a QString from a
2479 function very fast. If a shared instance is modified, it will be
2480 copied (copy-on-write), and that takes \l{linear time}.
2481
2482 \sa operator=()
2483*/
2484
2485/*!
2486 Constructs a string initialized with the first \a size characters
2487 of the QChar array \a unicode.
2488
2489 If \a unicode is 0, a null string is constructed.
2490
2491 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2492 array and its length is determined dynamically. The terminating
2493 null character is not considered part of the string.
2494
2495 QString makes a deep copy of the string data. The unicode data is copied as
2496 is and the Byte Order Mark is preserved if present.
2497
2498 \sa fromRawData()
2499*/
2500QString::QString(const QChar *unicode, qsizetype size)
2501{
2502 if (!unicode) {
2503 d.clear();
2504 } else {
2505 if (size < 0)
2506 size = QtPrivate::qustrlen(reinterpret_cast<const char16_t *>(unicode));
2507 if (!size) {
2508 d = DataPointer::fromRawData(&_empty, 0);
2509 } else {
2510 d = DataPointer(size, size);
2511 Q_CHECK_PTR(d.data());
2512 memcpy(d.data(), unicode, size * sizeof(QChar));
2513 d.data()[size] = '\0';
2514 }
2515 }
2516}
2517
2518/*!
2519 Constructs a string of the given \a size with every character set
2520 to \a ch.
2521
2522 \sa fill()
2523*/
2524QString::QString(qsizetype size, QChar ch)
2525{
2526 if (size <= 0) {
2527 d = DataPointer::fromRawData(&_empty, 0);
2528 } else {
2529 d = DataPointer(size, size);
2530 Q_CHECK_PTR(d.data());
2531 d.data()[size] = '\0';
2532 char16_t *b = d.data();
2533 char16_t *e = d.data() + size;
2534 const char16_t value = ch.unicode();
2535 std::fill(b, e, value);
2536 }
2537}
2538
2539/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2540 \internal
2541
2542 Constructs a string of the given \a size without initializing the
2543 characters. This is only used in \c QStringBuilder::toString().
2544*/
2545QString::QString(qsizetype size, Qt::Initialization)
2546{
2547 if (size <= 0) {
2548 d = DataPointer::fromRawData(&_empty, 0);
2549 } else {
2550 d = DataPointer(size, size);
2551 Q_CHECK_PTR(d.data());
2552 d.data()[size] = '\0';
2553 }
2554}
2555
2556/*! \fn QString::QString(QLatin1StringView str)
2557
2558 Constructs a copy of the Latin-1 string viewed by \a str.
2559
2560 \sa fromLatin1()
2561*/
2562
2563/*!
2564 Constructs a string of size 1 containing the character \a ch.
2565*/
2566QString::QString(QChar ch)
2567{
2568 d = DataPointer(1, 1);
2569 Q_CHECK_PTR(d.data());
2570 d.data()[0] = ch.unicode();
2571 d.data()[1] = '\0';
2572}
2573
2574/*! \fn QString::QString(const QByteArray &ba)
2575
2576 Constructs a string initialized with the byte array \a ba. The
2577 given byte array is converted to Unicode using fromUtf8().
2578
2579 You can disable this constructor by defining
2580 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2581 can be useful if you want to ensure that all user-visible strings
2582 go through QObject::tr(), for example.
2583
2584 \note Any null ('\\0') bytes in the byte array will be included in this
2585 string, converted to Unicode null characters (U+0000). This behavior is
2586 different from Qt 5.x.
2587
2588 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2589*/
2590
2591/*! \fn QString::QString(const Null &)
2592 \internal
2593*/
2594
2595/*! \fn QString::QString(QStringPrivate)
2596 \internal
2597*/
2598
2599/*! \fn QString &QString::operator=(const QString::Null &)
2600 \internal
2601*/
2602
2603/*!
2604 \fn QString::~QString()
2605
2606 Destroys the string.
2607*/
2608
2609
2610/*! \fn void QString::swap(QString &other)
2611 \since 4.8
2612 \memberswap{string}
2613*/
2614
2615/*! \fn void QString::detach()
2616
2617 \internal
2618*/
2619
2620/*! \fn bool QString::isDetached() const
2621
2622 \internal
2623*/
2624
2625/*! \fn bool QString::isSharedWith(const QString &other) const
2626
2627 \internal
2628*/
2629
2630/*! \fn QString::operator std::u16string_view() const
2631 \since 6.7
2632
2633 Converts this QString object to a \c{std::u16string_view} object.
2634*/
2635
2636static bool needsReallocate(const QString &str, qsizetype newSize)
2637{
2638 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2639 return newSize > capacityAtEnd;
2640}
2641
2642/*!
2643 Sets the size of the string to \a size characters.
2644
2645 If \a size is greater than the current size, the string is
2646 extended to make it \a size characters long with the extra
2647 characters added to the end. The new characters are uninitialized.
2648
2649 If \a size is less than the current size, characters beyond position
2650 \a size are excluded from the string.
2651
2652 \note While resize() will grow the capacity if needed, it never shrinks
2653 capacity. To shed excess capacity, use squeeze().
2654
2655 Example:
2656
2657 \snippet qstring/main.cpp 45
2658
2659 If you want to append a certain number of identical characters to
2660 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2661
2662 If you want to expand the string so that it reaches a certain
2663 width and fill the new positions with a particular character, use
2664 the leftJustified() function:
2665
2666 If \a size is negative, it is equivalent to passing zero.
2667
2668 \snippet qstring/main.cpp 47
2669
2670 \sa truncate(), reserve(), squeeze()
2671*/
2672
2673void QString::resize(qsizetype size)
2674{
2675 if (size < 0)
2676 size = 0;
2677
2678 if (d->needsDetach() || needsReallocate(*this, size))
2679 reallocData(size, QArrayData::Grow);
2680 d.size = size;
2681 if (d->allocatedCapacity())
2682 d.data()[size] = u'\0';
2683}
2684
2685/*!
2686 \overload
2687 \since 5.7
2688
2689 Unlike \l {QString::}{resize(qsizetype)}, this overload
2690 initializes the new characters to \a fillChar:
2691
2692 \snippet qstring/main.cpp 46
2693*/
2694
2695void QString::resize(qsizetype newSize, QChar fillChar)
2696{
2697 const qsizetype oldSize = size();
2698 resize(newSize);
2699 const qsizetype difference = size() - oldSize;
2700 if (difference > 0)
2701 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2702}
2703
2704
2705/*!
2706 \since 6.8
2707
2708 Sets the size of the string to \a size characters. If the size of
2709 the string grows, the new characters are uninitialized.
2710
2711 The behavior is identical to \c{resize(size)}.
2712
2713 \sa resize()
2714*/
2715
2716void QString::resizeForOverwrite(qsizetype size)
2717{
2718 resize(size);
2719}
2720
2721
2722/*! \fn qsizetype QString::capacity() const
2723
2724 Returns the maximum number of characters that can be stored in
2725 the string without forcing a reallocation.
2726
2727 The sole purpose of this function is to provide a means of fine
2728 tuning QString's memory usage. In general, you will rarely ever
2729 need to call this function. If you want to know how many
2730 characters are in the string, call size().
2731
2732 \note a statically allocated string will report a capacity of 0,
2733 even if it's not empty.
2734
2735 \note The free space position in the allocated memory block is undefined. In
2736 other words, one should not assume that the free memory is always located
2737 after the initialized elements.
2738
2739 \sa reserve(), squeeze()
2740*/
2741
2742/*!
2743 \fn void QString::reserve(qsizetype size)
2744
2745 Ensures the string has space for at least \a size characters.
2746
2747 If you know in advance how large a string will be, you can call this
2748 function to save repeated reallocation while building it.
2749 This can improve performance when building a string incrementally.
2750 A long sequence of operations that add to a string may trigger several
2751 reallocations, the last of which may leave you with significantly more
2752 space than you need. This is less efficient than doing a single
2753 allocation of the right size at the start.
2754
2755 If in doubt about how much space shall be needed, it is usually better to
2756 use an upper bound as \a size, or a high estimate of the most likely size,
2757 if a strict upper bound would be much bigger than this. If \a size is an
2758 underestimate, the string will grow as needed once the reserved size is
2759 exceeded, which may lead to a larger allocation than your best
2760 overestimate would have and will slow the operation that triggers it.
2761
2762 \warning reserve() reserves memory but does not change the size of the
2763 string. Accessing data beyond the end of the string is undefined behavior.
2764 If you need to access memory beyond the current end of the string,
2765 use resize().
2766
2767 This function is useful for code that needs to build up a long
2768 string and wants to avoid repeated reallocation. In this example,
2769 we want to add to the string until some condition is \c true, and
2770 we're fairly sure that size is large enough to make a call to
2771 reserve() worthwhile:
2772
2773 \snippet qstring/main.cpp 44
2774
2775 \sa squeeze(), capacity(), resize()
2776*/
2777
2778/*!
2779 \fn void QString::squeeze()
2780
2781 Releases any memory not required to store the character data.
2782
2783 The sole purpose of this function is to provide a means of fine
2784 tuning QString's memory usage. In general, you will rarely ever
2785 need to call this function.
2786
2787 \sa reserve(), capacity()
2788*/
2789
2790void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2791{
2792 if (!alloc) {
2793 d = DataPointer::fromRawData(&_empty, 0);
2794 return;
2795 }
2796
2797 // don't use reallocate path when reducing capacity and there's free space
2798 // at the beginning: might shift data pointer outside of allocated space
2799 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2800
2801 if (d->needsDetach() || cannotUseReallocate) {
2802 DataPointer dd(alloc, qMin(alloc, d.size), option);
2803 Q_CHECK_PTR(dd.data());
2804 if (dd.size > 0)
2805 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2806 dd.data()[dd.size] = 0;
2807 d.swap(dd);
2808 } else {
2809 d->reallocate(alloc, option);
2810 }
2811}
2812
2813void QString::reallocGrowData(qsizetype n)
2814{
2815 if (!n) // expected to always allocate
2816 n = 1;
2817
2818 if (d->needsDetach()) {
2819 DataPointer dd(DataPointer::allocateGrow(d, n, QArrayData::GrowsAtEnd));
2820 Q_CHECK_PTR(dd.data());
2821 dd->copyAppend(d.data(), d.data() + d.size);
2822 dd.data()[dd.size] = 0;
2823 d.swap(dd);
2824 } else {
2825 d->reallocate(d.constAllocatedCapacity() + n, QArrayData::Grow);
2826 }
2827}
2828
2829/*! \fn void QString::clear()
2830
2831 Clears the contents of the string and makes it null.
2832
2833 \sa resize(), isNull()
2834*/
2835
2836/*! \fn QString &QString::operator=(const QString &other)
2837
2838 Assigns \a other to this string and returns a reference to this
2839 string.
2840*/
2841
2842QString &QString::operator=(const QString &other) noexcept
2843{
2844 d = other.d;
2845 return *this;
2846}
2847
2848/*!
2849 \fn QString &QString::operator=(QString &&other)
2850
2851 Move-assigns \a other to this QString instance.
2852
2853 \since 5.2
2854*/
2855
2856/*! \fn QString &QString::operator=(QLatin1StringView str)
2857
2858 \overload operator=()
2859
2860 Assigns the Latin-1 string viewed by \a str to this string.
2861*/
2862QString &QString::operator=(QLatin1StringView other)
2863{
2864 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2865 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2866 d.size = other.size();
2867 d.data()[other.size()] = 0;
2868 qt_from_latin1(d.data(), other.latin1(), other.size());
2869 } else {
2870 *this = fromLatin1(other.latin1(), other.size());
2871 }
2872 return *this;
2873}
2874
2875/*! \fn QString &QString::operator=(const QByteArray &ba)
2876
2877 \overload operator=()
2878
2879 Assigns \a ba to this string. The byte array is converted to Unicode
2880 using the fromUtf8() function.
2881
2882 You can disable this operator by defining
2883 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2884 can be useful if you want to ensure that all user-visible strings
2885 go through QObject::tr(), for example.
2886*/
2887
2888/*! \fn QString &QString::operator=(const char *str)
2889
2890 \overload operator=()
2891
2892 Assigns \a str to this string. The const char pointer is converted
2893 to Unicode using the fromUtf8() function.
2894
2895 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2896 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2897 This can be useful if you want to ensure that all user-visible strings
2898 go through QObject::tr(), for example.
2899*/
2900
2901/*!
2902 \overload operator=()
2903
2904 Sets the string to contain the single character \a ch.
2905*/
2906QString &QString::operator=(QChar ch)
2907{
2908 return assign(1, ch);
2909}
2910
2911/*!
2912 \fn QString& QString::insert(qsizetype position, const QString &str)
2913
2914 Inserts the string \a str at the given index \a position and
2915 returns a reference to this string.
2916
2917 Example:
2918
2919 \snippet qstring/main.cpp 26
2920
2921//! [string-grow-at-insertion]
2922 This string grows to accommodate the insertion. If \a position is beyond
2923 the end of the string, space characters are appended to the string to reach
2924 this \a position, followed by \a str.
2925//! [string-grow-at-insertion]
2926
2927 \sa append(), prepend(), replace(), remove()
2928*/
2929
2930/*!
2931 \fn QString& QString::insert(qsizetype position, QStringView str)
2932 \since 6.0
2933 \overload insert()
2934
2935 Inserts the string view \a str at the given index \a position and
2936 returns a reference to this string.
2937
2938 \include qstring.cpp string-grow-at-insertion
2939*/
2940
2941
2942/*!
2943 \fn QString& QString::insert(qsizetype position, const char *str)
2944 \since 5.5
2945 \overload insert()
2946
2947 Inserts the C string \a str at the given index \a position and
2948 returns a reference to this string.
2949
2950 \include qstring.cpp string-grow-at-insertion
2951
2952 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2953 defined.
2954*/
2955
2956/*!
2957 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2958 \since 5.5
2959 \overload insert()
2960
2961 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2962 it encodes at the given index \a position and returns a reference to
2963 this string.
2964
2965 \include qstring.cpp string-grow-at-insertion
2966
2967 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2968 defined.
2969*/
2970
2971/*! \internal
2972 T is a view or a container on/of QChar, char16_t, or char
2973*/
2974template <typename T>
2975static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2976{
2977 auto &str_d = str.data_ptr();
2978 qsizetype difference = 0;
2979 if (Q_UNLIKELY(i > str_d.size))
2980 difference = i - str_d.size;
2981 const qsizetype oldSize = str_d.size;
2982 const qsizetype insert_size = toInsert.size();
2983 const qsizetype newSize = str_d.size + difference + insert_size;
2984 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2985
2986 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2987 const auto cbegin = str.cbegin();
2988 const auto cend = str.cend();
2989 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend;
2990 QString other;
2991 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2992 // unittests pass
2993 other.data_ptr().detachAndGrow(side, newSize, nullptr, nullptr);
2994 other.append(QStringView(cbegin, insert_start));
2995 other.resize(i, u' ');
2996 other.append(toInsert);
2997 other.append(QStringView(insert_start, cend));
2998 str.swap(other);
2999 return;
3000 }
3001
3002 str_d.detachAndGrow(side, difference + insert_size, nullptr, nullptr);
3003 Q_CHECK_PTR(str_d.data());
3004 str.resize(newSize);
3005
3006 auto begin = str_d.begin();
3007 auto old_end = std::next(begin, oldSize);
3008 std::fill_n(old_end, difference, u' ');
3009 auto insert_start = std::next(begin, i);
3010 if (difference == 0)
3011 std::move_backward(insert_start, old_end, str_d.end());
3012
3013 using Char = std::remove_cv_t<typename T::value_type>;
3014 if constexpr(std::is_same_v<Char, QChar>)
3015 std::copy_n(reinterpret_cast<const char16_t *>(toInsert.data()), insert_size, insert_start);
3016 else if constexpr (std::is_same_v<Char, char16_t>)
3017 std::copy_n(toInsert.data(), insert_size, insert_start);
3018 else if constexpr (std::is_same_v<Char, char>)
3019 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3020}
3021
3022/*!
3023 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3024 \overload insert()
3025
3026 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3027
3028 \include qstring.cpp string-grow-at-insertion
3029*/
3030QString &QString::insert(qsizetype i, QLatin1StringView str)
3031{
3032 const char *s = str.latin1();
3033 if (i < 0 || !s || !(*s))
3034 return *this;
3035
3036 insert_helper(*this, i, str);
3037 return *this;
3038}
3039
3040/*!
3041 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3042 \overload insert()
3043 \since 6.5
3044
3045 Inserts the UTF-8 string view \a str at the given index \a position.
3046
3047 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3048 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3049 (QLatin1StringView) and should thus be used sparingly.
3050
3051 \include qstring.cpp string-grow-at-insertion
3052*/
3053QString &QString::insert(qsizetype i, QUtf8StringView s)
3054{
3055 auto insert_size = s.size();
3056 if (i < 0 || insert_size <= 0)
3057 return *this;
3058
3059 qsizetype difference = 0;
3060 if (Q_UNLIKELY(i > d.size))
3061 difference = i - d.size;
3062
3063 const qsizetype newSize = d.size + difference + insert_size;
3064
3065 if (d.needsDetach() || needsReallocate(*this, newSize)) {
3066 const auto cbegin = this->cbegin();
3067 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend();
3068 QString other;
3069 other.reserve(newSize);
3070 other.append(QStringView(cbegin, insert_start));
3071 if (difference > 0)
3072 other.resize(i, u' ');
3073 other.append(s);
3074 other.append(QStringView(insert_start, cend()));
3075 swap(other);
3076 return *this;
3077 }
3078
3079 if (i >= d.size) {
3080 d.detachAndGrow(QArrayData::GrowsAtEnd, difference + insert_size, nullptr, nullptr);
3081 Q_CHECK_PTR(d.data());
3082
3083 if (difference > 0)
3084 resize(i, u' ');
3085 append(s);
3086 } else {
3087 // Optimal insertion of Utf8 data is at the end, anywhere else could
3088 // potentially lead to moving characters twice if Utf8 data size
3089 // (variable-width) is less than the equivalent Utf16 data size
3090 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3091 char16_t *b = QUtf8::convertToUnicode(buffer.data(), s);
3092 insert_helper(*this, i, QStringView(buffer.data(), b));
3093 }
3094
3095 return *this;
3096}
3097
3098/*!
3099 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3100 \overload insert()
3101
3102 Inserts the first \a size characters of the QChar array \a unicode
3103 at the given index \a position in the string.
3104
3105 This string grows to accommodate the insertion. If \a position is beyond
3106 the end of the string, space characters are appended to the string to reach
3107 this \a position, followed by \a size characters of the QChar array
3108 \a unicode.
3109*/
3110QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3111{
3112 if (i < 0 || size <= 0)
3113 return *this;
3114
3115 // In case when data points into "this"
3116 if (!d->needsDetach() && QtPrivate::q_points_into_range(unicode, *this)) {
3117 QVarLengthArray copy(unicode, unicode + size);
3118 insert(i, copy.data(), size);
3119 } else {
3120 insert_helper(*this, i, QStringView(unicode, size));
3121 }
3122
3123 return *this;
3124}
3125
3126/*!
3127 \fn QString& QString::insert(qsizetype position, QChar ch)
3128 \overload insert()
3129
3130 Inserts \a ch at the given index \a position in the string.
3131
3132 This string grows to accommodate the insertion. If \a position is beyond
3133 the end of the string, space characters are appended to the string to reach
3134 this \a position, followed by \a ch.
3135*/
3136
3137QString& QString::insert(qsizetype i, QChar ch)
3138{
3139 if (i < 0)
3140 i += d.size;
3141 return insert(i, &ch, 1);
3142}
3143
3144/*!
3145 Appends the string \a str onto the end of this string.
3146
3147 Example:
3148
3149 \snippet qstring/main.cpp 9
3150
3151 This is the same as using the insert() function:
3152
3153 \snippet qstring/main.cpp 10
3154
3155 The append() function is typically very fast (\l{constant time}),
3156 because QString preallocates extra space at the end of the string
3157 data so it can grow without reallocating the entire string each
3158 time.
3159
3160 \sa operator+=(), prepend(), insert()
3161*/
3162QString &QString::append(const QString &str)
3163{
3164 if (!str.isNull()) {
3165 if (isNull()) {
3166 if (Q_UNLIKELY(!str.d.isMutable()))
3167 assign(str); // fromRawData, so we do a deep copy
3168 else
3169 operator=(str);
3170 } else if (str.size()) {
3171 append(str.constData(), str.size());
3172 }
3173 }
3174 return *this;
3175}
3176
3177/*!
3178 \fn QString &QString::append(QStringView v)
3179 \overload append()
3180 \since 6.0
3181
3182 Appends the given string view \a v to this string and returns the result.
3183*/
3184
3185/*!
3186 \overload append()
3187 \since 5.0
3188
3189 Appends \a len characters from the QChar array \a str to this string.
3190*/
3191QString &QString::append(const QChar *str, qsizetype len)
3192{
3193 if (str && len > 0) {
3194 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3195 // the following should be safe as QChar uses char16_t as underlying data
3196 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3197 d->growAppend(char16String, char16String + len);
3198 d.data()[d.size] = u'\0';
3199 }
3200 return *this;
3201}
3202
3203/*!
3204 \overload append()
3205
3206 Appends the Latin-1 string viewed by \a str to this string.
3207*/
3208QString &QString::append(QLatin1StringView str)
3209{
3210 append_helper(*this, str);
3211 return *this;
3212}
3213
3214/*!
3215 \overload append()
3216 \since 6.5
3217
3218 Appends the UTF-8 string view \a str to this string.
3219*/
3220QString &QString::append(QUtf8StringView str)
3221{
3222 append_helper(*this, str);
3223 return *this;
3224}
3225
3226/*! \fn QString &QString::append(const QByteArray &ba)
3227
3228 \overload append()
3229
3230 Appends the byte array \a ba to this string. The given byte array
3231 is converted to Unicode using the fromUtf8() function.
3232
3233 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3234 when you compile your applications. This can be useful if you want
3235 to ensure that all user-visible strings go through QObject::tr(),
3236 for example.
3237*/
3238
3239/*! \fn QString &QString::append(const char *str)
3240
3241 \overload append()
3242
3243 Appends the string \a str to this string. The given const char
3244 pointer is converted to Unicode using the fromUtf8() function.
3245
3246 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3247 when you compile your applications. This can be useful if you want
3248 to ensure that all user-visible strings go through QObject::tr(),
3249 for example.
3250*/
3251
3252/*!
3253 \overload append()
3254
3255 Appends the character \a ch to this string.
3256*/
3257QString &QString::append(QChar ch)
3258{
3259 d.detachAndGrow(QArrayData::GrowsAtEnd, 1, nullptr, nullptr);
3260 d->copyAppend(1, ch.unicode());
3261 d.data()[d.size] = '\0';
3262 return *this;
3263}
3264
3265/*! \fn QString &QString::prepend(const QString &str)
3266
3267 Prepends the string \a str to the beginning of this string and
3268 returns a reference to this string.
3269
3270 This operation is typically very fast (\l{constant time}), because
3271 QString preallocates extra space at the beginning of the string data,
3272 so it can grow without reallocating the entire string each time.
3273
3274 Example:
3275
3276 \snippet qstring/main.cpp 36
3277
3278 \sa append(), insert()
3279*/
3280
3281/*! \fn QString &QString::prepend(QLatin1StringView str)
3282
3283 \overload prepend()
3284
3285 Prepends the Latin-1 string viewed by \a str to this string.
3286*/
3287
3288/*! \fn QString &QString::prepend(QUtf8StringView str)
3289 \since 6.5
3290 \overload prepend()
3291
3292 Prepends the UTF-8 string view \a str to this string.
3293*/
3294
3295/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3296 \since 5.5
3297 \overload prepend()
3298
3299 Prepends \a len characters from the QChar array \a str to this string and
3300 returns a reference to this string.
3301*/
3302
3303/*! \fn QString &QString::prepend(QStringView str)
3304 \since 6.0
3305 \overload prepend()
3306
3307 Prepends the string view \a str to the beginning of this string and
3308 returns a reference to this string.
3309*/
3310
3311/*! \fn QString &QString::prepend(const QByteArray &ba)
3312
3313 \overload prepend()
3314
3315 Prepends the byte array \a ba to this string. The byte array is
3316 converted to Unicode using the fromUtf8() function.
3317
3318 You can disable this function by defining
3319 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3320 can be useful if you want to ensure that all user-visible strings
3321 go through QObject::tr(), for example.
3322*/
3323
3324/*! \fn QString &QString::prepend(const char *str)
3325
3326 \overload prepend()
3327
3328 Prepends the string \a str to this string. The const char pointer
3329 is converted to Unicode using the fromUtf8() function.
3330
3331 You can disable this function by defining
3332 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3333 can be useful if you want to ensure that all user-visible strings
3334 go through QObject::tr(), for example.
3335*/
3336
3337/*! \fn QString &QString::prepend(QChar ch)
3338
3339 \overload prepend()
3340
3341 Prepends the character \a ch to this string.
3342*/
3343
3344/*!
3345 \fn QString &QString::assign(QAnyStringView v)
3346 \since 6.6
3347
3348 Replaces the contents of this string with a copy of \a v and returns a
3349 reference to this string.
3350
3351 The size of this string will be equal to the size of \a v, converted to
3352 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3353 this function only allocates memory if the estimated size exceeds the capacity
3354 of this string or this string is shared.
3355
3356 \sa QAnyStringView::toString()
3357*/
3358
3359/*!
3360 \fn QString &QString::assign(qsizetype n, QChar c)
3361 \since 6.6
3362
3363 Replaces the contents of this string with \a n copies of \a c and
3364 returns a reference to this string.
3365
3366 The size of this string will be equal to \a n, which has to be non-negative.
3367
3368 This function will only allocate memory if \a n exceeds the capacity of this
3369 string or this string is shared.
3370
3371 \sa fill()
3372*/
3373
3374/*!
3375 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3376 \since 6.6
3377
3378 Replaces the contents of this string with a copy of the elements in the
3379 iterator range [\a first, \a last) and returns a reference to this string.
3380
3381 The size of this string will be equal to the decoded length of the elements
3382 in the range [\a first, \a last), which need not be the same as the length of
3383 the range itself, because this function transparently recodes the input
3384 character set to UTF-16.
3385
3386 This function will only allocate memory if the number of elements in the
3387 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3388 resulting string, exceeds the capacity of this string, or if this string is
3389 shared.
3390
3391 \note The behavior is undefined if either argument is an iterator into *this or
3392 [\a first, \a last) is not a valid range.
3393
3394 \constraints
3395 \c InputIterator meets the requirements of a
3396 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3397 and the \c{value_type} of \c InputIterator is one of the following character types:
3398 \list
3399 \li QChar
3400 \li QLatin1Char
3401 \li \c {char}
3402 \li \c {unsigned char}
3403 \li \c {signed char}
3404 \li \c {char8_t}
3405 \li \c char16_t
3406 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3407 \li \c char32_t
3408 \endlist
3409*/
3410
3411QString &QString::assign(QAnyStringView s)
3412{
3413 if (s.size() <= capacity() && isDetached()) {
3414 const auto offset = d.freeSpaceAtBegin();
3415 if (offset)
3416 d.setBegin(d.begin() - offset);
3417 resize(0);
3418 s.visit([this](auto input) {
3419 this->append(input);
3420 });
3421 } else {
3422 *this = s.toString();
3423 }
3424 return *this;
3425}
3426
3427#ifndef QT_BOOTSTRAPPED
3428QString &QString::assign_helper(const char32_t *data, qsizetype len)
3429{
3430 // worst case: each char32_t requires a surrogate pair, so
3431 const auto requiredCapacity = len * 2;
3432 if (requiredCapacity <= capacity() && isDetached()) {
3433 const auto offset = d.freeSpaceAtBegin();
3434 if (offset)
3435 d.setBegin(d.begin() - offset);
3436 auto begin = reinterpret_cast<QChar *>(d.begin());
3437 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3438 QStringConverter::State state;
3439 const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
3440 d.size = end - begin;
3441 d.data()[d.size] = u'\0';
3442 } else {
3443 *this = QString::fromUcs4(data, len);
3444 }
3445 return *this;
3446}
3447#endif
3448
3449/*!
3450 \fn QString &QString::remove(qsizetype position, qsizetype n)
3451
3452 Removes \a n characters from the string, starting at the given \a
3453 position index, and returns a reference to the string.
3454
3455 If the specified \a position index is within the string, but \a
3456 position + \a n is beyond the end of the string, the string is
3457 truncated at the specified \a position.
3458
3459 If \a n is <= 0 nothing is changed.
3460
3461 \snippet qstring/main.cpp 37
3462
3463//! [shrinking-erase]
3464 Element removal will preserve the string's capacity and not reduce the
3465 amount of allocated memory. To shed extra capacity and free as much memory
3466 as possible, call squeeze() after the last change to the string's size.
3467//! [shrinking-erase]
3468
3469 \sa insert(), replace()
3470*/
3471QString &QString::remove(qsizetype pos, qsizetype len)
3472{
3473 if (pos < 0) // count from end of string
3474 pos += size();
3475
3476 if (size_t(pos) >= size_t(size()) || len <= 0)
3477 return *this;
3478
3479 len = std::min(len, size() - pos);
3480
3481 if (!d->isShared()) {
3482 d->erase(d.begin() + pos, len);
3483 d.data()[d.size] = u'\0';
3484 } else {
3485 // TODO: either reserve "size()", which is bigger than needed, or
3486 // modify the shrinking-erase docs of this method (since the size
3487 // of "copy" won't have any extra capacity any more)
3488 const qsizetype sz = size() - len;
3489 QString copy{sz, Qt::Uninitialized};
3490 auto begin = d.begin();
3491 auto toRemove_start = d.begin() + pos;
3492 copy.d->copyRanges({{begin, toRemove_start},
3493 {toRemove_start + len, d.end()}});
3494 swap(copy);
3495 }
3496 return *this;
3497}
3498
3499template<typename T>
3500static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3501{
3502 const auto needleSize = needle.size();
3503 if (!needleSize)
3504 return;
3505
3506 // avoid detach if nothing to do:
3507 qsizetype i = s.indexOf(needle, 0, cs);
3508 if (i < 0)
3509 return;
3510
3511 QString::DataPointer &dptr = s.data_ptr();
3512 auto begin = dptr.begin();
3513 auto end = dptr.end();
3514
3515 auto copyFunc = [&](auto &dst) {
3516 auto src = begin + i + needleSize;
3517 while (src < end) {
3518 i = s.indexOf(needle, std::distance(begin, src), cs);
3519 auto hit = i == -1 ? end : begin + i;
3520 dst = std::copy(src, hit, dst);
3521 src = hit + needleSize;
3522 }
3523 return dst;
3524 };
3525
3526 if (!dptr->needsDetach()) {
3527 auto dst = begin + i;
3528 dst = copyFunc(dst);
3529 s.truncate(std::distance(begin, dst));
3530 } else {
3531 QString copy{s.size(), Qt::Uninitialized};
3532 auto copy_begin = copy.begin();
3533 auto dst = std::copy(begin, begin + i, copy_begin); // Chunk before the first hit
3534 dst = copyFunc(dst);
3535 copy.resize(std::distance(copy_begin, dst));
3536 s.swap(copy);
3537 }
3538}
3539
3540/*!
3541 Removes every occurrence of the given \a str string in this
3542 string, and returns a reference to this string.
3543
3544 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3545
3546 This is the same as \c replace(str, "", cs).
3547
3548 \include qstring.cpp shrinking-erase
3549
3550 \sa replace()
3551*/
3552QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3553{
3554 const auto s = str.d.data();
3555 if (QtPrivate::q_points_into_range(s, d))
3556 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3557 else
3558 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3559 return *this;
3560}
3561
3562/*!
3563 \since 5.11
3564 \overload
3565
3566 Removes every occurrence of the given Latin-1 string viewed by \a str
3567 from this string, and returns a reference to this string.
3568
3569 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3570
3571 This is the same as \c replace(str, "", cs).
3572
3573 \include qstring.cpp shrinking-erase
3574
3575 \sa replace()
3576*/
3577QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3578{
3579 removeStringImpl(*this, str, cs);
3580 return *this;
3581}
3582
3583/*!
3584 \fn QString &QString::removeAt(qsizetype pos)
3585
3586 \since 6.5
3587
3588 Removes the character at index \a pos. If \a pos is out of bounds
3589 (i.e. \a pos >= size()), this function does nothing.
3590
3591 \sa remove()
3592*/
3593
3594/*!
3595 \fn QString &QString::removeFirst()
3596
3597 \since 6.5
3598
3599 Removes the first character in this string. If the string is empty,
3600 this function does nothing.
3601
3602 \sa remove()
3603*/
3604
3605/*!
3606 \fn QString &QString::removeLast()
3607
3608 \since 6.5
3609
3610 Removes the last character in this string. If the string is empty,
3611 this function does nothing.
3612
3613 \sa remove()
3614*/
3615
3616/*!
3617 Removes every occurrence of the character \a ch in this string, and
3618 returns a reference to this string.
3619
3620 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3621
3622 Example:
3623
3624 \snippet qstring/main.cpp 38
3625
3626 This is the same as \c replace(ch, "", cs).
3627
3628 \include qstring.cpp shrinking-erase
3629
3630 \sa replace()
3631*/
3632QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3633{
3634 const qsizetype idx = indexOf(ch, 0, cs);
3635 if (idx == -1)
3636 return *this;
3637
3638 const bool isCase = cs == Qt::CaseSensitive;
3639 ch = isCase ? ch : ch.toCaseFolded();
3640 auto match = [ch, isCase](QChar x) {
3641 return ch == (isCase ? x : x.toCaseFolded());
3642 };
3643
3644
3645 auto begin = d.begin();
3646 auto first_match = begin + idx;
3647 auto end = d.end();
3648 if (!d->isShared()) {
3649 auto it = std::remove_if(first_match, end, match);
3650 d->erase(it, std::distance(it, end));
3651 d.data()[d.size] = u'\0';
3652 } else {
3653 // Instead of detaching, create a new string and copy all characters except for
3654 // the ones we're removing
3655 // TODO: size() is more than the needed since "copy" would be shorter
3656 QString copy{size(), Qt::Uninitialized};
3657 auto dst = copy.d.begin();
3658 auto it = std::copy(begin, first_match, dst); // Chunk before idx
3659 it = std::remove_copy_if(first_match + 1, end, it, match);
3660 copy.d.size = std::distance(dst, it);
3661 copy.d.data()[copy.d.size] = u'\0';
3662 *this = std::move(copy);
3663 }
3664 return *this;
3665}
3666
3667/*!
3668 \fn QString &QString::remove(const QRegularExpression &re)
3669 \since 5.0
3670
3671 Removes every occurrence of the regular expression \a re in the
3672 string, and returns a reference to the string. For example:
3673
3674 \snippet qstring/main.cpp 96
3675
3676 \include qstring.cpp shrinking-erase
3677
3678 \sa indexOf(), lastIndexOf(), replace()
3679*/
3680
3681/*!
3682 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3683 \since 6.1
3684
3685 Removes all elements for which the predicate \a pred returns true
3686 from the string. Returns a reference to the string.
3687
3688 \sa remove()
3689*/
3690
3691static void replace_helper(QString &str, QSpan<qsizetype> indices, qsizetype blen, QStringView after)
3692{
3693 const qsizetype oldSize = str.data_ptr().size;
3694 const qsizetype adjust = indices.size() * (after.size() - blen);
3695 const qsizetype newSize = oldSize + adjust;
3696 using A = QStringAlgorithms<QString>;
3697 if (str.data_ptr().needsDetach() || needsReallocate(str, newSize)) {
3698 A::replace_helper(str, blen, after, indices);
3699 return;
3700 }
3701
3702 if (QtPrivate::q_points_into_range(after.begin(), str)) {
3703 // Copy after if it lies inside our own d.b area (which we could
3704 // possibly invalidate via a realloc or modify by replacement)
3705 A::replace_helper(str, blen, QVarLengthArray(after.begin(), after.end()), indices);
3706 } else {
3707 A::replace_helper(str, blen, after, indices);
3708 }
3709}
3710
3711/*!
3712 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3713
3714 Replaces \a n characters beginning at index \a position with
3715 the string \a after and returns a reference to this string.
3716
3717 \note If the specified \a position index is within the string,
3718 but \a position + \a n goes outside the strings range,
3719 then \a n will be adjusted to stop at the end of the string.
3720
3721 Example:
3722
3723 \snippet qstring/main.cpp 40
3724
3725 \sa insert(), remove()
3726*/
3727QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3728{
3729 return replace(pos, len, after.constData(), after.size());
3730}
3731
3732/*!
3733 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3734 \overload replace()
3735 Replaces \a n characters beginning at index \a position with the
3736 first \a alen characters of the QChar array \a after and returns a
3737 reference to this string.
3738
3739 \a n must not be negative.
3740*/
3741QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3742{
3743 Q_PRE(len >= 0);
3744
3745 if (size_t(pos) > size_t(this->size()))
3746 return *this;
3747 if (len > this->size() - pos)
3748 len = this->size() - pos;
3749
3750 qsizetype indices[] = {pos};
3751 replace_helper(*this, indices, len, QStringView{after, alen});
3752 return *this;
3753}
3754
3755/*!
3756 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3757 \overload replace()
3758
3759 Replaces \a n characters beginning at index \a position with the
3760 character \a after and returns a reference to this string.
3761*/
3762QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3763{
3764 return replace(pos, len, &after, 1);
3765}
3766
3767/*!
3768 \overload replace()
3769 Replaces every occurrence of the string \a before with the string \a
3770 after and returns a reference to this string.
3771
3772 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3773
3774 Example:
3775
3776 \snippet qstring/main.cpp 41
3777
3778 \note The replacement text is not rescanned after it is inserted.
3779
3780 Example:
3781
3782 \snippet qstring/main.cpp 86
3783
3784//! [empty-before-arg-in-replace]
3785 \note If you use an empty \a before argument, the \a after argument will be
3786 inserted \e {before and after} each character of the string.
3787//! [empty-before-arg-in-replace]
3788
3789*/
3790QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3791{
3792 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3793}
3794
3795/*!
3796 \since 4.5
3797 \overload replace()
3798
3799 Replaces each occurrence in this string of the first \a blen
3800 characters of \a before with the first \a alen characters of \a
3801 after and returns a reference to this string.
3802
3803 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3804
3805 \note If \a before points to an \e empty string (that is, \a blen == 0),
3806 the string pointed to by \a after will be inserted \e {before and after}
3807 each character in this string.
3808*/
3809QString &QString::replace(const QChar *before, qsizetype blen,
3810 const QChar *after, qsizetype alen,
3811 Qt::CaseSensitivity cs)
3812{
3813 if (isEmpty()) {
3814 if (blen)
3815 return *this;
3816 } else {
3817 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3818 return *this;
3819 }
3820 if (alen == 0 && blen == 0)
3821 return *this;
3822 if (alen == 1 && blen == 1)
3823 return replace(*before, *after, cs);
3824
3825 QStringMatcher matcher(before, blen, cs);
3826
3827 qsizetype index = 0;
3828
3829 QVarLengthArray<qsizetype> indices;
3830 while ((index = matcher.indexIn(*this, index)) != -1) {
3831 indices.push_back(index);
3832 if (blen) // Step over before:
3833 index += blen;
3834 else // Only count one instance of empty between any two characters:
3835 index++;
3836 }
3837 if (indices.isEmpty())
3838 return *this;
3839
3840 replace_helper(*this, indices, blen, QStringView{after, alen});
3841 return *this;
3842}
3843
3844/*!
3845 \overload replace()
3846 Replaces every occurrence of the character \a ch in the string with
3847 \a after and returns a reference to this string.
3848
3849 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3850*/
3851QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3852{
3853 if (after.size() == 0)
3854 return remove(ch, cs);
3855
3856 if (after.size() == 1)
3857 return replace(ch, after.front(), cs);
3858
3859 if (size() == 0)
3860 return *this;
3861
3862 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3863
3864 QVarLengthArray<qsizetype> indices;
3865 if (cs == Qt::CaseSensitive) {
3866 const char16_t *begin = d.begin();
3867 const char16_t *end = d.end();
3868 QStringView view(begin, end);
3869 const char16_t *hit = nullptr;
3870 while ((hit = QtPrivate::qustrchr(view, cc)) != end) {
3871 indices.push_back(std::distance(begin, hit));
3872 view = QStringView(std::next(hit), end);
3873 }
3874 } else {
3875 for (qsizetype i = 0; i < d.size; ++i)
3876 if (QChar::toCaseFolded(d.data()[i]) == cc)
3877 indices.push_back(i);
3878 }
3879 if (indices.isEmpty())
3880 return *this;
3881
3882 replace_helper(*this, indices, 1, after);
3883 return *this;
3884}
3885
3886/*!
3887 \overload replace()
3888 Replaces every occurrence of the character \a before with the
3889 character \a after and returns a reference to this string.
3890
3891 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3892*/
3893QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3894{
3895 const qsizetype idx = indexOf(before, 0, cs);
3896 if (idx == -1)
3897 return *this;
3898
3899 const char16_t achar = after.unicode();
3900 char16_t bchar = before.unicode();
3901
3902 auto matchesCIS = [](char16_t beforeChar) {
3903 return [beforeChar](char16_t ch) { return foldAndCompare(ch, beforeChar); };
3904 };
3905
3906 auto hit = d.begin() + idx;
3907 if (!d.needsDetach()) {
3908 *hit++ = achar;
3909 if (cs == Qt::CaseSensitive) {
3910 std::replace(hit, d.end(), bchar, achar);
3911 } else {
3912 bchar = foldCase(bchar);
3913 std::replace_if(hit, d.end(), matchesCIS(bchar), achar);
3914 }
3915 } else {
3916 QString other{ d.size, Qt::Uninitialized };
3917 auto dest = std::copy(d.begin(), hit, other.d.begin());
3918 *dest++ = achar;
3919 ++hit;
3920 if (cs == Qt::CaseSensitive) {
3921 std::replace_copy(hit, d.end(), dest, bchar, achar);
3922 } else {
3923 bchar = foldCase(bchar);
3924 std::replace_copy_if(hit, d.end(), dest, matchesCIS(bchar), achar);
3925 }
3926
3927 swap(other);
3928 }
3929 return *this;
3930}
3931
3932/*!
3933 \since 4.5
3934 \overload replace()
3935
3936 Replaces every occurrence in this string of the Latin-1 string viewed
3937 by \a before with the Latin-1 string viewed by \a after, and returns a
3938 reference to this string.
3939
3940 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3941
3942 \note The text is not rescanned after a replacement.
3943
3944 \include qstring.cpp empty-before-arg-in-replace
3945*/
3946QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
3947{
3948 const qsizetype alen = after.size();
3949 const qsizetype blen = before.size();
3950 if (blen == 1 && alen == 1)
3951 return replace(before.front(), after.front(), cs);
3952
3953 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
3954 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3955 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
3956}
3957
3958/*!
3959 \since 4.5
3960 \overload replace()
3961
3962 Replaces every occurrence in this string of the Latin-1 string viewed
3963 by \a before with the string \a after, and returns a reference to this
3964 string.
3965
3966 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3967
3968 \note The text is not rescanned after a replacement.
3969
3970 \include qstring.cpp empty-before-arg-in-replace
3971*/
3972QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
3973{
3974 const qsizetype blen = before.size();
3975 if (blen == 1 && after.size() == 1)
3976 return replace(before.front(), after.front(), cs);
3977
3978 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3979 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
3980}
3981
3982/*!
3983 \since 4.5
3984 \overload replace()
3985
3986 Replaces every occurrence of the string \a before with the string \a
3987 after and returns a reference to this string.
3988
3989 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3990
3991 \note The text is not rescanned after a replacement.
3992
3993 \include qstring.cpp empty-before-arg-in-replace
3994*/
3995QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
3996{
3997 const qsizetype alen = after.size();
3998 if (before.size() == 1 && alen == 1)
3999 return replace(before.front(), after.front(), cs);
4000
4001 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4002 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
4003}
4004
4005/*!
4006 \since 4.5
4007 \overload replace()
4008
4009 Replaces every occurrence of the character \a c with the string \a
4010 after and returns a reference to this string.
4011
4012 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4013
4014 \note The text is not rescanned after a replacement.
4015*/
4016QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4017{
4018 const qsizetype alen = after.size();
4019 if (alen == 1)
4020 return replace(c, after.front(), cs);
4021
4022 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4023 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
4024}
4025
4026/*!
4027 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4028 \overload operator==()
4029
4030 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4031 returns \c false.
4032
4033 \include qstring.cpp compare-isNull-vs-isEmpty
4034
4035 \sa {Comparing Strings}
4036*/
4037
4038/*!
4039 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4040
4041 \overload operator==()
4042
4043 Returns \c true if \a lhs is equal to \a rhs; otherwise
4044 returns \c false.
4045*/
4046
4047/*!
4048 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4049
4050 \overload operator==()
4051
4052 Returns \c true if \a lhs is equal to \a rhs; otherwise
4053 returns \c false.
4054*/
4055
4056/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4057
4058 \overload operator==()
4059
4060 The \a rhs byte array is converted to a QUtf8StringView.
4061
4062 You can disable this operator by defining
4063 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4064 can be useful if you want to ensure that all user-visible strings
4065 go through QObject::tr(), for example.
4066
4067 Returns \c true if string \a lhs is lexically equal to \a rhs.
4068 Otherwise returns \c false.
4069*/
4070
4071/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4072
4073 \overload operator==()
4074
4075 The \a rhs const char pointer is converted to a QUtf8StringView.
4076
4077 You can disable this operator by defining
4078 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4079 can be useful if you want to ensure that all user-visible strings
4080 go through QObject::tr(), for example.
4081*/
4082
4083/*!
4084 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4085
4086 \overload operator<()
4087
4088 Returns \c true if string \a lhs is lexically less than string
4089 \a rhs; otherwise returns \c false.
4090
4091 \sa {Comparing Strings}
4092*/
4093
4094/*!
4095 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4096
4097 \overload operator<()
4098
4099 Returns \c true if \a lhs is lexically less than \a rhs;
4100 otherwise returns \c false.
4101*/
4102
4103/*!
4104 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4105
4106 \overload operator<()
4107
4108 Returns \c true if \a lhs is lexically less than \a rhs;
4109 otherwise returns \c false.
4110*/
4111
4112/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4113
4114 \overload operator<()
4115
4116 The \a rhs byte array is converted to a QUtf8StringView.
4117 If any NUL characters ('\\0') are embedded in the byte array, they will be
4118 included in the transformation.
4119
4120 You can disable this operator
4121 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4122 can be useful if you want to ensure that all user-visible strings
4123 go through QObject::tr(), for example.
4124*/
4125
4126/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4127
4128 Returns \c true if string \a lhs is lexically less than string \a rhs.
4129 Otherwise returns \c false.
4130
4131 \overload operator<()
4132
4133 The \a rhs const char pointer is converted to a QUtf8StringView.
4134
4135 You can disable this operator by defining
4136 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4137 can be useful if you want to ensure that all user-visible strings
4138 go through QObject::tr(), for example.
4139*/
4140
4141/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4142
4143 Returns \c true if string \a lhs is lexically less than or equal to
4144 string \a rhs; otherwise returns \c false.
4145
4146 \sa {Comparing Strings}
4147*/
4148
4149/*!
4150 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4151
4152 \overload operator<=()
4153
4154 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4155 otherwise returns \c false.
4156*/
4157
4158/*!
4159 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4160
4161 \overload operator<=()
4162
4163 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4164 otherwise returns \c false.
4165*/
4166
4167/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4168
4169 \overload operator<=()
4170
4171 The \a rhs byte array is converted to a QUtf8StringView.
4172 If any NUL characters ('\\0') are embedded in the byte array, they will be
4173 included in the transformation.
4174
4175 You can disable this operator by defining
4176 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4177 can be useful if you want to ensure that all user-visible strings
4178 go through QObject::tr(), for example.
4179*/
4180
4181/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4182
4183 \overload operator<=()
4184
4185 The \a rhs const char pointer is converted to a QUtf8StringView.
4186
4187 You can disable this operator by defining
4188 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4189 can be useful if you want to ensure that all user-visible strings
4190 go through QObject::tr(), for example.
4191*/
4192
4193/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4194
4195 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4196 otherwise returns \c false.
4197
4198 \sa {Comparing Strings}
4199*/
4200
4201/*!
4202 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4203
4204 \overload operator>()
4205
4206 Returns \c true if \a lhs is lexically greater than \a rhs;
4207 otherwise returns \c false.
4208*/
4209
4210/*!
4211 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4212
4213 \overload operator>()
4214
4215 Returns \c true if \a lhs is lexically greater than \a rhs;
4216 otherwise returns \c false.
4217*/
4218
4219/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4220
4221 \overload operator>()
4222
4223 The \a rhs byte array is converted to a QUtf8StringView.
4224 If any NUL characters ('\\0') are embedded in the byte array, they will be
4225 included in the transformation.
4226
4227 You can disable this operator by defining
4228 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4229 can be useful if you want to ensure that all user-visible strings
4230 go through QObject::tr(), for example.
4231*/
4232
4233/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4234
4235 \overload operator>()
4236
4237 The \a rhs const char pointer is converted to a QUtf8StringView.
4238
4239 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4240 when you compile your applications. This can be useful if you want
4241 to ensure that all user-visible strings go through QObject::tr(),
4242 for example.
4243*/
4244
4245/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4246
4247 Returns \c true if string \a lhs is lexically greater than or equal to
4248 string \a rhs; otherwise returns \c false.
4249
4250 \sa {Comparing Strings}
4251*/
4252
4253/*!
4254 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4255
4256 \overload operator>=()
4257
4258 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4259 otherwise returns \c false.
4260*/
4261
4262/*!
4263 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4264
4265 \overload operator>=()
4266
4267 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4268 otherwise returns \c false.
4269*/
4270
4271/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4272
4273 \overload operator>=()
4274
4275 The \a rhs byte array is converted to a QUtf8StringView.
4276 If any NUL characters ('\\0') are embedded in the byte array, they will be
4277 included in the transformation.
4278
4279 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4280 when you compile your applications. This can be useful if you want
4281 to ensure that all user-visible strings go through QObject::tr(),
4282 for example.
4283*/
4284
4285/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4286
4287 \overload operator>=()
4288
4289 The \a rhs const char pointer is converted to a QUtf8StringView.
4290
4291 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4292 when you compile your applications. This can be useful if you want
4293 to ensure that all user-visible strings go through QObject::tr(),
4294 for example.
4295*/
4296
4297/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4298
4299 Returns \c true if string \a lhs is not equal to string \a rhs;
4300 otherwise returns \c false.
4301
4302 \sa {Comparing Strings}
4303*/
4304
4305/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4306
4307 Returns \c true if string \a lhs is not equal to string \a rhs.
4308 Otherwise returns \c false.
4309
4310 \overload operator!=()
4311*/
4312
4313/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4314
4315 \overload operator!=()
4316
4317 The \a rhs byte array is converted to a QUtf8StringView.
4318 If any NUL characters ('\\0') are embedded in the byte array, they will be
4319 included in the transformation.
4320
4321 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4322 when you compile your applications. This can be useful if you want
4323 to ensure that all user-visible strings go through QObject::tr(),
4324 for example.
4325*/
4326
4327/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4328
4329 \overload operator!=()
4330
4331 The \a rhs const char pointer is converted to a QUtf8StringView.
4332
4333 You can disable this operator by defining
4334 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4335 can be useful if you want to ensure that all user-visible strings
4336 go through QObject::tr(), for example.
4337*/
4338
4339/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4340
4341 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4342 \a rhs; otherwise returns \c false.
4343
4344 The comparison is case sensitive.
4345
4346 You can disable this operator by defining \c
4347 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4348 then need to call QString::fromUtf8(), QString::fromLatin1(),
4349 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4350 array to a QString before doing the comparison.
4351*/
4352
4353/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4354
4355 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4356 \a rhs; otherwise returns \c false.
4357
4358 The comparison is case sensitive.
4359
4360 You can disable this operator by defining \c
4361 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4362 then need to call QString::fromUtf8(), QString::fromLatin1(),
4363 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4364 array to a QString before doing the comparison.
4365*/
4366
4367/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4368
4369 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4370 of \a rhs; otherwise returns \c false.
4371
4372 The comparison is case sensitive.
4373
4374 You can disable this operator by defining \c
4375 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4376 then need to call QString::fromUtf8(), QString::fromLatin1(),
4377 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4378 array to a QString before doing the comparison.
4379*/
4380
4381/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4382
4383 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4384 encoding of \a rhs; otherwise returns \c false.
4385
4386 The comparison is case sensitive.
4387
4388 You can disable this operator by defining \c
4389 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4390 then need to call QString::fromUtf8(), QString::fromLatin1(),
4391 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4392 array to a QString before doing the comparison.
4393*/
4394
4395/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4396
4397 Returns \c true if byte array \a lhs is lexically less than or equal to the
4398 UTF-8 encoding of \a rhs; otherwise returns \c false.
4399
4400 The comparison is case sensitive.
4401
4402 You can disable this operator by defining \c
4403 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4404 then need to call QString::fromUtf8(), QString::fromLatin1(),
4405 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4406 array to a QString before doing the comparison.
4407*/
4408
4409/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4410
4411 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4412 encoding of \a rhs; otherwise returns \c false.
4413
4414 The comparison is case sensitive.
4415
4416 You can disable this operator by defining \c
4417 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4418 then need to call QString::fromUtf8(), QString::fromLatin1(),
4419 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4420 array to a QString before doing the comparison.
4421*/
4422
4423/*!
4424 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4425
4426 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4427
4428 Example:
4429
4430 \snippet qstring/main.cpp 24
4431
4432 \include qstring.qdocinc negative-index-start-search-from-end
4433
4434 \sa lastIndexOf(), contains(), count()
4435*/
4436qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4437{
4438 return QtPrivate::findString(QStringView(unicode(), size()), from, QStringView(str.unicode(), str.size()), cs);
4439}
4440
4441/*!
4442 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4443 \since 5.14
4444 \overload indexOf()
4445
4446 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4447
4448 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4449
4450 \include qstring.qdocinc negative-index-start-search-from-end
4451
4452 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4453*/
4454
4455/*!
4456 \since 4.5
4457
4458 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4459
4460 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4461
4462 Example:
4463
4464 \snippet qstring/main.cpp 24
4465
4466 \include qstring.qdocinc negative-index-start-search-from-end
4467
4468 \sa lastIndexOf(), contains(), count()
4469*/
4470
4471qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4472{
4473 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
4474}
4475
4476/*!
4477 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4478 \overload indexOf()
4479
4480 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4481*/
4482
4483/*!
4484 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4485
4486 \include qstring.qdocinc negative-index-start-search-from-end
4487
4488 Returns -1 if \a str is not found.
4489
4490 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4491
4492 Example:
4493
4494 \snippet qstring/main.cpp 29
4495
4496 \note When searching for a 0-length \a str, the match at the end of
4497 the data is excluded from the search by a negative \a from, even
4498 though \c{-1} is normally thought of as searching from the end of the
4499 string: the match at the end is \e after the last character, so it is
4500 excluded. To include such a final empty match, either give a positive
4501 value for \a from or omit the \a from parameter entirely.
4502
4503 \sa indexOf(), contains(), count()
4504*/
4505qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4506{
4507 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
4508}
4509
4510/*!
4511 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4512 \since 6.2
4513 \overload lastIndexOf()
4514
4515 Returns the index position of the last occurrence of the string \a
4516 str in this string. Returns -1 if \a str is not found.
4517
4518 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4519
4520 Example:
4521
4522 \snippet qstring/main.cpp 29
4523
4524 \sa indexOf(), contains(), count()
4525*/
4526
4527
4528/*!
4529 \since 4.5
4530 \overload lastIndexOf()
4531
4532 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4533
4534 \include qstring.qdocinc negative-index-start-search-from-end
4535
4536 Returns -1 if \a str is not found.
4537
4538 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4539
4540 Example:
4541
4542 \snippet qstring/main.cpp 29
4543
4544 \note When searching for a 0-length \a str, the match at the end of
4545 the data is excluded from the search by a negative \a from, even
4546 though \c{-1} is normally thought of as searching from the end of the
4547 string: the match at the end is \e after the last character, so it is
4548 excluded. To include such a final empty match, either give a positive
4549 value for \a from or omit the \a from parameter entirely.
4550
4551 \sa indexOf(), contains(), count()
4552*/
4553qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4554{
4555 return QtPrivate::lastIndexOf(*this, from, str, cs);
4556}
4557
4558/*!
4559 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4560 \since 6.2
4561 \overload lastIndexOf()
4562
4563 Returns the index position of the last occurrence of the string \a
4564 str in this string. Returns -1 if \a str is not found.
4565
4566 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4567
4568 Example:
4569
4570 \snippet qstring/main.cpp 29
4571
4572 \sa indexOf(), contains(), count()
4573*/
4574
4575/*!
4576 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4577 \overload lastIndexOf()
4578
4579 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4580*/
4581
4582/*!
4583 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4584 \since 6.3
4585 \overload lastIndexOf()
4586*/
4587
4588/*!
4589 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4590 \since 5.14
4591 \overload lastIndexOf()
4592
4593 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4594
4595 \include qstring.qdocinc negative-index-start-search-from-end
4596
4597 Returns -1 if \a str is not found.
4598
4599 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4600
4601 \note When searching for a 0-length \a str, the match at the end of
4602 the data is excluded from the search by a negative \a from, even
4603 though \c{-1} is normally thought of as searching from the end of the
4604 string: the match at the end is \e after the last character, so it is
4605 excluded. To include such a final empty match, either give a positive
4606 value for \a from or omit the \a from parameter entirely.
4607
4608 \sa indexOf(), contains(), count()
4609*/
4610
4611/*!
4612 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4613 \since 6.2
4614 \overload lastIndexOf()
4615
4616 Returns the index position of the last occurrence of the string view \a
4617 str in this string. Returns -1 if \a str is not found.
4618
4619 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4620
4621 \sa indexOf(), contains(), count()
4622*/
4623
4624#if QT_CONFIG(regularexpression)
4625struct QStringCapture
4626{
4627 qsizetype pos;
4628 qsizetype len;
4629 int no;
4630};
4631Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4632
4633/*!
4634 \overload replace()
4635 \since 5.0
4636
4637 Replaces every occurrence of the regular expression \a re in the
4638 string with \a after. Returns a reference to the string. For
4639 example:
4640
4641 \snippet qstring/main.cpp 87
4642
4643 For regular expressions containing capturing groups,
4644 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4645 with the string captured by the corresponding capturing group.
4646
4647 \snippet qstring/main.cpp 88
4648
4649 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4650*/
4651QString &QString::replace(const QRegularExpression &re, const QString &after)
4652{
4653 if (!re.isValid()) {
4654 qtWarnAboutInvalidRegularExpression(re, "QString", "replace");
4655 return *this;
4656 }
4657
4658 const QString copy(*this);
4659 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4660 if (!iterator.hasNext()) // no matches at all
4661 return *this;
4662
4663 reallocData(d.size, QArrayData::KeepSize);
4664
4665 qsizetype numCaptures = re.captureCount();
4666
4667 // 1. build the backreferences list, holding where the backreferences
4668 // are in the replacement string
4669 QVarLengthArray<QStringCapture> backReferences;
4670 const qsizetype al = after.size();
4671 const QChar *ac = after.unicode();
4672
4673 for (qsizetype i = 0; i < al - 1; i++) {
4674 if (ac[i] == u'\\') {
4675 int no = ac[i + 1].digitValue();
4676 if (no > 0 && no <= numCaptures) {
4677 QStringCapture backReference;
4678 backReference.pos = i;
4679 backReference.len = 2;
4680
4681 if (i < al - 2) {
4682 int secondDigit = ac[i + 2].digitValue();
4683 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4684 no = (no * 10) + secondDigit;
4685 ++backReference.len;
4686 }
4687 }
4688
4689 backReference.no = no;
4690 backReferences.append(backReference);
4691 }
4692 }
4693 }
4694
4695 // 2. iterate on the matches. For every match, copy in chunks
4696 // - the part before the match
4697 // - the after string, with the proper replacements for the backreferences
4698
4699 qsizetype newLength = 0; // length of the new string, with all the replacements
4700 qsizetype lastEnd = 0;
4701 QVarLengthArray<QStringView> chunks;
4702 const QStringView copyView{ copy }, afterView{ after };
4703 while (iterator.hasNext()) {
4704 QRegularExpressionMatch match = iterator.next();
4705 qsizetype len;
4706 // add the part before the match
4707 len = match.capturedStart() - lastEnd;
4708 if (len > 0) {
4709 chunks << copyView.mid(lastEnd, len);
4710 newLength += len;
4711 }
4712
4713 lastEnd = 0;
4714 // add the after string, with replacements for the backreferences
4715 for (const QStringCapture &backReference : std::as_const(backReferences)) {
4716 // part of "after" before the backreference
4717 len = backReference.pos - lastEnd;
4718 if (len > 0) {
4719 chunks << afterView.mid(lastEnd, len);
4720 newLength += len;
4721 }
4722
4723 // backreference itself
4724 len = match.capturedLength(backReference.no);
4725 if (len > 0) {
4726 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4727 newLength += len;
4728 }
4729
4730 lastEnd = backReference.pos + backReference.len;
4731 }
4732
4733 // add the last part of the after string
4734 len = afterView.size() - lastEnd;
4735 if (len > 0) {
4736 chunks << afterView.mid(lastEnd, len);
4737 newLength += len;
4738 }
4739
4740 lastEnd = match.capturedEnd();
4741 }
4742
4743 // 3. trailing string after the last match
4744 if (copyView.size() > lastEnd) {
4745 chunks << copyView.mid(lastEnd);
4746 newLength += copyView.size() - lastEnd;
4747 }
4748
4749 // 4. assemble the chunks together
4750 resize(newLength);
4751 qsizetype i = 0;
4752 QChar *uc = data();
4753 for (const QStringView &chunk : std::as_const(chunks)) {
4754 qsizetype len = chunk.size();
4755 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4756 i += len;
4757 }
4758
4759 return *this;
4760}
4761#endif // QT_CONFIG(regularexpression)
4762
4763/*!
4764 Returns the number of (potentially overlapping) occurrences of
4765 the string \a str in this string.
4766
4767 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4768
4769 \sa contains(), indexOf()
4770*/
4771
4772qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4773{
4774 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4775}
4776
4777/*!
4778 \overload count()
4779
4780 Returns the number of occurrences of character \a ch in the string.
4781
4782 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4783
4784 \sa contains(), indexOf()
4785*/
4786
4787qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4788{
4789 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4790}
4791
4792/*!
4793 \since 6.0
4794 \overload count()
4795 Returns the number of (potentially overlapping) occurrences of the
4796 string view \a str in this string.
4797
4798 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4799
4800 \sa contains(), indexOf()
4801*/
4802qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4803{
4804 return QtPrivate::count(*this, str, cs);
4805}
4806
4807/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4808
4809 Returns \c true if this string contains an occurrence of the string
4810 \a str; otherwise returns \c false.
4811
4812 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4813
4814 Example:
4815 \snippet qstring/main.cpp 17
4816
4817 \sa indexOf(), count()
4818*/
4819
4820/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4821 \since 5.3
4822
4823 \overload contains()
4824
4825 Returns \c true if this string contains an occurrence of the latin-1 string
4826 \a str; otherwise returns \c false.
4827*/
4828
4829/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4830
4831 \overload contains()
4832
4833 Returns \c true if this string contains an occurrence of the
4834 character \a ch; otherwise returns \c false.
4835*/
4836
4837/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4838 \since 5.14
4839 \overload contains()
4840
4841 Returns \c true if this string contains an occurrence of the string view
4842 \a str; otherwise returns \c false.
4843
4844 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4845
4846 \sa indexOf(), count()
4847*/
4848
4849#if QT_CONFIG(regularexpression)
4850/*!
4851 \since 5.5
4852
4853 Returns the index position of the first match of the regular
4854 expression \a re in the string, searching forward from index
4855 position \a from. Returns -1 if \a re didn't match anywhere.
4856
4857 If the match is successful and \a rmatch is not \nullptr, it also
4858 writes the results of the match into the QRegularExpressionMatch object
4859 pointed to by \a rmatch.
4860
4861 Example:
4862
4863 \snippet qstring/main.cpp 93
4864*/
4865qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4866{
4867 return QtPrivate::indexOf(QStringView(*this), this, re, from, rmatch);
4868}
4869
4870/*!
4871 \since 5.5
4872
4873 Returns the index position of the last match of the regular
4874 expression \a re in the string, which starts before the index
4875 position \a from.
4876
4877 \include qstring.qdocinc negative-index-start-search-from-end
4878
4879 Returns -1 if \a re didn't match anywhere.
4880
4881 If the match is successful and \a rmatch is not \nullptr, it also
4882 writes the results of the match into the QRegularExpressionMatch object
4883 pointed to by \a rmatch.
4884
4885 Example:
4886
4887 \snippet qstring/main.cpp 94
4888
4889 \note Due to how the regular expression matching algorithm works,
4890 this function will actually match repeatedly from the beginning of
4891 the string until the position \a from is reached.
4892
4893 \note When searching for a regular expression \a re that may match
4894 0 characters, the match at the end of the data is excluded from the
4895 search by a negative \a from, even though \c{-1} is normally
4896 thought of as searching from the end of the string: the match at
4897 the end is \e after the last character, so it is excluded. To
4898 include such a final empty match, either give a positive value for
4899 \a from or omit the \a from parameter entirely.
4900*/
4901qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4902{
4903 return QtPrivate::lastIndexOf(QStringView(*this), this, re, from, rmatch);
4904}
4905
4906/*!
4907 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4908 \since 6.2
4909 \overload lastIndexOf()
4910
4911 Returns the index position of the last match of the regular
4912 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4913
4914 If the match is successful and \a rmatch is not \nullptr, it also
4915 writes the results of the match into the QRegularExpressionMatch object
4916 pointed to by \a rmatch.
4917
4918 Example:
4919
4920 \snippet qstring/main.cpp 94
4921
4922 \note Due to how the regular expression matching algorithm works,
4923 this function will actually match repeatedly from the beginning of
4924 the string until the end of the string is reached.
4925*/
4926
4927/*!
4928 \since 5.1
4929
4930 Returns \c true if the regular expression \a re matches somewhere in this
4931 string; otherwise returns \c false.
4932
4933 If the match is successful and \a rmatch is not \nullptr, it also
4934 writes the results of the match into the QRegularExpressionMatch object
4935 pointed to by \a rmatch.
4936
4937 \sa QRegularExpression::match()
4938*/
4939
4940bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4941{
4942 return QtPrivate::contains(QStringView(*this), this, re, rmatch);
4943}
4944
4945/*!
4946 \overload count()
4947 \since 5.0
4948
4949 Returns the number of times the regular expression \a re matches
4950 in the string.
4951
4952 For historical reasons, this function counts overlapping matches,
4953 so in the example below, there are four instances of "ana" or
4954 "ama":
4955
4956 \snippet qstring/main.cpp 95
4957
4958 This behavior is different from simply iterating over the matches
4959 in the string using QRegularExpressionMatchIterator.
4960
4961 \sa QRegularExpression::globalMatch()
4962*/
4963qsizetype QString::count(const QRegularExpression &re) const
4964{
4965 return QtPrivate::count(QStringView(*this), re);
4966}
4967#endif // QT_CONFIG(regularexpression)
4968
4969#if QT_DEPRECATED_SINCE(6, 4)
4970/*! \fn qsizetype QString::count() const
4971 \deprecated [6.4] Use size() or length() instead.
4972 \overload count()
4973
4974 Same as size().
4975*/
4976#endif
4977
4978/*!
4979 \enum QString::SectionFlag
4980
4981 This enum specifies flags that can be used to affect various
4982 aspects of the section() function's behavior with respect to
4983 separators and empty fields.
4984
4985 \value SectionDefault Empty fields are counted, leading and
4986 trailing separators are not included, and the separator is
4987 compared case sensitively.
4988
4989 \value SectionSkipEmpty Treat empty fields as if they don't exist,
4990 i.e. they are not considered as far as \e start and \e end are
4991 concerned.
4992
4993 \value SectionIncludeLeadingSep Include the leading separator (if
4994 any) in the result string.
4995
4996 \value SectionIncludeTrailingSep Include the trailing separator
4997 (if any) in the result string.
4998
4999 \value SectionCaseInsensitiveSeps Compare the separator
5000 case-insensitively.
5001
5002 \sa section()
5003*/
5004
5005/*!
5006 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5007
5008 This function returns a section of the string.
5009
5010 This string is treated as a sequence of fields separated by the
5011 character, \a sep. The returned string consists of the fields from
5012 position \a start to position \a end inclusive. If \a end is not
5013 specified, all fields from position \a start to the end of the
5014 string are included. Fields are numbered 0, 1, 2, etc., counting
5015 from the left, and -1, -2, etc., counting from right to left.
5016
5017 The \a flags argument can be used to affect some aspects of the
5018 function's behavior, e.g. whether to be case sensitive, whether
5019 to skip empty fields and how to deal with leading and trailing
5020 separators; see \l{SectionFlags}.
5021
5022 \snippet qstring/main.cpp 52
5023
5024 If \a start or \a end is negative, we count fields from the right
5025 of the string, the right-most field being -1, the one from
5026 right-most field being -2, and so on.
5027
5028 \snippet qstring/main.cpp 53
5029
5030 \sa split()
5031*/
5032
5033/*!
5034 \overload section()
5035
5036 \snippet qstring/main.cpp 51
5037 \snippet qstring/main.cpp 54
5038
5039 \sa split()
5040*/
5041
5042QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5043{
5044 const QList<QStringView> sections = QStringView{ *this }.split(
5045 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5046 const qsizetype sectionsSize = sections.size();
5047 if (!(flags & SectionSkipEmpty)) {
5048 if (start < 0)
5049 start += sectionsSize;
5050 if (end < 0)
5051 end += sectionsSize;
5052 } else {
5053 qsizetype skip = 0;
5054 for (qsizetype k = 0; k < sectionsSize; ++k) {
5055 if (sections.at(k).isEmpty())
5056 skip++;
5057 }
5058 if (start < 0)
5059 start += sectionsSize - skip;
5060 if (end < 0)
5061 end += sectionsSize - skip;
5062 }
5063 if (start >= sectionsSize || end < 0 || start > end)
5064 return QString();
5065
5066 QString ret;
5067 qsizetype first_i = start, last_i = end;
5068 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5069 const QStringView &section = sections.at(i);
5070 const bool empty = section.isEmpty();
5071 if (x >= start) {
5072 if (x == start)
5073 first_i = i;
5074 if (x == end)
5075 last_i = i;
5076 if (x > start && i > 0)
5077 ret += sep;
5078 ret += section;
5079 }
5080 if (!empty || !(flags & SectionSkipEmpty))
5081 x++;
5082 }
5083 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5084 ret.prepend(sep);
5085 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5086 ret += sep;
5087 return ret;
5088}
5089
5090#if QT_CONFIG(regularexpression)
5091struct qt_section_chunk
5092{
5093 qsizetype length;
5094 QStringView string;
5095};
5096Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5097
5098static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5099 QString::SectionFlags flags)
5100{
5101 const qsizetype sectionsSize = sections.size();
5102
5103 if (!(flags & QString::SectionSkipEmpty)) {
5104 if (start < 0)
5105 start += sectionsSize;
5106 if (end < 0)
5107 end += sectionsSize;
5108 } else {
5109 qsizetype skip = 0;
5110 for (qsizetype k = 0; k < sectionsSize; ++k) {
5111 const qt_section_chunk &section = sections[k];
5112 if (section.length == section.string.size())
5113 skip++;
5114 }
5115 if (start < 0)
5116 start += sectionsSize - skip;
5117 if (end < 0)
5118 end += sectionsSize - skip;
5119 }
5120 if (start >= sectionsSize || end < 0 || start > end)
5121 return QString();
5122
5123 QString ret;
5124 qsizetype x = 0;
5125 qsizetype first_i = start, last_i = end;
5126 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5127 const qt_section_chunk &section = sections[i];
5128 const bool empty = (section.length == section.string.size());
5129 if (x >= start) {
5130 if (x == start)
5131 first_i = i;
5132 if (x == end)
5133 last_i = i;
5134 if (x != start)
5135 ret += section.string;
5136 else
5137 ret += section.string.mid(section.length);
5138 }
5139 if (!empty || !(flags & QString::SectionSkipEmpty))
5140 x++;
5141 }
5142
5143 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5144 const qt_section_chunk &section = sections[first_i];
5145 ret.prepend(section.string.left(section.length));
5146 }
5147
5148 if ((flags & QString::SectionIncludeTrailingSep)
5149 && last_i < sectionsSize - 1) {
5150 const qt_section_chunk &section = sections[last_i + 1];
5151 ret += section.string.left(section.length);
5152 }
5153
5154 return ret;
5155}
5156
5157/*!
5158 \overload section()
5159 \since 5.0
5160
5161 This string is treated as a sequence of fields separated by the
5162 regular expression, \a re.
5163
5164 \snippet qstring/main.cpp 89
5165
5166 \warning Using this QRegularExpression version is much more expensive than
5167 the overloaded string and character versions.
5168
5169 \sa split(), simplified()
5170*/
5171QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5172{
5173 if (!re.isValid()) {
5174 qtWarnAboutInvalidRegularExpression(re, "QString", "section");
5175 return QString();
5176 }
5177
5178 const QChar *uc = unicode();
5179 if (!uc)
5180 return QString();
5181
5182 QRegularExpression sep(re);
5183 if (flags & SectionCaseInsensitiveSeps)
5184 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5185
5186 QVarLengthArray<qt_section_chunk> sections;
5187 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5188 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
5189 while (iterator.hasNext()) {
5190 QRegularExpressionMatch match = iterator.next();
5191 m = match.capturedStart();
5192 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, m - last_m)});
5193 last_m = m;
5194 last_len = match.capturedLength();
5195 }
5196 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, n - last_m)});
5197
5198 return extractSections(sections, start, end, flags);
5199}
5200#endif // QT_CONFIG(regularexpression)
5201
5202/*!
5203 \fn QString QString::left(qsizetype n) const &
5204 \fn QString QString::left(qsizetype n) &&
5205
5206 Returns a substring that contains the \a n leftmost characters of
5207 this string (that is, from the beginning of this string up to, but not
5208 including, the element at index position \a n).
5209
5210 If you know that \a n cannot be out of bounds, use first() instead in new
5211 code, because it is faster.
5212
5213 The entire string is returned if \a n is greater than or equal
5214 to size(), or less than zero.
5215
5216 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5217*/
5218
5219/*!
5220 \fn QString QString::right(qsizetype n) const &
5221 \fn QString QString::right(qsizetype n) &&
5222
5223 Returns a substring that contains the \a n rightmost characters
5224 of the string.
5225
5226 If you know that \a n cannot be out of bounds, use last() instead in new
5227 code, because it is faster.
5228
5229 The entire string is returned if \a n is greater than or equal
5230 to size(), or less than zero.
5231
5232 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5233*/
5234
5235/*!
5236 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5237 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5238
5239 Returns a string that contains \a n characters of this string, starting
5240 at the specified \a position index up to, but not including, the element
5241 at index position \c {\a position + n}.
5242
5243 If you know that \a position and \a n cannot be out of bounds, use sliced()
5244 instead in new code, because it is faster.
5245
5246 Returns a null string if the \a position index exceeds the
5247 length of the string. If there are less than \a n characters
5248 available in the string starting at the given \a position, or if
5249 \a n is -1 (default), the function returns all characters that
5250 are available from the specified \a position.
5251
5252 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5253*/
5254QString QString::mid(qsizetype position, qsizetype n) const &
5255{
5256 qsizetype p = position;
5257 qsizetype l = n;
5258 using namespace QtPrivate;
5259 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5260 case QContainerImplHelper::Null:
5261 return QString();
5262 case QContainerImplHelper::Empty:
5263 return QString(DataPointer::fromRawData(&_empty, 0));
5264 case QContainerImplHelper::Full:
5265 return *this;
5266 case QContainerImplHelper::Subset:
5267 return sliced(p, l);
5268 }
5269 Q_UNREACHABLE_RETURN(QString());
5270}
5271
5272QString QString::mid(qsizetype position, qsizetype n) &&
5273{
5274 qsizetype p = position;
5275 qsizetype l = n;
5276 using namespace QtPrivate;
5277 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5278 case QContainerImplHelper::Null:
5279 return QString();
5280 case QContainerImplHelper::Empty:
5281 resize(0); // keep capacity if we've reserve()d
5282 [[fallthrough]];
5283 case QContainerImplHelper::Full:
5284 return std::move(*this);
5285 case QContainerImplHelper::Subset:
5286 return std::move(*this).sliced(p, l);
5287 }
5288 Q_UNREACHABLE_RETURN(QString());
5289}
5290
5291/*!
5292 \fn QString QString::first(qsizetype n) const &
5293 \fn QString QString::first(qsizetype n) &&
5294 \since 6.0
5295
5296 Returns a string that contains the first \a n characters of this string,
5297 (that is, from the beginning of this string up to, but not including,
5298 the element at index position \a n).
5299
5300 \note The behavior is undefined when \a n < 0 or \a n > size().
5301
5302 \snippet qstring/main.cpp 31
5303
5304 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5305*/
5306
5307/*!
5308 \fn QString QString::last(qsizetype n) const &
5309 \fn QString QString::last(qsizetype n) &&
5310 \since 6.0
5311
5312 Returns the string that contains the last \a n characters of this string.
5313
5314 \note The behavior is undefined when \a n < 0 or \a n > size().
5315
5316 \snippet qstring/main.cpp 48
5317
5318 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5319*/
5320
5321/*!
5322 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5323 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5324 \since 6.0
5325
5326 Returns a string that contains \a n characters of this string, starting
5327 at position \a pos up to, but not including, the element at index position
5328 \c {\a pos + n}.
5329
5330 \note The behavior is undefined when \a pos < 0, \a n < 0,
5331 or \a pos + \a n > size().
5332
5333 \snippet qstring/main.cpp 34
5334
5335 \sa first(), last(), chopped(), chop(), truncate(), slice()
5336*/
5337QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5338{
5339 if (n == 0)
5340 return QString(DataPointer::fromRawData(&_empty, 0));
5341 DataPointer d = std::move(str.d).sliced(pos, n);
5342 d.data()[n] = 0;
5343 return QString(std::move(d));
5344}
5345
5346/*!
5347 \fn QString QString::sliced(qsizetype pos) const &
5348 \fn QString QString::sliced(qsizetype pos) &&
5349 \since 6.0
5350 \overload
5351
5352 Returns a string that contains the portion of this string starting at
5353 position \a pos and extending to its end.
5354
5355 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5356
5357 \sa first(), last(), chopped(), chop(), truncate(), slice()
5358*/
5359
5360/*!
5361 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5362 \since 6.8
5363
5364 Modifies this string to start at position \a pos, up to, but not including,
5365 the character (code point) at index position \c {\a pos + n}; and returns
5366 a reference to this string.
5367
5368 \note The behavior is undefined if \a pos < 0, \a n < 0,
5369 or \a pos + \a n > size().
5370
5371 \snippet qstring/main.cpp slice97
5372
5373 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5374*/
5375
5376/*!
5377 \fn QString &QString::slice(qsizetype pos)
5378 \since 6.8
5379 \overload
5380
5381 Modifies this string to start at position \a pos and extending to its end,
5382 and returns a reference to this string.
5383
5384 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5385
5386 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5387*/
5388
5389/*!
5390 \fn QString QString::chopped(qsizetype len) const &
5391 \fn QString QString::chopped(qsizetype len) &&
5392 \since 5.10
5393
5394 Returns a string that contains the size() - \a len leftmost characters
5395 of this string.
5396
5397 \note The behavior is undefined if \a len is negative or greater than size().
5398
5399 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5400*/
5401
5402/*!
5403 Returns \c true if the string starts with \a s; otherwise returns
5404 \c false.
5405
5406 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5407
5408 \snippet qstring/main.cpp 65
5409
5410 \sa endsWith()
5411*/
5412bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5413{
5414 return qt_starts_with_impl(QStringView(*this), QStringView(s), cs);
5415}
5416
5417/*!
5418 \overload startsWith()
5419 */
5420bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5421{
5422 return qt_starts_with_impl(QStringView(*this), s, cs);
5423}
5424
5425/*!
5426 \overload startsWith()
5427
5428 Returns \c true if the string starts with \a c; otherwise returns
5429 \c false.
5430*/
5431bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5432{
5433 if (!size())
5434 return false;
5435 if (cs == Qt::CaseSensitive)
5436 return at(0) == c;
5437 return foldCase(at(0)) == foldCase(c);
5438}
5439
5440/*!
5441 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5442 \since 5.10
5443 \overload
5444
5445 Returns \c true if the string starts with the string view \a str;
5446 otherwise returns \c false.
5447
5448 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5449
5450 \sa endsWith()
5451*/
5452
5453/*!
5454 Returns \c true if the string ends with \a s; otherwise returns
5455 \c false.
5456
5457 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5458
5459 \snippet qstring/main.cpp 20
5460
5461 \sa startsWith()
5462*/
5463bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5464{
5465 return qt_ends_with_impl(QStringView(*this), QStringView(s), cs);
5466}
5467
5468/*!
5469 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5470 \since 5.10
5471 \overload endsWith()
5472 Returns \c true if the string ends with the string view \a str;
5473 otherwise returns \c false.
5474
5475 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5476
5477 \sa startsWith()
5478*/
5479
5480/*!
5481 \overload endsWith()
5482*/
5483bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5484{
5485 return qt_ends_with_impl(QStringView(*this), s, cs);
5486}
5487
5488/*!
5489 Returns \c true if the string ends with \a c; otherwise returns
5490 \c false.
5491
5492 \overload endsWith()
5493 */
5494bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5495{
5496 if (!size())
5497 return false;
5498 if (cs == Qt::CaseSensitive)
5499 return at(size() - 1) == c;
5500 return foldCase(at(size() - 1)) == foldCase(c);
5501}
5502
5503static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5504{
5505 QStringIterator it(s);
5506 while (it.hasNext()) {
5507 const char32_t uc = it.next();
5508 if (caseConversion(uc)[c].diff)
5509 return false;
5510 }
5511 return true;
5512}
5513
5514bool QtPrivate::isLower(QStringView s) noexcept
5515{
5516 return checkCase(s, QUnicodeTables::LowerCase);
5517}
5518
5519bool QtPrivate::isUpper(QStringView s) noexcept
5520{
5521 return checkCase(s, QUnicodeTables::UpperCase);
5522}
5523
5524/*!
5525 Returns \c true if the string is uppercase, that is, it's identical
5526 to its toUpper() folding.
5527
5528 Note that this does \e not mean that the string does not contain
5529 lowercase letters (some lowercase letters do not have a uppercase
5530 folding; they are left unchanged by toUpper()).
5531 For more information, refer to the Unicode standard, section 3.13.
5532
5533 \since 5.12
5534
5535 \sa QChar::toUpper(), isLower()
5536*/
5537bool QString::isUpper() const
5538{
5539 return QtPrivate::isUpper(qToStringViewIgnoringNull(*this));
5540}
5541
5542/*!
5543 Returns \c true if the string is lowercase, that is, it's identical
5544 to its toLower() folding.
5545
5546 Note that this does \e not mean that the string does not contain
5547 uppercase letters (some uppercase letters do not have a lowercase
5548 folding; they are left unchanged by toLower()).
5549 For more information, refer to the Unicode standard, section 3.13.
5550
5551 \since 5.12
5552
5553 \sa QChar::toLower(), isUpper()
5554 */
5555bool QString::isLower() const
5556{
5557 return QtPrivate::isLower(qToStringViewIgnoringNull(*this));
5558}
5559
5560static QByteArray qt_convert_to_latin1(QStringView string);
5561
5562QByteArray QString::toLatin1_helper(const QString &string)
5563{
5564 return qt_convert_to_latin1(string);
5565}
5566
5567/*!
5568 \since 6.0
5569 \internal
5570 \relates QAnyStringView
5571
5572 Returns a UTF-16 representation of \a string as a QString.
5573
5574 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5575 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5576*/
5577QString QtPrivate::convertToQString(QAnyStringView string)
5578{
5579 return string.visit([] (auto string) { return string.toString(); });
5580}
5581
5582/*!
5583 \since 5.10
5584 \internal
5585 \relates QStringView
5586
5587 Returns a Latin-1 representation of \a string as a QByteArray.
5588
5589 The behavior is undefined if \a string contains non-Latin1 characters.
5590
5591 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5592 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5593*/
5595{
5596 return qt_convert_to_latin1(string);
5597}
5598
5599Q_NEVER_INLINE
5600static QByteArray qt_convert_to_latin1(QStringView string)
5601{
5602 if (Q_UNLIKELY(string.isNull()))
5603 return QByteArray();
5604
5605 QByteArray ba(string.size(), Qt::Uninitialized);
5606
5607 // since we own the only copy, we're going to const_cast the constData;
5608 // that avoids an unnecessary call to detach() and expansion code that will never get used
5609 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5610 string.utf16(), string.size());
5611 return ba;
5612}
5613
5614QByteArray QString::toLatin1_helper_inplace(QString &s)
5615{
5616 if (!s.isDetached())
5617 return qt_convert_to_latin1(s);
5618
5619 // We can return our own buffer to the caller.
5620 // Conversion to Latin-1 always shrinks the buffer by half.
5621 // This relies on the fact that we use QArrayData for everything behind the scenes
5622
5623 // First, do the in-place conversion. Since isDetached() == true, the data
5624 // was allocated by QArrayData, so the null terminator must be there.
5625 qsizetype length = s.size();
5626 char16_t *sdata = s.d->data();
5627 Q_ASSERT(sdata[length] == u'\0');
5628 qt_to_latin1(reinterpret_cast<uchar *>(sdata), sdata, length + 1);
5629
5630 // Move the internals over to the byte array.
5631 // Kids, avert your eyes. Don't try this at home.
5632 auto ba_d = std::move(s.d).reinterpreted<char>();
5633
5634 // Some sanity checks
5635 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5636 Q_ASSERT(s.isNull());
5637 Q_ASSERT(s.isEmpty());
5638 Q_ASSERT(s.constData() == QString().constData());
5639
5640 return QByteArray(std::move(ba_d));
5641}
5642
5643/*!
5644 \since 6.9
5645 \internal
5646 \relates QLatin1StringView
5647
5648 Returns a UTF-8 representation of \a string as a QByteArray.
5649*/
5650QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
5651{
5652 if (Q_UNLIKELY(string.isNull()))
5653 return QByteArray();
5654
5655 // create a QByteArray with the worst case scenario size
5656 QByteArray ba(string.size() * 2, Qt::Uninitialized);
5657 const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
5658 ba.truncate(sz);
5659
5660 return ba;
5661}
5662
5663// QLatin1 methods that use helpers from qstring.cpp
5664char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5665{
5666 const qsizetype len = in.size();
5667 qt_from_latin1(out, in.data(), len);
5668 return std::next(out, len);
5669}
5670
5671char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5672{
5673 const qsizetype len = in.size();
5674 qt_to_latin1(reinterpret_cast<uchar *>(out), in.utf16(), len);
5675 return out + len;
5676}
5677
5678/*!
5679 \fn QByteArray QString::toLatin1() const
5680
5681 Returns a Latin-1 representation of the string as a QByteArray.
5682
5683 The returned byte array is undefined if the string contains non-Latin1
5684 characters. Those characters may be suppressed or replaced with a
5685 question mark.
5686
5687 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5688*/
5689
5690static QByteArray qt_convert_to_local_8bit(QStringView string);
5691
5692/*!
5693 \fn QByteArray QString::toLocal8Bit() const
5694
5695 Returns the local 8-bit representation of the string as a
5696 QByteArray.
5697
5698 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5699
5700 If this string contains any characters that cannot be encoded in the
5701 local 8-bit encoding, the returned byte array is undefined. Those
5702 characters may be suppressed or replaced by another.
5703
5704 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5705*/
5706
5707QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5708{
5709 return qt_convert_to_local_8bit(QStringView(data, size));
5710}
5711
5712static QByteArray qt_convert_to_local_8bit(QStringView string)
5713{
5714 if (string.isNull())
5715 return QByteArray();
5716 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5717 return fromUtf16(string);
5718}
5719
5720/*!
5721 \since 5.10
5722 \internal
5723 \relates QStringView
5724
5725 Returns a local 8-bit representation of \a string as a QByteArray.
5726
5727 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5728 current code page is being used.
5729
5730 The behavior is undefined if \a string contains characters not
5731 supported by the locale's 8-bit encoding.
5732
5733 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5734*/
5736{
5737 return qt_convert_to_local_8bit(string);
5738}
5739
5740static QByteArray qt_convert_to_utf8(QStringView str);
5741
5742/*!
5743 \fn QByteArray QString::toUtf8() const
5744
5745 Returns a UTF-8 representation of the string as a QByteArray.
5746
5747 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5748 string like QString.
5749
5750 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5751*/
5752
5753QByteArray QString::toUtf8_helper(const QString &str)
5754{
5755 return qt_convert_to_utf8(str);
5756}
5757
5758static QByteArray qt_convert_to_utf8(QStringView str)
5759{
5760 if (str.isNull())
5761 return QByteArray();
5762
5763 return QUtf8::convertFromUnicode(str);
5764}
5765
5766/*!
5767 \since 5.10
5768 \internal
5769 \relates QStringView
5770
5771 Returns a UTF-8 representation of \a string as a QByteArray.
5772
5773 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5774 string like QStringView.
5775
5776 \sa QString::toUtf8(), QStringView::toUtf8()
5777*/
5779{
5780 return qt_convert_to_utf8(string);
5781}
5782
5783static QList<uint> qt_convert_to_ucs4(QStringView string);
5784
5785/*!
5786 \since 4.2
5787
5788 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5789
5790 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5791 this string will be encoded in UTF-32. Any invalid sequence of code units in
5792 this string is replaced by the Unicode replacement character
5793 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5794
5795 The returned list is not 0-terminated.
5796
5797 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5798 fromUcs4(), toWCharArray()
5799*/
5800QList<uint> QString::toUcs4() const
5801{
5802 return qt_convert_to_ucs4(*this);
5803}
5804
5805static QList<uint> qt_convert_to_ucs4(QStringView string)
5806{
5807 QList<uint> v(string.size());
5808 uint *a = const_cast<uint*>(v.constData());
5809 QStringIterator it(string);
5810 while (it.hasNext())
5811 *a++ = it.next();
5812 v.resize(a - v.constData());
5813 return v;
5814}
5815
5816/*!
5817 \since 5.10
5818 \internal
5819 \relates QStringView
5820
5821 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5822
5823 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5824 this string will be encoded in UTF-32. Any invalid sequence of code units in
5825 this string is replaced by the Unicode replacement character
5826 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5827
5828 The returned list is not 0-terminated.
5829
5830 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5831 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5832*/
5833QList<uint> QtPrivate::convertToUcs4(QStringView string)
5834{
5835 return qt_convert_to_ucs4(string);
5836}
5837
5838/*!
5839 \fn QString QString::fromLatin1(QByteArrayView str)
5840 \overload
5841 \since 6.0
5842
5843 Returns a QString initialized with the Latin-1 string \a str.
5844
5845 \note: any null ('\\0') bytes in the byte array will be included in this
5846 string, converted to Unicode null characters (U+0000).
5847*/
5848QString QString::fromLatin1(QByteArrayView ba)
5849{
5850 DataPointer d;
5851 if (!ba.data()) {
5852 // nothing to do
5853 } else if (ba.size() == 0) {
5854 d = DataPointer::fromRawData(&_empty, 0);
5855 } else {
5856 d = DataPointer(ba.size(), ba.size());
5857 Q_CHECK_PTR(d.data());
5858 d.data()[ba.size()] = '\0';
5859 char16_t *dst = d.data();
5860
5861 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5862 }
5863 return QString(std::move(d));
5864}
5865
5866/*!
5867 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5868 Returns a QString initialized with the first \a size characters
5869 of the Latin-1 string \a str.
5870
5871 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5872
5873 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5874*/
5875
5876/*!
5877 \fn QString QString::fromLatin1(const QByteArray &str)
5878 \overload
5879 \since 5.0
5880
5881 Returns a QString initialized with the Latin-1 string \a str.
5882
5883 \note: any null ('\\0') bytes in the byte array will be included in this
5884 string, converted to Unicode null characters (U+0000). This behavior is
5885 different from Qt 5.x.
5886*/
5887
5888/*!
5889 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5890 Returns a QString initialized with the first \a size characters
5891 of the 8-bit string \a str.
5892
5893 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5894
5895 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5896
5897 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5898*/
5899
5900/*!
5901 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5902 \overload
5903 \since 5.0
5904
5905 Returns a QString initialized with the 8-bit string \a str.
5906
5907 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5908
5909 \note: any null ('\\0') bytes in the byte array will be included in this
5910 string, converted to Unicode null characters (U+0000). This behavior is
5911 different from Qt 5.x.
5912*/
5913
5914/*!
5915 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5916 \overload
5917 \since 6.0
5918
5919 Returns a QString initialized with the 8-bit string \a str.
5920
5921 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5922
5923 \note: any null ('\\0') bytes in the byte array will be included in this
5924 string, converted to Unicode null characters (U+0000).
5925*/
5926QString QString::fromLocal8Bit(QByteArrayView ba)
5927{
5928 if (ba.isNull())
5929 return QString();
5930 if (ba.isEmpty())
5931 return QString(DataPointer::fromRawData(&_empty, 0));
5932 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5933 return toUtf16(ba);
5934}
5935
5936/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5937 Returns a QString initialized with the first \a size bytes
5938 of the UTF-8 string \a str.
5939
5940 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5941
5942 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5943 string like QString. However, invalid sequences are possible with UTF-8
5944 and, if any such are found, they will be replaced with one or more
5945 "replacement characters", or suppressed. These include non-Unicode
5946 sequences, non-characters, overlong sequences or surrogate codepoints
5947 encoded into UTF-8.
5948
5949 This function can be used to process incoming data incrementally as long as
5950 all UTF-8 characters are terminated within the incoming data. Any
5951 unterminated characters at the end of the string will be replaced or
5952 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5953
5954 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5955*/
5956
5957/*!
5958 \fn QString QString::fromUtf8(const char8_t *str)
5959 \overload
5960 \since 6.1
5961
5962 This overload is only available when compiling in C++20 mode.
5963*/
5964
5965/*!
5966 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
5967 \overload
5968 \since 6.0
5969
5970 This overload is only available when compiling in C++20 mode.
5971*/
5972
5973/*!
5974 \fn QString QString::fromUtf8(const QByteArray &str)
5975 \overload
5976 \since 5.0
5977
5978 Returns a QString initialized with the UTF-8 string \a str.
5979
5980 \note: any null ('\\0') bytes in the byte array will be included in this
5981 string, converted to Unicode null characters (U+0000). This behavior is
5982 different from Qt 5.x.
5983*/
5984
5985/*!
5986 \fn QString QString::fromUtf8(QByteArrayView str)
5987 \overload
5988 \since 6.0
5989
5990 Returns a QString initialized with the UTF-8 string \a str.
5991
5992 \note: any null ('\\0') bytes in the byte array will be included in this
5993 string, converted to Unicode null characters (U+0000).
5994*/
5995QString QString::fromUtf8(QByteArrayView ba)
5996{
5997 if (ba.isNull())
5998 return QString();
5999 if (ba.isEmpty())
6000 return QString(DataPointer::fromRawData(&_empty, 0));
6001 return QUtf8::convertToUnicode(ba);
6002}
6003
6004#ifndef QT_BOOTSTRAPPED
6005/*!
6006 \since 5.3
6007 Returns a QString initialized with the first \a size characters
6008 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6009
6010 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6011
6012 This function checks for a Byte Order Mark (BOM). If it is missing,
6013 host byte order is assumed.
6014
6015 This function is slow compared to the other Unicode conversions.
6016 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6017
6018 QString makes a deep copy of the Unicode data.
6019
6020 \sa utf16(), setUtf16(), fromStdU16String()
6021*/
6022QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6023{
6024 if (!unicode)
6025 return QString();
6026 if (size < 0)
6027 size = QtPrivate::qustrlen(unicode);
6028 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6029 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6030}
6031
6032/*!
6033 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6034 \deprecated [6.0] Use the \c char16_t overload instead.
6035*/
6036
6037/*!
6038 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6039 \since 4.2
6040 \deprecated [6.0] Use the \c char32_t overload instead.
6041*/
6042
6043/*!
6044 \since 5.3
6045
6046 Returns a QString initialized with the first \a size characters
6047 of the Unicode string \a unicode (encoded as UTF-32).
6048
6049 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6050
6051 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6052 fromStdU32String()
6053*/
6054QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6055{
6056 if (!unicode)
6057 return QString();
6058 if (size < 0) {
6059 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6060 size = wcslen(reinterpret_cast<const wchar_t *>(unicode));
6061 else
6062 size = std::char_traits<char32_t>::length(unicode);
6063 }
6064 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6065 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6066}
6067#endif // !QT_BOOTSTRAPPED
6068
6069/*!
6070 Resizes the string to \a size characters and copies \a unicode
6071 into the string.
6072
6073 If \a unicode is \nullptr, nothing is copied, but the string is still
6074 resized to \a size.
6075
6076 \sa unicode(), setUtf16()
6077*/
6078QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6079{
6080 resize(size);
6081 if (unicode && size)
6082 memcpy(d.data(), unicode, size * sizeof(QChar));
6083 return *this;
6084}
6085
6086/*!
6087 \fn QString::setUnicode(const char16_t *unicode, qsizetype size)
6088 \overload
6089 \since 6.9
6090
6091 \sa unicode(), setUtf16()
6092*/
6093
6094/*!
6095 \fn QString::setUtf16(const char16_t *unicode, qsizetype size)
6096 \since 6.9
6097
6098 Resizes the string to \a size characters and copies \a unicode
6099 into the string.
6100
6101 If \a unicode is \nullptr, nothing is copied, but the string is still
6102 resized to \a size.
6103
6104 Note that unlike fromUtf16(), this function does not consider BOMs and
6105 possibly differing byte ordering.
6106
6107 \sa utf16(), setUnicode()
6108*/
6109
6110/*!
6111 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6112 \obsolete Use the \c char16_t overload instead.
6113*/
6114
6115/*!
6116 \fn QString QString::simplified() const
6117
6118 Returns a string that has whitespace removed from the start
6119 and the end, and that has each sequence of internal whitespace
6120 replaced with a single space.
6121
6122 Whitespace means any character for which QChar::isSpace() returns
6123 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6124 '\\f', '\\r', and ' '.
6125
6126 Example:
6127
6128 \snippet qstring/main.cpp 57
6129
6130 \sa trimmed()
6131*/
6132QString QString::simplified_helper(const QString &str)
6133{
6134 return QStringAlgorithms<const QString>::simplified_helper(str);
6135}
6136
6137QString QString::simplified_helper(QString &str)
6138{
6139 return QStringAlgorithms<QString>::simplified_helper(str);
6140}
6141
6142namespace {
6143 template <typename StringView>
6144 StringView qt_trimmed(StringView s) noexcept
6145 {
6146 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6147 return StringView{begin, end};
6148 }
6149}
6150
6151/*!
6152 \fn QStringView QtPrivate::trimmed(QStringView s)
6153 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6154 \internal
6155 \relates QStringView
6156 \since 5.10
6157
6158 Returns \a s with whitespace removed from the start and the end.
6159
6160 Whitespace means any character for which QChar::isSpace() returns
6161 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6162 '\\f', '\\r', and ' '.
6163
6164 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6165*/
6166QStringView QtPrivate::trimmed(QStringView s) noexcept
6167{
6168 return qt_trimmed(s);
6169}
6170
6171QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6172{
6173 return qt_trimmed(s);
6174}
6175
6176/*!
6177 \fn QString QString::trimmed() const
6178
6179 Returns a string that has whitespace removed from the start and
6180 the end.
6181
6182 Whitespace means any character for which QChar::isSpace() returns
6183 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6184 '\\f', '\\r', and ' '.
6185
6186 Example:
6187
6188 \snippet qstring/main.cpp 82
6189
6190 Unlike simplified(), trimmed() leaves internal whitespace alone.
6191
6192 \sa simplified()
6193*/
6194QString QString::trimmed_helper(const QString &str)
6195{
6196 return QStringAlgorithms<const QString>::trimmed_helper(str);
6197}
6198
6199QString QString::trimmed_helper(QString &str)
6200{
6201 return QStringAlgorithms<QString>::trimmed_helper(str);
6202}
6203
6204/*! \fn const QChar QString::at(qsizetype position) const
6205
6206 Returns the character at the given index \a position in the
6207 string.
6208
6209 The \a position must be a valid index position in the string
6210 (i.e., 0 <= \a position < size()).
6211
6212 \sa operator[]()
6213*/
6214
6215/*!
6216 \fn QChar &QString::operator[](qsizetype position)
6217
6218 Returns the character at the specified \a position in the string as a
6219 modifiable reference.
6220
6221 Example:
6222
6223 \snippet qstring/main.cpp 85
6224
6225 \sa at()
6226*/
6227
6228/*!
6229 \fn const QChar QString::operator[](qsizetype position) const
6230
6231 \overload operator[]()
6232*/
6233
6234/*!
6235 \fn QChar QString::front() const
6236 \since 5.10
6237
6238 Returns the first character in the string.
6239 Same as \c{at(0)}.
6240
6241 This function is provided for STL compatibility.
6242
6243 \warning Calling this function on an empty string constitutes
6244 undefined behavior.
6245
6246 \sa back(), at(), operator[]()
6247*/
6248
6249/*!
6250 \fn QChar QString::back() const
6251 \since 5.10
6252
6253 Returns the last character in the string.
6254 Same as \c{at(size() - 1)}.
6255
6256 This function is provided for STL compatibility.
6257
6258 \warning Calling this function on an empty string constitutes
6259 undefined behavior.
6260
6261 \sa front(), at(), operator[]()
6262*/
6263
6264/*!
6265 \fn QChar &QString::front()
6266 \since 5.10
6267
6268 Returns a reference to the first character in the string.
6269 Same as \c{operator[](0)}.
6270
6271 This function is provided for STL compatibility.
6272
6273 \warning Calling this function on an empty string constitutes
6274 undefined behavior.
6275
6276 \sa back(), at(), operator[]()
6277*/
6278
6279/*!
6280 \fn QChar &QString::back()
6281 \since 5.10
6282
6283 Returns a reference to the last character in the string.
6284 Same as \c{operator[](size() - 1)}.
6285
6286 This function is provided for STL compatibility.
6287
6288 \warning Calling this function on an empty string constitutes
6289 undefined behavior.
6290
6291 \sa front(), at(), operator[]()
6292*/
6293
6294/*!
6295 \fn void QString::truncate(qsizetype position)
6296
6297 Truncates the string starting from, and including, the element at index
6298 \a position.
6299
6300 If the specified \a position index is beyond the end of the
6301 string, nothing happens.
6302
6303 Example:
6304
6305 \snippet qstring/main.cpp 83
6306
6307 If \a position is negative, it is equivalent to passing zero.
6308
6309 \sa chop(), resize(), first(), QStringView::truncate()
6310*/
6311
6312void QString::truncate(qsizetype pos)
6313{
6314 if (pos < size())
6315 resize(pos);
6316}
6317
6318
6319/*!
6320 Removes \a n characters from the end of the string.
6321
6322 If \a n is greater than or equal to size(), the result is an
6323 empty string; if \a n is negative, it is equivalent to passing zero.
6324
6325 Example:
6326 \snippet qstring/main.cpp 15
6327
6328 If you want to remove characters from the \e beginning of the
6329 string, use remove() instead.
6330
6331 \sa truncate(), resize(), remove(), QStringView::chop()
6332*/
6333void QString::chop(qsizetype n)
6334{
6335 if (n > 0)
6336 resize(d.size - n);
6337}
6338
6339/*!
6340 Sets every character in the string to character \a ch. If \a size
6341 is different from -1 (default), the string is resized to \a
6342 size beforehand.
6343
6344 Example:
6345
6346 \snippet qstring/main.cpp 21
6347
6348 \sa resize()
6349*/
6350
6351QString& QString::fill(QChar ch, qsizetype size)
6352{
6353 resize(size < 0 ? d.size : size);
6354 if (d.size)
6355 std::fill(d.data(), d.data() + d.size, ch.unicode());
6356 return *this;
6357}
6358
6359/*!
6360 \fn qsizetype QString::length() const
6361
6362 Returns the number of characters in this string. Equivalent to
6363 size().
6364
6365 \sa resize()
6366*/
6367
6368/*!
6369 \fn qsizetype QString::size() const
6370
6371 Returns the number of characters in this string.
6372
6373 The last character in the string is at position size() - 1.
6374
6375 Example:
6376 \snippet qstring/main.cpp 58
6377
6378 \sa isEmpty(), resize()
6379*/
6380
6381/*!
6382 \fn qsizetype QString::max_size() const
6383 \fn qsizetype QString::maxSize()
6384 \since 6.8
6385
6386 It returns the maximum number of elements that the string can
6387 theoretically hold. In practice, the number can be much smaller,
6388 limited by the amount of memory available to the system.
6389*/
6390
6391/*! \fn bool QString::isNull() const
6392
6393 Returns \c true if this string is null; otherwise returns \c false.
6394
6395 Example:
6396
6397 \snippet qstring/main.cpp 28
6398
6399 Qt makes a distinction between null strings and empty strings for
6400 historical reasons. For most applications, what matters is
6401 whether or not a string contains any data, and this can be
6402 determined using the isEmpty() function.
6403
6404 \sa isEmpty()
6405*/
6406
6407/*! \fn bool QString::isEmpty() const
6408
6409 Returns \c true if the string has no characters; otherwise returns
6410 \c false.
6411
6412 Example:
6413
6414 \snippet qstring/main.cpp 27
6415
6416 \sa size()
6417*/
6418
6419/*! \fn QString &QString::operator+=(const QString &other)
6420
6421 Appends the string \a other onto the end of this string and
6422 returns a reference to this string.
6423
6424 Example:
6425
6426 \snippet qstring/main.cpp 84
6427
6428 This operation is typically very fast (\l{constant time}),
6429 because QString preallocates extra space at the end of the string
6430 data so it can grow without reallocating the entire string each
6431 time.
6432
6433 \sa append(), prepend()
6434*/
6435
6436/*! \fn QString &QString::operator+=(QLatin1StringView str)
6437
6438 \overload operator+=()
6439
6440 Appends the Latin-1 string viewed by \a str to this string.
6441*/
6442
6443/*! \fn QString &QString::operator+=(QUtf8StringView str)
6444 \since 6.5
6445 \overload operator+=()
6446
6447 Appends the UTF-8 string view \a str to this string.
6448*/
6449
6450/*! \fn QString &QString::operator+=(const QByteArray &ba)
6451
6452 \overload operator+=()
6453
6454 Appends the byte array \a ba to this string. The byte array is converted
6455 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6456 are embedded in the \a ba byte array, they will be included in the
6457 transformation.
6458
6459 You can disable this function by defining
6460 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6461 can be useful if you want to ensure that all user-visible strings
6462 go through QObject::tr(), for example.
6463*/
6464
6465/*! \fn QString &QString::operator+=(const char *str)
6466
6467 \overload operator+=()
6468
6469 Appends the string \a str to this string. The const char pointer
6470 is converted to Unicode using the fromUtf8() function.
6471
6472 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6473 when you compile your applications. This can be useful if you want
6474 to ensure that all user-visible strings go through QObject::tr(),
6475 for example.
6476*/
6477
6478/*! \fn QString &QString::operator+=(QStringView str)
6479 \since 6.0
6480 \overload operator+=()
6481
6482 Appends the string view \a str to this string.
6483*/
6484
6485/*! \fn QString &QString::operator+=(QChar ch)
6486
6487 \overload operator+=()
6488
6489 Appends the character \a ch to the string.
6490*/
6491
6492/*!
6493 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6494
6495 \overload operator==()
6496
6497 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6498 Note that no string is equal to \a lhs being 0.
6499
6500 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6501*/
6502
6503/*!
6504 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6505
6506 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6507 \c false.
6508
6509 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6510 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6511*/
6512
6513/*!
6514 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6515
6516 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6517 returns \c false. For \a lhs != 0, this is equivalent to \c
6518 {compare(lhs, rhs) < 0}.
6519
6520 \sa {Comparing Strings}
6521*/
6522
6523/*!
6524 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6525
6526 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6527 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6528 {compare(lhs, rhs) <= 0}.
6529
6530 \sa {Comparing Strings}
6531*/
6532
6533/*!
6534 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6535
6536 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6537 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6538
6539 \sa {Comparing Strings}
6540*/
6541
6542/*!
6543 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6544
6545 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6546 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6547 {compare(lhs, rhs) >= 0}.
6548
6549 \sa {Comparing Strings}
6550*/
6551
6552/*!
6553 \fn QString operator+(const QString &s1, const QString &s2)
6554 \fn QString operator+(QString &&s1, const QString &s2)
6555 \relates QString
6556
6557 Returns a string which is the result of concatenating \a s1 and \a
6558 s2.
6559*/
6560
6561/*!
6562 \fn QString operator+(const QString &s1, const char *s2)
6563 \relates QString
6564
6565 Returns a string which is the result of concatenating \a s1 and \a
6566 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6567 function).
6568
6569 \sa QString::fromUtf8()
6570*/
6571
6572/*!
6573 \fn QString operator+(const char *s1, const QString &s2)
6574 \relates QString
6575
6576 Returns a string which is the result of concatenating \a s1 and \a
6577 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6578 function).
6579
6580 \sa QString::fromUtf8()
6581*/
6582
6583/*!
6584 \fn QString operator+(QStringView lhs, const QString &rhs)
6585 \fn QString operator+(const QString &lhs, QStringView rhs)
6586
6587 \relates QString
6588 \since 6.9
6589
6590 Returns a string that is the result of concatenating \a lhs and \a rhs.
6591*/
6592
6593/*!
6594 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6595 \since 4.2
6596
6597 Compares the string \a s1 with the string \a s2 and returns a negative integer
6598 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6599 and zero if they are equal.
6600
6601 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6602
6603 Case sensitive comparison is based exclusively on the numeric
6604 Unicode values of the characters and is very fast, but is not what
6605 a human would expect. Consider sorting user-visible strings with
6606 localeAwareCompare().
6607
6608 \snippet qstring/main.cpp 16
6609
6610//! [compare-isNull-vs-isEmpty]
6611 \note This function treats null strings the same as empty strings,
6612 for more details see \l {Distinction Between Null and Empty Strings}.
6613//! [compare-isNull-vs-isEmpty]
6614
6615 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6616*/
6617
6618/*!
6619 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6620 \since 4.2
6621 \overload compare()
6622
6623 Performs a comparison of \a s1 and \a s2, using the case
6624 sensitivity setting \a cs.
6625*/
6626
6627/*!
6628 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6629
6630 \since 4.2
6631 \overload compare()
6632
6633 Performs a comparison of \a s1 and \a s2, using the case
6634 sensitivity setting \a cs.
6635*/
6636
6637/*!
6638 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6639
6640 \since 5.12
6641 \overload compare()
6642
6643 Performs a comparison of this with \a s, using the case
6644 sensitivity setting \a cs.
6645*/
6646
6647/*!
6648 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6649
6650 \since 5.14
6651 \overload compare()
6652
6653 Performs a comparison of this with \a ch, using the case
6654 sensitivity setting \a cs.
6655*/
6656
6657/*!
6658 \overload compare()
6659 \since 4.2
6660
6661 Lexically compares this string with the string \a other and returns
6662 a negative integer if this string is less than \a other, a positive
6663 integer if it is greater than \a other, and zero if they are equal.
6664
6665 Same as compare(*this, \a other, \a cs).
6666*/
6667int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6668{
6669 return QtPrivate::compareStrings(*this, other, cs);
6670}
6671
6672/*!
6673 \internal
6674 \since 4.5
6675*/
6676int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6677 Qt::CaseSensitivity cs) noexcept
6678{
6679 Q_ASSERT(length1 >= 0);
6680 Q_ASSERT(length2 >= 0);
6681 Q_ASSERT(data1 || length1 == 0);
6682 Q_ASSERT(data2 || length2 == 0);
6683 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
6684}
6685
6686/*!
6687 \overload compare()
6688 \since 4.2
6689
6690 Same as compare(*this, \a other, \a cs).
6691*/
6692int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6693{
6694 return QtPrivate::compareStrings(*this, other, cs);
6695}
6696
6697/*!
6698 \internal
6699 \since 5.0
6700*/
6701int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6702 Qt::CaseSensitivity cs) noexcept
6703{
6704 Q_ASSERT(length1 >= 0);
6705 Q_ASSERT(data1 || length1 == 0);
6706 if (!data2)
6707 return qt_lencmp(length1, 0);
6708 if (Q_UNLIKELY(length2 < 0))
6709 length2 = qsizetype(strlen(data2));
6710 return QtPrivate::compareStrings(QStringView(data1, length1),
6711 QUtf8StringView(data2, length2), cs);
6712}
6713
6714/*!
6715 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6716 \overload compare()
6717*/
6718
6719/*!
6720 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6721 \overload compare()
6722*/
6723
6724bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6725{
6726 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6727}
6728
6729Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6730{
6731 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6732 return Qt::compareThreeWay(res, 0);
6733}
6734
6735bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6736{
6737 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6738}
6739
6740Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6741{
6742 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6743 return Qt::compareThreeWay(res, 0);
6744}
6745
6746bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6747{
6748 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6749}
6750
6751Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6752{
6753 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6754 return Qt::compareThreeWay(res, 0);
6755}
6756
6757bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6758{
6759 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6760}
6761
6762Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6763{
6764 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6765 return Qt::compareThreeWay(res, 0);
6766}
6767
6768/*!
6769 \internal
6770 \since 6.8
6771*/
6772bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6773{
6774 return QtPrivate::equalStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6775}
6776
6777int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6778{
6779 return QtPrivate::compareStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6780}
6781
6782/*!
6783 \internal
6784 \since 6.8
6785*/
6786bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6787{
6788 Q_ASSERT(len >= 0);
6789 Q_ASSERT(data || len == 0);
6790 return QtPrivate::equalStrings(sv, QUtf8StringView(data, len));
6791}
6792
6793/*!
6794 \internal
6795 \since 6.8
6796*/
6797int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6798{
6799 Q_ASSERT(len >= 0);
6800 Q_ASSERT(data || len == 0);
6801 return QtPrivate::compareStrings(sv, QUtf8StringView(data, len));
6802}
6803
6804/*!
6805 \internal
6806 \since 6.8
6807*/
6808bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6809{
6810 // because qlatin1stringview.h can't include qutf8stringview.h
6811 Q_ASSERT(len >= 0);
6812 Q_ASSERT(s2 || len == 0);
6813 return QtPrivate::equalStrings(s1, QUtf8StringView(s2, len));
6814}
6815
6816/*!
6817 \internal
6818 \since 6.6
6819*/
6820int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6821{
6822 // because qlatin1stringview.h can't include qutf8stringview.h
6823 Q_ASSERT(len >= 0);
6824 Q_ASSERT(s2 || len == 0);
6825 return QtPrivate::compareStrings(s1, QUtf8StringView(s2, len));
6826}
6827
6828/*!
6829 \internal
6830 \since 4.5
6831*/
6832int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6833 Qt::CaseSensitivity cs) noexcept
6834{
6835 Q_ASSERT(length1 >= 0);
6836 Q_ASSERT(data1 || length1 == 0);
6837 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
6838}
6839
6840/*!
6841 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6842
6843 Compares \a s1 with \a s2 and returns an integer less than, equal
6844 to, or greater than zero if \a s1 is less than, equal to, or
6845 greater than \a s2.
6846
6847 The comparison is performed in a locale- and also
6848 platform-dependent manner. Use this function to present sorted
6849 lists of strings to the user.
6850
6851 \sa compare(), QLocale, {Comparing Strings}
6852*/
6853
6854/*!
6855 \fn int QString::localeAwareCompare(QStringView other) const
6856 \since 6.0
6857 \overload localeAwareCompare()
6858
6859 Compares this string with the \a other string and returns an
6860 integer less than, equal to, or greater than zero if this string
6861 is less than, equal to, or greater than the \a other string.
6862
6863 The comparison is performed in a locale- and also
6864 platform-dependent manner. Use this function to present sorted
6865 lists of strings to the user.
6866
6867 Same as \c {localeAwareCompare(*this, other)}.
6868
6869 \sa {Comparing Strings}
6870*/
6871
6872/*!
6873 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6874 \since 6.0
6875 \overload localeAwareCompare()
6876
6877 Compares \a s1 with \a s2 and returns an integer less than, equal
6878 to, or greater than zero if \a s1 is less than, equal to, or
6879 greater than \a s2.
6880
6881 The comparison is performed in a locale- and also
6882 platform-dependent manner. Use this function to present sorted
6883 lists of strings to the user.
6884
6885 \sa {Comparing Strings}
6886*/
6887
6888
6889#if !defined(CSTR_LESS_THAN)
6890#define CSTR_LESS_THAN 1
6891#define CSTR_EQUAL 2
6892#define CSTR_GREATER_THAN 3
6893#endif
6894
6895/*!
6896 \overload localeAwareCompare()
6897
6898 Compares this string with the \a other string and returns an
6899 integer less than, equal to, or greater than zero if this string
6900 is less than, equal to, or greater than the \a other string.
6901
6902 The comparison is performed in a locale- and also
6903 platform-dependent manner. Use this function to present sorted
6904 lists of strings to the user.
6905
6906 Same as \c {localeAwareCompare(*this, other)}.
6907
6908 \sa {Comparing Strings}
6909*/
6910int QString::localeAwareCompare(const QString &other) const
6911{
6912 return localeAwareCompare_helper(constData(), size(), other.constData(), other.size());
6913}
6914
6915/*!
6916 \internal
6917 \since 4.5
6918*/
6919int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6920 const QChar *data2, qsizetype length2)
6921{
6922 Q_ASSERT(length1 >= 0);
6923 Q_ASSERT(data1 || length1 == 0);
6924 Q_ASSERT(length2 >= 0);
6925 Q_ASSERT(data2 || length2 == 0);
6926
6927 // do the right thing for null and empty
6928 if (length1 == 0 || length2 == 0)
6929 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6930 Qt::CaseSensitive);
6931
6932#if QT_CONFIG(icu)
6933 return QCollator::defaultCompare(QStringView(data1, length1), QStringView(data2, length2));
6934#else
6935 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6936 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6937# if defined(Q_OS_WIN)
6938 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6939
6940 switch (res) {
6941 case CSTR_LESS_THAN:
6942 return -1;
6943 case CSTR_GREATER_THAN:
6944 return 1;
6945 default:
6946 return 0;
6947 }
6948# elif defined (Q_OS_DARWIN)
6949 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6950 // strings the same way as native applications do, and also respects
6951 // the "Order for sorted lists" setting in the International preferences
6952 // panel.
6953 const CFStringRef thisString =
6954 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6955 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6956 const CFStringRef otherString =
6957 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6958 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6959
6960 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6961 CFRelease(thisString);
6962 CFRelease(otherString);
6963 return result;
6964# elif defined(Q_OS_UNIX)
6965 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6966 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6967# else
6968# error "This case shouldn't happen"
6969 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6970# endif
6971#endif // !QT_CONFIG(icu)
6972}
6973
6974
6975/*!
6976 \fn const QChar *QString::unicode() const
6977
6978 Returns a Unicode representation of the string.
6979 The result remains valid until the string is modified.
6980
6981 \note The returned string may not be '\\0'-terminated.
6982 Use size() to determine the length of the array.
6983
6984 \sa utf16(), fromRawData()
6985*/
6986
6987/*!
6988 \fn const ushort *QString::utf16() const
6989 \obsolete [6.11] Use nullTerminate() and cast data() to \c{const char16_t *}
6990
6991 Returns the QString as a '\\0\'-terminated array of unsigned
6992 shorts. The result remains valid until the string is modified.
6993
6994 The returned string is in host byte order.
6995
6996 \sa unicode()
6997*/
6998
6999const ushort *QString::utf16() const
7000{
7001 if (!d->isMutable()) {
7002 // ensure '\0'-termination for ::fromRawData strings
7003 const_cast<QString*>(this)->reallocData(d.size, QArrayData::KeepSize);
7004 }
7005 return reinterpret_cast<const ushort *>(d.data());
7006}
7007
7008/*!
7009 \fn QString &QString::nullTerminate()
7010 \since 6.10
7011
7012 If this string data isn't null-terminated, this method will make a deep
7013 copy of the data and make it null-terminated.
7014
7015 A QString is null-terminated by default, however in some cases (e.g.
7016 when using fromRawData()), the string data doesn't necessarily end
7017 with a \c {\0} character, which could be a problem when calling methods
7018 that expect a null-terminated string.
7019
7020 \sa nullTerminated(), fromRawData(), setRawData()
7021*/
7022QString &QString::nullTerminate()
7023{
7024 // ensure '\0'-termination for ::fromRawData strings
7025 if (!d->isMutable())
7026 *this = QString{constData(), size()};
7027 return *this;
7028}
7029
7030/*!
7031 \fn QString QString::nullTerminated() const &
7032 \fn QString QString::nullTerminated() &&
7033 \since 6.10
7034
7035 Returns a copy of this string that is always null-terminated.
7036
7037 \sa nullTerminate(), fromRawData(), setRawData()
7038*/
7039QString QString::nullTerminated() const &
7040{
7041 // ensure '\0'-termination for ::fromRawData strings
7042 if (!d->isMutable())
7043 return QString{constData(), size()};
7044 return *this;
7045}
7046
7047QString QString::nullTerminated() &&
7048{
7049 nullTerminate();
7050 return std::move(*this);
7051}
7052
7053/*!
7054 Returns a string of size \a width that contains this string
7055 padded by the \a fill character.
7056
7057 If \a truncate is \c false and the size() of the string is more than
7058 \a width, then the returned string is a copy of the string.
7059
7060 \snippet qstring/main.cpp 32
7061
7062 If \a truncate is \c true and the size() of the string is more than
7063 \a width, then any characters in a copy of the string after
7064 position \a width are removed, and the copy is returned.
7065
7066 \snippet qstring/main.cpp 33
7067
7068 \sa rightJustified()
7069*/
7070
7071QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7072{
7073 QString result;
7074 qsizetype len = size();
7075 qsizetype padlen = width - len;
7076 if (padlen > 0) {
7077 result.resize(len+padlen);
7078 if (len)
7079 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
7080 QChar *uc = (QChar*)result.d.data() + len;
7081 while (padlen--)
7082 * uc++ = fill;
7083 } else {
7084 if (truncate)
7085 result = left(width);
7086 else
7087 result = *this;
7088 }
7089 return result;
7090}
7091
7092/*!
7093 Returns a string of size() \a width that contains the \a fill
7094 character followed by the string. For example:
7095
7096 \snippet qstring/main.cpp 49
7097
7098 If \a truncate is \c false and the size() of the string is more than
7099 \a width, then the returned string is a copy of the string.
7100
7101 If \a truncate is true and the size() of the string is more than
7102 \a width, then the resulting string is truncated at position \a
7103 width.
7104
7105 \snippet qstring/main.cpp 50
7106
7107 \sa leftJustified()
7108*/
7109
7110QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7111{
7112 QString result;
7113 qsizetype len = size();
7114 qsizetype padlen = width - len;
7115 if (padlen > 0) {
7116 result.resize(len+padlen);
7117 QChar *uc = (QChar*)result.d.data();
7118 while (padlen--)
7119 * uc++ = fill;
7120 if (len)
7121 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
7122 } else {
7123 if (truncate)
7124 result = left(width);
7125 else
7126 result = *this;
7127 }
7128 return result;
7129}
7130
7131/*!
7132 \fn QString QString::toLower() const
7133
7134 Returns a lowercase copy of the string.
7135
7136 \snippet qstring/main.cpp 75
7137
7138 The case conversion will always happen in the 'C' locale. For
7139 locale-dependent case folding use QLocale::toLower()
7140
7141 \sa toUpper(), QLocale::toLower()
7142*/
7143
7144namespace QUnicodeTables {
7145/*
7146 \internal
7147 Converts the \a str string starting from the position pointed to by the \a
7148 it iterator, using the Unicode case traits \c Traits, and returns the
7149 result. The input string must not be empty (the convertCase function below
7150 guarantees that).
7151
7152 The string type \c{T} is also a template and is either \c{const QString} or
7153 \c{QString}. This function can do both copy-conversion and in-place
7154 conversion depending on the state of the \a str parameter:
7155 \list
7156 \li \c{T} is \c{const QString}: copy-convert
7157 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7158 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7159 \endlist
7160
7161 In copy-convert mode, the local variable \c{s} is detached from the input
7162 \a str. In the in-place convert mode, \a str is in moved-from state and
7163 \c{s} contains the only copy of the string, without reallocation (thus,
7164 \a it is still valid).
7165
7166 There is one pathological case left: when the in-place conversion needs to
7167 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7168 it pointer.
7169 */
7170template <typename T>
7171Q_NEVER_INLINE
7173{
7174 Q_ASSERT(!str.isEmpty());
7175 QString s = std::move(str); // will copy if T is const QString
7176 QChar *pp = s.begin() + it.index(); // will detach if necessary
7177
7178 do {
7179 const auto folded = fullConvertCase(it.next(), which);
7180 if (Q_UNLIKELY(folded.size() > 1)) {
7181 if (folded.chars[0] == *pp && folded.size() == 2) {
7182 // special case: only second actually changed (e.g. surrogate pairs),
7183 // avoid slow case
7184 ++pp;
7185 *pp++ = folded.chars[1];
7186 } else {
7187 // slow path: the string is growing
7188 qsizetype inpos = it.index() - 1;
7190
7191 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
7192 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7193
7194 // Adjust the input iterator if we are performing an in-place conversion
7195 if constexpr (!std::is_const<T>::value)
7197 }
7198 } else {
7199 *pp++ = folded.chars[0];
7200 }
7201 } while (it.hasNext());
7202
7203 return s;
7204}
7205
7206template <typename T>
7207static QString convertCase(T &str, QUnicodeTables::Case which)
7208{
7209 const QChar *p = str.constBegin();
7210 const QChar *e = p + str.size();
7211
7212 // this avoids out of bounds check in the loop
7213 while (e != p && e[-1].isHighSurrogate())
7214 --e;
7215
7216 QStringIterator it(p, e);
7217 while (it.hasNext()) {
7218 const char32_t uc = it.next();
7219 if (caseConversion(uc)[which].diff) {
7220 it.recede();
7221 return detachAndConvertCase(str, it, which);
7222 }
7223 }
7224 return std::move(str);
7225}
7226} // namespace QUnicodeTables
7227
7228QString QString::toLower_helper(const QString &str)
7229{
7230 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7231}
7232
7233QString QString::toLower_helper(QString &str)
7234{
7235 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7236}
7237
7238/*!
7239 \fn QString QString::toCaseFolded() const
7240
7241 Returns the case folded equivalent of the string. For most Unicode
7242 characters this is the same as toLower().
7243*/
7244
7245QString QString::toCaseFolded_helper(const QString &str)
7246{
7247 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7248}
7249
7250QString QString::toCaseFolded_helper(QString &str)
7251{
7252 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7253}
7254
7255/*!
7256 \fn QString QString::toUpper() const
7257
7258 Returns an uppercase copy of the string.
7259
7260 \snippet qstring/main.cpp 81
7261
7262 The case conversion will always happen in the 'C' locale. For
7263 locale-dependent case folding use QLocale::toUpper().
7264
7265 \note In some cases the uppercase form of a string may be longer than the
7266 original.
7267
7268 \note Since 2024, the German language officially prefers to uppercase ß
7269 (U+00DF LATIN SMALL LETTER SHARP S) as ẞ (U+1E9E LATIN CAPITAL LETTER SHARP S).
7270 Qt's implementation follows Unicode, which still mandates the use of "SS".
7271 If you need to implement the new German rules, you need to manually do
7272 \c{replace(u'ß', u'ẞ')} \e{before} calling this function.
7273
7274 \sa toLower(), QLocale::toLower()
7275*/
7276
7277QString QString::toUpper_helper(const QString &str)
7278{
7279 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7280}
7281
7282QString QString::toUpper_helper(QString &str)
7283{
7284 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7285}
7286
7287/*!
7288 \since 5.5
7289
7290 Safely builds a formatted string from the format string \a cformat
7291 and an arbitrary list of arguments.
7292
7293 The format string supports the conversion specifiers, length modifiers,
7294 and flags provided by printf() in the standard C++ library. The \a cformat
7295 string and \c{%s} arguments must be UTF-8 encoded.
7296
7297 \note The \c{%lc} escape sequence expects a unicode character of type
7298 \c char16_t (as returned by QChar::unicode()), or \c ushort.
7299 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7300 of unicode characters of type \c char16_t, or \c ushort (as returned by
7301 QString::utf16()). This is at odds with the printf() in the standard C++
7302 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7303 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7304 where the size of \c {wchar_t} is not 16 bits.
7305
7306 \warning We do not recommend using QString::asprintf() in new Qt
7307 code. Instead, consider using QTextStream or arg(), both of
7308 which support Unicode strings seamlessly and are type-safe.
7309 Here is an example that uses QTextStream:
7310
7311 \snippet qstring/main.cpp 64
7312
7313 For \l {QObject::tr()}{translations}, especially if the strings
7314 contains more than one escape sequence, you should consider using
7315 the arg() function instead. This allows the order of the
7316 replacements to be controlled by the translator.
7317
7318 \sa arg()
7319*/
7320
7321QString QString::asprintf(const char *cformat, ...)
7322{
7323 va_list ap;
7324 va_start(ap, cformat);
7325 QString s = vasprintf(cformat, ap);
7326 va_end(ap);
7327 return s;
7328}
7329
7330static void append_utf8(QString &qs, const char *cs, qsizetype len)
7331{
7332 const qsizetype oldSize = qs.size();
7333 qs.resize(oldSize + len);
7334 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
7335 qs.resize(newEnd - qs.constData());
7336}
7337
7338static uint parse_flag_characters(const char * &c) noexcept
7339{
7340 uint flags = QLocaleData::ZeroPadExponent;
7341 while (true) {
7342 switch (*c) {
7343 case '#':
7346 break;
7347 case '0': flags |= QLocaleData::ZeroPadded; break;
7348 case '-': flags |= QLocaleData::LeftAdjusted; break;
7349 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7350 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7351 case '\'': flags |= QLocaleData::GroupDigits; break;
7352 default: return flags;
7353 }
7354 ++c;
7355 }
7356}
7357
7358static int parse_field_width(const char *&c, qsizetype size)
7359{
7360 Q_ASSERT(isAsciiDigit(*c));
7361 const char *const stop = c + size;
7362
7363 // can't be negative - started with a digit
7364 // contains at least one digit
7365 auto [result, used] = qstrntoull(c, size, 10);
7366 c += used;
7367 if (used <= 0)
7368 return false;
7369 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7370 while (c < stop && isAsciiDigit(*c))
7371 ++c;
7372 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7373}
7374
7376
7377static inline bool can_consume(const char * &c, char ch) noexcept
7378{
7379 if (*c == ch) {
7380 ++c;
7381 return true;
7382 }
7383 return false;
7384}
7385
7386static LengthMod parse_length_modifier(const char * &c) noexcept
7387{
7388 switch (*c++) {
7389 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
7390 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
7391 case 'L': return lm_L;
7392 case 'j': return lm_j;
7393 case 'z':
7394 case 'Z': return lm_z;
7395 case 't': return lm_t;
7396 }
7397 --c; // don't consume *c - it wasn't a flag
7398 return lm_none;
7399}
7400
7401/*!
7402 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7403 \since 5.5
7404
7405 Equivalent method to asprintf(), but takes a va_list \a ap
7406 instead a list of variable arguments. See the asprintf()
7407 documentation for an explanation of \a cformat.
7408
7409 This method does not call the va_end macro, the caller
7410 is responsible to call va_end on \a ap.
7411
7412 \sa asprintf()
7413*/
7414
7415QString QString::vasprintf(const char *cformat, va_list ap)
7416{
7417 if (!cformat || !*cformat) {
7418 // Qt 1.x compat
7419 return fromLatin1("");
7420 }
7421
7422 // Parse cformat
7423
7424 QString result;
7425 const char *c = cformat;
7426 const char *formatEnd = cformat + qstrlen(cformat);
7427 for (;;) {
7428 // Copy non-escape chars to result
7429 const char *cb = c;
7430 while (*c != '\0' && *c != '%')
7431 c++;
7432 append_utf8(result, cb, qsizetype(c - cb));
7433
7434 if (*c == '\0')
7435 break;
7436
7437 // Found '%'
7438 const char *escape_start = c;
7439 ++c;
7440
7441 if (*c == '\0') {
7442 result.append(u'%'); // a % at the end of the string - treat as non-escape text
7443 break;
7444 }
7445 if (*c == '%') {
7446 result.append(u'%'); // %%
7447 ++c;
7448 continue;
7449 }
7450
7451 uint flags = parse_flag_characters(c);
7452
7453 if (*c == '\0') {
7454 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7455 break;
7456 }
7457
7458 // Parse field width
7459 int width = -1; // -1 means unspecified
7460 if (isAsciiDigit(*c)) {
7461 width = parse_field_width(c, formatEnd - c);
7462 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7463 width = va_arg(ap, int);
7464 if (width < 0)
7465 width = -1; // treat all negative numbers as unspecified
7466 ++c;
7467 }
7468
7469 if (*c == '\0') {
7470 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7471 break;
7472 }
7473
7474 // Parse precision
7475 int precision = -1; // -1 means unspecified
7476 if (*c == '.') {
7477 ++c;
7478 precision = 0;
7479 if (isAsciiDigit(*c)) {
7480 precision = parse_field_width(c, formatEnd - c);
7481 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7482 precision = va_arg(ap, int);
7483 if (precision < 0)
7484 precision = -1; // treat all negative numbers as unspecified
7485 ++c;
7486 }
7487 }
7488
7489 if (*c == '\0') {
7490 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7491 break;
7492 }
7493
7494 const LengthMod length_mod = parse_length_modifier(c);
7495
7496 if (*c == '\0') {
7497 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7498 break;
7499 }
7500
7501 // Parse the conversion specifier and do the conversion
7502 QString subst;
7503 switch (*c) {
7504 case 'd':
7505 case 'i': {
7506 qint64 i;
7507 switch (length_mod) {
7508 case lm_none: i = va_arg(ap, int); break;
7509 case lm_hh: i = va_arg(ap, int); break;
7510 case lm_h: i = va_arg(ap, int); break;
7511 case lm_l: i = va_arg(ap, long int); break;
7512 case lm_ll: i = va_arg(ap, qint64); break;
7513 case lm_j: i = va_arg(ap, long int); break;
7514
7515 /* ptrdiff_t actually, but it should be the same for us */
7516 case lm_z: i = va_arg(ap, qsizetype); break;
7517 case lm_t: i = va_arg(ap, qsizetype); break;
7518 default: i = 0; break;
7519 }
7520 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
7521 ++c;
7522 break;
7523 }
7524 case 'o':
7525 case 'u':
7526 case 'x':
7527 case 'X': {
7528 quint64 u;
7529 switch (length_mod) {
7530 case lm_none: u = va_arg(ap, uint); break;
7531 case lm_hh: u = va_arg(ap, uint); break;
7532 case lm_h: u = va_arg(ap, uint); break;
7533 case lm_l: u = va_arg(ap, ulong); break;
7534 case lm_ll: u = va_arg(ap, quint64); break;
7535 case lm_t: u = va_arg(ap, size_t); break;
7536 case lm_z: u = va_arg(ap, size_t); break;
7537 default: u = 0; break;
7538 }
7539
7540 if (isAsciiUpper(*c))
7541 flags |= QLocaleData::CapitalEorX;
7542
7543 int base = 10;
7544 switch (QtMiscUtils::toAsciiLower(*c)) {
7545 case 'o':
7546 base = 8; break;
7547 case 'u':
7548 base = 10; break;
7549 case 'x':
7550 base = 16; break;
7551 default: break;
7552 }
7553 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
7554 ++c;
7555 break;
7556 }
7557 case 'E':
7558 case 'e':
7559 case 'F':
7560 case 'f':
7561 case 'G':
7562 case 'g':
7563 case 'A':
7564 case 'a': {
7565 double d;
7566 if (length_mod == lm_L)
7567 d = va_arg(ap, long double); // not supported - converted to a double
7568 else
7569 d = va_arg(ap, double);
7570
7571 if (isAsciiUpper(*c))
7572 flags |= QLocaleData::CapitalEorX;
7573
7574 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7575 switch (QtMiscUtils::toAsciiLower(*c)) {
7576 case 'e': form = QLocaleData::DFExponent; break;
7577 case 'a': // not supported - decimal form used instead
7578 case 'f': form = QLocaleData::DFDecimal; break;
7579 case 'g': form = QLocaleData::DFSignificantDigits; break;
7580 default: break;
7581 }
7582 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7583 ++c;
7584 break;
7585 }
7586 case 'c': {
7587 if (length_mod == lm_l)
7588 subst = QChar::fromUcs2(va_arg(ap, int));
7589 else
7590 subst = QLatin1Char((uchar) va_arg(ap, int));
7591 ++c;
7592 break;
7593 }
7594 case 's': {
7595 if (length_mod == lm_l) {
7596 const char16_t *buff = va_arg(ap, const char16_t*);
7597 const auto *ch = buff;
7598 while (precision != 0 && *ch != 0) {
7599 ++ch;
7600 --precision;
7601 }
7602 subst.setUtf16(buff, ch - buff);
7603 } else if (precision == -1) {
7604 subst = QString::fromUtf8(va_arg(ap, const char*));
7605 } else {
7606 const char *buff = va_arg(ap, const char*);
7607 subst = QString::fromUtf8(buff, qstrnlen(buff, precision));
7608 }
7609 ++c;
7610 break;
7611 }
7612 case 'p': {
7613 void *arg = va_arg(ap, void*);
7614 const quint64 i = reinterpret_cast<quintptr>(arg);
7615 flags |= QLocaleData::ShowBase;
7616 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
7617 ++c;
7618 break;
7619 }
7620 case 'n':
7621 switch (length_mod) {
7622 case lm_hh: {
7623 signed char *n = va_arg(ap, signed char*);
7624 *n = result.size();
7625 break;
7626 }
7627 case lm_h: {
7628 short int *n = va_arg(ap, short int*);
7629 *n = result.size();
7630 break;
7631 }
7632 case lm_l: {
7633 long int *n = va_arg(ap, long int*);
7634 *n = result.size();
7635 break;
7636 }
7637 case lm_ll: {
7638 qint64 *n = va_arg(ap, qint64*);
7639 *n = result.size();
7640 break;
7641 }
7642 default: {
7643 int *n = va_arg(ap, int*);
7644 *n = int(result.size());
7645 break;
7646 }
7647 }
7648 ++c;
7649 break;
7650
7651 default: // bad escape, treat as non-escape text
7652 for (const char *cc = escape_start; cc != c; ++cc)
7653 result.append(QLatin1Char(*cc));
7654 continue;
7655 }
7656
7657 if (flags & QLocaleData::LeftAdjusted)
7658 result.append(subst.leftJustified(width));
7659 else
7660 result.append(subst.rightJustified(width));
7661 }
7662
7663 return result;
7664}
7665
7666/*!
7667 \fn QString::toLongLong(bool *ok, int base) const
7668
7669 Returns the string converted to a \c{long long} using base \a
7670 base, which is 10 by default and must be between 2 and 36, or 0.
7671 Returns 0 if the conversion fails.
7672
7673 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7674 to \c false, and success by setting *\a{ok} to \c true.
7675
7676 If \a base is 0, the C language convention is used: if the string begins
7677 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7678 2 is used; otherwise, if the string begins with "0", base 8 is used;
7679 otherwise, base 10 is used.
7680
7681 The string conversion will always happen in the 'C' locale. For
7682 locale-dependent conversion use QLocale::toLongLong()
7683
7684 Example:
7685
7686 \snippet qstring/main.cpp 74
7687
7688 This function ignores leading and trailing whitespace.
7689
7690 \note Support for the "0b" prefix was added in Qt 6.4.
7691
7692 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7693*/
7694
7695template <typename Int>
7696static Int toIntegral(QStringView string, bool *ok, int base)
7697{
7698#if defined(QT_CHECK_RANGE)
7699 if (base != 0 && (base < 2 || base > 36)) {
7700 qWarning("QString::toIntegral: Invalid base (%d)", base);
7701 base = 10;
7702 }
7703#endif
7704
7705 QVarLengthArray<uchar> latin1(string.size());
7706 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7707 QSimpleParsedNumber<Int> r;
7708 if constexpr (std::is_signed_v<Int>)
7709 r = QLocaleData::bytearrayToLongLong(latin1, base);
7710 else
7711 r = QLocaleData::bytearrayToUnsLongLong(latin1, base);
7712 if (ok)
7713 *ok = r.ok();
7714 return r.result;
7715}
7716
7717qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7718{
7719 return toIntegral<qlonglong>(string, ok, base);
7720}
7721
7722/*!
7723 \fn QString::toULongLong(bool *ok, int base) const
7724
7725 Returns the string converted to an \c{unsigned long long} using base \a
7726 base, which is 10 by default and must be between 2 and 36, or 0.
7727 Returns 0 if the conversion fails.
7728
7729 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7730 to \c false, and success by setting *\a{ok} to \c true.
7731
7732 If \a base is 0, the C language convention is used: if the string begins
7733 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7734 2 is used; otherwise, if the string begins with "0", base 8 is used;
7735 otherwise, base 10 is used.
7736
7737 The string conversion will always happen in the 'C' locale. For
7738 locale-dependent conversion use QLocale::toULongLong()
7739
7740 Example:
7741
7742 \snippet qstring/main.cpp 79
7743
7744 This function ignores leading and trailing whitespace.
7745
7746 \note Support for the "0b" prefix was added in Qt 6.4.
7747
7748 \sa number(), toLongLong(), QLocale::toULongLong()
7749*/
7750
7751qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7752{
7753 return toIntegral<qulonglong>(string, ok, base);
7754}
7755
7756/*!
7757 \fn long QString::toLong(bool *ok, int base) const
7758
7759 Returns the string converted to a \c long using base \a
7760 base, which is 10 by default and must be between 2 and 36, or 0.
7761 Returns 0 if the conversion fails.
7762
7763 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7764 to \c false, and success by setting *\a{ok} to \c true.
7765
7766 If \a base is 0, the C language convention is used: if the string begins
7767 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7768 2 is used; otherwise, if the string begins with "0", base 8 is used;
7769 otherwise, base 10 is used.
7770
7771 The string conversion will always happen in the 'C' locale. For
7772 locale-dependent conversion use QLocale::toLongLong()
7773
7774 Example:
7775
7776 \snippet qstring/main.cpp 73
7777
7778 This function ignores leading and trailing whitespace.
7779
7780 \note Support for the "0b" prefix was added in Qt 6.4.
7781
7782 \sa number(), toULong(), toInt(), QLocale::toInt()
7783*/
7784
7785/*!
7786 \fn ulong QString::toULong(bool *ok, int base) const
7787
7788 Returns the string converted to an \c{unsigned long} using base \a
7789 base, which is 10 by default and must be between 2 and 36, or 0.
7790 Returns 0 if the conversion fails.
7791
7792 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7793 to \c false, and success by setting *\a{ok} to \c true.
7794
7795 If \a base is 0, the C language convention is used: if the string begins
7796 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7797 2 is used; otherwise, if the string begins with "0", base 8 is used;
7798 otherwise, base 10 is used.
7799
7800 The string conversion will always happen in the 'C' locale. For
7801 locale-dependent conversion use QLocale::toULongLong()
7802
7803 Example:
7804
7805 \snippet qstring/main.cpp 78
7806
7807 This function ignores leading and trailing whitespace.
7808
7809 \note Support for the "0b" prefix was added in Qt 6.4.
7810
7811 \sa number(), QLocale::toUInt()
7812*/
7813
7814/*!
7815 \fn int QString::toInt(bool *ok, int base) const
7816 Returns the string converted to an \c int using base \a
7817 base, which is 10 by default and must be between 2 and 36, or 0.
7818 Returns 0 if the conversion fails.
7819
7820 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7821 to \c false, and success by setting *\a{ok} to \c true.
7822
7823 If \a base is 0, the C language convention is used: if the string begins
7824 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7825 2 is used; otherwise, if the string begins with "0", base 8 is used;
7826 otherwise, base 10 is used.
7827
7828 The string conversion will always happen in the 'C' locale. For
7829 locale-dependent conversion use QLocale::toInt()
7830
7831 Example:
7832
7833 \snippet qstring/main.cpp 72
7834
7835 This function ignores leading and trailing whitespace.
7836
7837 \note Support for the "0b" prefix was added in Qt 6.4.
7838
7839 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7840*/
7841
7842/*!
7843 \fn uint QString::toUInt(bool *ok, int base) const
7844 Returns the string converted to an \c{unsigned int} using base \a
7845 base, which is 10 by default and must be between 2 and 36, or 0.
7846 Returns 0 if the conversion fails.
7847
7848 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7849 to \c false, and success by setting *\a{ok} to \c true.
7850
7851 If \a base is 0, the C language convention is used: if the string begins
7852 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7853 2 is used; otherwise, if the string begins with "0", base 8 is used;
7854 otherwise, base 10 is used.
7855
7856 The string conversion will always happen in the 'C' locale. For
7857 locale-dependent conversion use QLocale::toUInt()
7858
7859 Example:
7860
7861 \snippet qstring/main.cpp 77
7862
7863 This function ignores leading and trailing whitespace.
7864
7865 \note Support for the "0b" prefix was added in Qt 6.4.
7866
7867 \sa number(), toInt(), QLocale::toUInt()
7868*/
7869
7870/*!
7871 \fn short QString::toShort(bool *ok, int base) const
7872
7873 Returns the string converted to a \c short using base \a
7874 base, which is 10 by default and must be between 2 and 36, or 0.
7875 Returns 0 if the conversion fails.
7876
7877 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7878 to \c false, and success by setting *\a{ok} to \c true.
7879
7880 If \a base is 0, the C language convention is used: if the string begins
7881 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7882 2 is used; otherwise, if the string begins with "0", base 8 is used;
7883 otherwise, base 10 is used.
7884
7885 The string conversion will always happen in the 'C' locale. For
7886 locale-dependent conversion use QLocale::toShort()
7887
7888 Example:
7889
7890 \snippet qstring/main.cpp 76
7891
7892 This function ignores leading and trailing whitespace.
7893
7894 \note Support for the "0b" prefix was added in Qt 6.4.
7895
7896 \sa number(), toUShort(), toInt(), QLocale::toShort()
7897*/
7898
7899/*!
7900 \fn ushort QString::toUShort(bool *ok, int base) const
7901
7902 Returns the string converted to an \c{unsigned short} using base \a
7903 base, which is 10 by default and must be between 2 and 36, or 0.
7904 Returns 0 if the conversion fails.
7905
7906 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7907 to \c false, and success by setting *\a{ok} to \c true.
7908
7909 If \a base is 0, the C language convention is used: if the string begins
7910 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7911 2 is used; otherwise, if the string begins with "0", base 8 is used;
7912 otherwise, base 10 is used.
7913
7914 The string conversion will always happen in the 'C' locale. For
7915 locale-dependent conversion use QLocale::toUShort()
7916
7917 Example:
7918
7919 \snippet qstring/main.cpp 80
7920
7921 This function ignores leading and trailing whitespace.
7922
7923 \note Support for the "0b" prefix was added in Qt 6.4.
7924
7925 \sa number(), toShort(), QLocale::toUShort()
7926*/
7927
7928/*!
7929 Returns the string converted to a \c double value.
7930
7931 Returns an infinity if the conversion overflows or 0.0 if the
7932 conversion fails for other reasons (e.g. underflow).
7933
7934 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7935 to \c false, and success by setting *\a{ok} to \c true.
7936
7937 \snippet qstring/main.cpp 66
7938
7939 \warning The QString content may only contain valid numerical characters
7940 which includes the plus/minus sign, the character e used in scientific
7941 notation, and the decimal point. Including the unit or additional characters
7942 leads to a conversion error.
7943
7944 \snippet qstring/main.cpp 67
7945
7946 The string conversion will always happen in the 'C' locale. For
7947 locale-dependent conversion use QLocale::toDouble()
7948
7949 \snippet qstring/main.cpp 68
7950
7951 For historical reasons, this function does not handle
7952 thousands group separators. If you need to convert such numbers,
7953 use QLocale::toDouble().
7954
7955 \snippet qstring/main.cpp 69
7956
7957 This function ignores leading and trailing whitespace.
7958
7959 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7960*/
7961
7962double QString::toDouble(bool *ok) const
7963{
7964 return QStringView(*this).toDouble(ok);
7965}
7966
7967double QStringView::toDouble(bool *ok) const
7968{
7969 QStringView string = qt_trimmed(*this);
7970 QVarLengthArray<uchar> latin1(string.size());
7971 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7972 auto r = qt_asciiToDouble(reinterpret_cast<const char *>(latin1.data()), string.size());
7973 if (ok != nullptr)
7974 *ok = r.ok();
7975 return r.result;
7976}
7977
7978/*!
7979 Returns the string converted to a \c float value.
7980
7981 Returns an infinity if the conversion overflows or 0.0 if the
7982 conversion fails for other reasons (e.g. underflow).
7983
7984 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7985 to \c false, and success by setting *\a{ok} to \c true.
7986
7987 \warning The QString content may only contain valid numerical characters
7988 which includes the plus/minus sign, the character e used in scientific
7989 notation, and the decimal point. Including the unit or additional characters
7990 leads to a conversion error.
7991
7992 The string conversion will always happen in the 'C' locale. For
7993 locale-dependent conversion use QLocale::toFloat()
7994
7995 For historical reasons, this function does not handle
7996 thousands group separators. If you need to convert such numbers,
7997 use QLocale::toFloat().
7998
7999 Example:
8000
8001 \snippet qstring/main.cpp 71
8002
8003 This function ignores leading and trailing whitespace.
8004
8005 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
8006*/
8007
8008float QString::toFloat(bool *ok) const
8009{
8010 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8011}
8012
8013float QStringView::toFloat(bool *ok) const
8014{
8015 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8016}
8017
8018/*! \fn QString &QString::setNum(int n, int base)
8019
8020 Sets the string to the printed value of \a n in the specified \a
8021 base, and returns a reference to the string.
8022
8023 The base is 10 by default and must be between 2 and 36.
8024
8025 \snippet qstring/main.cpp 56
8026
8027 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8028 To get a localized string representation of a number, use
8029 QLocale::toString() with the appropriate locale.
8030
8031 \sa number()
8032*/
8033
8034/*! \fn QString &QString::setNum(uint n, int base)
8035
8036 \overload
8037*/
8038
8039/*! \fn QString &QString::setNum(long n, int base)
8040
8041 \overload
8042*/
8043
8044/*! \fn QString &QString::setNum(ulong n, int base)
8045
8046 \overload
8047*/
8048
8049/*!
8050 \overload
8051*/
8052QString &QString::setNum(qlonglong n, int base)
8053{
8054 return *this = number(n, base);
8055}
8056
8057/*!
8058 \overload
8059*/
8060QString &QString::setNum(qulonglong n, int base)
8061{
8062 return *this = number(n, base);
8063}
8064
8065/*! \fn QString &QString::setNum(short n, int base)
8066
8067 \overload
8068*/
8069
8070/*! \fn QString &QString::setNum(ushort n, int base)
8071
8072 \overload
8073*/
8074
8075/*!
8076 \overload
8077
8078 Sets the string to the printed value of \a n, formatted according to the
8079 given \a format and \a precision, and returns a reference to the string.
8080
8081 \sa number(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8082*/
8083
8084QString &QString::setNum(double n, char format, int precision)
8085{
8086 return *this = number(n, format, precision);
8087}
8088
8089/*!
8090 \fn QString &QString::setNum(float n, char format, int precision)
8091 \overload
8092
8093 Sets the string to the printed value of \a n, formatted according
8094 to the given \a format and \a precision, and returns a reference
8095 to the string.
8096
8097 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8098 To get a localized string representation of a number, use
8099 QLocale::toString() with the appropriate locale.
8100
8101 \sa number()
8102*/
8103
8104
8105/*!
8106 \fn QString QString::number(long n, int base)
8107
8108 Returns a string equivalent of the number \a n according to the
8109 specified \a base.
8110
8111 The base is 10 by default and must be between 2
8112 and 36. For bases other than 10, \a n is treated as an
8113 unsigned integer.
8114
8115 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8116 To get a localized string representation of a number, use
8117 QLocale::toString() with the appropriate locale.
8118
8119 \snippet qstring/main.cpp 35
8120
8121 \sa setNum()
8122*/
8123
8124QString QString::number(long n, int base)
8125{
8126 return number(qlonglong(n), base);
8127}
8128
8129/*!
8130 \fn QString QString::number(ulong n, int base)
8131
8132 \overload
8133*/
8134QString QString::number(ulong n, int base)
8135{
8136 return number(qulonglong(n), base);
8137}
8138
8139/*!
8140 \overload
8141*/
8142QString QString::number(int n, int base)
8143{
8144 return number(qlonglong(n), base);
8145}
8146
8147/*!
8148 \overload
8149*/
8150QString QString::number(uint n, int base)
8151{
8152 return number(qulonglong(n), base);
8153}
8154
8155/*!
8156 \overload
8157*/
8158QString QString::number(qlonglong n, int base)
8159{
8160#if defined(QT_CHECK_RANGE)
8161 if (base < 2 || base > 36) {
8162 qWarning("QString::setNum: Invalid base (%d)", base);
8163 base = 10;
8164 }
8165#endif
8166 bool negative = n < 0;
8167 /*
8168 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8169 taking an absolute value has to take a slight detour.
8170 */
8171 return qulltoBasicLatin(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8172}
8173
8174/*!
8175 \overload
8176*/
8177QString QString::number(qulonglong n, int base)
8178{
8179#if defined(QT_CHECK_RANGE)
8180 if (base < 2 || base > 36) {
8181 qWarning("QString::setNum: Invalid base (%d)", base);
8182 base = 10;
8183 }
8184#endif
8185 return qulltoBasicLatin(n, base, false);
8186}
8187
8188
8189/*!
8190 Returns a string representing the floating-point number \a n.
8191
8192 Returns a string that represents \a n, formatted according to the specified
8193 \a format and \a precision.
8194
8195 For formats with an exponent, the exponent will show its sign and have at
8196 least two digits, left-padding the exponent with zero if needed.
8197
8198 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8199*/
8200QString QString::number(double n, char format, int precision)
8201{
8202 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8203
8204 switch (QtMiscUtils::toAsciiLower(format)) {
8205 case 'f':
8206 form = QLocaleData::DFDecimal;
8207 break;
8208 case 'e':
8209 form = QLocaleData::DFExponent;
8210 break;
8211 case 'g':
8212 form = QLocaleData::DFSignificantDigits;
8213 break;
8214 default:
8215#if defined(QT_CHECK_RANGE)
8216 qWarning("QString::setNum: Invalid format char '%c'", format);
8217#endif
8218 break;
8219 }
8220
8221 return qdtoBasicLatin(n, form, precision, isAsciiUpper(format));
8222}
8223
8224namespace {
8225template<class ResultList, class StringSource>
8226static ResultList splitString(const StringSource &source, QStringView sep,
8227 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8228{
8229 ResultList list;
8230 typename StringSource::size_type start = 0;
8231 typename StringSource::size_type end;
8232 typename StringSource::size_type extra = 0;
8233 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8234 if (start != end || behavior == Qt::KeepEmptyParts)
8235 list.append(source.sliced(start, end - start));
8236 start = end + sep.size();
8237 extra = (sep.size() == 0 ? 1 : 0);
8238 }
8239 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8240 list.append(source.sliced(start));
8241 return list;
8242}
8243
8244} // namespace
8245
8246/*!
8247 Splits the string into substrings wherever \a sep occurs, and
8248 returns the list of those strings. If \a sep does not match
8249 anywhere in the string, split() returns a single-element list
8250 containing this string.
8251
8252 \a cs specifies whether \a sep should be matched case
8253 sensitively or case insensitively.
8254
8255 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8256 appear in the result. By default, empty entries are kept.
8257
8258 Example:
8259
8260 \snippet qstring/main.cpp 62
8261
8262 If \a sep is empty, split() returns an empty string, followed
8263 by each of the string's characters, followed by another empty string:
8264
8265 \snippet qstring/main.cpp 62-empty
8266
8267 To understand this behavior, recall that the empty string matches
8268 everywhere, so the above is qualitatively the same as:
8269
8270 \snippet qstring/main.cpp 62-slashes
8271
8272 \sa QStringList::join(), section()
8273
8274 \since 5.14
8275*/
8276QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8277{
8278 return splitString<QStringList>(*this, sep, behavior, cs);
8279}
8280
8281/*!
8282 \overload
8283 \since 5.14
8284*/
8285QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8286{
8287 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
8288}
8289
8290/*!
8291 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8292 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8293
8294
8295 Splits the view into substring views wherever \a sep occurs, and
8296 returns the list of those string views.
8297
8298 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8299 the result.
8300
8301 \note All the returned views are valid as long as the data referenced by
8302 this string view is valid. Destroying the data will cause all views to
8303 become dangling.
8304
8305 \since 6.0
8306*/
8307QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8308{
8309 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
8310}
8311
8312QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8313{
8314 return split(QStringView(&sep, 1), behavior, cs);
8315}
8316
8317#if QT_CONFIG(regularexpression)
8318namespace {
8319template<class ResultList, typename String, typename MatchingFunction>
8320static ResultList splitString(const String &source, const QRegularExpression &re,
8321 MatchingFunction matchingFunction,
8322 Qt::SplitBehavior behavior)
8323{
8324 ResultList list;
8325 if (!re.isValid()) {
8326 qtWarnAboutInvalidRegularExpression(re, "QString", "split");
8327 return list;
8328 }
8329
8330 qsizetype start = 0;
8331 qsizetype end = 0;
8332 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8333 while (iterator.hasNext()) {
8334 QRegularExpressionMatch match = iterator.next();
8335 end = match.capturedStart();
8336 if (start != end || behavior == Qt::KeepEmptyParts)
8337 list.append(source.sliced(start, end - start));
8338 start = match.capturedEnd();
8339 }
8340
8341 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8342 list.append(source.sliced(start));
8343
8344 return list;
8345}
8346} // namespace
8347
8348/*!
8349 \overload
8350 \since 5.14
8351
8352 Splits the string into substrings wherever the regular expression
8353 \a re matches, and returns the list of those strings. If \a re
8354 does not match anywhere in the string, split() returns a
8355 single-element list containing this string.
8356
8357 Here is an example where we extract the words in a sentence
8358 using one or more whitespace characters as the separator:
8359
8360 \snippet qstring/main.cpp 90
8361
8362 Here is a similar example, but this time we use any sequence of
8363 non-word characters as the separator:
8364
8365 \snippet qstring/main.cpp 91
8366
8367 Here is a third example where we use a zero-length assertion,
8368 \b{\\b} (word boundary), to split the string into an
8369 alternating sequence of non-word and word tokens:
8370
8371 \snippet qstring/main.cpp 92
8372
8373 \sa QStringList::join(), section()
8374*/
8375QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8376{
8377#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8378 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8379#else
8380 const auto matchingFunction = &QRegularExpression::globalMatch;
8381#endif
8382 return splitString<QStringList>(*this,
8383 re,
8384 matchingFunction,
8385 behavior);
8386}
8387
8388/*!
8389 \overload
8390 \since 6.0
8391
8392 Splits the string into substring views wherever the regular expression \a re
8393 matches, and returns the list of those strings. If \a re does not match
8394 anywhere in the string, split() returns a single-element list containing
8395 this string as view.
8396
8397 \note The views in the returned list are sub-views of this view; as such,
8398 they reference the same data as it and only remain valid for as long as that
8399 data remains live.
8400*/
8401QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8402{
8403 return splitString<QList<QStringView>>(*this, re, &QRegularExpression::globalMatchView, behavior);
8404}
8405
8406#endif // QT_CONFIG(regularexpression)
8407
8408/*!
8409 \enum QString::NormalizationForm
8410
8411 This enum describes the various normalized forms of Unicode text.
8412
8413 \value NormalizationForm_D Canonical Decomposition
8414 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8415 \value NormalizationForm_KD Compatibility Decomposition
8416 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8417
8418 \sa normalized(),
8419 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8420*/
8421
8422/*!
8423 \since 4.5
8424
8425 Returns a copy of this string repeated the specified number of \a times.
8426
8427 If \a times is less than 1, an empty string is returned.
8428
8429 Example:
8430
8431 \snippet code/src_corelib_text_qstring.cpp 8
8432*/
8433QString QString::repeated(qsizetype times) const
8434{
8435 if (d.size == 0)
8436 return *this;
8437
8438 if (times <= 1) {
8439 if (times == 1)
8440 return *this;
8441 return QString();
8442 }
8443
8444 const qsizetype resultSize = times * d.size;
8445
8446 QString result;
8447 result.reserve(resultSize);
8448 if (result.capacity() != resultSize)
8449 return QString(); // not enough memory
8450
8451 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
8452
8453 qsizetype sizeSoFar = d.size;
8454 char16_t *end = result.d.data() + sizeSoFar;
8455
8456 const qsizetype halfResultSize = resultSize >> 1;
8457 while (sizeSoFar <= halfResultSize) {
8458 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
8459 end += sizeSoFar;
8460 sizeSoFar <<= 1;
8461 }
8462 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
8463 result.d.data()[resultSize] = '\0';
8464 result.d.size = resultSize;
8465 return result;
8466}
8467
8468void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8469{
8470 {
8471 // check if it's fully ASCII first, because then we have no work
8472 auto start = reinterpret_cast<const char16_t *>(data->constData());
8473 const char16_t *p = start + from;
8474 if (isAscii_helper(p, p + data->size() - from))
8475 return;
8476 if (p > start + from)
8477 from = p - start - 1; // need one before the non-ASCII to perform NFC
8478 }
8479
8480 if (version == QChar::Unicode_Unassigned) {
8481 version = QChar::currentUnicodeVersion();
8482 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8483 const QString &s = *data;
8484 QChar *d = nullptr;
8486 if (n.version > version) {
8487 qsizetype pos = from;
8488 if (QChar::requiresSurrogates(n.ucs4)) {
8489 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
8490 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
8491
8492 // scan for this codepoint
8493 for ( ; pos < s.size() - 1; ++pos) {
8494 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low)
8495 break;
8496 }
8497 if (pos == s.size())
8498 continue; // no correction necessary
8499
8500 // detach if necessary
8501 if (!d)
8502 d = data->data();
8503 if (QChar::requiresSurrogates(n.old_mapping)) {
8504 // no shrinking
8505 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
8506 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
8507 while (pos < s.size() - 1) {
8508 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
8509 d[pos] = QChar(oldHigh);
8510 d[++pos] = QChar(oldLow);
8511 }
8512 ++pos;
8513 }
8514 } else {
8515 // shrinking, so a little harder
8516 char16_t old = char16_t(n.old_mapping);
8517 qsizetype outpos = pos;
8518 for ( ; pos < s.size(); ++outpos, ++pos) {
8519 if (pos < s.size() - 1 && s.at(pos).unicode() == ucs4High
8520 && s.at(pos + 1).unicode() == ucs4Low) {
8521 d[outpos] = QChar(old);
8522 ++pos;
8523 }
8524 }
8525 data->truncate(outpos);
8526 d = nullptr;
8527 }
8528 } else {
8529 Q_ASSERT(!QChar::requiresSurrogates(n.old_mapping)); // BMP maps to BMP
8530 while (pos < s.size()) {
8531 if (s.at(pos).unicode() == n.ucs4) {
8532 if (!d)
8533 d = data->data();
8534 d[pos] = QChar(n.old_mapping);
8535 }
8536 ++pos;
8537 }
8538 }
8539 }
8540 }
8541 }
8542
8543 if (normalizationQuickCheckHelper(data, mode, from, &from))
8544 return;
8545
8546 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
8547
8548 canonicalOrderHelper(data, version, from);
8549
8550 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8551 return;
8552
8553 composeHelper(data, version, from);
8554}
8555
8556/*!
8557 Returns the string in the given Unicode normalization \a mode,
8558 according to the given \a version of the Unicode standard.
8559*/
8560QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8561{
8562 QString copy = *this;
8563 qt_string_normalize(&copy, mode, version, 0);
8564 return copy;
8565}
8566
8567#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8568static void checkArgEscape(QStringView s)
8569{
8570 // If we're in here, it means that qArgDigitValue has accepted the
8571 // digit. We can skip the check in case we already know it will
8572 // succeed.
8573 if (!supportUnicodeDigitValuesInArg())
8574 return;
8575
8576 const auto isNonAsciiDigit = [](QChar c) {
8577 return c.unicode() < u'0' || c.unicode() > u'9';
8578 };
8579
8580 if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
8581 const auto accumulateDigit = [](int partial, QChar digit) {
8582 return partial * 10 + digit.digitValue();
8583 };
8584 const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
8585
8586 qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8587 " it is currently being interpreted as the %d-th substitution.\n"
8588 " This is deprecated; support for non-ASCII digits will be dropped\n"
8589 " in a future version of Qt.",
8590 qUtf16Printable(s.toString()),
8591 parsedNumber);
8592 }
8593}
8594#endif
8595
8597{
8598 int min_escape; // lowest escape sequence number
8599 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8600 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8601 // contain 'L'
8602 qsizetype escape_len; // total length of escape sequences which will be replaced
8603};
8604
8605static ArgEscapeData findArgEscapes(QStringView s)
8606{
8607 const QChar *uc_begin = s.begin();
8608 const QChar *uc_end = s.end();
8609
8610 ArgEscapeData d;
8611
8612 d.min_escape = INT_MAX;
8613 d.occurrences = 0;
8614 d.escape_len = 0;
8615 d.locale_occurrences = 0;
8616
8617 const QChar *c = uc_begin;
8618 while (c != uc_end) {
8619 while (c != uc_end && c->unicode() != '%')
8620 ++c;
8621
8622 if (c == uc_end)
8623 break;
8624 const QChar *escape_start = c;
8625 if (++c == uc_end)
8626 break;
8627
8628 bool locale_arg = false;
8629 if (c->unicode() == 'L') {
8630 locale_arg = true;
8631 if (++c == uc_end)
8632 break;
8633 }
8634
8635 int escape = qArgDigitValue(*c);
8636 if (escape == -1)
8637 continue;
8638
8639 // ### Qt 7: do not allow anything but ASCII digits
8640 // in arg()'s replacements.
8641#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8642 const QChar *escapeBegin = c;
8643 const QChar *escapeEnd = escapeBegin + 1;
8644#endif
8645
8646 ++c;
8647
8648 if (c != uc_end) {
8649 const int next_escape = qArgDigitValue(*c);
8650 if (next_escape != -1) {
8651 escape = (10 * escape) + next_escape;
8652 ++c;
8653#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8654 ++escapeEnd;
8655#endif
8656 }
8657 }
8658
8659#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8660 checkArgEscape(QStringView(escapeBegin, escapeEnd));
8661#endif
8662
8663 if (escape > d.min_escape)
8664 continue;
8665
8666 if (escape < d.min_escape) {
8667 d.min_escape = escape;
8668 d.occurrences = 0;
8669 d.escape_len = 0;
8670 d.locale_occurrences = 0;
8671 }
8672
8673 ++d.occurrences;
8674 if (locale_arg)
8675 ++d.locale_occurrences;
8676 d.escape_len += c - escape_start;
8677 }
8678 return d;
8679}
8680
8681static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8682 QStringView arg, QStringView larg, QChar fillChar)
8683{
8684 // Negative field-width for right-padding, positive for left-padding:
8685 const qsizetype abs_field_width = qAbs(field_width);
8686 const qsizetype result_len =
8687 s.size() - d.escape_len
8688 + (d.occurrences - d.locale_occurrences) * qMax(abs_field_width, arg.size())
8689 + d.locale_occurrences * qMax(abs_field_width, larg.size());
8690
8691 QString result(result_len, Qt::Uninitialized);
8692 QChar *rc = const_cast<QChar *>(result.unicode());
8693 QChar *const result_end = rc + result_len;
8694 qsizetype repl_cnt = 0;
8695
8696 const QChar *c = s.begin();
8697 const QChar *const uc_end = s.end();
8698 while (c != uc_end) {
8699 Q_ASSERT(d.occurrences > repl_cnt);
8700 /* We don't have to check increments of c against uc_end because, as
8701 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8702 sequences remaining. */
8703
8704 const QChar *text_start = c;
8705 while (c->unicode() != '%')
8706 ++c;
8707
8708 const QChar *escape_start = c++;
8709 const bool localize = c->unicode() == 'L';
8710 if (localize)
8711 ++c;
8712
8713 int escape = qArgDigitValue(*c);
8714 if (escape != -1 && c + 1 != uc_end) {
8715 const int digit = qArgDigitValue(c[1]);
8716 if (digit != -1) {
8717 ++c;
8718 escape = 10 * escape + digit;
8719 }
8720 }
8721
8722 if (escape != d.min_escape) {
8723 memcpy(rc, text_start, (c - text_start) * sizeof(QChar));
8724 rc += c - text_start;
8725 } else {
8726 ++c;
8727
8728 memcpy(rc, text_start, (escape_start - text_start) * sizeof(QChar));
8729 rc += escape_start - text_start;
8730
8731 const QStringView use = localize ? larg : arg;
8732 const qsizetype pad_chars = abs_field_width - use.size();
8733 // (If negative, relevant loops are no-ops: no need to check.)
8734
8735 if (field_width > 0) { // left padded
8736 rc = std::fill_n(rc, pad_chars, fillChar);
8737 }
8738
8739 if (use.size())
8740 memcpy(rc, use.data(), use.size() * sizeof(QChar));
8741 rc += use.size();
8742
8743 if (field_width < 0) { // right padded
8744 rc = std::fill_n(rc, pad_chars, fillChar);
8745 }
8746
8747 if (++repl_cnt == d.occurrences) {
8748 memcpy(rc, c, (uc_end - c) * sizeof(QChar));
8749 rc += uc_end - c;
8750 Q_ASSERT(rc == result_end);
8751 c = uc_end;
8752 }
8753 }
8754 }
8755 Q_ASSERT(rc == result_end);
8756
8757 return result;
8758}
8759
8760/*!
8761 \fn template <typename T, QString::if_string_like<T> = true> QString QString::arg(const T &a, int fieldWidth, QChar fillChar) const
8762
8763 Returns a copy of this string with the lowest-numbered place-marker
8764 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8765
8766 \a fieldWidth specifies the minimum amount of space that \a a
8767 shall occupy. If \a a requires less space than \a fieldWidth, it
8768 is padded to \a fieldWidth with character \a fillChar. A positive
8769 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8770 produces left-aligned text.
8771
8772 This example shows how we might create a \c status string for
8773 reporting progress while processing a list of files:
8774
8775 \snippet qstring/main.cpp 11-qstringview
8776
8777 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8778 %2. Finally, \c arg(fileName) replaces \c %3.
8779
8780 One advantage of using arg() over asprintf() is that the order of the
8781 numbered place markers can change, if the application's strings are
8782 translated into other languages, but each arg() will still replace
8783 the lowest-numbered unreplaced place-marker, no matter where it
8784 appears. Also, if place-marker \c %i appears more than once in the
8785 string, arg() replaces all of them.
8786
8787 If there is no unreplaced place-marker remaining, a warning message
8788 is printed and the result is undefined. Place-marker numbers must be
8789 in the range 1 to 99.
8790
8791 \note In Qt versions prior to 6.9, this function was overloaded on
8792 \c{char}, QChar, QString, QStringView, and QLatin1StringView and in some
8793 cases, \c{wchar_t} and \c{char16_t} arguments would resolve to the integer
8794 overloads. In Qt versions prior to 5.10, this function lacked the
8795 QStringView and QLatin1StringView overloads.
8796*/
8797QString QString::arg_impl(QAnyStringView a, int fieldWidth, QChar fillChar) const
8798{
8799 ArgEscapeData d = findArgEscapes(*this);
8800
8801 if (Q_UNLIKELY(d.occurrences == 0)) {
8802 qWarning("QString::arg: Argument missing: \"%ls\", \"%ls\"", qUtf16Printable(*this),
8803 qUtf16Printable(a.toString()));
8804 return *this;
8805 }
8806 struct {
8807 QVarLengthArray<char16_t> out;
8808 QStringView operator()(QStringView in) noexcept { return in; }
8809 QStringView operator()(QLatin1StringView in)
8810 {
8811 out.resize(in.size());
8812 qt_from_latin1(out.data(), in.data(), size_t(in.size()));
8813 return out;
8814 }
8815 QStringView operator()(QUtf8StringView in)
8816 {
8817 out.resize(in.size());
8818 return QStringView{out.data(), QUtf8::convertToUnicode(out.data(), in)};
8819 }
8820 } convert;
8821
8822 QStringView sv = a.visit(std::ref(convert));
8823 return replaceArgEscapes(*this, d, fieldWidth, sv, sv, fillChar);
8824}
8825
8826/*!
8827 \fn template <typename T, QString::if_integral_non_char<T> = true> QString QString::arg(T a, int fieldWidth, int base, QChar fillChar) const
8828 \overload arg()
8829
8830 The \a a argument is expressed in base \a base, which is 10 by
8831 default and must be between 2 and 36. For bases other than 10, \a a
8832 is treated as an unsigned integer.
8833
8834 \a fieldWidth specifies the minimum amount of space that \a a is
8835 padded to and filled with the character \a fillChar. A positive
8836 value produces right-aligned text; a negative value produces
8837 left-aligned text.
8838
8839 The '%' can be followed by an 'L', in which case the sequence is
8840 replaced with a localized representation of \a a. The conversion
8841 uses the default locale, set by QLocale::setDefault(). If no default
8842 locale was specified, the system locale is used. The 'L' flag is
8843 ignored if \a base is not 10.
8844
8845 \snippet qstring/main.cpp 12
8846 \snippet qstring/main.cpp 14
8847
8848 \note In Qt versions prior to 6.10.1, this function accepted arguments of
8849 types that implicitly convert to integral types. This is no longer supported,
8850 except for (unscoped) enums, because it also accepted types convertible to
8851 floating-point types, losing precision when those were printed as integers. A
8852 backwards-compatible fix is to cast such types to a C++ type whose displayed
8853 form matches your intent (\c int, \c float, ...).
8854
8855 \note In Qt versions prior to 6.9, this function was overloaded on various
8856 integral types and sometimes incorrectly accepted \c char and \c char16_t
8857 arguments.
8858
8859 \sa {Number Formats}
8860*/
8861QString QString::arg_impl(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8862{
8863 ArgEscapeData d = findArgEscapes(*this);
8864
8865 if (d.occurrences == 0) {
8866 qWarning("QString::arg: Argument missing: \"%ls\", %llu", qUtf16Printable(*this), a);
8867 return *this;
8868 }
8869
8870 unsigned flags = QLocaleData::NoFlags;
8871 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8872 if (fillChar == u'0')
8873 flags = QLocaleData::ZeroPadded;
8874
8875 QString arg;
8876 if (d.occurrences > d.locale_occurrences) {
8877 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
8878 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8879 }
8880
8881 QString localeArg;
8882 if (d.locale_occurrences > 0) {
8883 QLocale locale;
8884 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8885 flags |= QLocaleData::GroupDigits;
8886 localeArg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
8887 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8888 }
8889
8890 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8891}
8892
8893QString QString::arg_impl(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8894{
8895 ArgEscapeData d = findArgEscapes(*this);
8896
8897 if (d.occurrences == 0) {
8898 qWarning("QString::arg: Argument missing: \"%ls\", %lld", qUtf16Printable(*this), a);
8899 return *this;
8900 }
8901
8902 unsigned flags = QLocaleData::NoFlags;
8903 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8904 if (fillChar == u'0')
8905 flags = QLocaleData::ZeroPadded;
8906
8907 QString arg;
8908 if (d.occurrences > d.locale_occurrences) {
8909 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8910 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8911 }
8912
8913 QString localeArg;
8914 if (d.locale_occurrences > 0) {
8915 QLocale locale;
8916 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8917 flags |= QLocaleData::GroupDigits;
8918 localeArg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8919 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8920 }
8921
8922 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8923}
8924
8925/*!
8926 \fn template <typename T, QString::if_floating_point<T> = true> QString QString::arg(T a, int fieldWidth, char format, int precision, QChar fillChar) const
8927 \overload arg()
8928
8929 Argument \a a is formatted according to the specified \a format and
8930 \a precision. See \l{Floating-point Formats} for details.
8931
8932 \a fieldWidth specifies the minimum amount of space that \a a is
8933 padded to and filled with the character \a fillChar. A positive
8934 value produces right-aligned text; a negative value produces
8935 left-aligned text.
8936
8937 \snippet code/src_corelib_text_qstring.cpp 2
8938
8939 \note In Qt versions prior to 6.9, this function was a regular function
8940 taking \c double. As a consequence of being a template function now, it no
8941 longer accepts arguments that merely implicitly convert to floating-point
8942 types. A backwards-compatible fix is to cast such types to one of the C++
8943 floating-point types.
8944
8945 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8946*/
8947QString QString::arg_impl(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8948{
8949 ArgEscapeData d = findArgEscapes(*this);
8950
8951 if (d.occurrences == 0) {
8952 qWarning("QString::arg: Argument missing: \"%ls\", %g", qUtf16Printable(*this), a);
8953 return *this;
8954 }
8955
8956 unsigned flags = QLocaleData::NoFlags;
8957 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8958 if (fillChar == u'0')
8959 flags |= QLocaleData::ZeroPadded;
8960
8961 if (isAsciiUpper(format))
8962 flags |= QLocaleData::CapitalEorX;
8963
8964 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8965 switch (QtMiscUtils::toAsciiLower(format)) {
8966 case 'f':
8967 form = QLocaleData::DFDecimal;
8968 break;
8969 case 'e':
8970 form = QLocaleData::DFExponent;
8971 break;
8972 case 'g':
8973 form = QLocaleData::DFSignificantDigits;
8974 break;
8975 default:
8976#if defined(QT_CHECK_RANGE)
8977 qWarning("QString::arg: Invalid format char '%c'", format);
8978#endif
8979 break;
8980 }
8981
8982 QString arg;
8983 if (d.occurrences > d.locale_occurrences) {
8984 arg = QLocaleData::c()->doubleToString(a, precision, form, fieldWidth,
8985 flags | QLocaleData::ZeroPadExponent);
8986 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8987 || fieldWidth <= arg.size());
8988 }
8989
8990 QString localeArg;
8991 if (d.locale_occurrences > 0) {
8992 QLocale locale;
8993
8994 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8995 if (!(numberOptions & QLocale::OmitGroupSeparator))
8996 flags |= QLocaleData::GroupDigits;
8997 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
8998 flags |= QLocaleData::ZeroPadExponent;
8999 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
9000 flags |= QLocaleData::AddTrailingZeroes;
9001 localeArg = locale.d->m_data->doubleToString(a, precision, form, fieldWidth, flags);
9002 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9003 || fieldWidth <= localeArg.size());
9004 }
9005
9006 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
9007}
9008
9009static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9010static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9011
9012template <typename Char>
9013static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9014{
9015 qsizetype i = *pos;
9016 ++i;
9017 if (i < len && uc[i] == u'L')
9018 ++i;
9019 if (i < len) {
9020 int escape = to_unicode(uc[i]) - '0';
9021 if (uint(escape) >= 10U)
9022 return -1;
9023 ++i;
9024 if (i < len) {
9025 // there's a second digit
9026 int digit = to_unicode(uc[i]) - '0';
9027 if (uint(digit) < 10U) {
9028 escape = (escape * 10) + digit;
9029 ++i;
9030 }
9031 }
9032 *pos = i;
9033 return escape;
9034 }
9035 return -1;
9036}
9037
9038/*
9039 Algorithm for multiArg:
9040
9041 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9042 The L is parsed and accepted for compatibility with non-multi-arg, but since
9043 multiArg only accepts strings as replacements, the localization request can
9044 be safely ignored.
9045 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9046 either points at text to be copied verbatim (in which case the int is -1),
9047 or, initially, at the textual representation of the placeholder. In that case,
9048 the int contains the numerical number as parsed from the placeholder.
9049 3. Next, collect all the non-negative ints found, sort them in ascending order and
9050 remove duplicates.
9051 3a. If the result has more entries than multiArg() was given replacement strings,
9052 we have found placeholders we can't satisfy with replacement strings. That is
9053 fine (there could be another .arg() call coming after this one), so just
9054 truncate the result to the number of actual multiArg() replacement strings.
9055 3b. If the result has less entries than multiArg() was given replacement strings,
9056 the string is missing placeholders. This is an error that the user should be
9057 warned about.
9058 4. The result of step (3) is a mapping from the index of any replacement string to
9059 placeholder number. This is the wrong way around, but since placeholder
9060 numbers could get as large as 999, while we typically don't have more than 9
9061 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9062 each time we need to map a placeholder number to a replacement string index
9063 (that's a linear search; but still *much* faster than using an associative container).
9064 5. Next, for each of the tuples found in step (1), do the following:
9065 5a. If the int is negative, do nothing.
9066 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9067 the string-ref with a string-ref for the (complete) I'th replacement string.
9068 5c. Otherwise, do nothing.
9069 6. Concatenate all string refs into a single result string.
9070*/
9071
9072namespace {
9073struct Part
9074{
9075 Part() = default; // for QVarLengthArray; do not use
9076 constexpr Part(QAnyStringView s, int num = -1)
9077 : string{s}, number{num} {}
9078
9079 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9080
9081 QAnyStringView string;
9082 int number;
9083};
9084} // unnamed namespace
9085
9087
9088namespace {
9089
9090enum { ExpectedParts = 32 };
9091
9092typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9093typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9094
9095template <typename StringView>
9096static ParseResult parseMultiArgFormatString_impl(StringView s)
9097{
9098 ParseResult result;
9099
9100 const auto uc = s.data();
9101 const auto len = s.size();
9102 const auto end = len - 1;
9103 qsizetype i = 0;
9104 qsizetype last = 0;
9105
9106 while (i < end) {
9107 if (uc[i] == u'%') {
9108 qsizetype percent = i;
9109 int number = getEscape(uc, &i, len);
9110 if (number != -1) {
9111 if (last != percent)
9112 result.push_back(Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9113 result.push_back(Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9114 last = i;
9115 continue;
9116 }
9117 }
9118 ++i;
9119 }
9120
9121 if (last < len)
9122 result.push_back(Part{s.sliced(last, len - last)}); // trailing literal text
9123
9124 return result;
9125}
9126
9127static ParseResult parseMultiArgFormatString(QAnyStringView s)
9128{
9129 return s.visit([] (auto s) { return parseMultiArgFormatString_impl(s); });
9130}
9131
9132static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9133{
9134 ArgIndexToPlaceholderMap result;
9135
9136 for (const Part &part : parts) {
9137 if (part.number >= 0)
9138 result.push_back(part.number);
9139 }
9140
9141 std::sort(result.begin(), result.end());
9142 result.erase(std::unique(result.begin(), result.end()),
9143 result.end());
9144
9145 return result;
9146}
9147
9148static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9149{
9150 using namespace QtPrivate;
9151 qsizetype totalSize = 0;
9152 for (Part &part : parts) {
9153 if (part.number != -1) {
9154 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
9155 if (it != argIndexToPlaceholderMap.end()) {
9156 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9157 switch (arg.tag) {
9158 case ArgBase::L1:
9159 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
9160 break;
9161 case ArgBase::Any:
9162 part.reset(static_cast<const QAnyStringArg&>(arg).string);
9163 break;
9164 case ArgBase::U16:
9165 part.reset(static_cast<const QStringViewArg&>(arg).string);
9166 break;
9167 }
9168 }
9169 }
9170 totalSize += part.string.size();
9171 }
9172 return totalSize;
9173}
9174
9175} // unnamed namespace
9176
9177QString QtPrivate::argToQString(QAnyStringView pattern, size_t numArgs, const ArgBase **args)
9178{
9179 // Step 1-2 above
9180 ParseResult parts = parseMultiArgFormatString(pattern);
9181
9182 // 3-4
9183 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9184
9185 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9186 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
9187 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9188 qWarning("QString::arg: %d argument(s) missing in %ls",
9189 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9190
9191 // 5
9192 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9193
9194 // 6:
9195 QString result(totalSize, Qt::Uninitialized);
9196 auto out = const_cast<QChar*>(result.constData());
9197
9198 struct Concatenate {
9199 QChar *out;
9200 QChar *operator()(QLatin1String part) noexcept
9201 {
9202 if (part.size()) {
9203 qt_from_latin1(reinterpret_cast<char16_t*>(out),
9204 part.data(), part.size());
9205 }
9206 return out + part.size();
9207 }
9208 QChar *operator()(QUtf8StringView part) noexcept
9209 {
9210 return QUtf8::convertToUnicode(out, part);
9211 }
9212 QChar *operator()(QStringView part) noexcept
9213 {
9214 if (part.size())
9215 memcpy(out, part.data(), part.size() * sizeof(QChar));
9216 return out + part.size();
9217 }
9218 };
9219
9220 for (const Part &part : parts)
9221 out = part.string.visit(Concatenate{out});
9222
9223 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9224 result.truncate(out - result.cbegin());
9225
9226 return result;
9227}
9228
9229/*! \fn bool QString::isRightToLeft() const
9230
9231 Returns \c true if the string is read right to left.
9232
9233 \sa QStringView::isRightToLeft()
9234*/
9235bool QString::isRightToLeft() const
9236{
9237 return QtPrivate::isRightToLeft(QStringView(*this));
9238}
9239
9240/*!
9241 \fn bool QString::isValidUtf16() const noexcept
9242 \since 5.15
9243
9244 Returns \c true if the string contains valid UTF-16 encoded data,
9245 or \c false otherwise.
9246
9247 Note that this function does not perform any special validation of the
9248 data; it merely checks if it can be successfully decoded from UTF-16.
9249 The data is assumed to be in host byte order; the presence of a BOM
9250 is meaningless.
9251
9252 \sa QStringView::isValidUtf16()
9253*/
9254
9255/*! \fn QChar *QString::data()
9256
9257 Returns a pointer to the data stored in the QString. The pointer
9258 can be used to access and modify the characters that compose the
9259 string.
9260
9261 Unlike constData() and unicode(), the returned data is always
9262 '\\0'-terminated.
9263
9264 Example:
9265
9266 \snippet qstring/main.cpp 19
9267
9268 Note that the pointer remains valid only as long as the string is
9269 not modified by other means. For read-only access, constData() is
9270 faster because it never causes a \l{deep copy} to occur.
9271
9272 \sa constData(), operator[]()
9273*/
9274
9275/*! \fn const QChar *QString::data() const
9276
9277 \overload
9278
9279 \note The returned string may not be '\\0'-terminated.
9280 Use size() to determine the length of the array.
9281
9282 \sa fromRawData()
9283*/
9284
9285/*! \fn const QChar *QString::constData() const
9286
9287 Returns a pointer to the data stored in the QString. The pointer
9288 can be used to access the characters that compose the string.
9289
9290 Note that the pointer remains valid only as long as the string is
9291 not modified.
9292
9293 \note The returned string may not be '\\0'-terminated.
9294 Use size() to determine the length of the array.
9295
9296 \sa data(), operator[](), fromRawData()
9297*/
9298
9299/*! \fn void QString::push_front(const QString &other)
9300
9301 This function is provided for STL compatibility, prepending the
9302 given \a other string to the beginning of this string. It is
9303 equivalent to \c prepend(other).
9304
9305 \sa prepend()
9306*/
9307
9308/*! \fn void QString::push_front(QChar ch)
9309
9310 \overload
9311
9312 Prepends the given \a ch character to the beginning of this string.
9313*/
9314
9315/*! \fn void QString::push_back(const QString &other)
9316
9317 This function is provided for STL compatibility, appending the
9318 given \a other string onto the end of this string. It is
9319 equivalent to \c append(other).
9320
9321 \sa append()
9322*/
9323
9324/*! \fn void QString::push_back(QChar ch)
9325
9326 \overload
9327
9328 Appends the given \a ch character onto the end of this string.
9329*/
9330
9331/*!
9332 \since 6.1
9333
9334 Removes from the string the characters in the half-open range
9335 [ \a first , \a last ). Returns an iterator to the character
9336 immediately after the last erased character (i.e. the character
9337 referred to by \a last before the erase).
9338*/
9339QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9340{
9341 const auto start = std::distance(cbegin(), first);
9342 const auto len = std::distance(first, last);
9343 remove(start, len);
9344 return begin() + start;
9345}
9346
9347/*!
9348 \fn QString::iterator QString::erase(QString::const_iterator it)
9349
9350 \overload
9351 \since 6.5
9352
9353 Removes the character denoted by \c it from the string.
9354 Returns an iterator to the character immediately after the
9355 erased character.
9356
9357 \code
9358 QString c = "abcdefg";
9359 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9360 \endcode
9361*/
9362
9363/*! \fn void QString::shrink_to_fit()
9364 \since 5.10
9365
9366 This function is provided for STL compatibility. It is
9367 equivalent to squeeze().
9368
9369 \sa squeeze()
9370*/
9371
9372/*!
9373 \fn std::string QString::toStdString() const
9374
9375 Returns a std::string object with the data contained in this
9376 QString. The Unicode data is converted into 8-bit characters using
9377 the toUtf8() function.
9378
9379 This method is mostly useful to pass a QString to a function
9380 that accepts a std::string object.
9381
9382 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9383*/
9384std::string QString::toStdString() const
9385{
9386 std::string result;
9387 if (isEmpty())
9388 return result;
9389
9390 auto writeToBuffer = [this](char *out, size_t) {
9391 char *last = QUtf8::convertFromUnicode(out, *this);
9392 return last - out;
9393 };
9394 size_t maxSize = size() * 3; // worst case for UTF-8
9395#ifdef __cpp_lib_string_resize_and_overwrite
9396 // C++23
9397 result.resize_and_overwrite(maxSize, writeToBuffer);
9398#else
9399 result.resize(maxSize);
9400 result.resize(writeToBuffer(result.data(), result.size()));
9401#endif
9402 return result;
9403}
9404
9405/*!
9406 \fn QString QString::fromRawData(const char16_t *unicode, qsizetype size)
9407 \since 6.10
9408
9409 Constructs a QString that uses the first \a size Unicode characters
9410 in the array \a unicode. The data in \a unicode is \e not
9411 copied. The caller must be able to guarantee that \a unicode will
9412 not be deleted or modified as long as the QString (or an
9413 unmodified copy of it) exists.
9414
9415 Any attempts to modify the QString or copies of it will cause it
9416 to create a deep copy of the data, ensuring that the raw data
9417 isn't modified.
9418
9419 Here is an example of how we can use a QRegularExpression on raw data in
9420 memory without requiring to copy the data into a QString:
9421
9422 \snippet qstring/main.cpp 22
9423 \snippet qstring/main.cpp 23
9424
9425 \warning A string created with fromRawData() is \e not
9426 '\\0'-terminated, unless the raw data contains a '\\0' character
9427 at position \a size. This means unicode() will \e not return a
9428 '\\0'-terminated string (although utf16() does, at the cost of
9429 copying the raw data).
9430
9431 \sa fromUtf16(), setRawData(), data(), constData(),
9432 nullTerminate(), nullTerminated()
9433*/
9434
9435/*!
9436 \fn QString QString::fromRawData(const QChar *unicode, qsizetype size)
9437 \overload
9438*/
9439
9440/*!
9441 \since 4.7
9442
9443 Resets the QString to use the first \a size Unicode characters
9444 in the array \a unicode. The data in \a unicode is \e not
9445 copied. The caller must be able to guarantee that \a unicode will
9446 not be deleted or modified as long as the QString (or an
9447 unmodified copy of it) exists.
9448
9449 This function can be used instead of fromRawData() to re-use
9450 existings QString objects to save memory re-allocations.
9451
9452 \sa fromRawData(), nullTerminate(), nullTerminated()
9453*/
9454QString &QString::setRawData(const QChar *unicode, qsizetype size)
9455{
9456 if (!unicode || !size) {
9457 clear();
9458 }
9459 *this = fromRawData(unicode, size);
9460 return *this;
9461}
9462
9463/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9464 \since 5.5
9465
9466 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9467
9468 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9469*/
9470
9471/*!
9472 \fn std::u16string QString::toStdU16String() const
9473 \since 5.5
9474
9475 Returns a std::u16string object with the data contained in this
9476 QString. The Unicode data is the same as returned by the utf16()
9477 method.
9478
9479 \sa utf16(), toStdWString(), toStdU32String()
9480*/
9481
9482/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9483 \since 5.5
9484
9485 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9486
9487 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9488*/
9489
9490/*!
9491 \fn std::u32string QString::toStdU32String() const
9492 \since 5.5
9493
9494 Returns a std::u32string object with the data contained in this
9495 QString. The Unicode data is the same as returned by the toUcs4()
9496 method.
9497
9498 \sa toUcs4(), toStdWString(), toStdU16String()
9499*/
9500
9501#if !defined(QT_NO_DATASTREAM)
9502/*!
9503 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9504 \relates QString
9505
9506 Writes the given \a string to the specified \a stream.
9507
9508 \sa {Serializing Qt Data Types}
9509*/
9510
9511QDataStream &operator<<(QDataStream &out, const QString &str)
9512{
9513 if (out.version() == 1) {
9514 out << str.toLatin1();
9515 } else {
9516 if (!str.isNull() || out.version() < 3) {
9517 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9518 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9519 static_cast<qsizetype>(sizeof(QChar) * str.size()));
9520 } else {
9521 QVarLengthArray<char16_t> buffer(str.size());
9522 qbswap<sizeof(char16_t)>(str.constData(), str.size(), buffer.data());
9523 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9524 static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9525 }
9526 } else {
9527 QDataStream::writeQSizeType(out, -1); // write null marker
9528 }
9529 }
9530 return out;
9531}
9532
9533/*!
9534 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9535 \relates QString
9536
9537 Reads a string from the specified \a stream into the given \a string.
9538
9539 \sa {Serializing Qt Data Types}
9540*/
9541
9542QDataStream &operator>>(QDataStream &in, QString &str)
9543{
9544 if (in.version() == 1) {
9545 QByteArray l;
9546 in >> l;
9547 str = QString::fromLatin1(l);
9548 } else {
9549 qint64 size = QDataStream::readQSizeType(in);
9550 qsizetype bytes = size;
9551 if (size != bytes || size < -1) {
9552 str.clear();
9553 in.setStatus(QDataStream::SizeLimitExceeded);
9554 return in;
9555 }
9556 if (bytes == -1) { // null string
9557 str = QString();
9558 } else if (bytes > 0) {
9559 if (bytes & 0x1) {
9560 str.clear();
9561 in.setStatus(QDataStream::ReadCorruptData);
9562 return in;
9563 }
9564
9565 const qsizetype Step = 1024 * 1024;
9566 qsizetype len = bytes / 2;
9567 qsizetype allocated = 0;
9568
9569 while (allocated < len) {
9570 int blockSize = qMin(Step, len - allocated);
9571 str.resize(allocated + blockSize);
9572 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9573 blockSize * 2) != blockSize * 2) {
9574 str.clear();
9575 in.setStatus(QDataStream::ReadPastEnd);
9576 return in;
9577 }
9578 allocated += blockSize;
9579 }
9580
9581 if ((in.byteOrder() == QDataStream::BigEndian)
9582 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9583 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9584 qbswap<sizeof(*data)>(data, len, data);
9585 }
9586 } else {
9587 str = QString(QLatin1StringView(""));
9588 }
9589 }
9590 return in;
9591}
9592#endif // QT_NO_DATASTREAM
9593
9594/*!
9595 \typedef QString::Data
9596 \internal
9597*/
9598
9599/*!
9600 \typedef QString::DataPtr
9601 \internal
9602*/
9603
9604/*!
9605 \fn DataPtr & QString::data_ptr()
9606 \internal
9607*/
9608
9609/*!
9610 \since 5.11
9611 \internal
9612 \relates QStringView
9613
9614 Returns \c true if the string is read right to left.
9615
9616 \sa QString::isRightToLeft()
9617*/
9618bool QtPrivate::isRightToLeft(QStringView string) noexcept
9619{
9620 int isolateLevel = 0;
9621
9622 for (QStringIterator i(string); i.hasNext();) {
9623 const char32_t c = i.next();
9624
9625 switch (QChar::direction(c)) {
9626 case QChar::DirRLI:
9627 case QChar::DirLRI:
9628 case QChar::DirFSI:
9629 ++isolateLevel;
9630 break;
9631 case QChar::DirPDI:
9632 if (isolateLevel)
9633 --isolateLevel;
9634 break;
9635 case QChar::DirL:
9636 if (isolateLevel)
9637 break;
9638 return false;
9639 case QChar::DirR:
9640 case QChar::DirAL:
9641 if (isolateLevel)
9642 break;
9643 return true;
9644 case QChar::DirEN:
9645 case QChar::DirES:
9646 case QChar::DirET:
9647 case QChar::DirAN:
9648 case QChar::DirCS:
9649 case QChar::DirB:
9650 case QChar::DirS:
9651 case QChar::DirWS:
9652 case QChar::DirON:
9653 case QChar::DirLRE:
9654 case QChar::DirLRO:
9655 case QChar::DirRLE:
9656 case QChar::DirRLO:
9657 case QChar::DirPDF:
9658 case QChar::DirNSM:
9659 case QChar::DirBN:
9660 break;
9661 }
9662 }
9663 return false;
9664}
9665
9666qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9667{
9668 qsizetype num = 0;
9669 qsizetype i = -1;
9670 if (haystack.size() > 500 && needle.size() > 5) {
9671 QStringMatcher matcher(needle, cs);
9672 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9673 ++num;
9674 } else {
9675 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9676 ++num;
9677 }
9678 return num;
9679}
9680
9681qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9682{
9683 if (cs == Qt::CaseSensitive)
9684 return std::count(haystack.cbegin(), haystack.cend(), needle);
9685
9686 needle = foldCase(needle);
9687 return std::count_if(haystack.cbegin(), haystack.cend(),
9688 [needle](const QChar c) { return foldAndCompare(c, needle); });
9689}
9690
9691qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9692{
9693 qsizetype num = 0;
9694 qsizetype i = -1;
9695
9696 QLatin1StringMatcher matcher(needle, cs);
9697 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9698 ++num;
9699
9700 return num;
9701}
9702
9703qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9704{
9705 if (haystack.size() < needle.size())
9706 return 0;
9707
9708 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9709 return 0;
9710
9711 qsizetype num = 0;
9712 qsizetype i = -1;
9713
9714 QVarLengthArray<uchar> s(needle.size());
9715 qt_to_latin1_unchecked(s.data(), needle.utf16(), needle.size());
9716
9717 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9718 cs);
9719 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9720 ++num;
9721
9722 return num;
9723}
9724
9725qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9726{
9727 if (haystack.size() < needle.size())
9728 return -1;
9729
9730 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9731 return QtPrivate::count(haystack, QStringView(s.data(), s.size()), cs);
9732}
9733
9734qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9735{
9736 // non-L1 needles cannot possibly match in L1-only haystacks
9737 if (needle.unicode() > 0xff)
9738 return 0;
9739
9740 if (cs == Qt::CaseSensitive) {
9741 return std::count(haystack.cbegin(), haystack.cend(), needle.toLatin1());
9742 } else {
9743 return std::count_if(haystack.cbegin(), haystack.cend(),
9744 CaseInsensitiveL1::matcher(needle.toLatin1()));
9745 }
9746}
9747
9748/*!
9749 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9750 \since 5.10
9751 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9752 \since 5.10
9753 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9754 \since 5.10
9755 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9756 \since 5.10
9757 \internal
9758 \relates QStringView
9759
9760 Returns \c true if \a haystack starts with \a needle,
9761 otherwise returns \c false.
9762
9763 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9764
9765 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9766*/
9767
9768bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9769{
9770 return qt_starts_with_impl(haystack, needle, cs);
9771}
9772
9773bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9774{
9775 return qt_starts_with_impl(haystack, needle, cs);
9776}
9777
9778bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9779{
9780 return qt_starts_with_impl(haystack, needle, cs);
9781}
9782
9783bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9784{
9785 return qt_starts_with_impl(haystack, needle, cs);
9786}
9787
9788/*!
9789 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9790 \since 5.10
9791 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9792 \since 5.10
9793 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9794 \since 5.10
9795 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9796 \since 5.10
9797 \internal
9798 \relates QStringView
9799
9800 Returns \c true if \a haystack ends with \a needle,
9801 otherwise returns \c false.
9802
9803 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9804
9805 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9806*/
9807
9808bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9809{
9810 return qt_ends_with_impl(haystack, needle, cs);
9811}
9812
9813bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9814{
9815 return qt_ends_with_impl(haystack, needle, cs);
9816}
9817
9818bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9819{
9820 return qt_ends_with_impl(haystack, needle, cs);
9821}
9822
9823bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9824{
9825 return qt_ends_with_impl(haystack, needle, cs);
9826}
9827
9828qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9829{
9830 const qsizetype l = haystack0.size();
9831 const qsizetype sl = needle0.size();
9832 if (sl == 1)
9833 return findString(haystack0, from, needle0[0], cs);
9834 if (from < 0)
9835 from += l;
9836 if (std::size_t(sl + from) > std::size_t(l))
9837 return -1;
9838 if (!sl)
9839 return from;
9840 if (!l)
9841 return -1;
9842
9843 /*
9844 We use the Boyer-Moore algorithm in cases where the overhead
9845 for the skip table should pay off, otherwise we use a simple
9846 hash function.
9847 */
9848 if (l > 500 && sl > 5)
9849 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9850
9851 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9852 /*
9853 We use some hashing for efficiency's sake. Instead of
9854 comparing strings, we compare the hash value of str with that
9855 of a part of this QString. Only if that matches, we call
9856 qt_string_compare().
9857 */
9858 const char16_t *needle = needle0.utf16();
9859 const char16_t *haystack = haystack0.utf16() + from;
9860 const char16_t *end = haystack0.utf16() + (l - sl);
9861 const qregisteruint sl_minus_1 = sl - 1;
9862 qregisteruint hashNeedle = 0, hashHaystack = 0;
9863 qsizetype idx;
9864
9865 if (cs == Qt::CaseSensitive) {
9866 for (idx = 0; idx < sl; ++idx) {
9867 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9868 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9869 }
9870 hashHaystack -= haystack[sl_minus_1];
9871
9872 while (haystack <= end) {
9873 hashHaystack += haystack[sl_minus_1];
9874 if (hashHaystack == hashNeedle
9875 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9876 return haystack - haystack0.utf16();
9877
9878 REHASH(*haystack);
9879 ++haystack;
9880 }
9881 } else {
9882 const char16_t *haystack_start = haystack0.utf16();
9883 for (idx = 0; idx < sl; ++idx) {
9884 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9885 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9886 }
9887 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9888
9889 while (haystack <= end) {
9890 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
9891 if (hashHaystack == hashNeedle
9892 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
9893 return haystack - haystack0.utf16();
9894
9895 REHASH(foldCase(haystack, haystack_start));
9896 ++haystack;
9897 }
9898 }
9899 return -1;
9900}
9901
9902qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9903{
9904 if (haystack.size() < needle.size())
9905 return -1;
9906
9907 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9908 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9909}
9910
9911qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9912{
9913 if (haystack.size() < needle.size())
9914 return -1;
9915
9916 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9917 return -1;
9918
9919 if (needle.size() == 1) {
9920 const char n = needle.front().toLatin1();
9921 return QtPrivate::findString(haystack, from, QLatin1StringView(&n, 1), cs);
9922 }
9923
9924 QVarLengthArray<char> s(needle.size());
9925 qt_to_latin1_unchecked(reinterpret_cast<uchar *>(s.data()), needle.utf16(), needle.size());
9926 return QtPrivate::findString(haystack, from, QLatin1StringView(s.data(), s.size()), cs);
9927}
9928
9929qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9930{
9931 if (from < 0)
9932 from += haystack.size();
9933 if (from < 0)
9934 return -1;
9935 qsizetype adjustedSize = haystack.size() - from;
9936 if (adjustedSize < needle.size())
9937 return -1;
9938 if (needle.size() == 0)
9939 return from;
9940
9941 if (cs == Qt::CaseSensitive) {
9942
9943 if (needle.size() == 1) {
9944 Q_ASSERT(haystack.data() != nullptr); // see size check above
9945 if (auto it = memchr(haystack.data() + from, needle.front().toLatin1(), adjustedSize))
9946 return static_cast<const char *>(it) - haystack.data();
9947 return -1;
9948 }
9949
9950 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
9951 return matcher.indexIn(haystack, from);
9952 }
9953
9954 // If the needle is sufficiently small we simply iteratively search through
9955 // the haystack. When the needle is too long we use a boyer-moore searcher
9956 // from the standard library, if available. If it is not available then the
9957 // QLatin1Strings are converted to QString and compared as such. Though
9958 // initialization is slower the boyer-moore search it employs still makes up
9959 // for it when haystack and needle are sufficiently long.
9960 // The needle size was chosen by testing various lengths using the
9961 // qstringtokenizer benchmark with the
9962 // "tokenize_qlatin1string_qlatin1string" test.
9963#ifdef Q_CC_MSVC
9964 const qsizetype threshold = 1;
9965#else
9966 const qsizetype threshold = 13;
9967#endif
9968 if (needle.size() <= threshold) {
9969 const auto begin = haystack.begin();
9970 const auto end = haystack.end() - needle.size() + 1;
9971 auto ciMatch = CaseInsensitiveL1::matcher(needle[0].toLatin1());
9972 const qsizetype nlen1 = needle.size() - 1;
9973 for (auto it = std::find_if(begin + from, end, ciMatch); it != end;
9974 it = std::find_if(it + 1, end, ciMatch)) {
9975 // In this comparison we skip the first character because we know it's a match
9976 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
9977 return std::distance(begin, it);
9978 }
9979 return -1;
9980 }
9981
9982 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
9983 return matcher.indexIn(haystack, from);
9984}
9985
9986qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
9987{
9988 return qLastIndexOf(haystack, QChar(needle), from, cs);
9989}
9990
9991qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9992{
9993 return qLastIndexOf(haystack, from, needle, cs);
9994}
9995
9996qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9997{
9998 return qLastIndexOf(haystack, from, needle, cs);
9999}
10000
10001qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10002{
10003 return qLastIndexOf(haystack, from, needle, cs);
10004}
10005
10006qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10007{
10008 return qLastIndexOf(haystack, from, needle, cs);
10009}
10010
10011#if QT_CONFIG(regularexpression)
10012qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10013{
10014 if (!re.isValid()) {
10015 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "indexOf");
10016 return -1;
10017 }
10018
10019 QRegularExpressionMatch match = stringHaystack
10020 ? re.match(*stringHaystack, from)
10021 : re.matchView(viewHaystack, from);
10022 if (match.hasMatch()) {
10023 const qsizetype ret = match.capturedStart();
10024 if (rmatch)
10025 *rmatch = std::move(match);
10026 return ret;
10027 }
10028
10029 return -1;
10030}
10031
10032qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10033{
10034 return indexOf(haystack, nullptr, re, from, rmatch);
10035}
10036
10037qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10038{
10039 if (!re.isValid()) {
10040 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "lastIndexOf");
10041 return -1;
10042 }
10043
10044 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10045 QRegularExpressionMatchIterator iterator = stringHaystack
10046 ? re.globalMatch(*stringHaystack)
10047 : re.globalMatchView(viewHaystack);
10048 qsizetype lastIndex = -1;
10049 while (iterator.hasNext()) {
10050 QRegularExpressionMatch match = iterator.next();
10051 qsizetype start = match.capturedStart();
10052 if (start < endpos) {
10053 lastIndex = start;
10054 if (rmatch)
10055 *rmatch = std::move(match);
10056 } else {
10057 break;
10058 }
10059 }
10060
10061 return lastIndex;
10062}
10063
10064qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10065{
10066 return lastIndexOf(haystack, nullptr, re, from, rmatch);
10067}
10068
10069bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10070{
10071 if (!re.isValid()) {
10072 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "contains");
10073 return false;
10074 }
10075 QRegularExpressionMatch m = stringHaystack
10076 ? re.match(*stringHaystack)
10077 : re.matchView(viewHaystack);
10078 bool hasMatch = m.hasMatch();
10079 if (hasMatch && rmatch)
10080 *rmatch = std::move(m);
10081 return hasMatch;
10082}
10083
10084bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10085{
10086 return contains(haystack, nullptr, re, rmatch);
10087}
10088
10089qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10090{
10091 if (!re.isValid()) {
10092 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "count");
10093 return 0;
10094 }
10095 qsizetype count = 0;
10096 qsizetype index = -1;
10097 qsizetype len = haystack.size();
10098 while (index <= len - 1) {
10099 QRegularExpressionMatch match = re.matchView(haystack, index + 1);
10100 if (!match.hasMatch())
10101 break;
10102 count++;
10103
10104 // Search again, from the next character after the beginning of this
10105 // capture. If the capture starts with a surrogate pair, both together
10106 // count as "one character".
10107 index = match.capturedStart();
10108 if (index < len && haystack[index].isHighSurrogate())
10109 ++index;
10110 }
10111 return count;
10112}
10113
10114#endif // QT_CONFIG(regularexpression)
10115
10116/*!
10117 \since 5.0
10118
10119 Converts a plain text string to an HTML string with
10120 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10121 entities.
10122
10123 Example:
10124
10125 \snippet code/src_corelib_text_qstring.cpp 7
10126*/
10127QString QString::toHtmlEscaped() const
10128{
10129 const auto pos = std::u16string_view(*this).find_first_of(u"<>&\"");
10130 if (pos == std::u16string_view::npos)
10131 return *this;
10132 QString rich;
10133 const qsizetype len = size();
10134 rich.reserve(qsizetype(len * 1.1));
10135 rich += qToStringViewIgnoringNull(*this).first(pos);
10136 for (auto ch : qToStringViewIgnoringNull(*this).sliced(pos)) {
10137 if (ch == u'<')
10138 rich += "&lt;"_L1;
10139 else if (ch == u'>')
10140 rich += "&gt;"_L1;
10141 else if (ch == u'&')
10142 rich += "&amp;"_L1;
10143 else if (ch == u'"')
10144 rich += "&quot;"_L1;
10145 else
10146 rich += ch;
10147 }
10148 rich.squeeze();
10149 return rich;
10150}
10151
10152/*!
10153 \macro QStringLiteral(str)
10154 \relates QString
10155
10156 The macro generates the data for a QString out of the string literal \a str
10157 at compile time. Creating a QString from it is free in this case, and the
10158 generated string data is stored in the read-only segment of the compiled
10159 object file.
10160
10161 If you have code that looks like this:
10162
10163 \snippet code/src_corelib_text_qstring.cpp 9
10164
10165 then a temporary QString will be created to be passed as the \c{hasAttribute}
10166 function parameter. This can be quite expensive, as it involves a memory
10167 allocation and the copy/conversion of the data into QString's internal
10168 encoding.
10169
10170 This cost can be avoided by using QStringLiteral instead:
10171
10172 \snippet code/src_corelib_text_qstring.cpp 10
10173
10174 In this case, QString's internal data will be generated at compile time; no
10175 conversion or allocation will occur at runtime.
10176
10177 Using QStringLiteral instead of a double quoted plain C++ string literal can
10178 significantly speed up creation of QString instances from data known at
10179 compile time.
10180
10181 \note QLatin1StringView can still be more efficient than QStringLiteral
10182 when the string is passed to a function that has an overload taking
10183 QLatin1StringView and this overload avoids conversion to QString. For
10184 instance, QString::operator==() can compare to a QLatin1StringView
10185 directly:
10186
10187 \snippet code/src_corelib_text_qstring.cpp 11
10188
10189 \note Some compilers have bugs encoding strings containing characters outside
10190 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10191 those cases. It is optional otherwise.
10192
10193 \note QStringLiteral is interchangeable with \l operator""_s. The latter saves
10194 typing when many string literals are present in the code.
10195
10196 \sa QByteArrayLiteral
10197*/
10198
10199#if QT_DEPRECATED_SINCE(6, 8)
10200/*!
10201 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10202
10203 \relates QString
10204 \since 6.2
10205 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10206
10207 Literal operator that creates a QString out of the first \a size characters in
10208 the char16_t string literal \a str.
10209
10210 The QString is created at compile time, and the generated string data is stored
10211 in the read-only segment of the compiled object file. Duplicate literals may
10212 share the same read-only memory. This functionality is interchangeable with
10213 QStringLiteral, but saves typing when many string literals are present in the
10214 code.
10215
10216 The following code creates a QString:
10217 \code
10218 auto str = u"hello"_qs;
10219 \endcode
10220
10221 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10222*/
10223#endif // QT_DEPRECATED_SINCE(6, 8)
10224
10225/*!
10226 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10227
10228 \relates QString
10229 \since 6.4
10230
10231 Literal operator that creates a QString out of the first \a size characters in
10232 the char16_t string literal \a str.
10233
10234 The QString is created at compile time, and the generated string data is stored
10235 in the read-only segment of the compiled object file. Duplicate literals may
10236 share the same read-only memory. This functionality is interchangeable with
10237 QStringLiteral, but saves typing when many string literals are present in the
10238 code.
10239
10240 The following code creates a QString:
10241 \code
10242 using namespace Qt::StringLiterals;
10243
10244 auto str = u"hello"_s;
10245 \endcode
10246
10247 \sa Qt::Literals::StringLiterals
10248*/
10249
10250/*!
10251 \internal
10252 */
10253void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10254{
10255 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10256}
10257
10258/*!
10259 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10260 \relates QString
10261 \since 6.1
10262
10263 Removes all elements that compare equal to \a t from the
10264 string \a s. Returns the number of elements removed, if any.
10265
10266 \sa erase_if
10267*/
10268
10269/*!
10270 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10271 \relates QString
10272 \since 6.1
10273
10274 Removes all elements for which the predicate \a pred returns true
10275 from the string \a s. Returns the number of elements removed, if
10276 any.
10277
10278 \sa erase
10279*/
10280
10281/*!
10282 \macro const char *qPrintable(const QString &str)
10283 \relates QString
10284
10285 Returns \a str as a \c{const char *}. This is equivalent to
10286 \a{str}.toLocal8Bit().\l{QByteArray::}{constData()}.
10287
10288 The char pointer will be invalid after the statement in which
10289 qPrintable() is used. This is because the array returned by
10290 QString::toLocal8Bit() will fall out of scope.
10291
10292 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10293 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10294 local 8-bit encoding. Therefore qUtf8Printable() should be used
10295 for logging strings instead of qPrintable().
10296
10297 \sa qUtf8Printable()
10298*/
10299
10300/*!
10301 \macro const char *qUtf8Printable(const QString &str)
10302 \relates QString
10303 \since 5.4
10304
10305 Returns \a str as a \c{const char *}. This is equivalent to
10306 \a{str}.toUtf8().\l{QByteArray::}{constData()}.
10307
10308 The char pointer will be invalid after the statement in which
10309 qUtf8Printable() is used. This is because the array returned by
10310 QString::toUtf8() will fall out of scope.
10311
10312 Example:
10313
10314 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10315
10316 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10317*/
10318
10319/*!
10320 \macro const wchar_t *qUtf16Printable(const QString &str)
10321 \relates QString
10322 \since 5.7
10323
10324 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10325 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10326
10327 The only useful thing you can do with the return value of this macro is to
10328 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10329 the return value is \e{not} a valid \c{const wchar_t*}!
10330
10331 In general, the pointer will be invalid after the statement in which
10332 qUtf16Printable() is used. This is because the pointer may have been
10333 obtained from a temporary expression, which will fall out of scope.
10334
10335 Example:
10336
10337 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10338
10339 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10340*/
10341
10342QT_END_NAMESPACE
10343
10344#undef REHASH
QString convertToQString(QAnyStringView string)
Definition qstring.cpp:5577
Definition qlist.h:81
char32_t next(char32_t invalidAs=QChar::ReplacementCharacter)
bool hasNext() const
\inmodule QtCore
QList< uint > convertToUcs4(QStringView string)
Definition qstring.cpp:5833
QByteArray convertToUtf8(QStringView string)
Definition qstring.cpp:5778
QByteArray convertToLocal8Bit(QStringView string)
Definition qstring.cpp:5735
QByteArray convertToLatin1(QStringView string)
Definition qstring.cpp:5594
Combined button and popup list for selecting options.
static QString convertCase(T &str, QUnicodeTables::Case which)
Definition qstring.cpp:7207
static constexpr NormalizationCorrection uc_normalization_corrections[]
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9768
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9808
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLower(QStringView s) noexcept
Definition qstring.cpp:5514
const QString & asString(const QString &s)
Definition qstring.h:1678
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept
Definition qstring.cpp:906
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool equalStrings(QStringView lhs, QStringView rhs) noexcept
Definition qstring.cpp:1374
qsizetype findString(QStringView str, qsizetype from, QChar needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isRightToLeft(QStringView string) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1StringView s) noexcept
Definition qstring.cpp:851
constexpr bool isLatin1(QLatin1StringView s) noexcept
Definition qstring.h:77
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrcasechr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:776
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isUpper(QStringView s) noexcept
Definition qstring.cpp:5519
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrchr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:688
void qt_to_latin1_unchecked(uchar *dst, const char16_t *uc, qsizetype len)
Definition qstring.cpp:1189
static char16_t foldCase(char16_t ch) noexcept
Definition qchar.cpp:1696
#define __has_feature(x)
uint QT_FASTCALL fetch1Pixel< QPixelLayout::BPP1LSB >(const uchar *src, int index)
bool comparesEqual(const QFileInfo &lhs, const QFileInfo &rhs)
static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
Definition qstring.cpp:859
static Int toIntegral(QStringView string, bool *ok, int base)
Definition qstring.cpp:7696
void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1184
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6751
static void append_utf8(QString &qs, const char *cs, qsizetype len)
Definition qstring.cpp:7330
#define ATTRIBUTE_NO_SANITIZE
Definition qstring.cpp:367
bool qt_is_ascii(const char *&ptr, const char *end) noexcept
Definition qstring.cpp:787
static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
Definition qstring.cpp:5503
static void replace_helper(QString &str, QSpan< qsizetype > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3691
Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
Definition qstring.cpp:921
static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
Definition qstring.cpp:1347
bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6757
Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE)
static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
Definition qstring.cpp:3500
static bool needsReallocate(const QString &str, qsizetype newSize)
Definition qstring.cpp:2636
static int qArgDigitValue(QChar ch) noexcept
Definition qstring.cpp:1614
bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6746
#define REHASH(a)
Definition qstring.cpp:66
bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6735
static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
Definition qstring.cpp:1265
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
Definition qstring.cpp:1220
static QByteArray qt_convert_to_latin1(QStringView string)
Definition qstring.cpp:5600
static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
Definition qstring.cpp:1340
static QList< uint > qt_convert_to_ucs4(QStringView string)
Definition qstring.cpp:5805
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static QByteArray qt_convert_to_local_8bit(QStringView string)
Definition qstring.cpp:5712
static LengthMod parse_length_modifier(const char *&c) noexcept
Definition qstring.cpp:7386
static ArgEscapeData findArgEscapes(QStringView s)
Definition qstring.cpp:8605
static QByteArray qt_convert_to_utf8(QStringView str)
Definition qstring.cpp:5758
static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1005
QtPrivate::QCaseInsensitiveLatin1Hash CaseInsensitiveL1
Definition qstring.cpp:1354
LengthMod
Definition qstring.cpp:7375
@ lm_z
Definition qstring.cpp:7375
@ lm_none
Definition qstring.cpp:7375
@ lm_t
Definition qstring.cpp:7375
@ lm_l
Definition qstring.cpp:7375
@ lm_ll
Definition qstring.cpp:7375
@ lm_hh
Definition qstring.cpp:7375
@ lm_L
Definition qstring.cpp:7375
@ lm_h
Definition qstring.cpp:7375
@ lm_j
Definition qstring.cpp:7375
static void insert_helper(QString &str, qsizetype i, const T &toInsert)
Definition qstring.cpp:2975
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
Definition qstring.cpp:1356
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6729
static char16_t to_unicode(const char c)
Definition qstring.cpp:9010
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6762
static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width, QStringView arg, QStringView larg, QChar fillChar)
Definition qstring.cpp:8681
static QVarLengthArray< char16_t > qt_from_latin1_to_qvla(QLatin1StringView str)
Definition qstring.cpp:996
static Q_NEVER_INLINE int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
Definition qstring.cpp:1238
void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
Definition qstring.cpp:8468
static uint parse_flag_characters(const char *&c) noexcept
Definition qstring.cpp:7338
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
Definition qstring.cpp:1195
static char16_t to_unicode(const QChar c)
Definition qstring.cpp:9009
QDataStream & operator>>(QDataStream &in, QString &str)
Definition qstring.cpp:9542
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
Definition qstring.cpp:9013
static int ucstrncmp(const char16_t *a, const char *b, size_t l)
Definition qstring.cpp:1318
static bool can_consume(const char *&c, char ch) noexcept
Definition qstring.cpp:7377
static int parse_field_width(const char *&c, qsizetype size)
Definition qstring.cpp:7358
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6740
#define qUtf16Printable(string)
Definition qstring.h:1695
qsizetype occurrences
Definition qstring.cpp:8599
qsizetype escape_len
Definition qstring.cpp:8602
qsizetype locale_occurrences
Definition qstring.cpp:8600
\inmodule QtCore \reentrant
Definition qchar.h:18
constexpr char16_t unicode() const noexcept
Converts a Latin-1 character to an 16-bit-encoded Unicode representation of the character.
Definition qchar.h:22
constexpr QLatin1Char(char c) noexcept
Constructs a Latin-1 character for c.
Definition qchar.h:20
@ BlankBeforePositive
Definition qlocale_p.h:270
@ AddTrailingZeroes
Definition qlocale_p.h:267
static int difference(char lhs, char rhs)