Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstring.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5// Qt-Security score:critical reason:data-parser
6
7#include "qstringlist.h"
8#if QT_CONFIG(regularexpression)
9#include "qregularexpression.h"
10#endif
12#include <private/qstringconverter_p.h>
13#include <private/qtools_p.h>
15#include "private/qsimd_p.h"
16#include <qnumeric.h>
17#include <qdatastream.h>
18#include <qlist.h>
19#include "qlocale.h"
20#include "qlocale_p.h"
21#include "qspan.h"
22#include "qstringbuilder.h"
23#include "qstringmatcher.h"
25#include "qdebug.h"
26#include "qendian.h"
27#include "qcollator.h"
28#include "qttypetraits.h"
29
30#ifdef Q_OS_DARWIN
31#include <private/qcore_mac_p.h>
32#endif
33
34#include <private/qfunctions_p.h>
35
36#include <limits.h>
37#include <string.h>
38#include <stdlib.h>
39#include <stdio.h>
40#include <stdarg.h>
41#include <wchar.h>
42
43#include "qchar.cpp"
48#include "qthreadstorage.h"
49
50#include <algorithm>
51#include <functional>
52
53#ifdef Q_OS_WIN
54# include <qt_windows.h>
55# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
56// MSVC requires this, but let's apply it to MinGW compilers too, just in case
57# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, "
58 "otherwise some QString functions will not get exported."
59# endif
60#endif
61
62#ifdef truncate
63# undef truncate
64#endif
65
66#define REHASH(a)
67 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
68 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1;
69 hashHaystack <<= 1
70
72
73using namespace Qt::StringLiterals;
74using namespace QtMiscUtils;
75
76const char16_t QString::_empty = 0;
77
78// in qstringmatcher.cpp
79qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
80
81namespace {
82enum StringComparisonMode {
83 CompareStringsForEquality,
84 CompareStringsForOrdering
85};
86
87template <typename Pointer>
88char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
89
90template <>
91char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
92{
93 return foldCase(reinterpret_cast<const char16_t*>(ch),
94 reinterpret_cast<const char16_t*>(start));
95}
96
97template <>
98char32_t foldCaseHelper<const char*>(const char* ch, const char*)
99{
100 return foldCase(char16_t(uchar(*ch)));
101}
102
103template <typename T>
104char16_t valueTypeToUtf16(T t) = delete;
105
106template <>
107char16_t valueTypeToUtf16<QChar>(QChar t)
108{
109 return t.unicode();
110}
111
112template <>
113char16_t valueTypeToUtf16<char>(char t)
114{
115 return char16_t{uchar(t)};
116}
117
118template <typename T>
119static inline bool foldAndCompare(const T a, const T b)
120{
121 return foldCase(a) == b;
122}
123
124/*!
125 \internal
126
127 Returns the index position of the first occurrence of the
128 character \a ch in the string given by \a str and \a len,
129 searching forward from index
130 position \a from. Returns -1 if \a ch could not be found.
131*/
132template <typename Haystack>
133static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
134 qsizetype from, Qt::CaseSensitivity cs) noexcept
135{
136 if (haystack.size() == 0)
137 return -1;
138 if (from < 0)
139 from += haystack.size();
140 else if (std::size_t(from) > std::size_t(haystack.size()))
141 from = haystack.size() - 1;
142 if (from >= 0) {
143 char16_t c = needle.unicode();
144 const auto b = haystack.data();
145 auto n = b + from;
146 if (cs == Qt::CaseSensitive) {
147 for (; n >= b; --n)
148 if (valueTypeToUtf16(*n) == c)
149 return n - b;
150 } else {
151 c = foldCase(c);
152 for (; n >= b; --n)
153 if (foldCase(valueTypeToUtf16(*n)) == c)
154 return n - b;
155 }
156 }
157 return -1;
158}
159template <> qsizetype
160qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
161
162template<typename Haystack, typename Needle>
163static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
164 Needle needle0, Qt::CaseSensitivity cs) noexcept
165{
166 const qsizetype sl = needle0.size();
167 if (sl == 1)
168 return qLastIndexOf(haystack0, needle0.front(), from, cs);
169
170 const qsizetype l = haystack0.size();
171 if (from < 0)
172 from += l;
173 if (from == l && sl == 0)
174 return from;
175 const qsizetype delta = l - sl;
176 if (std::size_t(from) > std::size_t(l) || delta < 0)
177 return -1;
178 if (from > delta)
179 from = delta;
180
181 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
182
183 auto haystack = haystack0.data();
184 const auto needle = needle0.data();
185 const auto *end = haystack;
186 haystack += from;
187 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
188 const auto *n = needle + sl_minus_1;
189 const auto *h = haystack + sl_minus_1;
190 qregisteruint hashNeedle = 0, hashHaystack = 0;
191
192 if (cs == Qt::CaseSensitive) {
193 for (qsizetype idx = 0; idx < sl; ++idx) {
194 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
195 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
196 }
197 hashHaystack -= valueTypeToUtf16(*haystack);
198
199 while (haystack >= end) {
200 hashHaystack += valueTypeToUtf16(*haystack);
201 if (hashHaystack == hashNeedle
202 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
203 return haystack - end;
204 --haystack;
205 REHASH(valueTypeToUtf16(haystack[sl]));
206 }
207 } else {
208 for (qsizetype idx = 0; idx < sl; ++idx) {
209 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
210 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
211 }
212 hashHaystack -= foldCaseHelper(haystack, end);
213
214 while (haystack >= end) {
215 hashHaystack += foldCaseHelper(haystack, end);
216 if (hashHaystack == hashNeedle
217 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
218 return haystack - end;
219 --haystack;
220 REHASH(foldCaseHelper(haystack + sl, end));
221 }
222 }
223 return -1;
224}
225
226template <typename Haystack, typename Needle>
227bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
228{
229 if (haystack.isNull())
230 return needle.isNull();
231 const auto haystackLen = haystack.size();
232 const auto needleLen = needle.size();
233 if (haystackLen == 0)
234 return needleLen == 0;
235 if (needleLen > haystackLen)
236 return false;
237
238 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
239}
240
241template <typename Haystack, typename Needle>
242bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
243{
244 if (haystack.isNull())
245 return needle.isNull();
246 const auto haystackLen = haystack.size();
247 const auto needleLen = needle.size();
248 if (haystackLen == 0)
249 return needleLen == 0;
250 if (haystackLen < needleLen)
251 return false;
252
253 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
254}
255
256template <typename T>
257static void append_helper(QString &self, T view)
258{
259 const auto strData = view.data();
260 const qsizetype strSize = view.size();
261 auto &d = self.data_ptr();
262 if (strData && strSize > 0) {
263 // the number of UTF-8 code units is always at a minimum equal to the number
264 // of equivalent UTF-16 code units
265 d.detachAndGrow(QArrayData::GrowsAtEnd, strSize, nullptr, nullptr);
266 Q_CHECK_PTR(d.data());
267 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
268
269 auto dst = std::next(d.data(), d.size);
270 if constexpr (std::is_same_v<T, QUtf8StringView>) {
271 dst = QUtf8::convertToUnicode(dst, view);
272 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
273 QLatin1::convertToUnicode(dst, view);
274 dst += strSize;
275 } else {
276 static_assert(QtPrivate::type_dependent_false<T>(),
277 "Can only operate on UTF-8 and Latin-1");
278 }
279 self.resize(std::distance(d.begin(), dst));
280 } else if (d.isNull() && !view.isNull()) { // special case
281 self = QLatin1StringView("");
282 }
283}
284
285template <uint MaxCount> struct UnrollTailLoop
286{
287 template <typename RetType, typename Functor1, typename Functor2, typename Number>
288 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
289 {
290 /* equivalent to:
291 * while (count--) {
292 * if (loopCheck(i))
293 * return returnIfFailed(i);
294 * }
295 * return returnIfExited;
296 */
297
298 if (!count)
299 return returnIfExited;
300
301 bool check = loopCheck(i);
302 if (check)
303 return returnIfFailed(i);
304
305 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
306 }
307
308 template <typename Functor, typename Number>
309 static inline void exec(Number count, Functor code)
310 {
311 /* equivalent to:
312 * for (Number i = 0; i < count; ++i)
313 * code(i);
314 */
315 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
316 }
317};
318template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
319inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
320{
321 return returnIfExited;
322}
323} // unnamed namespace
324
325/*
326 * Note on the use of SIMD in qstring.cpp:
327 *
328 * Several operations with strings are improved with the use of SIMD code,
329 * since they are repetitive. For MIPS, we have hand-written assembly code
330 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
331 * x86, we can only use intrinsics and therefore everything is contained in
332 * qstring.cpp. We need to use intrinsics only for those platforms due to the
333 * different compilers and toolchains used, which have different syntax for
334 * assembly sources.
335 *
336 * ** SSE notes: **
337 *
338 * Whenever multiple alternatives are equivalent or near so, we prefer the one
339 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
340 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
341 * SSE versions should be done when there is a clear performance benefit and
342 * requires fallback code to SSE2, if it exists.
343 *
344 * Performance measurement in the past shows that most strings are short in
345 * size and, therefore, do not benefit from alignment prologues. That is,
346 * trying to find a 16-byte-aligned boundary to operate on is often more
347 * expensive than executing the unaligned operation directly. In addition, note
348 * that the QString private data is designed so that the data is stored on
349 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
350 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
351 * 50% of the time), so skipping the alignment prologue is actually optimizing
352 * for the common case.
353 */
354
355#if defined(__mips_dsp)
356// From qstring_mips_dsp_asm.S
357extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
358extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
359extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
360#endif
361
362#if defined(__SSE2__) && defined(Q_CC_GNU)
363// We may overrun the buffer, but that's a false positive:
364// this won't crash nor produce incorrect results
365# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
366#else
367# define ATTRIBUTE_NO_SANITIZE
368#endif
369
370#ifdef __SSE2__
371static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
372static constexpr bool UseAvx2 = UseSse4_1 &&
373 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
374
375[[maybe_unused]]
376Q_ALWAYS_INLINE static __m128i mm_load8_zero_extend(const void *ptr)
377{
378 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
379 if constexpr (UseSse4_1) {
380 // use a MOVQ followed by PMOVZXBW
381 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
382 __m128i data = _mm_loadl_epi64(dataptr);
383 return _mm_cvtepu8_epi16(data);
384 }
385
386 // use MOVQ followed by PUNPCKLBW
387 __m128i data = _mm_loadl_epi64(dataptr);
388 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
389}
390
391[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
392static qsizetype qustrlen_sse2(const char16_t *str) noexcept
393{
394 // find the 16-byte alignment immediately prior or equal to str
395 quintptr misalignment = quintptr(str) & 0xf;
396 Q_ASSERT((misalignment & 1) == 0);
397 const char16_t *ptr = str - (misalignment / 2);
398
399 // load 16 bytes and see if we have a null
400 // (aligned loads can never segfault)
401 const __m128i zeroes = _mm_setzero_si128();
402 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
403 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
404 uint mask = _mm_movemask_epi8(comparison);
405
406 // ignore the result prior to the beginning of str
407 mask >>= misalignment;
408
409 // Have we found something in the first block? Need to handle it now
410 // because of the left shift above.
411 if (mask)
412 return qCountTrailingZeroBits(mask) / sizeof(char16_t);
413
414 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
415 qsizetype size = Step - misalignment / sizeof(char16_t);
416
417 size -= Step;
418 do {
419 size += Step;
420 data = _mm_load_si128(reinterpret_cast<const __m128i *>(str + size));
421
422 comparison = _mm_cmpeq_epi16(data, zeroes);
423 mask = _mm_movemask_epi8(comparison);
424 } while (mask == 0);
425
426 // found a null
427 return size + qCountTrailingZeroBits(mask) / sizeof(char16_t);
428}
429
430// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
431// the no non-zero was found. Returns false and updates \a ptr to point to the
432// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
433// may be updated to one byte short).
434static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
435{
436 auto updatePtr = [&](uint result) {
437 // found a character matching the mask
438 uint idx = qCountTrailingZeroBits(~result);
439 ptr += idx;
440 return false;
441 };
442
443 if constexpr (UseSse4_1) {
444# ifndef Q_OS_QNX // compiler fails in the code below
445 __m128i mask;
446 auto updatePtrSimd = [&](__m128i data) -> bool {
447 __m128i masked = _mm_and_si128(mask, data);
448 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
449 uint result = _mm_movemask_epi8(comparison);
450 return updatePtr(result);
451 };
452
453 if constexpr (UseAvx2) {
454 // AVX2 implementation: test 32 bytes at a time
455 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
456 while (ptr + 32 <= end) {
457 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
458 if (!_mm256_testz_si256(mask256, data)) {
459 // found a character matching the mask
460 __m256i masked256 = _mm256_and_si256(mask256, data);
461 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
462 return updatePtr(_mm256_movemask_epi8(comparison256));
463 }
464 ptr += 32;
465 }
466
467 mask = _mm256_castsi256_si128(mask256);
468 } else {
469 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
470 // comparisons, unrolled)
471 mask = _mm_set1_epi32(maskval);
472 while (ptr + 32 <= end) {
473 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
474 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
475 if (!_mm_testz_si128(mask, data1))
476 return updatePtrSimd(data1);
477
478 ptr += 16;
479 if (!_mm_testz_si128(mask, data2))
480 return updatePtrSimd(data2);
481 ptr += 16;
482 }
483 }
484
485 // AVX2 and SSE4.1: final 16-byte comparison
486 if (ptr + 16 <= end) {
487 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
488 if (!_mm_testz_si128(mask, data1))
489 return updatePtrSimd(data1);
490 ptr += 16;
491 }
492
493 // and final 8-byte comparison
494 if (ptr + 8 <= end) {
495 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
496 if (!_mm_testz_si128(mask, data1))
497 return updatePtrSimd(data1);
498 ptr += 8;
499 }
500
501 return true;
502# endif // QNX
503 }
504
505 // SSE2 implementation: test 16 bytes at a time.
506 const __m128i mask = _mm_set1_epi32(maskval);
507 while (ptr + 16 <= end) {
508 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
509 __m128i masked = _mm_and_si128(mask, data);
510 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
511 quint16 result = _mm_movemask_epi8(comparison);
512 if (result != 0xffff)
513 return updatePtr(result);
514 ptr += 16;
515 }
516
517 // and one 8-byte comparison
518 if (ptr + 8 <= end) {
519 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
520 __m128i masked = _mm_and_si128(mask, data);
521 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
522 quint8 result = _mm_movemask_epi8(comparison);
523 if (result != 0xff)
524 return updatePtr(result);
525 ptr += 8;
526 }
527
528 return true;
529}
530
531template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
532static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
533{
534 static_assert(std::is_unsigned_v<Char>);
535
536 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
537 // we compare. This lambda helps extract the code unit.
538 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
539 constexpr int Stride = 2;
540 // this is the same as:
541 // return n[idx / Stride];
542 // but using pointer arithmetic to avoid the compiler dividing by two
543 // and multiplying by two in the case of char16_t (we know idx is even,
544 // but the compiler does not). This is not UB.
545
546 auto ptr = reinterpret_cast<const uchar *>(n);
547 ptr += idx / (Stride / sizeof(*n));
548 return *reinterpret_cast<decltype(n)>(ptr);
549 };
550 auto difference = [a, b](uint mask, qptrdiff offset) {
551 if (Mode == CompareStringsForEquality)
552 return 1;
553 uint idx = qCountTrailingZeroBits(mask);
554 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
555 };
556
557 static const auto load8Chars = [](const auto *ptr) {
558 if (sizeof(*ptr) == 2)
559 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
560 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
561 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
562 };
563 static const auto load4Chars = [](const auto *ptr) {
564 if (sizeof(*ptr) == 2)
565 return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
566 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
567 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
568 };
569
570 // we're going to read a[0..15] and b[0..15] (32 bytes)
571 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
572 if constexpr (UseAvx2) {
573 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
574 __m256i b_data;
575 if (sizeof(Char) == 1) {
576 // expand to UTF-16 via zero-extension
577 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
578 b_data = _mm256_cvtepu8_epi16(chunk);
579 } else {
580 b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
581 }
582 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
583 return _mm256_movemask_epi8(result);
584 }
585
586 __m128i a_data1 = load8Chars(a + offset);
587 __m128i a_data2 = load8Chars(a + offset + 8);
588 __m128i b_data1, b_data2;
589 if (sizeof(Char) == 1) {
590 // expand to UTF-16 via unpacking
591 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
592 b_data1 = _mm_unpacklo_epi8(b_data, _mm_setzero_si128());
593 b_data2 = _mm_unpackhi_epi8(b_data, _mm_setzero_si128());
594 } else {
595 b_data1 = load8Chars(b + offset);
596 b_data2 = load8Chars(b + offset + 8);
597 }
598 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
599 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
600 return _mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16;
601 };
602
603 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
604 qptrdiff offset = 0;
605 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
606 uint mask = ~processChunk16Chars(offset);
607 if (mask)
608 return difference(mask, offset);
609 }
610
611 // maybe overlap the last 32 bytes
612 if (size_t(offset) < l) {
613 offset = l - sizeof(__m256i) / sizeof(char16_t);
614 uint mask = ~processChunk16Chars(offset);
615 return mask ? difference(mask, offset) : 0;
616 }
617 } else if (l >= 4) {
618 __m128i a_data1, b_data1;
619 __m128i a_data2, b_data2;
620 int width;
621 if (l >= 8) {
622 width = 8;
623 a_data1 = load8Chars(a);
624 b_data1 = load8Chars(b);
625 a_data2 = load8Chars(a + l - width);
626 b_data2 = load8Chars(b + l - width);
627 } else {
628 // we're going to read a[0..3] and b[0..3] (8 bytes)
629 width = 4;
630 a_data1 = load4Chars(a);
631 b_data1 = load4Chars(b);
632 a_data2 = load4Chars(a + l - width);
633 b_data2 = load4Chars(b + l - width);
634 }
635
636 __m128i result = _mm_cmpeq_epi16(a_data1, b_data1);
637 ushort mask = ~_mm_movemask_epi8(result);
638 if (mask)
639 return difference(mask, 0);
640
641 result = _mm_cmpeq_epi16(a_data2, b_data2);
642 mask = ~_mm_movemask_epi8(result);
643 if (mask)
644 return difference(mask, l - width);
645 } else {
646 // reset l
647 l &= 3;
648
649 const auto lambda = [=](size_t i) -> int {
650 return a[i] - b[i];
651 };
652 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
653 }
654 return 0;
655}
656#endif
657
658Q_NEVER_INLINE
659qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
660{
661#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
662 return qustrlen_sse2(str);
663#endif
664
665 if (sizeof(wchar_t) == sizeof(char16_t))
666 return wcslen(reinterpret_cast<const wchar_t *>(str));
667
668 qsizetype result = 0;
669 while (*str++)
670 ++result;
671 return result;
672}
673
674qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
675{
676 return qustrchr({ str, maxlen }, u'\0') - str;
677}
678
679/*!
680 * \internal
681 *
682 * Searches for character \a c in the string \a str and returns a pointer to
683 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
684 * character is not found, this function returns a pointer to the end of the
685 * string -- that is, \c{str.end()}.
686 */
688const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
689{
690 const char16_t *n = str.utf16();
691 const char16_t *e = n + str.size();
692
693#ifdef __SSE2__
694 bool loops = true;
695 // Using the PMOVMSKB instruction, we get two bits for each character
696 // we compare.
697 __m128i mch;
698 if constexpr (UseAvx2) {
699 // we're going to read n[0..15] (32 bytes)
700 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
701 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
702 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
703 __m256i result = _mm256_cmpeq_epi16(data, mch256);
704 uint mask = uint(_mm256_movemask_epi8(result));
705 if (mask) {
706 uint idx = qCountTrailingZeroBits(mask);
707 return n + idx / 2;
708 }
709 }
710 loops = false;
711 mch = _mm256_castsi256_si128(mch256);
712 } else {
713 mch = _mm_set1_epi32(c | (c << 16));
714 }
715
716 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
717 __m128i result = _mm_cmpeq_epi16(data, mch);
718 uint mask = uint(_mm_movemask_epi8(result));
719 if ((mask & validityMask) == 0)
720 return false;
721 uint idx = qCountTrailingZeroBits(mask);
722 n += idx / 2;
723 return true;
724 };
725
726 // we're going to read n[0..7] (16 bytes)
727 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
728 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
729 if (hasMatch(data, 0xffff))
730 return n;
731
732 if (!loops) {
733 n += 8;
734 break;
735 }
736 }
737
738# if !defined(__OPTIMIZE_SIZE__)
739 // we're going to read n[0..3] (8 bytes)
740 if (e - n > 3) {
741 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
742 if (hasMatch(data, 0xff))
743 return n;
744
745 n += 4;
746 }
747
748 return UnrollTailLoop<3>::exec(e - n, e,
749 [=](qsizetype i) { return n[i] == c; },
750 [=](qsizetype i) { return n + i; });
751# endif
752#elif defined(__ARM_NEON__)
753 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
754 const uint16x8_t ch_vec = vdupq_n_u16(c);
755 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
756 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
757 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
758 if (ushort(mask)) {
759 // found a match
760 return n + qCountTrailingZeroBits(mask);
761 }
762 }
763#endif // aarch64
764
765 return std::find(n, e, c);
766}
767
768/*!
769 * \internal
770 *
771 * Searches case-insensitively for character \a c in the string \a str and
772 * returns a pointer to it. Iif the character is not found, this function
773 * returns a pointer to the end of the string -- that is, \c{str.end()}.
774 */
776const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
777{
778 const QChar *n = str.begin();
779 const QChar *e = str.end();
780 c = foldCase(c);
781 auto it = std::find_if(n, e, [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
782 return reinterpret_cast<const char16_t *>(it);
783}
784
785// Note: ptr on output may be off by one and point to a preceding US-ASCII
786// character. Usually harmless.
787bool qt_is_ascii(const char *&ptr, const char *end) noexcept
788{
789#if defined(__SSE2__)
790 // Testing for the high bit can be done efficiently with just PMOVMSKB
791 bool loops = true;
792 if constexpr (UseAvx2) {
793 while (ptr + 32 <= end) {
794 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
795 quint32 mask = _mm256_movemask_epi8(data);
796 if (mask) {
797 uint idx = qCountTrailingZeroBits(mask);
798 ptr += idx;
799 return false;
800 }
801 ptr += 32;
802 }
803 loops = false;
804 }
805
806 while (ptr + 16 <= end) {
807 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
808 quint32 mask = _mm_movemask_epi8(data);
809 if (mask) {
810 uint idx = qCountTrailingZeroBits(mask);
811 ptr += idx;
812 return false;
813 }
814 ptr += 16;
815
816 if (!loops)
817 break;
818 }
819 if (ptr + 8 <= end) {
820 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
821 quint8 mask = _mm_movemask_epi8(data);
822 if (mask) {
823 uint idx = qCountTrailingZeroBits(mask);
824 ptr += idx;
825 return false;
826 }
827 ptr += 8;
828 }
829#endif
830
831 while (ptr + 4 <= end) {
832 quint32 data = qFromUnaligned<quint32>(ptr);
833 if (data &= 0x80808080U) {
834 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
835 ? qCountLeadingZeroBits(data)
836 : qCountTrailingZeroBits(data);
837 ptr += idx / 8;
838 return false;
839 }
840 ptr += 4;
841 }
842
843 while (ptr != end) {
844 if (quint8(*ptr) & 0x80)
845 return false;
846 ++ptr;
847 }
848 return true;
849}
850
851bool QtPrivate::isAscii(QLatin1StringView s) noexcept
852{
853 const char *ptr = s.begin();
854 const char *end = s.end();
855
856 return qt_is_ascii(ptr, end);
857}
858
859static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
860{
861#ifdef __SSE2__
862 const char *ptr8 = reinterpret_cast<const char *>(ptr);
863 const char *end8 = reinterpret_cast<const char *>(end);
864 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
865 ptr = reinterpret_cast<const char16_t *>(ptr8);
866 if (!ok)
867 return false;
868#endif
869
870 while (ptr != end) {
871 if (*ptr & 0xff80)
872 return false;
873 ++ptr;
874 }
875 return true;
876}
877
878bool QtPrivate::isAscii(QStringView s) noexcept
879{
880 const char16_t *ptr = s.utf16();
881 const char16_t *end = ptr + s.size();
882
883 return isAscii_helper(ptr, end);
884}
885
886bool QtPrivate::isLatin1(QStringView s) noexcept
887{
888 const char16_t *ptr = s.utf16();
889 const char16_t *end = ptr + s.size();
890
891#ifdef __SSE2__
892 const char *ptr8 = reinterpret_cast<const char *>(ptr);
893 const char *end8 = reinterpret_cast<const char *>(end);
894 if (!simdTestMask(ptr8, end8, 0xff00ff00))
895 return false;
896 ptr = reinterpret_cast<const char16_t *>(ptr8);
897#endif
898
899 while (ptr != end) {
900 if (*ptr++ > 0xff)
901 return false;
902 }
903 return true;
904}
905
906bool QtPrivate::isValidUtf16(QStringView s) noexcept
907{
908 constexpr char32_t InvalidCodePoint = UINT_MAX;
909
910 QStringIterator i(s);
911 while (i.hasNext()) {
912 const char32_t c = i.next(InvalidCodePoint);
913 if (c == InvalidCodePoint)
914 return false;
915 }
916
917 return true;
918}
919
920// conversion between Latin 1 and UTF-16
921Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
922{
923 /* SIMD:
924 * Unpacking with SSE has been shown to improve performance on recent CPUs
925 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
926 * itself in exactly the same way as one would do it with intrinsics.
927 */
928#if defined(__SSE2__)
929 // we're going to read str[offset..offset+15] (16 bytes)
930 const __m128i nullMask = _mm_setzero_si128();
931 auto processOneChunk = [=](qptrdiff offset) {
932 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
933 if constexpr (UseAvx2) {
934 // zero extend to an YMM register
935 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
936
937 // store
938 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
939 } else {
940 // unpack the first 8 bytes, padding with zeros
941 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
942 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
943
944 // unpack the last 8 bytes, padding with zeros
945 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
946 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
947 }
948 };
949
950 const char *e = str + size;
951 if (size >= sizeof(__m128i)) {
952 qptrdiff offset = 0;
953 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
954 processOneChunk(offset);
955 if (str + offset < e)
956 processOneChunk(size - sizeof(__m128i));
957 return;
958 }
959
960# if !defined(__OPTIMIZE_SIZE__)
961 if (size >= 4) {
962 // two overlapped loads & stores, of either 64-bit or of 32-bit
963 if (size >= 8) {
964 const __m128i unpacked1 = mm_load8_zero_extend(str);
965 const __m128i unpacked2 = mm_load8_zero_extend(str + size - 8);
966 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), unpacked1);
967 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + size - 8), unpacked2);
968 } else {
969 const __m128i chunk1 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str));
970 const __m128i chunk2 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str + size - 4));
971 const __m128i unpacked1 = _mm_unpacklo_epi8(chunk1, nullMask);
972 const __m128i unpacked2 = _mm_unpacklo_epi8(chunk2, nullMask);
973 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), unpacked1);
974 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + size - 4), unpacked2);
975 }
976 return;
977 } else {
978 size = size % 4;
979 return UnrollTailLoop<3>::exec(qsizetype(size), [=](qsizetype i) { dst[i] = uchar(str[i]); });
980 }
981# endif
982#endif
983#if defined(__mips_dsp)
984 static_assert(sizeof(qsizetype) == sizeof(int),
985 "oops, the assembler implementation needs to be called in a loop");
986 if (size > 20)
987 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
988 else
989 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
990#else
991 while (size--)
992 *dst++ = (uchar)*str++;
993#endif
994}
995
996static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
997{
998 const qsizetype len = str.size();
999 QVarLengthArray<char16_t> arr(len);
1000 qt_from_latin1(arr.data(), str.data(), len);
1001 return arr;
1002}
1003
1004template <bool Checked>
1005static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1006{
1007#if defined(__SSE2__)
1008 auto questionMark256 = []() {
1009 if constexpr (UseAvx2)
1010 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
1011 else
1012 return 0;
1013 }();
1014 auto outOfRange256 = []() {
1015 if constexpr (UseAvx2)
1016 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
1017 else
1018 return 0;
1019 }();
1020 __m128i questionMark, outOfRange;
1021 if constexpr (UseAvx2) {
1022 questionMark = _mm256_castsi256_si128(questionMark256);
1023 outOfRange = _mm256_castsi256_si128(outOfRange256);
1024 } else {
1025 questionMark = _mm_set1_epi16('?');
1026 outOfRange = _mm_set1_epi16(0x100);
1027 }
1028
1029 auto mergeQuestionMarks = [=](__m128i chunk) {
1030 if (!Checked)
1031 return chunk;
1032
1033 // SSE has no compare instruction for unsigned comparison.
1034 if constexpr (UseSse4_1) {
1035 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1036 chunk = _mm_min_epu16(chunk, outOfRange);
1037 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
1038 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
1039 return chunk;
1040 }
1041 // The variables must be shiffted + 0x8000 to be compared
1042 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
1043 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
1044
1045 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
1046 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
1047
1048 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1049 // the 16 bits that were correct contains zeros
1050 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
1051
1052 // correctBytes contains the bytes that were in limit
1053 // the 16 bits that were off limits contains zeros
1054 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
1055
1056 // merge offLimitQuestionMark and correctBytes to have the result
1057 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
1058
1059 Q_UNUSED(outOfRange);
1060 return chunk;
1061 };
1062
1063 // we're going to read to src[offset..offset+15] (16 bytes)
1064 auto loadChunkAt = [=](qptrdiff offset) {
1065 __m128i chunk1, chunk2;
1066 if constexpr (UseAvx2) {
1067 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
1068 if (Checked) {
1069 // See mergeQuestionMarks lambda above for details
1070 chunk = _mm256_min_epu16(chunk, outOfRange256);
1071 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1072 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1073 }
1074
1075 chunk2 = _mm256_extracti128_si256(chunk, 1);
1076 chunk1 = _mm256_castsi256_si128(chunk);
1077 } else {
1078 chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
1079 chunk1 = mergeQuestionMarks(chunk1);
1080
1081 chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
1082 chunk2 = mergeQuestionMarks(chunk2);
1083 }
1084
1085 // pack the two vector to 16 x 8bits elements
1086 return _mm_packus_epi16(chunk1, chunk2);
1087 };
1088
1089 if (size_t(length) >= sizeof(__m128i)) {
1090 // because of possible overlapping, we won't process the last chunk in the loop
1091 qptrdiff offset = 0;
1092 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1093 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1094
1095 // overlapped conversion of the last full chunk and the tail
1096 __m128i last1 = loadChunkAt(offset);
1097 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1098 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), last1);
1099 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), last2);
1100 return;
1101 }
1102
1103# if !defined(__OPTIMIZE_SIZE__)
1104 if (length >= 4) {
1105 // this code is fine even for in-place conversion because we load both
1106 // before any store
1107 if (length >= 8) {
1108 __m128i chunk1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
1109 __m128i chunk2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + length - 8));
1110 chunk1 = mergeQuestionMarks(chunk1);
1111 chunk2 = mergeQuestionMarks(chunk2);
1112
1113 // pack, where the upper half is ignored
1114 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1115 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1116 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), result1);
1117 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + length - 8), result2);
1118 } else {
1119 __m128i chunk1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
1120 __m128i chunk2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + length - 4));
1121 chunk1 = mergeQuestionMarks(chunk1);
1122 chunk2 = mergeQuestionMarks(chunk2);
1123
1124 // pack, we'll zero the upper three quarters
1125 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1126 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1127 qToUnaligned(_mm_cvtsi128_si32(result1), dst);
1128 qToUnaligned(_mm_cvtsi128_si32(result2), dst + length - 4);
1129 }
1130 return;
1131 }
1132
1133 length = length % 4;
1134 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1135 if (Checked)
1136 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1137 else
1138 dst[i] = src[i];
1139 });
1140# else
1141 length = length % 16;
1142# endif // optimize size
1143#elif defined(__ARM_NEON__)
1144 // Refer to the documentation of the SSE2 implementation.
1145 // This uses exactly the same method as for SSE except:
1146 // 1) neon has unsigned comparison
1147 // 2) packing is done to 64 bits (8 x 8bits component).
1148 if (length >= 16) {
1149 const qsizetype chunkCount = length >> 3; // divided by 8
1150 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1151 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1152 for (qsizetype i = 0; i < chunkCount; ++i) {
1153 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1154 src += 8;
1155
1156 if (Checked) {
1157 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1158 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1159 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1160 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1161 }
1162 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1163 vst1_u8(dst, result); // store
1164 dst += 8;
1165 }
1166 length = length % 8;
1167 }
1168#endif
1169#if defined(__mips_dsp)
1170 static_assert(sizeof(qsizetype) == sizeof(int),
1171 "oops, the assembler implementation needs to be called in a loop");
1172 qt_toLatin1_mips_dsp_asm(dst, src, length);
1173#else
1174 while (length--) {
1175 if (Checked)
1176 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1177 else
1178 *dst++ = *src;
1179 ++src;
1180 }
1181#endif
1182}
1183
1184void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1185{
1186 qt_to_latin1_internal<true>(dst, src, length);
1187}
1188
1189void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1190{
1191 qt_to_latin1_internal<false>(dst, src, length);
1192}
1193
1194// Unicode case-insensitive comparison (argument order matches QStringView)
1195Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1196{
1197 if (a == b)
1198 return qt_lencmp(alen, blen);
1199
1200 qsizetype l = qMin(alen, blen);
1201 qsizetype i;
1202 for (i = 0; i < l; ++i) {
1203// qDebug() << Qt::hex << alast << blast;
1204// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1205// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1206 int diff = foldCase(a + i, a) - foldCase(b + i, b);
1207 if ((diff))
1208 return diff;
1209 }
1210 if (i == alen) {
1211 if (i == blen)
1212 return 0;
1213 return -1;
1214 }
1215 return 1;
1216}
1217
1218// Case-insensitive comparison between a QStringView and a QLatin1StringView
1219// (argument order matches those types)
1220Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1221{
1222 qsizetype l = qMin(alen, blen);
1223 qsizetype i;
1224 for (i = 0; i < l; ++i) {
1225 int diff = foldCase(a[i]) - foldCase(char16_t{uchar(b[i])});
1226 if ((diff))
1227 return diff;
1228 }
1229 if (i == alen) {
1230 if (i == blen)
1231 return 0;
1232 return -1;
1233 }
1234 return 1;
1235}
1236
1237// Case-insensitive comparison between a Unicode string and a UTF-8 string
1238Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1239{
1240 auto src1 = reinterpret_cast<const qchar8_t *>(utf8);
1241 auto end1 = reinterpret_cast<const qchar8_t *>(utf8end);
1242 QStringIterator src2(utf16, utf16end);
1243
1244 while (src1 < end1 && src2.hasNext()) {
1245 char32_t uc1 = QChar::toCaseFolded(QUtf8Functions::nextUcs4FromUtf8(src1, end1));
1246 char32_t uc2 = QChar::toCaseFolded(src2.next());
1247 int diff = uc1 - uc2; // can't underflow
1248 if (diff)
1249 return diff;
1250 }
1251
1252 // the shorter string sorts first
1253 return (end1 > src1) - int(src2.hasNext());
1254}
1255
1256#if defined(__mips_dsp)
1257// From qstring_mips_dsp_asm.S
1258extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1259 const char16_t *b,
1260 unsigned len);
1261#endif
1262
1263// Unicode case-sensitive compare two same-sized strings
1264template <StringComparisonMode Mode>
1265static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1266{
1267 // This function isn't memcmp() because that can return the wrong sorting
1268 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1269 // but the bytes in memory are FF 00 and 00 01.
1270
1271#ifndef __OPTIMIZE_SIZE__
1272# if defined(__mips_dsp)
1273 static_assert(sizeof(uint) == sizeof(size_t));
1274 if (l >= 8) {
1275 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1276 }
1277# elif defined(__SSE2__)
1278 return ucstrncmp_sse2<Mode>(a, b, l);
1279# elif defined(__ARM_NEON__)
1280 if (l >= 8) {
1281 const char16_t *end = a + l;
1282 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1283 while (end - a > 7) {
1284 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1285 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1286
1287 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1288 if (r) {
1289 // found a different QChar
1290 if (Mode == CompareStringsForEquality)
1291 return 1;
1292 uint idx = qCountTrailingZeroBits(r);
1293 return a[idx] - b[idx];
1294 }
1295 a += 8;
1296 b += 8;
1297 }
1298 l &= 7;
1299 }
1300 const auto lambda = [=](size_t i) -> int {
1301 return a[i] - b[i];
1302 };
1303 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1304# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1305#endif // __OPTIMIZE_SIZE__
1306
1307 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1308 return memcmp(a, b, l * sizeof(char16_t));
1309
1310 for (size_t i = 0; i < l; ++i) {
1311 if (int diff = a[i] - b[i])
1312 return diff;
1313 }
1314 return 0;
1315}
1316
1317template <StringComparisonMode Mode>
1318static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1319{
1320 const uchar *c = reinterpret_cast<const uchar *>(b);
1321 const char16_t *uc = a;
1322 const char16_t *e = uc + l;
1323
1324#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1325 return ucstrncmp_sse2<Mode>(uc, c, l);
1326#endif
1327
1328 while (uc < e) {
1329 int diff = *uc - *c;
1330 if (diff)
1331 return diff;
1332 uc++, c++;
1333 }
1334
1335 return 0;
1336}
1337
1338// Unicode case-sensitive equality
1339template <typename Char2>
1340static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1341{
1342 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1343}
1344
1345// Unicode case-sensitive comparison
1346template <typename Char2>
1347static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1348{
1349 const size_t l = qMin(alen, blen);
1350 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1351 return cmp ? cmp : qt_lencmp(alen, blen);
1352}
1353
1355
1356static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1357{
1358 // We're called with QLatin1StringView's .data() and .size():
1359 Q_ASSERT(lSize >= 0 && rSize >= 0);
1360 if (!lSize)
1361 return rSize ? -1 : 0;
1362 if (!rSize)
1363 return 1;
1364 const qsizetype size = std::min(lSize, rSize);
1365
1366 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1367 for (qsizetype i = 0; i < size; i++) {
1368 if (int res = CaseInsensitiveL1::difference(lhsChar[i], rhsChar[i]))
1369 return res;
1370 }
1371 return qt_lencmp(lSize, rSize);
1372}
1373
1374bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1375{
1376 Q_ASSERT(lhs.size() == rhs.size());
1377 return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16());
1378}
1379
1380bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1381{
1382 Q_ASSERT(lhs.size() == rhs.size());
1383 return ucstreq(lhs.utf16(), lhs.size(), rhs.latin1());
1384}
1385
1386bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1387{
1388 return QtPrivate::equalStrings(rhs, lhs);
1389}
1390
1391bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1392{
1393 Q_ASSERT(lhs.size() == rhs.size());
1394 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1395}
1396
1397bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1398{
1399 return QUtf8::compareUtf8(lhs, rhs) == 0;
1400}
1401
1402bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1403{
1404 return QtPrivate::equalStrings(rhs, lhs);
1405}
1406
1407bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1408{
1409 return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
1410}
1411
1412bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1413{
1414 return QtPrivate::equalStrings(rhs, lhs);
1415}
1416
1417bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1418{
1419#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1420 Q_ASSERT(lhs.size() == rhs.size());
1421#else
1422 // operator== didn't enforce size prior to Qt 6.2
1423 if (lhs.size() != rhs.size())
1424 return false;
1425#endif
1426 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1427}
1428
1429bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1430{
1431 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1432 return false;
1433 return lhs.visit([rhs](auto lhs) {
1434 return rhs.visit([lhs](auto rhs) {
1435 return QtPrivate::equalStrings(lhs, rhs);
1436 });
1437 });
1438}
1439
1440/*!
1441 \relates QStringView
1442 \internal
1443 \since 5.10
1444
1445 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1446
1447 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1448
1449 Case-sensitive comparison is based exclusively on the numeric Unicode values
1450 of the characters and is very fast, but is not what a human would expect.
1451 Consider sorting user-visible strings with QString::localeAwareCompare().
1452
1453 \sa {Comparing Strings}
1454*/
1455int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1456{
1457 if (cs == Qt::CaseSensitive)
1458 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size());
1459 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.utf16());
1460}
1461
1462/*!
1463 \relates QStringView
1464 \internal
1465 \since 5.10
1466 \overload
1467
1468 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1469
1470 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1471
1472 Case-sensitive comparison is based exclusively on the numeric Unicode values
1473 of the characters and is very fast, but is not what a human would expect.
1474 Consider sorting user-visible strings with QString::localeAwareCompare().
1475
1476 \sa {Comparing Strings}
1477*/
1478int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1479{
1480 if (cs == Qt::CaseSensitive)
1481 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.latin1(), rhs.size());
1482 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.latin1());
1483}
1484
1485/*!
1486 \relates QStringView
1487 \internal
1488 \since 6.0
1489 \overload
1490*/
1491int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1492{
1493 return -compareStrings(rhs, lhs, cs);
1494}
1495
1496/*!
1497 \relates QStringView
1498 \internal
1499 \since 5.10
1500 \overload
1501*/
1502int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1503{
1504 return -compareStrings(rhs, lhs, cs);
1505}
1506
1507/*!
1508 \relates QStringView
1509 \internal
1510 \since 5.10
1511 \overload
1512
1513 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1514
1515 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1516
1517 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1518 of the characters and is very fast, but is not what a human would expect.
1519 Consider sorting user-visible strings with QString::localeAwareCompare().
1520
1521 \sa {Comparing Strings}
1522*/
1523int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1524{
1525 if (lhs.isEmpty())
1526 return qt_lencmp(qsizetype(0), rhs.size());
1527 if (rhs.isEmpty())
1528 return qt_lencmp(lhs.size(), qsizetype(0));
1529 if (cs == Qt::CaseInsensitive)
1530 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1531 const auto l = std::min(lhs.size(), rhs.size());
1532 int r = memcmp(lhs.data(), rhs.data(), l);
1533 return r ? r : qt_lencmp(lhs.size(), rhs.size());
1534}
1535
1536/*!
1537 \relates QStringView
1538 \internal
1539 \since 6.0
1540 \overload
1541*/
1542int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1543{
1544 return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
1545}
1546
1547/*!
1548 \relates QStringView
1549 \internal
1550 \since 6.0
1551 \overload
1552*/
1553int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1554{
1555 if (cs == Qt::CaseSensitive)
1556 return QUtf8::compareUtf8(lhs, rhs);
1557 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1558}
1559
1560/*!
1561 \relates QStringView
1562 \internal
1563 \since 6.0
1564 \overload
1565*/
1566int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1567{
1568 return -compareStrings(rhs, lhs, cs);
1569}
1570
1571/*!
1572 \relates QStringView
1573 \internal
1574 \since 6.0
1575 \overload
1576*/
1577int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1578{
1579 return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
1580}
1581
1582int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1583{
1584 return lhs.visit([rhs, cs](auto lhs) {
1585 return rhs.visit([lhs, cs](auto rhs) {
1586 return QtPrivate::compareStrings(lhs, rhs, cs);
1587 });
1588 });
1589}
1590
1591// ### Qt 7: do not allow anything but ASCII digits
1592// in arg()'s replacements.
1593#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1594static bool supportUnicodeDigitValuesInArg()
1595{
1596 static const bool result = []() {
1597 static const char supportUnicodeDigitValuesEnvVar[]
1598 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1599
1600 if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
1601 return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
1602
1603#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1604 return true;
1605#else
1606 return false;
1607#endif
1608 }();
1609
1610 return result;
1611}
1612#endif
1613
1614static int qArgDigitValue(QChar ch) noexcept
1615{
1616#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1617 if (supportUnicodeDigitValuesInArg())
1618 return ch.digitValue();
1619#endif
1620 if (ch >= u'0' && ch <= u'9')
1621 return int(ch.unicode() - u'0');
1622 return -1;
1623}
1624
1625#if QT_CONFIG(regularexpression)
1626Q_DECL_COLD_FUNCTION
1627static void qtWarnAboutInvalidRegularExpression(const QRegularExpression &re, const char *cls, const char *method)
1628{
1629 extern void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *cls, const char *method);
1630 qtWarnAboutInvalidRegularExpression(re.pattern(), cls, method);
1631}
1632#endif
1633
1634/*!
1635 \macro QT_RESTRICTED_CAST_FROM_ASCII
1636 \relates QString
1637
1638 Disables most automatic conversions from source literals and 8-bit data
1639 to unicode QStrings, but allows the use of
1640 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1641 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1642 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1643 but does not require user code to wrap character and string literals
1644 with QLatin1Char, QLatin1StringView or similar.
1645
1646 Using this macro together with source strings outside the 7-bit range,
1647 non-literals, or literals with embedded NUL characters is undefined.
1648
1649 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1650*/
1651
1652/*!
1653 \macro QT_NO_CAST_FROM_ASCII
1654 \relates QString
1655 \relates QChar
1656
1657 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1658 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1659 \c{unsigned char}) to QChar.
1660
1661 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1662 QT_NO_CAST_FROM_BYTEARRAY
1663*/
1664
1665/*!
1666 \macro QT_NO_CAST_TO_ASCII
1667 \relates QString
1668
1669 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1670
1671 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1672 QT_NO_CAST_FROM_BYTEARRAY
1673*/
1674
1675/*!
1676 \macro QT_ASCII_CAST_WARNINGS
1677 \internal
1678 \relates QString
1679
1680 This macro can be defined to force a warning whenever a function is
1681 called that automatically converts between unicode and 8-bit encodings.
1682
1683 Note: This only works for compilers that support warnings for
1684 deprecated API.
1685
1686 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1687*/
1688
1689/*!
1690 \class QString
1691 \inmodule QtCore
1692 \reentrant
1693
1694 \brief The QString class provides a Unicode character string.
1695
1696 \ingroup tools
1697 \ingroup shared
1698 \ingroup string-processing
1699
1700 \compares strong
1701 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1702 QStringView QUtf8StringView
1703 \endcompareswith
1704 \compareswith strong QByteArray QByteArrayView {const char *}
1705 When comparing with byte arrays, their content is interpreted as UTF-8.
1706 \endcompareswith
1707
1708 QString stores a string of 16-bit \l{QChar}s, where each QChar
1709 corresponds to one UTF-16 code unit. (Unicode characters
1710 with code values above 65535 are stored using surrogate pairs,
1711 that is, two consecutive \l{QChar}s.)
1712
1713 \l{Unicode} is an international standard that supports most of the
1714 writing systems in use today. It is a superset of US-ASCII (ANSI
1715 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1716 characters are available at the same code positions.
1717
1718 Behind the scenes, QString uses \l{implicit sharing}
1719 (copy-on-write) to reduce memory usage and to avoid the needless
1720 copying of data. This also helps reduce the inherent overhead of
1721 storing 16-bit characters instead of 8-bit characters.
1722
1723 In addition to QString, Qt also provides the QByteArray class to
1724 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1725 For most purposes, QString is the class you want to use. It is
1726 used throughout the Qt API, and the Unicode support ensures that
1727 your applications are easy to translate if you want to expand
1728 your application's market at some point. Two prominent cases
1729 where QByteArray is appropriate are when you need to store raw
1730 binary data, and when memory conservation is critical (like in
1731 embedded systems).
1732
1733 \section1 Initializing a string
1734
1735 One way to initialize a QString is to pass a \c{const char
1736 *} to its constructor. For example, the following code creates a
1737 QString of size 5 containing the data "Hello":
1738
1739 \snippet qstring/main.cpp 0
1740
1741 QString converts the \c{const char *} data into Unicode using the
1742 fromUtf8() function.
1743
1744 In all of the QString functions that take \c{const char *}
1745 parameters, the \c{const char *} is interpreted as a classic
1746 C-style \c{'\\0'}-terminated string. Except where the function's
1747 name overtly indicates some other encoding, such \c{const char *}
1748 parameters are assumed to be encoded in UTF-8.
1749
1750 Since Qt 6.4, it is also possible to initialize QStrings using
1751 the \l {Qt::Literals::StringLiterals::operator""_s()} and
1752 \l {Qt::Literals::StringLiterals::operator""_L1()} literal
1753 operators. In many cases, using the literals results in
1754 \l{More efficient string construction}{more efficient string construction}.
1755
1756
1757 You can also provide string data as an array of \l{QChar}s:
1758
1759 \snippet qstring/main.cpp 1
1760
1761 QString makes a deep copy of the QChar data, so you can modify it
1762 later without experiencing side effects. You can avoid taking a
1763 deep copy of the character data by using QStringView or
1764 QString::fromRawData() instead.
1765
1766 Another approach is to set the size of the string using resize()
1767 and to initialize the data character per character. QString uses
1768 0-based indexes, just like C++ arrays. To access the character at
1769 a particular index position, you can use \l operator[](). On
1770 non-\c{const} strings, \l operator[]() returns a reference to a
1771 character that can be used on the left side of an assignment. For
1772 example:
1773
1774 \snippet qstring/main.cpp 2
1775
1776 For read-only access, an alternative syntax is to use the at()
1777 function:
1778
1779 \snippet qstring/main.cpp 3
1780
1781 The at() function can be faster than \l operator[]() because it
1782 never causes a \l{deep copy} to occur. Alternatively, use the
1783 first(), last(), or sliced() functions to extract several characters
1784 at a time.
1785
1786 A QString can embed '\\0' characters (QChar::Null). The size()
1787 function always returns the size of the whole string, including
1788 embedded '\\0' characters.
1789
1790 After a call to the resize() function, newly allocated characters
1791 have undefined values. To set all the characters in the string to
1792 a particular value, use the fill() function.
1793
1794 QString provides dozens of overloads designed to simplify string
1795 usage. For example, if you want to compare a QString with a string
1796 literal, you can write code like this and it will work as expected:
1797
1798 \snippet qstring/main.cpp 4
1799
1800 You can also pass string literals to functions that take QStrings
1801 as arguments, invoking the QString(const char *)
1802 constructor. Similarly, you can pass a QString to a function that
1803 takes a \c{const char *} argument using the \l qPrintable() macro,
1804 which returns the given QString as a \c{const char *}. This is
1805 equivalent to calling toLocal8Bit().\l{QByteArray::}{constData()}
1806 on the QString.
1807
1808 \section1 Manipulating string data
1809
1810 QString provides the following basic functions for modifying the
1811 character data: append(), prepend(), insert(), replace(), and
1812 remove(). For example:
1813
1814 \snippet qstring/main.cpp 5
1815
1816 In the above example, the replace() function's first two arguments are the
1817 position from which to start replacing and the number of characters that
1818 should be replaced.
1819
1820 When data-modifying functions increase the size of the string,
1821 QString may reallocate the memory in which it holds its data. When
1822 this happens, QString expands by more than it immediately needs so as
1823 to have space for further expansion without reallocation until the size
1824 of the string has significantly increased.
1825
1826 The insert(), remove(), and, when replacing a sub-string with one of
1827 different size, replace() functions can be slow (\l{linear time}) for
1828 large strings because they require moving many characters in the string
1829 by at least one position in memory.
1830
1831 If you are building a QString gradually and know in advance
1832 approximately how many characters the QString will contain, you
1833 can call reserve(), asking QString to preallocate a certain amount
1834 of memory. You can also call capacity() to find out how much
1835 memory the QString actually has allocated.
1836
1837 QString provides \l{STL-style iterators} (QString::const_iterator and
1838 QString::iterator). In practice, iterators are handy when working with
1839 generic algorithms provided by the C++ standard library.
1840
1841 \note Iterators over a QString, and references to individual characters
1842 within one, cannot be relied on to remain valid when any non-\c{const}
1843 method of the QString is called. Accessing such an iterator or reference
1844 after the call to a non-\c{const} method leads to undefined behavior. When
1845 stability for iterator-like functionality is required, you should use
1846 indexes instead of iterators, as they are not tied to QString's internal
1847 state and thus do not get invalidated.
1848
1849 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1850 function used on a given QString may cause it to internally perform a deep
1851 copy of its data. This invalidates all iterators over the string and
1852 references to individual characters within it. Do not call non-const
1853 functions while keeping iterators. Accessing an iterator or reference
1854 after it has been invalidated leads to undefined behavior. See the
1855 \l{Implicit sharing iterator problem} section for more information.
1856
1857 A frequent requirement is to remove or simplify the spacing between
1858 visible characters in a string. The characters that make up that spacing
1859 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1860 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1861 To obtain a copy of a string leaving out any spacing from its start and end,
1862 use \l trimmed(). To also replace each sequence of spacing characters within
1863 the string with a simple space, \c{' '}, use \l simplified().
1864
1865 If you want to find all occurrences of a particular character or
1866 substring in a QString, use the indexOf() or lastIndexOf()
1867 functions.The former searches forward, the latter searches backward.
1868 Either can be told an index position from which to start their search.
1869 Each returns the index position of the character or substring if they
1870 find it; otherwise, they return -1. For example, here is a typical loop
1871 that finds all occurrences of a particular substring:
1872
1873 \snippet qstring/main.cpp 6
1874
1875 QString provides many functions for converting numbers into
1876 strings and strings into numbers. See the arg() functions, the
1877 setNum() functions, the number() static functions, and the
1878 toInt(), toDouble(), and similar functions.
1879
1880 To get an uppercase or lowercase version of a string, use toUpper() or
1881 toLower().
1882
1883 Lists of strings are handled by the QStringList class. You can
1884 split a string into a list of strings using the split() function,
1885 and join a list of strings into a single string with an optional
1886 separator using QStringList::join(). You can obtain a filtered list
1887 from a string list by selecting the entries in it that contain a
1888 particular substring or match a particular QRegularExpression.
1889 See QStringList::filter() for details.
1890
1891 \section1 Querying string data
1892
1893 To see if a QString starts or ends with a particular substring, use
1894 startsWith() or endsWith(). To check whether a QString contains a
1895 specific character or substring, use the contains() function. To
1896 find out how many times a particular character or substring occurs
1897 in a string, use count().
1898
1899 To obtain a pointer to the actual character data, call data() or
1900 constData(). These functions return a pointer to the beginning of
1901 the QChar data. The pointer is guaranteed to remain valid until a
1902 non-\c{const} function is called on the QString.
1903
1904 \section2 Comparing strings
1905
1906 QStrings can be compared using overloaded operators such as \l
1907 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1908 and so on. The comparison is based exclusively on the lexicographical
1909 order of the two strings, seen as sequences of UTF-16 code units.
1910 It is very fast but is not what a human would expect; the
1911 QString::localeAwareCompare() function is usually a better choice for
1912 sorting user-interface strings, when such a comparison is available.
1913
1914 When Qt is linked with the ICU library (which it usually is), its
1915 locale-aware sorting is used. Otherwise, platform-specific solutions
1916 are used:
1917 \list
1918 \li On Windows, localeAwareCompare() uses the current user locale,
1919 as set in the \uicontrol{regional} and \uicontrol{language}
1920 options portion of \uicontrol{Control Panel}.
1921 \li On \macos and iOS, \l localeAwareCompare() compares according
1922 to the \uicontrol{Order for sorted lists} setting in the
1923 \uicontrol{International preferences} panel.
1924 \li On other Unix-like systems, the comparison falls back to the
1925 system library's \c strcoll().
1926 \endlist
1927
1928 \section1 Converting between encoded string data and QString
1929
1930 QString provides the following functions that return a
1931 \c{const char *} version of the string as QByteArray: toUtf8(),
1932 toLatin1(), and toLocal8Bit().
1933
1934 \list
1935 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1936 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1937 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1938 Unicode character set through multibyte sequences.
1939 \li toLocal8Bit() returns an 8-bit string using the system's local
1940 encoding. This is the same as toUtf8() on Unix systems.
1941 \endlist
1942
1943 To convert from one of these encodings, QString provides
1944 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1945 encodings are supported through the QStringEncoder and QStringDecoder
1946 classes.
1947
1948 As mentioned above, QString provides a lot of functions and
1949 operators that make it easy to interoperate with \c{const char *}
1950 strings. But this functionality is a double-edged sword: It makes
1951 QString more convenient to use if all strings are US-ASCII or
1952 Latin-1, but there is always the risk that an implicit conversion
1953 from or to \c{const char *} is done using the wrong 8-bit
1954 encoding. To minimize these risks, you can turn off these implicit
1955 conversions by defining some of the following preprocessor symbols:
1956
1957 \list
1958 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1959 C string literals and pointers to Unicode.
1960 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1961 from C characters and character arrays but disables automatic
1962 conversions from character pointers to Unicode.
1963 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1964 to C strings.
1965 \endlist
1966
1967 You then need to explicitly call fromUtf8(), fromLatin1(),
1968 or fromLocal8Bit() to construct a QString from an
1969 8-bit string, or use the lightweight QLatin1StringView class. For
1970 example:
1971
1972 \snippet code/src_corelib_text_qstring.cpp 1
1973
1974 Similarly, you must call toLatin1(), toUtf8(), or
1975 toLocal8Bit() explicitly to convert the QString to an 8-bit
1976 string.
1977
1978 \table 100 %
1979 \header
1980 \li Note for C Programmers
1981
1982 \row
1983 \li
1984 Due to C++'s type system and the fact that QString is
1985 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1986 other basic types. For example:
1987
1988 \snippet qstring/main.cpp 7
1989
1990 The \c result variable is a normal variable allocated on the
1991 stack. When \c return is called, and because we're returning by
1992 value, the copy constructor is called and a copy of the string is
1993 returned. No actual copying takes place thanks to the implicit
1994 sharing.
1995
1996 \endtable
1997
1998 \section1 Distinction between null and empty strings
1999
2000 For historical reasons, QString distinguishes between null
2001 and empty strings. A \e null string is a string that is
2002 initialized using QString's default constructor or by passing
2003 \nullptr to the constructor. An \e empty string is any
2004 string with size 0. A null string is always empty, but an empty
2005 string isn't necessarily null:
2006
2007 \snippet qstring/main.cpp 8
2008
2009 All functions except isNull() treat null strings the same as empty
2010 strings. For example, toUtf8().\l{QByteArray::}{constData()} returns a valid pointer
2011 (not \nullptr) to a '\\0' character for a null string. We
2012 recommend that you always use the isEmpty() function and avoid isNull().
2013
2014 \section1 Number formats
2015
2016 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2017 qualifier, and the base is ten (its default), the default locale is
2018 used. This can be set using \l{QLocale::setDefault()}. For more refined
2019 control of localized string representations of numbers, see
2020 QLocale::toString(). All other number formatting done by QString follows the
2021 C locale's representation of numbers.
2022
2023 When QString::arg() applies left-padding to numbers, the fill character
2024 \c{'0'} is treated specially. If the number is negative, its minus sign
2025 appears before the zero-padding. If the field is localized, the
2026 locale-appropriate zero character is used in place of \c{'0'}. For
2027 floating-point numbers, this special treatment only applies if the number is
2028 finite.
2029
2030 \section2 Floating-point formats
2031
2032 In member functions (for example, arg() and number()) that format floating-point
2033 numbers (\c float or \c double) as strings, the representation used can be
2034 controlled by a choice of \e format and \e precision, whose meanings are as
2035 for \l {QLocale::toString(double, char, int)}.
2036
2037 If the selected \e format includes an exponent, localized forms follow the
2038 locale's convention on digits in the exponent. For non-localized formatting,
2039 the exponent shows its sign and includes at least two digits, left-padding
2040 with zero if needed.
2041
2042 \section1 More efficient string construction
2043
2044 Many strings are known at compile time. The QString constructor from
2045 C++ string literals will copy the contents of the string,
2046 treating the contents as UTF-8. This requires memory allocation and
2047 re-encoding string data, operations that will happen at runtime.
2048 If the string data is known at compile time, you can use the QStringLiteral
2049 macro or similarly \c{operator""_s} to create QString's payload at compile
2050 time instead.
2051
2052 Using the QString \c{'+'} operator, it is easy to construct a
2053 complex string from multiple substrings. You will often write code
2054 like this:
2055
2056 \snippet qstring/stringbuilder.cpp 0
2057
2058 There is nothing wrong with either of these string constructions,
2059 but there are a few hidden inefficiencies:
2060
2061 First, repeated use of the \c{'+'} operator may lead to
2062 multiple memory allocations. When concatenating \e{n} substrings,
2063 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2064 memory allocator.
2065
2066 These allocations can be optimized by an internal class
2067 \c{QStringBuilder}. This class is marked
2068 internal and does not appear in the documentation, because you
2069 aren't meant to instantiate it in your code. Its use will be
2070 automatic, as described below.
2071
2072 \c{QStringBuilder} uses expression templates and reimplements the
2073 \c{'%'} operator so that when you use \c{'%'} for string
2074 concatenation instead of \c{'+'}, multiple substring
2075 concatenations will be postponed until the final result is about
2076 to be assigned to a QString. At this point, the amount of memory
2077 required for the final result is known. The memory allocator is
2078 then called \e{once} to get the required space, and the substrings
2079 are copied into it one by one.
2080
2081 Additional efficiency is gained by inlining and reducing reference
2082 counting (the QString created from a \c{QStringBuilder}
2083 has a ref count of 1, whereas QString::append() needs an extra
2084 test).
2085
2086 There are two ways you can access this improved method of string
2087 construction. The straightforward way is to include
2088 \c{QStringBuilder} wherever you want to use it and use the
2089 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2090
2091 \snippet qstring/stringbuilder.cpp 5
2092
2093 A more global approach, which is more convenient but not entirely
2094 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2095 it to the compiler flags) at build time. This will make concatenating
2096 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2097
2098 \note Using automatic type deduction (for example, by using the \c
2099 auto keyword) with the result of string concatenation when QStringBuilder
2100 is enabled will show that the concatenation is indeed an object of a
2101 QStringBuilder specialization:
2102
2103 \snippet qstring/stringbuilder.cpp 6
2104
2105 This does not cause any harm, as QStringBuilder will implicitly convert to
2106 QString when required. If this is undesirable, then one should specify
2107 the necessary types instead of having the compiler deduce them:
2108
2109 \snippet qstring/stringbuilder.cpp 7
2110
2111 \section1 Maximum size and out-of-memory conditions
2112
2113 The maximum size of QString depends on the architecture. Most 64-bit
2114 systems can allocate more than 2 GB of memory, with a typical limit
2115 of 2^63 bytes. The actual value also depends on the overhead required for
2116 managing the data block. As a result, you can expect a maximum size
2117 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2118 on 64-bit platforms. The number of elements that can be stored in a
2119 QString is this maximum size divided by the size of QChar.
2120
2121 When memory allocation fails, QString throws a \c std::bad_alloc
2122 exception if the application was compiled with exception support.
2123 Out-of-memory conditions in Qt containers are the only cases where Qt
2124 will throw exceptions. If exceptions are disabled, then running out of
2125 memory is undefined behavior.
2126
2127 \note Target operating systems may impose limits on how much memory an
2128 application can allocate, in total, or on the size of individual allocations.
2129 This may further restrict the size of string a QString can hold.
2130 Mitigating or controlling the behavior these limits cause is beyond the
2131 scope of the Qt API.
2132
2133 \sa {Which string class to use?}, fromRawData(), QChar, QStringView,
2134 QLatin1StringView, QByteArray
2135*/
2136
2137/*! \typedef QString::ConstIterator
2138
2139 Qt-style synonym for QString::const_iterator.
2140*/
2141
2142/*! \typedef QString::Iterator
2143
2144 Qt-style synonym for QString::iterator.
2145*/
2146
2147/*! \typedef QString::const_iterator
2148
2149 \sa QString::iterator
2150*/
2151
2152/*! \typedef QString::iterator
2153
2154 \sa QString::const_iterator
2155*/
2156
2157/*! \typedef QString::const_reverse_iterator
2158 \since 5.6
2159
2160 \sa QString::reverse_iterator, QString::const_iterator
2161*/
2162
2163/*! \typedef QString::reverse_iterator
2164 \since 5.6
2165
2166 \sa QString::const_reverse_iterator, QString::iterator
2167*/
2168
2169/*!
2170 \typedef QString::size_type
2171*/
2172
2173/*!
2174 \typedef QString::difference_type
2175*/
2176
2177/*!
2178 \typedef QString::const_reference
2179*/
2180/*!
2181 \typedef QString::reference
2182*/
2183
2184/*!
2185 \typedef QString::const_pointer
2186
2187 The QString::const_pointer typedef provides an STL-style
2188 const pointer to a QString element (QChar).
2189*/
2190/*!
2191 \typedef QString::pointer
2192
2193 The QString::pointer typedef provides an STL-style
2194 pointer to a QString element (QChar).
2195*/
2196
2197/*!
2198 \typedef QString::value_type
2199*/
2200
2201/*! \fn QString::iterator QString::begin()
2202
2203 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2204 first character in the string.
2205
2206//! [iterator-invalidation-func-desc]
2207 \warning The returned iterator is invalidated on detachment or when the
2208 QString is modified.
2209//! [iterator-invalidation-func-desc]
2210
2211 \sa constBegin(), end()
2212*/
2213
2214/*! \fn QString::const_iterator QString::begin() const
2215
2216 \overload begin()
2217*/
2218
2219/*! \fn QString::const_iterator QString::cbegin() const
2220 \since 5.0
2221
2222 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2223 first character in the string.
2224
2225 \include qstring.cpp iterator-invalidation-func-desc
2226
2227 \sa begin(), cend()
2228*/
2229
2230/*! \fn QString::const_iterator QString::constBegin() const
2231
2232 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2233 first character in the string.
2234
2235 \include qstring.cpp iterator-invalidation-func-desc
2236
2237 \sa begin(), constEnd()
2238*/
2239
2240/*! \fn QString::iterator QString::end()
2241
2242 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2243 the last character in the string.
2244
2245 \include qstring.cpp iterator-invalidation-func-desc
2246
2247 \sa begin(), constEnd()
2248*/
2249
2250/*! \fn QString::const_iterator QString::end() const
2251
2252 \overload end()
2253*/
2254
2255/*! \fn QString::const_iterator QString::cend() const
2256 \since 5.0
2257
2258 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2259 after the last character in the string.
2260
2261 \include qstring.cpp iterator-invalidation-func-desc
2262
2263 \sa cbegin(), end()
2264*/
2265
2266/*! \fn QString::const_iterator QString::constEnd() const
2267
2268 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2269 after the last character in the string.
2270
2271 \include qstring.cpp iterator-invalidation-func-desc
2272
2273 \sa constBegin(), end()
2274*/
2275
2276/*! \fn QString::reverse_iterator QString::rbegin()
2277 \since 5.6
2278
2279 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2280 the first character in the string, in reverse order.
2281
2282 \include qstring.cpp iterator-invalidation-func-desc
2283
2284 \sa begin(), crbegin(), rend()
2285*/
2286
2287/*! \fn QString::const_reverse_iterator QString::rbegin() const
2288 \since 5.6
2289 \overload
2290*/
2291
2292/*! \fn QString::const_reverse_iterator QString::crbegin() const
2293 \since 5.6
2294
2295 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2296 pointing to the first character in the string, in reverse order.
2297
2298 \include qstring.cpp iterator-invalidation-func-desc
2299
2300 \sa begin(), rbegin(), rend()
2301*/
2302
2303/*! \fn QString::reverse_iterator QString::rend()
2304 \since 5.6
2305
2306 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2307 after the last character in the string, in reverse order.
2308
2309 \include qstring.cpp iterator-invalidation-func-desc
2310
2311 \sa end(), crend(), rbegin()
2312*/
2313
2314/*! \fn QString::const_reverse_iterator QString::rend() const
2315 \since 5.6
2316 \overload
2317*/
2318
2319/*! \fn QString::const_reverse_iterator QString::crend() const
2320 \since 5.6
2321
2322 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2323 pointing just after the last character in the string, in reverse order.
2324
2325 \include qstring.cpp iterator-invalidation-func-desc
2326
2327 \sa end(), rend(), rbegin()
2328*/
2329
2330/*!
2331 \fn QString::QString()
2332
2333 Constructs a null string. Null strings are also considered empty.
2334
2335 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2336*/
2337
2338/*!
2339 \fn QString::QString(QString &&other)
2340
2341 Move-constructs a QString instance, making it point at the same
2342 object that \a other was pointing to.
2343
2344 \since 5.2
2345*/
2346
2347/*! \fn QString::QString(const char *str)
2348
2349 Constructs a string initialized with the 8-bit string \a str. The
2350 given const char pointer is converted to Unicode using the
2351 fromUtf8() function.
2352
2353 You can disable this constructor by defining
2354 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2355 can be useful if you want to ensure that all user-visible strings
2356 go through QObject::tr(), for example.
2357
2358 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2359 this constructor, but enables a \c{QString(const char (&ch)[N])}
2360 constructor instead. Using non-literal input, or input with
2361 embedded NUL characters, or non-7-bit characters is undefined
2362 in this case.
2363
2364 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2365*/
2366
2367/*! \fn QString::QString(const char8_t *str)
2368
2369 Constructs a string initialized with the UTF-8 string \a str. The
2370 given const char8_t pointer is converted to Unicode using the
2371 fromUtf8() function.
2372
2373 \since 6.1
2374 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2375*/
2376
2377/*!
2378 \fn QString::QString(QStringView sv)
2379
2380 Constructs a string initialized with the string view's data.
2381
2382 The QString will be null if and only if \a sv is null.
2383
2384 \since 6.8
2385
2386 \sa fromUtf16()
2387*/
2388
2389/*
2390//! [from-std-string]
2391Returns a copy of the \a str string. The given string is assumed to be
2392encoded in \1, and is converted to QString using the \2 function.
2393//! [from-std-string]
2394*/
2395
2396/*! \fn QString QString::fromStdString(const std::string &str)
2397
2398 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2399
2400 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2401*/
2402
2403/*! \fn QString QString::fromStdWString(const std::wstring &str)
2404
2405 Returns a copy of the \a str string. The given string is assumed
2406 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2407 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2408 systems).
2409
2410 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2411 fromStdU16String(), fromStdU32String()
2412*/
2413
2414/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2415 \since 4.2
2416
2417 Reads the first \a size code units of the \c wchar_t array to whose start
2418 \a string points, converting them to Unicode and returning the result as
2419 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2420 type's size is four bytes or UTF-16 if its size is two bytes.
2421
2422 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2423
2424 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2425 fromStdWString()
2426*/
2427
2428/*! \fn std::wstring QString::toStdWString() const
2429
2430 Returns a std::wstring object with the data contained in this
2431 QString. The std::wstring is encoded in UTF-16 on platforms where
2432 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2433 where wchar_t is 4 bytes wide (most Unix systems).
2434
2435 This method is mostly useful to pass a QString to a function
2436 that accepts a std::wstring object.
2437
2438 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2439 toStdU32String()
2440*/
2441
2442qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2443{
2444 qsizetype count = 0;
2445
2446 QStringIterator i(QStringView(uc, length));
2447 while (i.hasNext())
2448 out[count++] = i.next();
2449
2450 return count;
2451}
2452
2453/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2454 \since 4.2
2455
2456 Fills the \a array with the data contained in this QString object.
2457 The array is encoded in UTF-16 on platforms where
2458 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2459 where wchar_t is 4 bytes wide (most Unix systems).
2460
2461 \a array has to be allocated by the caller and contain enough space to
2462 hold the complete string (allocating the array with the same length as the
2463 string is always sufficient).
2464
2465 This function returns the actual length of the string in \a array.
2466
2467 \note This function does not append a null character to the array.
2468
2469 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2470 QStringView::toWCharArray()
2471*/
2472
2473/*! \fn QString::QString(const QString &other)
2474
2475 Constructs a copy of \a other.
2476
2477 This operation takes \l{constant time}, because QString is
2478 \l{implicitly shared}. This makes returning a QString from a
2479 function very fast. If a shared instance is modified, it will be
2480 copied (copy-on-write), and that takes \l{linear time}.
2481
2482 \sa operator=()
2483*/
2484
2485/*!
2486 Constructs a string initialized with the first \a size characters
2487 of the QChar array \a unicode.
2488
2489 If \a unicode is 0, a null string is constructed.
2490
2491 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2492 array and its length is determined dynamically. The terminating
2493 null character is not considered part of the string.
2494
2495 QString makes a deep copy of the string data. The unicode data is copied as
2496 is and the Byte Order Mark is preserved if present.
2497
2498 \sa fromRawData()
2499*/
2500QString::QString(const QChar *unicode, qsizetype size)
2501{
2502 if (!unicode) {
2503 d.clear();
2504 } else {
2505 if (size < 0)
2506 size = QtPrivate::qustrlen(reinterpret_cast<const char16_t *>(unicode));
2507 if (!size) {
2508 d = DataPointer::fromRawData(&_empty, 0);
2509 } else {
2510 d = DataPointer(size, size);
2511 Q_CHECK_PTR(d.data());
2512 memcpy(d.data(), unicode, size * sizeof(QChar));
2513 d.data()[size] = '\0';
2514 }
2515 }
2516}
2517
2518/*!
2519 Constructs a string of the given \a size with every character set
2520 to \a ch.
2521
2522 \sa fill()
2523*/
2524QString::QString(qsizetype size, QChar ch)
2525{
2526 if (size <= 0) {
2527 d = DataPointer::fromRawData(&_empty, 0);
2528 } else {
2529 d = DataPointer(size, size);
2530 Q_CHECK_PTR(d.data());
2531 d.data()[size] = '\0';
2532 char16_t *b = d.data();
2533 char16_t *e = d.data() + size;
2534 const char16_t value = ch.unicode();
2535 std::fill(b, e, value);
2536 }
2537}
2538
2539/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2540 \internal
2541
2542 Constructs a string of the given \a size without initializing the
2543 characters. This is only used in \c QStringBuilder::toString().
2544*/
2545QString::QString(qsizetype size, Qt::Initialization)
2546{
2547 if (size <= 0) {
2548 d = DataPointer::fromRawData(&_empty, 0);
2549 } else {
2550 d = DataPointer(size, size);
2551 Q_CHECK_PTR(d.data());
2552 d.data()[size] = '\0';
2553 }
2554}
2555
2556/*! \fn QString::QString(QLatin1StringView str)
2557
2558 Constructs a copy of the Latin-1 string viewed by \a str.
2559
2560 \sa fromLatin1()
2561*/
2562
2563/*!
2564 Constructs a string of size 1 containing the character \a ch.
2565*/
2566QString::QString(QChar ch)
2567{
2568 d = DataPointer(1, 1);
2569 Q_CHECK_PTR(d.data());
2570 d.data()[0] = ch.unicode();
2571 d.data()[1] = '\0';
2572}
2573
2574/*! \fn QString::QString(const QByteArray &ba)
2575
2576 Constructs a string initialized with the byte array \a ba. The
2577 given byte array is converted to Unicode using fromUtf8().
2578
2579 You can disable this constructor by defining
2580 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2581 can be useful if you want to ensure that all user-visible strings
2582 go through QObject::tr(), for example.
2583
2584 \note Any null ('\\0') bytes in the byte array will be included in this
2585 string, converted to Unicode null characters (U+0000). This behavior is
2586 different from Qt 5.x.
2587
2588 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2589*/
2590
2591/*! \fn QString::QString(const Null &)
2592 \internal
2593*/
2594
2595/*! \fn QString::QString(QStringPrivate)
2596 \internal
2597*/
2598
2599/*! \fn QString &QString::operator=(const QString::Null &)
2600 \internal
2601*/
2602
2603/*!
2604 \fn QString::~QString()
2605
2606 Destroys the string.
2607*/
2608
2609
2610/*! \fn void QString::swap(QString &other)
2611 \since 4.8
2612 \memberswap{string}
2613*/
2614
2615/*! \fn void QString::detach()
2616
2617 \internal
2618*/
2619
2620/*! \fn bool QString::isDetached() const
2621
2622 \internal
2623*/
2624
2625/*! \fn bool QString::isSharedWith(const QString &other) const
2626
2627 \internal
2628*/
2629
2630/*! \fn QString::operator std::u16string_view() const
2631 \target qstring-operator-std-u16string_view
2632 \since 6.7
2633
2634 Converts this QString object to a \c{std::u16string_view} object.
2635*/
2636
2637static bool needsReallocate(const QString &str, qsizetype newSize)
2638{
2639 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2640 return newSize > capacityAtEnd;
2641}
2642
2643/*!
2644 Sets the size of the string to \a size characters.
2645
2646 If \a size is greater than the current size, the string is
2647 extended to make it \a size characters long with the extra
2648 characters added to the end. The new characters are uninitialized.
2649
2650 If \a size is less than the current size, characters beyond position
2651 \a size are excluded from the string.
2652
2653 \note While resize() will grow the capacity if needed, it never shrinks
2654 capacity. To shed excess capacity, use squeeze().
2655
2656 Example:
2657
2658 \snippet qstring/main.cpp 45
2659
2660 If you want to append a certain number of identical characters to
2661 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2662
2663 If you want to expand the string so that it reaches a certain
2664 width and fill the new positions with a particular character, use
2665 the leftJustified() function:
2666
2667 If \a size is negative, it is equivalent to passing zero.
2668
2669 \snippet qstring/main.cpp 47
2670
2671 \sa truncate(), reserve(), squeeze()
2672*/
2673
2674void QString::resize(qsizetype size)
2675{
2676 if (size < 0)
2677 size = 0;
2678
2679 if (d->needsDetach() || needsReallocate(*this, size))
2680 reallocData(size, QArrayData::Grow);
2681 d.size = size;
2682 if (d->allocatedCapacity())
2683 d.data()[size] = u'\0';
2684}
2685
2686/*!
2687 \overload
2688 \since 5.7
2689
2690 Unlike \l {QString::}{resize(qsizetype)}, this overload
2691 initializes the new characters to \a fillChar:
2692
2693 \snippet qstring/main.cpp 46
2694*/
2695
2696void QString::resize(qsizetype newSize, QChar fillChar)
2697{
2698 const qsizetype oldSize = size();
2699 resize(newSize);
2700 const qsizetype difference = size() - oldSize;
2701 if (difference > 0)
2702 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2703}
2704
2705
2706/*!
2707 \since 6.8
2708
2709 Sets the size of the string to \a size characters. If the size of
2710 the string grows, the new characters are uninitialized.
2711
2712 The behavior is identical to \c{resize(size)}.
2713
2714 \sa resize()
2715*/
2716
2717void QString::resizeForOverwrite(qsizetype size)
2718{
2719 resize(size);
2720}
2721
2722
2723/*! \fn qsizetype QString::capacity() const
2724
2725 Returns the maximum number of characters that can be stored in
2726 the string without forcing a reallocation.
2727
2728 The sole purpose of this function is to provide a means of fine
2729 tuning QString's memory usage. In general, you will rarely ever
2730 need to call this function. If you want to know how many
2731 characters are in the string, call size().
2732
2733 \note a statically allocated string will report a capacity of 0,
2734 even if it's not empty.
2735
2736 \note The free space position in the allocated memory block is undefined. In
2737 other words, one should not assume that the free memory is always located
2738 after the initialized elements.
2739
2740 \sa reserve(), squeeze()
2741*/
2742
2743/*!
2744 \fn void QString::reserve(qsizetype size)
2745
2746 Ensures the string has space for at least \a size characters.
2747
2748 If you know in advance how large a string will be, you can call this
2749 function to save repeated reallocation while building it.
2750 This can improve performance when building a string incrementally.
2751 A long sequence of operations that add to a string may trigger several
2752 reallocations, the last of which may leave you with significantly more
2753 space than you need. This is less efficient than doing a single
2754 allocation of the right size at the start.
2755
2756 If in doubt about how much space shall be needed, it is usually better to
2757 use an upper bound as \a size, or a high estimate of the most likely size,
2758 if a strict upper bound would be much bigger than this. If \a size is an
2759 underestimate, the string will grow as needed once the reserved size is
2760 exceeded, which may lead to a larger allocation than your best
2761 overestimate would have and will slow the operation that triggers it.
2762
2763 \warning reserve() reserves memory but does not change the size of the
2764 string. Accessing data beyond the end of the string is undefined behavior.
2765 If you need to access memory beyond the current end of the string,
2766 use resize().
2767
2768 This function is useful for code that needs to build up a long
2769 string and wants to avoid repeated reallocation. In this example,
2770 we want to add to the string until some condition is \c true, and
2771 we're fairly sure that size is large enough to make a call to
2772 reserve() worthwhile:
2773
2774 \snippet qstring/main.cpp 44
2775
2776 \sa squeeze(), capacity(), resize()
2777*/
2778
2779/*!
2780 \fn void QString::squeeze()
2781
2782 Releases any memory not required to store the character data.
2783
2784 The sole purpose of this function is to provide a means of fine
2785 tuning QString's memory usage. In general, you will rarely ever
2786 need to call this function.
2787
2788 \sa reserve(), capacity()
2789*/
2790
2791void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2792{
2793 if (!alloc) {
2794 d = DataPointer::fromRawData(&_empty, 0);
2795 return;
2796 }
2797
2798 // don't use reallocate path when reducing capacity and there's free space
2799 // at the beginning: might shift data pointer outside of allocated space
2800 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2801
2802 if (d->needsDetach() || cannotUseReallocate) {
2803 DataPointer dd(alloc, qMin(alloc, d.size), option);
2804 Q_CHECK_PTR(dd.data());
2805 if (dd.size > 0)
2806 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2807 dd.data()[dd.size] = 0;
2808 d.swap(dd);
2809 } else {
2810 d->reallocate(alloc, option);
2811 }
2812}
2813
2814void QString::reallocGrowData(qsizetype n)
2815{
2816 if (!n) // expected to always allocate
2817 n = 1;
2818
2819 if (d->needsDetach()) {
2820 DataPointer dd(DataPointer::allocateGrow(d, n, QArrayData::GrowsAtEnd));
2821 Q_CHECK_PTR(dd.data());
2822 dd->copyAppend(d.data(), d.data() + d.size);
2823 dd.data()[dd.size] = 0;
2824 d.swap(dd);
2825 } else {
2826 d->reallocate(d.constAllocatedCapacity() + n, QArrayData::Grow);
2827 }
2828}
2829
2830/*! \fn void QString::clear()
2831
2832 Clears the contents of the string and makes it null.
2833
2834 \sa resize(), isNull()
2835*/
2836
2837/*! \fn QString &QString::operator=(const QString &other)
2838
2839 Assigns \a other to this string and returns a reference to this
2840 string.
2841*/
2842
2843QString &QString::operator=(const QString &other) noexcept
2844{
2845 d = other.d;
2846 return *this;
2847}
2848
2849/*!
2850 \fn QString &QString::operator=(QString &&other)
2851
2852 Move-assigns \a other to this QString instance.
2853
2854 \since 5.2
2855*/
2856
2857/*! \fn QString &QString::operator=(QLatin1StringView str)
2858
2859 \overload operator=()
2860
2861 Assigns the Latin-1 string viewed by \a str to this string.
2862*/
2863QString &QString::operator=(QLatin1StringView other)
2864{
2865 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2866 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2867 d.size = other.size();
2868 d.data()[other.size()] = 0;
2869 qt_from_latin1(d.data(), other.latin1(), other.size());
2870 } else {
2871 *this = fromLatin1(other.latin1(), other.size());
2872 }
2873 return *this;
2874}
2875
2876/*! \fn QString &QString::operator=(const QByteArray &ba)
2877
2878 \overload operator=()
2879
2880 Assigns \a ba to this string. The byte array is converted to Unicode
2881 using the fromUtf8() function.
2882
2883 You can disable this operator by defining
2884 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2885 can be useful if you want to ensure that all user-visible strings
2886 go through QObject::tr(), for example.
2887*/
2888
2889/*! \fn QString &QString::operator=(const char *str)
2890
2891 \overload operator=()
2892
2893 Assigns \a str to this string. The const char pointer is converted
2894 to Unicode using the fromUtf8() function.
2895
2896 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2897 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2898 This can be useful if you want to ensure that all user-visible strings
2899 go through QObject::tr(), for example.
2900*/
2901
2902/*!
2903 \overload operator=()
2904
2905 Sets the string to contain the single character \a ch.
2906*/
2907QString &QString::operator=(QChar ch)
2908{
2909 return assign(1, ch);
2910}
2911
2912/*!
2913 \fn QString& QString::insert(qsizetype position, const QString &str)
2914
2915 Inserts the string \a str at the given index \a position and
2916 returns a reference to this string.
2917
2918 Example:
2919
2920 \snippet qstring/main.cpp 26
2921
2922//! [string-grow-at-insertion]
2923 This string grows to accommodate the insertion. If \a position is beyond
2924 the end of the string, space characters are appended to the string to reach
2925 this \a position, followed by \a str.
2926//! [string-grow-at-insertion]
2927
2928 \sa append(), prepend(), replace(), remove()
2929*/
2930
2931/*!
2932 \fn QString& QString::insert(qsizetype position, QStringView str)
2933 \since 6.0
2934 \overload insert()
2935
2936 Inserts the string view \a str at the given index \a position and
2937 returns a reference to this string.
2938
2939 \include qstring.cpp string-grow-at-insertion
2940*/
2941
2942
2943/*!
2944 \fn QString& QString::insert(qsizetype position, const char *str)
2945 \since 5.5
2946 \overload insert()
2947
2948 Inserts the C string \a str at the given index \a position and
2949 returns a reference to this string.
2950
2951 \include qstring.cpp string-grow-at-insertion
2952
2953 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2954 defined.
2955*/
2956
2957/*!
2958 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2959 \since 5.5
2960 \overload insert()
2961
2962 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2963 it encodes at the given index \a position and returns a reference to
2964 this string.
2965
2966 \include qstring.cpp string-grow-at-insertion
2967
2968 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2969 defined.
2970*/
2971
2972/*! \internal
2973 T is a view or a container on/of QChar, char16_t, or char
2974*/
2975template <typename T>
2976static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2977{
2978 auto &str_d = str.data_ptr();
2979 qsizetype difference = 0;
2980 if (Q_UNLIKELY(i > str_d.size))
2981 difference = i - str_d.size;
2982 const qsizetype oldSize = str_d.size;
2983 const qsizetype insert_size = toInsert.size();
2984 const qsizetype newSize = str_d.size + difference + insert_size;
2985 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2986
2987 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2988 const auto cbegin = str.cbegin();
2989 const auto cend = str.cend();
2990 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend;
2991 QString other;
2992 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2993 // unittests pass
2994 other.data_ptr().detachAndGrow(side, newSize, nullptr, nullptr);
2995 other.append(QStringView(cbegin, insert_start));
2996 other.resize(i, u' ');
2997 other.append(toInsert);
2998 other.append(QStringView(insert_start, cend));
2999 str.swap(other);
3000 return;
3001 }
3002
3003 str_d.detachAndGrow(side, difference + insert_size, nullptr, nullptr);
3004 Q_CHECK_PTR(str_d.data());
3005 str.resize(newSize);
3006
3007 auto begin = str_d.begin();
3008 auto old_end = std::next(begin, oldSize);
3009 std::fill_n(old_end, difference, u' ');
3010 auto insert_start = std::next(begin, i);
3011 if (difference == 0)
3012 std::move_backward(insert_start, old_end, str_d.end());
3013
3014 using Char = std::remove_cv_t<typename T::value_type>;
3015 if constexpr(std::is_same_v<Char, QChar>)
3016 std::copy_n(reinterpret_cast<const char16_t *>(toInsert.data()), insert_size, insert_start);
3017 else if constexpr (std::is_same_v<Char, char16_t>)
3018 std::copy_n(toInsert.data(), insert_size, insert_start);
3019 else if constexpr (std::is_same_v<Char, char>)
3020 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3021}
3022
3023/*!
3024 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3025 \overload insert()
3026
3027 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3028
3029 \include qstring.cpp string-grow-at-insertion
3030*/
3031QString &QString::insert(qsizetype i, QLatin1StringView str)
3032{
3033 const char *s = str.latin1();
3034 if (i < 0 || !s || !(*s))
3035 return *this;
3036
3037 insert_helper(*this, i, str);
3038 return *this;
3039}
3040
3041/*!
3042 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3043 \overload insert()
3044 \since 6.5
3045
3046 Inserts the UTF-8 string view \a str at the given index \a position.
3047
3048 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3049 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3050 (QLatin1StringView) and should thus be used sparingly.
3051
3052 \include qstring.cpp string-grow-at-insertion
3053*/
3054QString &QString::insert(qsizetype i, QUtf8StringView s)
3055{
3056 auto insert_size = s.size();
3057 if (i < 0 || insert_size <= 0)
3058 return *this;
3059
3060 qsizetype difference = 0;
3061 if (Q_UNLIKELY(i > d.size))
3062 difference = i - d.size;
3063
3064 const qsizetype newSize = d.size + difference + insert_size;
3065
3066 if (d.needsDetach() || needsReallocate(*this, newSize)) {
3067 const auto cbegin = this->cbegin();
3068 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend();
3069 QString other;
3070 other.reserve(newSize);
3071 other.append(QStringView(cbegin, insert_start));
3072 if (difference > 0)
3073 other.resize(i, u' ');
3074 other.append(s);
3075 other.append(QStringView(insert_start, cend()));
3076 swap(other);
3077 return *this;
3078 }
3079
3080 if (i >= d.size) {
3081 d.detachAndGrow(QArrayData::GrowsAtEnd, difference + insert_size, nullptr, nullptr);
3082 Q_CHECK_PTR(d.data());
3083
3084 if (difference > 0)
3085 resize(i, u' ');
3086 append(s);
3087 } else {
3088 // Optimal insertion of Utf8 data is at the end, anywhere else could
3089 // potentially lead to moving characters twice if Utf8 data size
3090 // (variable-width) is less than the equivalent Utf16 data size
3091 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3092 char16_t *b = QUtf8::convertToUnicode(buffer.data(), s);
3093 insert_helper(*this, i, QStringView(buffer.data(), b));
3094 }
3095
3096 return *this;
3097}
3098
3099/*!
3100 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3101 \overload insert()
3102
3103 Inserts the first \a size characters of the QChar array \a unicode
3104 at the given index \a position in the string.
3105
3106 This string grows to accommodate the insertion. If \a position is beyond
3107 the end of the string, space characters are appended to the string to reach
3108 this \a position, followed by \a size characters of the QChar array
3109 \a unicode.
3110*/
3111QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3112{
3113 if (i < 0 || size <= 0)
3114 return *this;
3115
3116 // In case when data points into "this"
3117 if (!d->needsDetach() && QtPrivate::q_points_into_range(unicode, *this)) {
3118 QVarLengthArray copy(unicode, unicode + size);
3119 insert(i, copy.data(), size);
3120 } else {
3121 insert_helper(*this, i, QStringView(unicode, size));
3122 }
3123
3124 return *this;
3125}
3126
3127/*!
3128 \fn QString& QString::insert(qsizetype position, QChar ch)
3129 \overload insert()
3130
3131 Inserts \a ch at the given index \a position in the string.
3132
3133 This string grows to accommodate the insertion. If \a position is beyond
3134 the end of the string, space characters are appended to the string to reach
3135 this \a position, followed by \a ch.
3136*/
3137
3138QString& QString::insert(qsizetype i, QChar ch)
3139{
3140 if (i < 0)
3141 i += d.size;
3142 return insert(i, &ch, 1);
3143}
3144
3145/*!
3146 Appends the string \a str onto the end of this string.
3147
3148 Example:
3149
3150 \snippet qstring/main.cpp 9
3151
3152 This is the same as using the insert() function:
3153
3154 \snippet qstring/main.cpp 10
3155
3156 The append() function is typically very fast (\l{constant time}),
3157 because QString preallocates extra space at the end of the string
3158 data so it can grow without reallocating the entire string each
3159 time.
3160
3161 \sa operator+=(), prepend(), insert()
3162*/
3163QString &QString::append(const QString &str)
3164{
3165 if (!str.isNull()) {
3166 if (isNull()) {
3167 if (Q_UNLIKELY(!str.d.isMutable()))
3168 assign(str); // fromRawData, so we do a deep copy
3169 else
3170 operator=(str);
3171 } else if (str.size()) {
3172 append(str.constData(), str.size());
3173 }
3174 }
3175 return *this;
3176}
3177
3178/*!
3179 \fn QString &QString::append(QStringView v)
3180 \overload append()
3181 \since 6.0
3182
3183 Appends the given string view \a v to this string and returns the result.
3184*/
3185
3186/*!
3187 \overload append()
3188 \since 5.0
3189
3190 Appends \a len characters from the QChar array \a str to this string.
3191*/
3192QString &QString::append(const QChar *str, qsizetype len)
3193{
3194 if (str && len > 0) {
3195 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3196 // the following should be safe as QChar uses char16_t as underlying data
3197 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3198 d->growAppend(char16String, char16String + len);
3199 d.data()[d.size] = u'\0';
3200 }
3201 return *this;
3202}
3203
3204/*!
3205 \overload append()
3206
3207 Appends the Latin-1 string viewed by \a str to this string.
3208*/
3209QString &QString::append(QLatin1StringView str)
3210{
3211 append_helper(*this, str);
3212 return *this;
3213}
3214
3215/*!
3216 \overload append()
3217 \since 6.5
3218
3219 Appends the UTF-8 string view \a str to this string.
3220*/
3221QString &QString::append(QUtf8StringView str)
3222{
3223 append_helper(*this, str);
3224 return *this;
3225}
3226
3227/*! \fn QString &QString::append(const QByteArray &ba)
3228
3229 \overload append()
3230
3231 Appends the byte array \a ba to this string. The given byte array
3232 is converted to Unicode using the fromUtf8() function.
3233
3234 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3235 when you compile your applications. This can be useful if you want
3236 to ensure that all user-visible strings go through QObject::tr(),
3237 for example.
3238*/
3239
3240/*! \fn QString &QString::append(const char *str)
3241
3242 \overload append()
3243
3244 Appends the string \a str to this string. The given const char
3245 pointer is converted to Unicode using the fromUtf8() function.
3246
3247 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3248 when you compile your applications. This can be useful if you want
3249 to ensure that all user-visible strings go through QObject::tr(),
3250 for example.
3251*/
3252
3253/*!
3254 \overload append()
3255
3256 Appends the character \a ch to this string.
3257*/
3258QString &QString::append(QChar ch)
3259{
3260 d.detachAndGrow(QArrayData::GrowsAtEnd, 1, nullptr, nullptr);
3261 d->copyAppend(1, ch.unicode());
3262 d.data()[d.size] = '\0';
3263 return *this;
3264}
3265
3266/*! \fn QString &QString::prepend(const QString &str)
3267
3268 Prepends the string \a str to the beginning of this string and
3269 returns a reference to this string.
3270
3271 This operation is typically very fast (\l{constant time}), because
3272 QString preallocates extra space at the beginning of the string data,
3273 so it can grow without reallocating the entire string each time.
3274
3275 Example:
3276
3277 \snippet qstring/main.cpp 36
3278
3279 \sa append(), insert()
3280*/
3281
3282/*! \fn QString &QString::prepend(QLatin1StringView str)
3283
3284 \overload prepend()
3285
3286 Prepends the Latin-1 string viewed by \a str to this string.
3287*/
3288
3289/*! \fn QString &QString::prepend(QUtf8StringView str)
3290 \since 6.5
3291 \overload prepend()
3292
3293 Prepends the UTF-8 string view \a str to this string.
3294*/
3295
3296/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3297 \since 5.5
3298 \overload prepend()
3299
3300 Prepends \a len characters from the QChar array \a str to this string and
3301 returns a reference to this string.
3302*/
3303
3304/*! \fn QString &QString::prepend(QStringView str)
3305 \since 6.0
3306 \overload prepend()
3307
3308 Prepends the string view \a str to the beginning of this string and
3309 returns a reference to this string.
3310*/
3311
3312/*! \fn QString &QString::prepend(const QByteArray &ba)
3313
3314 \overload prepend()
3315
3316 Prepends the byte array \a ba to this string. The byte array is
3317 converted to Unicode using the fromUtf8() function.
3318
3319 You can disable this function by defining
3320 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3321 can be useful if you want to ensure that all user-visible strings
3322 go through QObject::tr(), for example.
3323*/
3324
3325/*! \fn QString &QString::prepend(const char *str)
3326
3327 \overload prepend()
3328
3329 Prepends the string \a str to this string. The const char pointer
3330 is converted to Unicode using the fromUtf8() function.
3331
3332 You can disable this function by defining
3333 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3334 can be useful if you want to ensure that all user-visible strings
3335 go through QObject::tr(), for example.
3336*/
3337
3338/*! \fn QString &QString::prepend(QChar ch)
3339
3340 \overload prepend()
3341
3342 Prepends the character \a ch to this string.
3343*/
3344
3345/*!
3346 \fn QString &QString::assign(QAnyStringView v)
3347 \since 6.6
3348
3349 Replaces the contents of this string with a copy of \a v and returns a
3350 reference to this string.
3351
3352 The size of this string will be equal to the size of \a v, converted to
3353 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3354 this function only allocates memory if the estimated size exceeds the capacity
3355 of this string or this string is shared.
3356
3357 \sa QAnyStringView::toString()
3358*/
3359
3360/*!
3361 \fn QString &QString::assign(qsizetype n, QChar c)
3362 \since 6.6
3363
3364 Replaces the contents of this string with \a n copies of \a c and
3365 returns a reference to this string.
3366
3367 The size of this string will be equal to \a n, which has to be non-negative.
3368
3369 This function will only allocate memory if \a n exceeds the capacity of this
3370 string or this string is shared.
3371
3372 \sa fill()
3373*/
3374
3375/*!
3376 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3377 \since 6.6
3378
3379 Replaces the contents of this string with a copy of the elements in the
3380 iterator range [\a first, \a last) and returns a reference to this string.
3381
3382 The size of this string will be equal to the decoded length of the elements
3383 in the range [\a first, \a last), which need not be the same as the length of
3384 the range itself, because this function transparently recodes the input
3385 character set to UTF-16.
3386
3387 This function will only allocate memory if the number of elements in the
3388 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3389 resulting string, exceeds the capacity of this string, or if this string is
3390 shared.
3391
3392 \note The behavior is undefined if either argument is an iterator into *this or
3393 [\a first, \a last) is not a valid range.
3394
3395 \constraints
3396 \c InputIterator meets the requirements of a
3397 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3398 and the \c{value_type} of \c InputIterator is one of the following character types:
3399 \list
3400 \li QChar
3401 \li QLatin1Char
3402 \li \c {char}
3403 \li \c {unsigned char}
3404 \li \c {signed char}
3405 \li \c {char8_t}
3406 \li \c char16_t
3407 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3408 \li \c char32_t
3409 \endlist
3410*/
3411
3412QString &QString::assign(QAnyStringView s)
3413{
3414 if (s.size() <= capacity() && isDetached()) {
3415 const auto offset = d.freeSpaceAtBegin();
3416 if (offset)
3417 d.setBegin(d.begin() - offset);
3418 resize(0);
3419 s.visit([this](auto input) {
3420 this->append(input);
3421 });
3422 } else {
3423 *this = s.toString();
3424 }
3425 return *this;
3426}
3427
3428#ifndef QT_BOOTSTRAPPED
3429QString &QString::assign_helper(const char32_t *data, qsizetype len)
3430{
3431 // worst case: each char32_t requires a surrogate pair, so
3432 const auto requiredCapacity = len * 2;
3433 if (requiredCapacity <= capacity() && isDetached()) {
3434 const auto offset = d.freeSpaceAtBegin();
3435 if (offset)
3436 d.setBegin(d.begin() - offset);
3437 auto begin = reinterpret_cast<QChar *>(d.begin());
3438 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3439 QStringConverter::State state;
3440 const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
3441 d.size = end - begin;
3442 d.data()[d.size] = u'\0';
3443 } else {
3444 *this = QString::fromUcs4(data, len);
3445 }
3446 return *this;
3447}
3448#endif
3449
3450/*!
3451 \fn QString &QString::remove(qsizetype position, qsizetype n)
3452
3453 Removes \a n characters from the string, starting at the given \a
3454 position index, and returns a reference to the string.
3455
3456 If the specified \a position index is within the string, but \a
3457 position + \a n is beyond the end of the string, the string is
3458 truncated at the specified \a position.
3459
3460 If \a n is <= 0 nothing is changed.
3461
3462 \snippet qstring/main.cpp 37
3463
3464//! [shrinking-erase]
3465 Element removal will preserve the string's capacity and not reduce the
3466 amount of allocated memory. To shed extra capacity and free as much memory
3467 as possible, call squeeze() after the last change to the string's size.
3468//! [shrinking-erase]
3469
3470 \sa insert(), replace()
3471*/
3472QString &QString::remove(qsizetype pos, qsizetype len)
3473{
3474 if (pos < 0) // count from end of string
3475 pos += size();
3476
3477 if (size_t(pos) >= size_t(size()) || len <= 0)
3478 return *this;
3479
3480 len = std::min(len, size() - pos);
3481
3482 if (!d->isShared()) {
3483 d->erase(d.begin() + pos, len);
3484 d.data()[d.size] = u'\0';
3485 } else {
3486 // TODO: either reserve "size()", which is bigger than needed, or
3487 // modify the shrinking-erase docs of this method (since the size
3488 // of "copy" won't have any extra capacity any more)
3489 const qsizetype sz = size() - len;
3490 QString copy{sz, Qt::Uninitialized};
3491 auto begin = d.begin();
3492 auto toRemove_start = d.begin() + pos;
3493 copy.d->copyRanges({{begin, toRemove_start},
3494 {toRemove_start + len, d.end()}});
3495 swap(copy);
3496 }
3497 return *this;
3498}
3499
3500template<typename T>
3501static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3502{
3503 const auto needleSize = needle.size();
3504 if (!needleSize)
3505 return;
3506
3507 // avoid detach if nothing to do:
3508 qsizetype i = s.indexOf(needle, 0, cs);
3509 if (i < 0)
3510 return;
3511
3512 QString::DataPointer &dptr = s.data_ptr();
3513 auto begin = dptr.begin();
3514 auto end = dptr.end();
3515
3516 auto copyFunc = [&](auto &dst) {
3517 auto src = begin + i + needleSize;
3518 while (src < end) {
3519 i = s.indexOf(needle, std::distance(begin, src), cs);
3520 auto hit = i == -1 ? end : begin + i;
3521 dst = std::copy(src, hit, dst);
3522 src = hit + needleSize;
3523 }
3524 return dst;
3525 };
3526
3527 if (!dptr->needsDetach()) {
3528 auto dst = begin + i;
3529 dst = copyFunc(dst);
3530 s.truncate(std::distance(begin, dst));
3531 } else {
3532 QString copy{s.size(), Qt::Uninitialized};
3533 auto copy_begin = copy.begin();
3534 auto dst = std::copy(begin, begin + i, copy_begin); // Chunk before the first hit
3535 dst = copyFunc(dst);
3536 copy.resize(std::distance(copy_begin, dst));
3537 s.swap(copy);
3538 }
3539}
3540
3541/*!
3542 Removes every occurrence of the given \a str string in this
3543 string, and returns a reference to this string.
3544
3545 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3546
3547 This is the same as \c replace(str, "", cs).
3548
3549 \include qstring.cpp shrinking-erase
3550
3551 \sa replace()
3552*/
3553QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3554{
3555 const auto s = str.d.data();
3556 if (QtPrivate::q_points_into_range(s, d))
3557 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3558 else
3559 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3560 return *this;
3561}
3562
3563/*!
3564 \since 5.11
3565 \overload
3566
3567 Removes every occurrence of the given Latin-1 string viewed by \a str
3568 from this string, and returns a reference to this string.
3569
3570 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3571
3572 This is the same as \c replace(str, "", cs).
3573
3574 \include qstring.cpp shrinking-erase
3575
3576 \sa replace()
3577*/
3578QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3579{
3580 removeStringImpl(*this, str, cs);
3581 return *this;
3582}
3583
3584/*!
3585 \fn QString &QString::removeAt(qsizetype pos)
3586
3587 \since 6.5
3588
3589 Removes the character at index \a pos. If \a pos is out of bounds
3590 (i.e. \a pos >= size()), this function does nothing.
3591
3592 \sa remove()
3593*/
3594
3595/*!
3596 \fn QString &QString::removeFirst()
3597
3598 \since 6.5
3599
3600 Removes the first character in this string. If the string is empty,
3601 this function does nothing.
3602
3603 \sa remove()
3604*/
3605
3606/*!
3607 \fn QString &QString::removeLast()
3608
3609 \since 6.5
3610
3611 Removes the last character in this string. If the string is empty,
3612 this function does nothing.
3613
3614 \sa remove()
3615*/
3616
3617/*!
3618 Removes every occurrence of the character \a ch in this string, and
3619 returns a reference to this string.
3620
3621 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3622
3623 Example:
3624
3625 \snippet qstring/main.cpp 38
3626
3627 This is the same as \c replace(ch, "", cs).
3628
3629 \include qstring.cpp shrinking-erase
3630
3631 \sa replace()
3632*/
3633QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3634{
3635 const qsizetype idx = indexOf(ch, 0, cs);
3636 if (idx == -1)
3637 return *this;
3638
3639 const bool isCase = cs == Qt::CaseSensitive;
3640 ch = isCase ? ch : ch.toCaseFolded();
3641 auto match = [ch, isCase](QChar x) {
3642 return ch == (isCase ? x : x.toCaseFolded());
3643 };
3644
3645
3646 auto begin = d.begin();
3647 auto first_match = begin + idx;
3648 auto end = d.end();
3649 if (!d->isShared()) {
3650 auto it = std::remove_if(first_match, end, match);
3651 d->erase(it, std::distance(it, end));
3652 d.data()[d.size] = u'\0';
3653 } else {
3654 // Instead of detaching, create a new string and copy all characters except for
3655 // the ones we're removing
3656 // TODO: size() is more than the needed since "copy" would be shorter
3657 QString copy{size(), Qt::Uninitialized};
3658 auto dst = copy.d.begin();
3659 auto it = std::copy(begin, first_match, dst); // Chunk before idx
3660 it = std::remove_copy_if(first_match + 1, end, it, match);
3661 copy.d.size = std::distance(dst, it);
3662 copy.d.data()[copy.d.size] = u'\0';
3663 *this = std::move(copy);
3664 }
3665 return *this;
3666}
3667
3668/*!
3669 \fn QString &QString::remove(const QRegularExpression &re)
3670 \since 5.0
3671
3672 Removes every occurrence of the regular expression \a re in the
3673 string, and returns a reference to the string. For example:
3674
3675 \snippet qstring/main.cpp 96
3676
3677 \include qstring.cpp shrinking-erase
3678
3679 \sa indexOf(), lastIndexOf(), replace()
3680*/
3681
3682/*!
3683 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3684 \since 6.1
3685
3686 Removes all elements for which the predicate \a pred returns true
3687 from the string. Returns a reference to the string.
3688
3689 \sa remove()
3690*/
3691
3692static void replace_helper(QString &str, QSpan<qsizetype> indices, qsizetype blen, QStringView after)
3693{
3694 const qsizetype oldSize = str.data_ptr().size;
3695 const qsizetype adjust = indices.size() * (after.size() - blen);
3696 const qsizetype newSize = oldSize + adjust;
3697 using A = QStringAlgorithms<QString>;
3698 if (str.data_ptr().needsDetach() || needsReallocate(str, newSize)) {
3699 A::replace_helper(str, blen, after, indices);
3700 return;
3701 }
3702
3703 if (QtPrivate::q_points_into_range(after.begin(), str)) {
3704 // Copy after if it lies inside our own d.b area (which we could
3705 // possibly invalidate via a realloc or modify by replacement)
3706 A::replace_helper(str, blen, QVarLengthArray(after.begin(), after.end()), indices);
3707 } else {
3708 A::replace_helper(str, blen, after, indices);
3709 }
3710}
3711
3712/*!
3713 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3714
3715 Replaces \a n characters beginning at index \a position with
3716 the string \a after and returns a reference to this string.
3717
3718 \note If the specified \a position index is within the string,
3719 but \a position + \a n goes outside the strings range,
3720 then \a n will be adjusted to stop at the end of the string.
3721
3722 Example:
3723
3724 \snippet qstring/main.cpp 40
3725
3726 \sa insert(), remove()
3727*/
3728QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3729{
3730 return replace(pos, len, after.constData(), after.size());
3731}
3732
3733/*!
3734 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3735 \overload replace()
3736 Replaces \a n characters beginning at index \a position with the
3737 first \a alen characters of the QChar array \a after and returns a
3738 reference to this string.
3739
3740 \a n must not be negative.
3741*/
3742QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3743{
3744 Q_PRE(len >= 0);
3745
3746 if (size_t(pos) > size_t(this->size()))
3747 return *this;
3748 if (len > this->size() - pos)
3749 len = this->size() - pos;
3750
3751 qsizetype indices[] = {pos};
3752 replace_helper(*this, indices, len, QStringView{after, alen});
3753 return *this;
3754}
3755
3756/*!
3757 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3758 \overload replace()
3759
3760 Replaces \a n characters beginning at index \a position with the
3761 character \a after and returns a reference to this string.
3762*/
3763QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3764{
3765 return replace(pos, len, &after, 1);
3766}
3767
3768/*!
3769 \overload replace()
3770 Replaces every occurrence of the string \a before with the string \a
3771 after and returns a reference to this string.
3772
3773 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3774
3775 Example:
3776
3777 \snippet qstring/main.cpp 41
3778
3779 \note The replacement text is not rescanned after it is inserted.
3780
3781 Example:
3782
3783 \snippet qstring/main.cpp 86
3784
3785//! [empty-before-arg-in-replace]
3786 \note If you use an empty \a before argument, the \a after argument will be
3787 inserted \e {before and after} each character of the string.
3788//! [empty-before-arg-in-replace]
3789
3790*/
3791QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3792{
3793 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3794}
3795
3796/*!
3797 \since 4.5
3798 \overload replace()
3799
3800 Replaces each occurrence in this string of the first \a blen
3801 characters of \a before with the first \a alen characters of \a
3802 after and returns a reference to this string.
3803
3804 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3805
3806 \note If \a before points to an \e empty string (that is, \a blen == 0),
3807 the string pointed to by \a after will be inserted \e {before and after}
3808 each character in this string.
3809*/
3810QString &QString::replace(const QChar *before, qsizetype blen,
3811 const QChar *after, qsizetype alen,
3812 Qt::CaseSensitivity cs)
3813{
3814 if (isEmpty()) {
3815 if (blen)
3816 return *this;
3817 } else {
3818 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3819 return *this;
3820 }
3821 if (alen == 0 && blen == 0)
3822 return *this;
3823 if (alen == 1 && blen == 1)
3824 return replace(*before, *after, cs);
3825
3826 QStringMatcher matcher(before, blen, cs);
3827
3828 qsizetype index = 0;
3829
3830 QVarLengthArray<qsizetype> indices;
3831 while ((index = matcher.indexIn(*this, index)) != -1) {
3832 indices.push_back(index);
3833 if (blen) // Step over before:
3834 index += blen;
3835 else // Only count one instance of empty between any two characters:
3836 index++;
3837 }
3838 if (indices.isEmpty())
3839 return *this;
3840
3841 replace_helper(*this, indices, blen, QStringView{after, alen});
3842 return *this;
3843}
3844
3845/*!
3846 \overload replace()
3847 Replaces every occurrence of the character \a ch in the string with
3848 \a after and returns a reference to this string.
3849
3850 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3851*/
3852QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3853{
3854 if (after.size() == 0)
3855 return remove(ch, cs);
3856
3857 if (after.size() == 1)
3858 return replace(ch, after.front(), cs);
3859
3860 if (size() == 0)
3861 return *this;
3862
3863 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3864
3865 QVarLengthArray<qsizetype> indices;
3866 if (cs == Qt::CaseSensitive) {
3867 const char16_t *begin = d.begin();
3868 const char16_t *end = d.end();
3869 QStringView view(begin, end);
3870 const char16_t *hit = nullptr;
3871 while ((hit = QtPrivate::qustrchr(view, cc)) != end) {
3872 indices.push_back(std::distance(begin, hit));
3873 view = QStringView(std::next(hit), end);
3874 }
3875 } else {
3876 for (qsizetype i = 0; i < d.size; ++i)
3877 if (QChar::toCaseFolded(d.data()[i]) == cc)
3878 indices.push_back(i);
3879 }
3880 if (indices.isEmpty())
3881 return *this;
3882
3883 replace_helper(*this, indices, 1, after);
3884 return *this;
3885}
3886
3887/*!
3888 \overload replace()
3889 Replaces every occurrence of the character \a before with the
3890 character \a after and returns a reference to this string.
3891
3892 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3893*/
3894QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3895{
3896 const qsizetype idx = indexOf(before, 0, cs);
3897 if (idx == -1)
3898 return *this;
3899
3900 const char16_t achar = after.unicode();
3901 char16_t bchar = before.unicode();
3902
3903 auto matchesCIS = [](char16_t beforeChar) {
3904 return [beforeChar](char16_t ch) { return foldAndCompare(ch, beforeChar); };
3905 };
3906
3907 auto hit = d.begin() + idx;
3908 if (!d.needsDetach()) {
3909 *hit++ = achar;
3910 if (cs == Qt::CaseSensitive) {
3911 std::replace(hit, d.end(), bchar, achar);
3912 } else {
3913 bchar = foldCase(bchar);
3914 std::replace_if(hit, d.end(), matchesCIS(bchar), achar);
3915 }
3916 } else {
3917 QString other{ d.size, Qt::Uninitialized };
3918 auto dest = std::copy(d.begin(), hit, other.d.begin());
3919 *dest++ = achar;
3920 ++hit;
3921 if (cs == Qt::CaseSensitive) {
3922 std::replace_copy(hit, d.end(), dest, bchar, achar);
3923 } else {
3924 bchar = foldCase(bchar);
3925 std::replace_copy_if(hit, d.end(), dest, matchesCIS(bchar), achar);
3926 }
3927
3928 swap(other);
3929 }
3930 return *this;
3931}
3932
3933/*!
3934 \since 4.5
3935 \overload replace()
3936
3937 Replaces every occurrence in this string of the Latin-1 string viewed
3938 by \a before with the Latin-1 string viewed by \a after, and returns a
3939 reference to this string.
3940
3941 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3942
3943 \note The text is not rescanned after a replacement.
3944
3945 \include qstring.cpp empty-before-arg-in-replace
3946*/
3947QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
3948{
3949 const qsizetype alen = after.size();
3950 const qsizetype blen = before.size();
3951 if (blen == 1 && alen == 1)
3952 return replace(before.front(), after.front(), cs);
3953
3954 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
3955 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3956 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
3957}
3958
3959/*!
3960 \since 4.5
3961 \overload replace()
3962
3963 Replaces every occurrence in this string of the Latin-1 string viewed
3964 by \a before with the string \a after, and returns a reference to this
3965 string.
3966
3967 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3968
3969 \note The text is not rescanned after a replacement.
3970
3971 \include qstring.cpp empty-before-arg-in-replace
3972*/
3973QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
3974{
3975 const qsizetype blen = before.size();
3976 if (blen == 1 && after.size() == 1)
3977 return replace(before.front(), after.front(), cs);
3978
3979 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
3980 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
3981}
3982
3983/*!
3984 \since 4.5
3985 \overload replace()
3986
3987 Replaces every occurrence of the string \a before with the string \a
3988 after and returns a reference to this string.
3989
3990 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3991
3992 \note The text is not rescanned after a replacement.
3993
3994 \include qstring.cpp empty-before-arg-in-replace
3995*/
3996QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
3997{
3998 const qsizetype alen = after.size();
3999 if (before.size() == 1 && alen == 1)
4000 return replace(before.front(), after.front(), cs);
4001
4002 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4003 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
4004}
4005
4006/*!
4007 \since 4.5
4008 \overload replace()
4009
4010 Replaces every occurrence of the character \a c with the string \a
4011 after and returns a reference to this string.
4012
4013 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4014
4015 \note The text is not rescanned after a replacement.
4016*/
4017QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4018{
4019 const qsizetype alen = after.size();
4020 if (alen == 1)
4021 return replace(c, after.front(), cs);
4022
4023 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4024 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
4025}
4026
4027/*!
4028 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4029 \overload operator==()
4030
4031 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4032 returns \c false.
4033
4034 \include qstring.cpp compare-isNull-vs-isEmpty
4035
4036 \sa {Comparing Strings}
4037*/
4038
4039/*!
4040 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4041
4042 \overload operator==()
4043
4044 Returns \c true if \a lhs is equal to \a rhs; otherwise
4045 returns \c false.
4046*/
4047
4048/*!
4049 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4050
4051 \overload operator==()
4052
4053 Returns \c true if \a lhs is equal to \a rhs; otherwise
4054 returns \c false.
4055*/
4056
4057/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4058
4059 \overload operator==()
4060
4061 The \a rhs byte array is converted to a QUtf8StringView.
4062
4063 You can disable this operator by defining
4064 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4065 can be useful if you want to ensure that all user-visible strings
4066 go through QObject::tr(), for example.
4067
4068 Returns \c true if string \a lhs is lexically equal to \a rhs.
4069 Otherwise returns \c false.
4070*/
4071
4072/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4073
4074 \overload operator==()
4075
4076 The \a rhs const char pointer is converted to a QUtf8StringView.
4077
4078 You can disable this operator by defining
4079 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4080 can be useful if you want to ensure that all user-visible strings
4081 go through QObject::tr(), for example.
4082*/
4083
4084/*!
4085 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4086
4087 \overload operator<()
4088
4089 Returns \c true if string \a lhs is lexically less than string
4090 \a rhs; otherwise returns \c false.
4091
4092 \sa {Comparing Strings}
4093*/
4094
4095/*!
4096 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4097
4098 \overload operator<()
4099
4100 Returns \c true if \a lhs is lexically less than \a rhs;
4101 otherwise returns \c false.
4102*/
4103
4104/*!
4105 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4106
4107 \overload operator<()
4108
4109 Returns \c true if \a lhs is lexically less than \a rhs;
4110 otherwise returns \c false.
4111*/
4112
4113/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4114
4115 \overload operator<()
4116
4117 The \a rhs byte array is converted to a QUtf8StringView.
4118 If any NUL characters ('\\0') are embedded in the byte array, they will be
4119 included in the transformation.
4120
4121 You can disable this operator
4122 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4123 can be useful if you want to ensure that all user-visible strings
4124 go through QObject::tr(), for example.
4125*/
4126
4127/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4128
4129 Returns \c true if string \a lhs is lexically less than string \a rhs.
4130 Otherwise returns \c false.
4131
4132 \overload operator<()
4133
4134 The \a rhs const char pointer is converted to a QUtf8StringView.
4135
4136 You can disable this operator by defining
4137 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4138 can be useful if you want to ensure that all user-visible strings
4139 go through QObject::tr(), for example.
4140*/
4141
4142/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4143
4144 Returns \c true if string \a lhs is lexically less than or equal to
4145 string \a rhs; otherwise returns \c false.
4146
4147 \sa {Comparing Strings}
4148*/
4149
4150/*!
4151 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4152
4153 \overload operator<=()
4154
4155 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4156 otherwise returns \c false.
4157*/
4158
4159/*!
4160 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4161
4162 \overload operator<=()
4163
4164 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4165 otherwise returns \c false.
4166*/
4167
4168/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4169
4170 \overload operator<=()
4171
4172 The \a rhs byte array is converted to a QUtf8StringView.
4173 If any NUL characters ('\\0') are embedded in the byte array, they will be
4174 included in the transformation.
4175
4176 You can disable this operator by defining
4177 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4178 can be useful if you want to ensure that all user-visible strings
4179 go through QObject::tr(), for example.
4180*/
4181
4182/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4183
4184 \overload operator<=()
4185
4186 The \a rhs const char pointer is converted to a QUtf8StringView.
4187
4188 You can disable this operator by defining
4189 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4190 can be useful if you want to ensure that all user-visible strings
4191 go through QObject::tr(), for example.
4192*/
4193
4194/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4195
4196 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4197 otherwise returns \c false.
4198
4199 \sa {Comparing Strings}
4200*/
4201
4202/*!
4203 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4204
4205 \overload operator>()
4206
4207 Returns \c true if \a lhs is lexically greater than \a rhs;
4208 otherwise returns \c false.
4209*/
4210
4211/*!
4212 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4213
4214 \overload operator>()
4215
4216 Returns \c true if \a lhs is lexically greater than \a rhs;
4217 otherwise returns \c false.
4218*/
4219
4220/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4221
4222 \overload operator>()
4223
4224 The \a rhs byte array is converted to a QUtf8StringView.
4225 If any NUL characters ('\\0') are embedded in the byte array, they will be
4226 included in the transformation.
4227
4228 You can disable this operator by defining
4229 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4230 can be useful if you want to ensure that all user-visible strings
4231 go through QObject::tr(), for example.
4232*/
4233
4234/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4235
4236 \overload operator>()
4237
4238 The \a rhs const char pointer is converted to a QUtf8StringView.
4239
4240 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4241 when you compile your applications. This can be useful if you want
4242 to ensure that all user-visible strings go through QObject::tr(),
4243 for example.
4244*/
4245
4246/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4247
4248 Returns \c true if string \a lhs is lexically greater than or equal to
4249 string \a rhs; otherwise returns \c false.
4250
4251 \sa {Comparing Strings}
4252*/
4253
4254/*!
4255 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4256
4257 \overload operator>=()
4258
4259 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4260 otherwise returns \c false.
4261*/
4262
4263/*!
4264 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4265
4266 \overload operator>=()
4267
4268 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4269 otherwise returns \c false.
4270*/
4271
4272/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4273
4274 \overload operator>=()
4275
4276 The \a rhs byte array is converted to a QUtf8StringView.
4277 If any NUL characters ('\\0') are embedded in the byte array, they will be
4278 included in the transformation.
4279
4280 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4281 when you compile your applications. This can be useful if you want
4282 to ensure that all user-visible strings go through QObject::tr(),
4283 for example.
4284*/
4285
4286/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4287
4288 \overload operator>=()
4289
4290 The \a rhs const char pointer is converted to a QUtf8StringView.
4291
4292 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4293 when you compile your applications. This can be useful if you want
4294 to ensure that all user-visible strings go through QObject::tr(),
4295 for example.
4296*/
4297
4298/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4299
4300 Returns \c true if string \a lhs is not equal to string \a rhs;
4301 otherwise returns \c false.
4302
4303 \sa {Comparing Strings}
4304*/
4305
4306/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4307
4308 Returns \c true if string \a lhs is not equal to string \a rhs.
4309 Otherwise returns \c false.
4310
4311 \overload operator!=()
4312*/
4313
4314/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4315
4316 \overload operator!=()
4317
4318 The \a rhs byte array is converted to a QUtf8StringView.
4319 If any NUL characters ('\\0') are embedded in the byte array, they will be
4320 included in the transformation.
4321
4322 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4323 when you compile your applications. This can be useful if you want
4324 to ensure that all user-visible strings go through QObject::tr(),
4325 for example.
4326*/
4327
4328/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4329
4330 \overload operator!=()
4331
4332 The \a rhs const char pointer is converted to a QUtf8StringView.
4333
4334 You can disable this operator by defining
4335 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4336 can be useful if you want to ensure that all user-visible strings
4337 go through QObject::tr(), for example.
4338*/
4339
4340/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4341
4342 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4343 \a rhs; otherwise returns \c false.
4344
4345 The comparison is case sensitive.
4346
4347 You can disable this operator by defining \c
4348 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4349 then need to call QString::fromUtf8(), QString::fromLatin1(),
4350 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4351 array to a QString before doing the comparison.
4352*/
4353
4354/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4355
4356 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4357 \a rhs; otherwise returns \c false.
4358
4359 The comparison is case sensitive.
4360
4361 You can disable this operator by defining \c
4362 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4363 then need to call QString::fromUtf8(), QString::fromLatin1(),
4364 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4365 array to a QString before doing the comparison.
4366*/
4367
4368/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4369
4370 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4371 of \a rhs; otherwise returns \c false.
4372
4373 The comparison is case sensitive.
4374
4375 You can disable this operator by defining \c
4376 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4377 then need to call QString::fromUtf8(), QString::fromLatin1(),
4378 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4379 array to a QString before doing the comparison.
4380*/
4381
4382/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4383
4384 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4385 encoding of \a rhs; otherwise returns \c false.
4386
4387 The comparison is case sensitive.
4388
4389 You can disable this operator by defining \c
4390 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4391 then need to call QString::fromUtf8(), QString::fromLatin1(),
4392 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4393 array to a QString before doing the comparison.
4394*/
4395
4396/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4397
4398 Returns \c true if byte array \a lhs is lexically less than or equal to the
4399 UTF-8 encoding of \a rhs; otherwise returns \c false.
4400
4401 The comparison is case sensitive.
4402
4403 You can disable this operator by defining \c
4404 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4405 then need to call QString::fromUtf8(), QString::fromLatin1(),
4406 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4407 array to a QString before doing the comparison.
4408*/
4409
4410/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4411
4412 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4413 encoding of \a rhs; otherwise returns \c false.
4414
4415 The comparison is case sensitive.
4416
4417 You can disable this operator by defining \c
4418 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4419 then need to call QString::fromUtf8(), QString::fromLatin1(),
4420 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4421 array to a QString before doing the comparison.
4422*/
4423
4424/*!
4425 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4426
4427 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4428
4429 Example:
4430
4431 \snippet qstring/main.cpp 24
4432
4433 \include qstring.qdocinc negative-index-start-search-from-end
4434
4435 \sa lastIndexOf(), contains(), count()
4436*/
4437qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4438{
4439 return QtPrivate::findString(QStringView(unicode(), size()), from, QStringView(str.unicode(), str.size()), cs);
4440}
4441
4442/*!
4443 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4444 \since 5.14
4445 \overload indexOf()
4446
4447 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4448
4449 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4450
4451 \include qstring.qdocinc negative-index-start-search-from-end
4452
4453 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4454*/
4455
4456/*!
4457 \since 4.5
4458
4459 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4460
4461 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4462
4463 Example:
4464
4465 \snippet qstring/main.cpp 24
4466
4467 \include qstring.qdocinc negative-index-start-search-from-end
4468
4469 \sa lastIndexOf(), contains(), count()
4470*/
4471
4472qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4473{
4474 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
4475}
4476
4477/*!
4478 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4479 \overload indexOf()
4480
4481 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4482*/
4483
4484/*!
4485 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4486
4487 \include qstring.qdocinc negative-index-start-search-from-end
4488
4489 Returns -1 if \a str is not found.
4490
4491 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4492
4493 Example:
4494
4495 \snippet qstring/main.cpp 29
4496
4497 \note When searching for a 0-length \a str, the match at the end of
4498 the data is excluded from the search by a negative \a from, even
4499 though \c{-1} is normally thought of as searching from the end of the
4500 string: the match at the end is \e after the last character, so it is
4501 excluded. To include such a final empty match, either give a positive
4502 value for \a from or omit the \a from parameter entirely.
4503
4504 \sa indexOf(), contains(), count()
4505*/
4506qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4507{
4508 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
4509}
4510
4511/*!
4512 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4513 \since 6.2
4514 \overload lastIndexOf()
4515
4516 Returns the index position of the last occurrence of the string \a
4517 str in this string. Returns -1 if \a str is not found.
4518
4519 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4520
4521 Example:
4522
4523 \snippet qstring/main.cpp 29
4524
4525 \sa indexOf(), contains(), count()
4526*/
4527
4528
4529/*!
4530 \since 4.5
4531 \overload lastIndexOf()
4532
4533 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4534
4535 \include qstring.qdocinc negative-index-start-search-from-end
4536
4537 Returns -1 if \a str is not found.
4538
4539 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4540
4541 Example:
4542
4543 \snippet qstring/main.cpp 29
4544
4545 \note When searching for a 0-length \a str, the match at the end of
4546 the data is excluded from the search by a negative \a from, even
4547 though \c{-1} is normally thought of as searching from the end of the
4548 string: the match at the end is \e after the last character, so it is
4549 excluded. To include such a final empty match, either give a positive
4550 value for \a from or omit the \a from parameter entirely.
4551
4552 \sa indexOf(), contains(), count()
4553*/
4554qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4555{
4556 return QtPrivate::lastIndexOf(*this, from, str, cs);
4557}
4558
4559/*!
4560 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4561 \since 6.2
4562 \overload lastIndexOf()
4563
4564 Returns the index position of the last occurrence of the string \a
4565 str in this string. Returns -1 if \a str is not found.
4566
4567 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4568
4569 Example:
4570
4571 \snippet qstring/main.cpp 29
4572
4573 \sa indexOf(), contains(), count()
4574*/
4575
4576/*!
4577 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4578 \overload lastIndexOf()
4579
4580 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4581*/
4582
4583/*!
4584 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4585 \since 6.3
4586 \overload lastIndexOf()
4587*/
4588
4589/*!
4590 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4591 \since 5.14
4592 \overload lastIndexOf()
4593
4594 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4595
4596 \include qstring.qdocinc negative-index-start-search-from-end
4597
4598 Returns -1 if \a str is not found.
4599
4600 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4601
4602 \note When searching for a 0-length \a str, the match at the end of
4603 the data is excluded from the search by a negative \a from, even
4604 though \c{-1} is normally thought of as searching from the end of the
4605 string: the match at the end is \e after the last character, so it is
4606 excluded. To include such a final empty match, either give a positive
4607 value for \a from or omit the \a from parameter entirely.
4608
4609 \sa indexOf(), contains(), count()
4610*/
4611
4612/*!
4613 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4614 \since 6.2
4615 \overload lastIndexOf()
4616
4617 Returns the index position of the last occurrence of the string view \a
4618 str in this string. Returns -1 if \a str is not found.
4619
4620 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4621
4622 \sa indexOf(), contains(), count()
4623*/
4624
4625#if QT_CONFIG(regularexpression)
4626struct QStringCapture
4627{
4628 qsizetype pos;
4629 qsizetype len;
4630 int no;
4631};
4632Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4633
4634/*!
4635 \overload replace()
4636 \since 5.0
4637
4638 Replaces every occurrence of the regular expression \a re in the
4639 string with \a after. Returns a reference to the string. For
4640 example:
4641
4642 \snippet qstring/main.cpp 87
4643
4644 For regular expressions containing capturing groups,
4645 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4646 with the string captured by the corresponding capturing group.
4647
4648 \snippet qstring/main.cpp 88
4649
4650 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4651*/
4652QString &QString::replace(const QRegularExpression &re, const QString &after)
4653{
4654 if (!re.isValid()) {
4655 qtWarnAboutInvalidRegularExpression(re, "QString", "replace");
4656 return *this;
4657 }
4658
4659 const QString copy(*this);
4660 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4661 if (!iterator.hasNext()) // no matches at all
4662 return *this;
4663
4664 reallocData(d.size, QArrayData::KeepSize);
4665
4666 qsizetype numCaptures = re.captureCount();
4667
4668 // 1. build the backreferences list, holding where the backreferences
4669 // are in the replacement string
4670 QVarLengthArray<QStringCapture> backReferences;
4671 const qsizetype al = after.size();
4672 const QChar *ac = after.unicode();
4673
4674 for (qsizetype i = 0; i < al - 1; i++) {
4675 if (ac[i] == u'\\') {
4676 int no = ac[i + 1].digitValue();
4677 if (no > 0 && no <= numCaptures) {
4678 QStringCapture backReference;
4679 backReference.pos = i;
4680 backReference.len = 2;
4681
4682 if (i < al - 2) {
4683 int secondDigit = ac[i + 2].digitValue();
4684 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4685 no = (no * 10) + secondDigit;
4686 ++backReference.len;
4687 }
4688 }
4689
4690 backReference.no = no;
4691 backReferences.append(backReference);
4692 }
4693 }
4694 }
4695
4696 // 2. iterate on the matches. For every match, copy in chunks
4697 // - the part before the match
4698 // - the after string, with the proper replacements for the backreferences
4699
4700 qsizetype newLength = 0; // length of the new string, with all the replacements
4701 qsizetype lastEnd = 0;
4702 QVarLengthArray<QStringView> chunks;
4703 const QStringView copyView{ copy }, afterView{ after };
4704 while (iterator.hasNext()) {
4705 QRegularExpressionMatch match = iterator.next();
4706 qsizetype len;
4707 // add the part before the match
4708 len = match.capturedStart() - lastEnd;
4709 if (len > 0) {
4710 chunks << copyView.mid(lastEnd, len);
4711 newLength += len;
4712 }
4713
4714 lastEnd = 0;
4715 // add the after string, with replacements for the backreferences
4716 for (const QStringCapture &backReference : std::as_const(backReferences)) {
4717 // part of "after" before the backreference
4718 len = backReference.pos - lastEnd;
4719 if (len > 0) {
4720 chunks << afterView.mid(lastEnd, len);
4721 newLength += len;
4722 }
4723
4724 // backreference itself
4725 len = match.capturedLength(backReference.no);
4726 if (len > 0) {
4727 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4728 newLength += len;
4729 }
4730
4731 lastEnd = backReference.pos + backReference.len;
4732 }
4733
4734 // add the last part of the after string
4735 len = afterView.size() - lastEnd;
4736 if (len > 0) {
4737 chunks << afterView.mid(lastEnd, len);
4738 newLength += len;
4739 }
4740
4741 lastEnd = match.capturedEnd();
4742 }
4743
4744 // 3. trailing string after the last match
4745 if (copyView.size() > lastEnd) {
4746 chunks << copyView.mid(lastEnd);
4747 newLength += copyView.size() - lastEnd;
4748 }
4749
4750 // 4. assemble the chunks together
4751 resize(newLength);
4752 qsizetype i = 0;
4753 QChar *uc = data();
4754 for (const QStringView &chunk : std::as_const(chunks)) {
4755 qsizetype len = chunk.size();
4756 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4757 i += len;
4758 }
4759
4760 return *this;
4761}
4762#endif // QT_CONFIG(regularexpression)
4763
4764/*!
4765 Returns the number of (potentially overlapping) occurrences of
4766 the string \a str in this string.
4767
4768 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4769
4770 \sa contains(), indexOf()
4771*/
4772
4773qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4774{
4775 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4776}
4777
4778/*!
4779 \overload count()
4780
4781 Returns the number of occurrences of character \a ch in the string.
4782
4783 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4784
4785 \sa contains(), indexOf()
4786*/
4787
4788qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4789{
4790 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4791}
4792
4793/*!
4794 \since 6.0
4795 \overload count()
4796 Returns the number of (potentially overlapping) occurrences of the
4797 string view \a str in this string.
4798
4799 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4800
4801 \sa contains(), indexOf()
4802*/
4803qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4804{
4805 return QtPrivate::count(*this, str, cs);
4806}
4807
4808/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4809
4810 Returns \c true if this string contains an occurrence of the string
4811 \a str; otherwise returns \c false.
4812
4813 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4814
4815 Example:
4816 \snippet qstring/main.cpp 17
4817
4818 \sa indexOf(), count()
4819*/
4820
4821/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4822 \since 5.3
4823
4824 \overload contains()
4825
4826 Returns \c true if this string contains an occurrence of the latin-1 string
4827 \a str; otherwise returns \c false.
4828*/
4829
4830/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4831
4832 \overload contains()
4833
4834 Returns \c true if this string contains an occurrence of the
4835 character \a ch; otherwise returns \c false.
4836*/
4837
4838/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4839 \since 5.14
4840 \overload contains()
4841
4842 Returns \c true if this string contains an occurrence of the string view
4843 \a str; otherwise returns \c false.
4844
4845 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4846
4847 \sa indexOf(), count()
4848*/
4849
4850#if QT_CONFIG(regularexpression)
4851/*!
4852 \since 5.5
4853
4854 Returns the index position of the first match of the regular
4855 expression \a re in the string, searching forward from index
4856 position \a from. Returns -1 if \a re didn't match anywhere.
4857
4858 If the match is successful and \a rmatch is not \nullptr, it also
4859 writes the results of the match into the QRegularExpressionMatch object
4860 pointed to by \a rmatch.
4861
4862 Example:
4863
4864 \snippet qstring/main.cpp 93
4865*/
4866qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4867{
4868 return QtPrivate::indexOf(QStringView(*this), this, re, from, rmatch);
4869}
4870
4871/*!
4872 \since 5.5
4873
4874 Returns the index position of the last match of the regular
4875 expression \a re in the string, which starts before the index
4876 position \a from.
4877
4878 \include qstring.qdocinc negative-index-start-search-from-end
4879
4880 Returns -1 if \a re didn't match anywhere.
4881
4882 If the match is successful and \a rmatch is not \nullptr, it also
4883 writes the results of the match into the QRegularExpressionMatch object
4884 pointed to by \a rmatch.
4885
4886 Example:
4887
4888 \snippet qstring/main.cpp 94
4889
4890 \note Due to how the regular expression matching algorithm works,
4891 this function will actually match repeatedly from the beginning of
4892 the string until the position \a from is reached.
4893
4894 \note When searching for a regular expression \a re that may match
4895 0 characters, the match at the end of the data is excluded from the
4896 search by a negative \a from, even though \c{-1} is normally
4897 thought of as searching from the end of the string: the match at
4898 the end is \e after the last character, so it is excluded. To
4899 include such a final empty match, either give a positive value for
4900 \a from or omit the \a from parameter entirely.
4901*/
4902qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4903{
4904 return QtPrivate::lastIndexOf(QStringView(*this), this, re, from, rmatch);
4905}
4906
4907/*!
4908 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4909 \since 6.2
4910 \overload lastIndexOf()
4911
4912 Returns the index position of the last match of the regular
4913 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4914
4915 If the match is successful and \a rmatch is not \nullptr, it also
4916 writes the results of the match into the QRegularExpressionMatch object
4917 pointed to by \a rmatch.
4918
4919 Example:
4920
4921 \snippet qstring/main.cpp 94
4922
4923 \note Due to how the regular expression matching algorithm works,
4924 this function will actually match repeatedly from the beginning of
4925 the string until the end of the string is reached.
4926*/
4927
4928/*!
4929 \since 5.1
4930
4931 Returns \c true if the regular expression \a re matches somewhere in this
4932 string; otherwise returns \c false.
4933
4934 If the match is successful and \a rmatch is not \nullptr, it also
4935 writes the results of the match into the QRegularExpressionMatch object
4936 pointed to by \a rmatch.
4937
4938 \sa QRegularExpression::match()
4939*/
4940
4941bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4942{
4943 return QtPrivate::contains(QStringView(*this), this, re, rmatch);
4944}
4945
4946/*!
4947 \overload count()
4948 \since 5.0
4949
4950 Returns the number of times the regular expression \a re matches
4951 in the string.
4952
4953 For historical reasons, this function counts overlapping matches,
4954 so in the example below, there are four instances of "ana" or
4955 "ama":
4956
4957 \snippet qstring/main.cpp 95
4958
4959 This behavior is different from simply iterating over the matches
4960 in the string using QRegularExpressionMatchIterator.
4961
4962 \sa QRegularExpression::globalMatch()
4963*/
4964qsizetype QString::count(const QRegularExpression &re) const
4965{
4966 return QtPrivate::count(QStringView(*this), re);
4967}
4968#endif // QT_CONFIG(regularexpression)
4969
4970#if QT_DEPRECATED_SINCE(6, 4)
4971/*! \fn qsizetype QString::count() const
4972 \deprecated [6.4] Use size() or length() instead.
4973 \overload count()
4974
4975 Same as size().
4976*/
4977#endif
4978
4979/*!
4980 \enum QString::SectionFlag
4981
4982 This enum specifies flags that can be used to affect various
4983 aspects of the section() function's behavior with respect to
4984 separators and empty fields.
4985
4986 \value SectionDefault Empty fields are counted, leading and
4987 trailing separators are not included, and the separator is
4988 compared case sensitively.
4989
4990 \value SectionSkipEmpty Treat empty fields as if they don't exist,
4991 i.e. they are not considered as far as \e start and \e end are
4992 concerned.
4993
4994 \value SectionIncludeLeadingSep Include the leading separator (if
4995 any) in the result string.
4996
4997 \value SectionIncludeTrailingSep Include the trailing separator
4998 (if any) in the result string.
4999
5000 \value SectionCaseInsensitiveSeps Compare the separator
5001 case-insensitively.
5002
5003 \sa section()
5004*/
5005
5006/*!
5007 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5008
5009 This function returns a section of the string.
5010
5011 This string is treated as a sequence of fields separated by the
5012 character, \a sep. The returned string consists of the fields from
5013 position \a start to position \a end inclusive. If \a end is not
5014 specified, all fields from position \a start to the end of the
5015 string are included. Fields are numbered 0, 1, 2, etc., counting
5016 from the left, and -1, -2, etc., counting from right to left.
5017
5018 The \a flags argument can be used to affect some aspects of the
5019 function's behavior, e.g. whether to be case sensitive, whether
5020 to skip empty fields and how to deal with leading and trailing
5021 separators; see \l{SectionFlags}.
5022
5023 \snippet qstring/main.cpp 52
5024
5025 If \a start or \a end is negative, we count fields from the right
5026 of the string, the right-most field being -1, the one from
5027 right-most field being -2, and so on.
5028
5029 \snippet qstring/main.cpp 53
5030
5031 \sa split()
5032*/
5033
5034/*!
5035 \overload section()
5036
5037 \snippet qstring/main.cpp 51
5038 \snippet qstring/main.cpp 54
5039
5040 \sa split()
5041*/
5042
5043QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5044{
5045 const QList<QStringView> sections = QStringView{ *this }.split(
5046 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5047 const qsizetype sectionsSize = sections.size();
5048 if (!(flags & SectionSkipEmpty)) {
5049 if (start < 0)
5050 start += sectionsSize;
5051 if (end < 0)
5052 end += sectionsSize;
5053 } else {
5054 qsizetype skip = 0;
5055 for (qsizetype k = 0; k < sectionsSize; ++k) {
5056 if (sections.at(k).isEmpty())
5057 skip++;
5058 }
5059 if (start < 0)
5060 start += sectionsSize - skip;
5061 if (end < 0)
5062 end += sectionsSize - skip;
5063 }
5064 if (start >= sectionsSize || end < 0 || start > end)
5065 return QString();
5066
5067 QString ret;
5068 qsizetype first_i = start, last_i = end;
5069 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5070 const QStringView &section = sections.at(i);
5071 const bool empty = section.isEmpty();
5072 if (x >= start) {
5073 if (x == start)
5074 first_i = i;
5075 if (x == end)
5076 last_i = i;
5077 if (x > start && i > 0)
5078 ret += sep;
5079 ret += section;
5080 }
5081 if (!empty || !(flags & SectionSkipEmpty))
5082 x++;
5083 }
5084 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5085 ret.prepend(sep);
5086 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5087 ret += sep;
5088 return ret;
5089}
5090
5091#if QT_CONFIG(regularexpression)
5092struct qt_section_chunk
5093{
5094 qsizetype length;
5095 QStringView string;
5096};
5097Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5098
5099static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5100 QString::SectionFlags flags)
5101{
5102 const qsizetype sectionsSize = sections.size();
5103
5104 if (!(flags & QString::SectionSkipEmpty)) {
5105 if (start < 0)
5106 start += sectionsSize;
5107 if (end < 0)
5108 end += sectionsSize;
5109 } else {
5110 qsizetype skip = 0;
5111 for (qsizetype k = 0; k < sectionsSize; ++k) {
5112 const qt_section_chunk &section = sections[k];
5113 if (section.length == section.string.size())
5114 skip++;
5115 }
5116 if (start < 0)
5117 start += sectionsSize - skip;
5118 if (end < 0)
5119 end += sectionsSize - skip;
5120 }
5121 if (start >= sectionsSize || end < 0 || start > end)
5122 return QString();
5123
5124 QString ret;
5125 qsizetype x = 0;
5126 qsizetype first_i = start, last_i = end;
5127 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5128 const qt_section_chunk &section = sections[i];
5129 const bool empty = (section.length == section.string.size());
5130 if (x >= start) {
5131 if (x == start)
5132 first_i = i;
5133 if (x == end)
5134 last_i = i;
5135 if (x != start)
5136 ret += section.string;
5137 else
5138 ret += section.string.mid(section.length);
5139 }
5140 if (!empty || !(flags & QString::SectionSkipEmpty))
5141 x++;
5142 }
5143
5144 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5145 const qt_section_chunk &section = sections[first_i];
5146 ret.prepend(section.string.left(section.length));
5147 }
5148
5149 if ((flags & QString::SectionIncludeTrailingSep)
5150 && last_i < sectionsSize - 1) {
5151 const qt_section_chunk &section = sections[last_i + 1];
5152 ret += section.string.left(section.length);
5153 }
5154
5155 return ret;
5156}
5157
5158/*!
5159 \overload section()
5160 \since 5.0
5161
5162 This string is treated as a sequence of fields separated by the
5163 regular expression, \a re.
5164
5165 \snippet qstring/main.cpp 89
5166
5167 \warning Using this QRegularExpression version is much more expensive than
5168 the overloaded string and character versions.
5169
5170 \sa split(), simplified()
5171*/
5172QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5173{
5174 if (!re.isValid()) {
5175 qtWarnAboutInvalidRegularExpression(re, "QString", "section");
5176 return QString();
5177 }
5178
5179 const QChar *uc = unicode();
5180 if (!uc)
5181 return QString();
5182
5183 QRegularExpression sep(re);
5184 if (flags & SectionCaseInsensitiveSeps)
5185 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5186
5187 QVarLengthArray<qt_section_chunk> sections;
5188 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5189 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
5190 while (iterator.hasNext()) {
5191 QRegularExpressionMatch match = iterator.next();
5192 m = match.capturedStart();
5193 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, m - last_m)});
5194 last_m = m;
5195 last_len = match.capturedLength();
5196 }
5197 sections.append(qt_section_chunk{last_len, QStringView{*this}.sliced(last_m, n - last_m)});
5198
5199 return extractSections(sections, start, end, flags);
5200}
5201#endif // QT_CONFIG(regularexpression)
5202
5203/*!
5204 \fn QString QString::left(qsizetype n) const &
5205 \fn QString QString::left(qsizetype n) &&
5206
5207 Returns a substring that contains the \a n leftmost characters of
5208 this string (that is, from the beginning of this string up to, but not
5209 including, the element at index position \a n).
5210
5211 If you know that \a n cannot be out of bounds, use first() instead in new
5212 code, because it is faster.
5213
5214 The entire string is returned if \a n is greater than or equal
5215 to size(), or less than zero.
5216
5217 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5218*/
5219
5220/*!
5221 \fn QString QString::right(qsizetype n) const &
5222 \fn QString QString::right(qsizetype n) &&
5223
5224 Returns a substring that contains the \a n rightmost characters
5225 of the string.
5226
5227 If you know that \a n cannot be out of bounds, use last() instead in new
5228 code, because it is faster.
5229
5230 The entire string is returned if \a n is greater than or equal
5231 to size(), or less than zero.
5232
5233 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5234*/
5235
5236/*!
5237 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5238 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5239
5240 Returns a string that contains \a n characters of this string, starting
5241 at the specified \a position index up to, but not including, the element
5242 at index position \c {\a position + n}.
5243
5244 If you know that \a position and \a n cannot be out of bounds, use sliced()
5245 instead in new code, because it is faster.
5246
5247 Returns a null string if the \a position index exceeds the
5248 length of the string. If there are less than \a n characters
5249 available in the string starting at the given \a position, or if
5250 \a n is -1 (default), the function returns all characters that
5251 are available from the specified \a position.
5252
5253 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5254*/
5255QString QString::mid(qsizetype position, qsizetype n) const &
5256{
5257 qsizetype p = position;
5258 qsizetype l = n;
5259 using namespace QtPrivate;
5260 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5261 case QContainerImplHelper::Null:
5262 return QString();
5263 case QContainerImplHelper::Empty:
5264 return QString(DataPointer::fromRawData(&_empty, 0));
5265 case QContainerImplHelper::Full:
5266 return *this;
5267 case QContainerImplHelper::Subset:
5268 return sliced(p, l);
5269 }
5270 Q_UNREACHABLE_RETURN(QString());
5271}
5272
5273QString QString::mid(qsizetype position, qsizetype n) &&
5274{
5275 qsizetype p = position;
5276 qsizetype l = n;
5277 using namespace QtPrivate;
5278 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5279 case QContainerImplHelper::Null:
5280 return QString();
5281 case QContainerImplHelper::Empty:
5282 resize(0); // keep capacity if we've reserve()d
5283 [[fallthrough]];
5284 case QContainerImplHelper::Full:
5285 return std::move(*this);
5286 case QContainerImplHelper::Subset:
5287 return std::move(*this).sliced(p, l);
5288 }
5289 Q_UNREACHABLE_RETURN(QString());
5290}
5291
5292/*!
5293 \fn QString QString::first(qsizetype n) const &
5294 \fn QString QString::first(qsizetype n) &&
5295 \since 6.0
5296
5297 Returns a string that contains the first \a n characters of this string,
5298 (that is, from the beginning of this string up to, but not including,
5299 the element at index position \a n).
5300
5301 \note The behavior is undefined when \a n < 0 or \a n > size().
5302
5303 \snippet qstring/main.cpp 31
5304
5305 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5306*/
5307
5308/*!
5309 \fn QString QString::last(qsizetype n) const &
5310 \fn QString QString::last(qsizetype n) &&
5311 \since 6.0
5312
5313 Returns the string that contains the last \a n characters of this string.
5314
5315 \note The behavior is undefined when \a n < 0 or \a n > size().
5316
5317 \snippet qstring/main.cpp 48
5318
5319 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5320*/
5321
5322/*!
5323 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5324 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5325 \since 6.0
5326
5327 Returns a string that contains \a n characters of this string, starting
5328 at position \a pos up to, but not including, the element at index position
5329 \c {\a pos + n}.
5330
5331 \note The behavior is undefined when \a pos < 0, \a n < 0,
5332 or \a pos + \a n > size().
5333
5334 \snippet qstring/main.cpp 34
5335
5336 \sa first(), last(), chopped(), chop(), truncate(), slice()
5337*/
5338QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5339{
5340 if (n == 0)
5341 return QString(DataPointer::fromRawData(&_empty, 0));
5342 DataPointer d = std::move(str.d).sliced(pos, n);
5343 d.data()[n] = 0;
5344 return QString(std::move(d));
5345}
5346
5347/*!
5348 \fn QString QString::sliced(qsizetype pos) const &
5349 \fn QString QString::sliced(qsizetype pos) &&
5350 \since 6.0
5351 \overload
5352
5353 Returns a string that contains the portion of this string starting at
5354 position \a pos and extending to its end.
5355
5356 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5357
5358 \sa first(), last(), chopped(), chop(), truncate(), slice()
5359*/
5360
5361/*!
5362 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5363 \since 6.8
5364
5365 Modifies this string to start at position \a pos, up to, but not including,
5366 the character (code point) at index position \c {\a pos + n}; and returns
5367 a reference to this string.
5368
5369 \note The behavior is undefined if \a pos < 0, \a n < 0,
5370 or \a pos + \a n > size().
5371
5372 \snippet qstring/main.cpp slice97
5373
5374 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5375*/
5376
5377/*!
5378 \fn QString &QString::slice(qsizetype pos)
5379 \since 6.8
5380 \overload
5381
5382 Modifies this string to start at position \a pos and extending to its end,
5383 and returns a reference to this string.
5384
5385 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5386
5387 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5388*/
5389
5390/*!
5391 \fn QString QString::chopped(qsizetype len) const &
5392 \fn QString QString::chopped(qsizetype len) &&
5393 \since 5.10
5394
5395 Returns a string that contains the size() - \a len leftmost characters
5396 of this string.
5397
5398 \note The behavior is undefined if \a len is negative or greater than size().
5399
5400 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5401*/
5402
5403/*!
5404 Returns \c true if the string starts with \a s; otherwise returns
5405 \c false.
5406
5407 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5408
5409 \snippet qstring/main.cpp 65
5410
5411 \sa endsWith()
5412*/
5413bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5414{
5415 return qt_starts_with_impl(QStringView(*this), QStringView(s), cs);
5416}
5417
5418/*!
5419 \overload startsWith()
5420 */
5421bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5422{
5423 return qt_starts_with_impl(QStringView(*this), s, cs);
5424}
5425
5426/*!
5427 \overload startsWith()
5428
5429 Returns \c true if the string starts with \a c; otherwise returns
5430 \c false.
5431*/
5432bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5433{
5434 if (!size())
5435 return false;
5436 if (cs == Qt::CaseSensitive)
5437 return at(0) == c;
5438 return foldCase(at(0)) == foldCase(c);
5439}
5440
5441/*!
5442 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5443 \since 5.10
5444 \overload
5445
5446 Returns \c true if the string starts with the string view \a str;
5447 otherwise returns \c false.
5448
5449 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5450
5451 \sa endsWith()
5452*/
5453
5454/*!
5455 Returns \c true if the string ends with \a s; otherwise returns
5456 \c false.
5457
5458 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5459
5460 \snippet qstring/main.cpp 20
5461
5462 \sa startsWith()
5463*/
5464bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5465{
5466 return qt_ends_with_impl(QStringView(*this), QStringView(s), cs);
5467}
5468
5469/*!
5470 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5471 \since 5.10
5472 \overload endsWith()
5473 Returns \c true if the string ends with the string view \a str;
5474 otherwise returns \c false.
5475
5476 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5477
5478 \sa startsWith()
5479*/
5480
5481/*!
5482 \overload endsWith()
5483*/
5484bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5485{
5486 return qt_ends_with_impl(QStringView(*this), s, cs);
5487}
5488
5489/*!
5490 Returns \c true if the string ends with \a c; otherwise returns
5491 \c false.
5492
5493 \overload endsWith()
5494 */
5495bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5496{
5497 if (!size())
5498 return false;
5499 if (cs == Qt::CaseSensitive)
5500 return at(size() - 1) == c;
5501 return foldCase(at(size() - 1)) == foldCase(c);
5502}
5503
5504static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5505{
5506 QStringIterator it(s);
5507 while (it.hasNext()) {
5508 const char32_t uc = it.next();
5509 if (caseConversion(uc)[c].diff)
5510 return false;
5511 }
5512 return true;
5513}
5514
5515bool QtPrivate::isLower(QStringView s) noexcept
5516{
5517 return checkCase(s, QUnicodeTables::LowerCase);
5518}
5519
5520bool QtPrivate::isUpper(QStringView s) noexcept
5521{
5522 return checkCase(s, QUnicodeTables::UpperCase);
5523}
5524
5525/*!
5526 Returns \c true if the string is uppercase, that is, it's identical
5527 to its toUpper() folding.
5528
5529 Note that this does \e not mean that the string does not contain
5530 lowercase letters (some lowercase letters do not have a uppercase
5531 folding; they are left unchanged by toUpper()).
5532 For more information, refer to the Unicode standard, section 3.13.
5533
5534 \since 5.12
5535
5536 \sa QChar::toUpper(), isLower()
5537*/
5538bool QString::isUpper() const
5539{
5540 return QtPrivate::isUpper(qToStringViewIgnoringNull(*this));
5541}
5542
5543/*!
5544 Returns \c true if the string is lowercase, that is, it's identical
5545 to its toLower() folding.
5546
5547 Note that this does \e not mean that the string does not contain
5548 uppercase letters (some uppercase letters do not have a lowercase
5549 folding; they are left unchanged by toLower()).
5550 For more information, refer to the Unicode standard, section 3.13.
5551
5552 \since 5.12
5553
5554 \sa QChar::toLower(), isUpper()
5555 */
5556bool QString::isLower() const
5557{
5558 return QtPrivate::isLower(qToStringViewIgnoringNull(*this));
5559}
5560
5561static QByteArray qt_convert_to_latin1(QStringView string);
5562
5563QByteArray QString::toLatin1_helper(const QString &string)
5564{
5565 return qt_convert_to_latin1(string);
5566}
5567
5568/*!
5569 \since 6.0
5570 \internal
5571 \relates QAnyStringView
5572
5573 Returns a UTF-16 representation of \a string as a QString.
5574
5575 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5576 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5577*/
5578QString QtPrivate::convertToQString(QAnyStringView string)
5579{
5580 return string.visit([] (auto string) { return string.toString(); });
5581}
5582
5583/*!
5584 \since 5.10
5585 \internal
5586 \relates QStringView
5587
5588 Returns a Latin-1 representation of \a string as a QByteArray.
5589
5590 The behavior is undefined if \a string contains non-Latin1 characters.
5591
5592 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5593 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5594*/
5596{
5597 return qt_convert_to_latin1(string);
5598}
5599
5600Q_NEVER_INLINE
5601static QByteArray qt_convert_to_latin1(QStringView string)
5602{
5603 if (Q_UNLIKELY(string.isNull()))
5604 return QByteArray();
5605
5606 QByteArray ba(string.size(), Qt::Uninitialized);
5607
5608 // since we own the only copy, we're going to const_cast the constData;
5609 // that avoids an unnecessary call to detach() and expansion code that will never get used
5610 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5611 string.utf16(), string.size());
5612 return ba;
5613}
5614
5615QByteArray QString::toLatin1_helper_inplace(QString &s)
5616{
5617 if (!s.isDetached())
5618 return qt_convert_to_latin1(s);
5619
5620 // We can return our own buffer to the caller.
5621 // Conversion to Latin-1 always shrinks the buffer by half.
5622 // This relies on the fact that we use QArrayData for everything behind the scenes
5623
5624 // First, do the in-place conversion. Since isDetached() == true, the data
5625 // was allocated by QArrayData, so the null terminator must be there.
5626 qsizetype length = s.size();
5627 char16_t *sdata = s.d->data();
5628 Q_ASSERT(sdata[length] == u'\0');
5629 qt_to_latin1(reinterpret_cast<uchar *>(sdata), sdata, length + 1);
5630
5631 // Move the internals over to the byte array.
5632 // Kids, avert your eyes. Don't try this at home.
5633 auto ba_d = std::move(s.d).reinterpreted<char>();
5634
5635 // Some sanity checks
5636 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5637 Q_ASSERT(s.isNull());
5638 Q_ASSERT(s.isEmpty());
5639 Q_ASSERT(s.constData() == QString().constData());
5640
5641 return QByteArray(std::move(ba_d));
5642}
5643
5644/*!
5645 \since 6.9
5646 \internal
5647 \relates QLatin1StringView
5648
5649 Returns a UTF-8 representation of \a string as a QByteArray.
5650*/
5651QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
5652{
5653 if (Q_UNLIKELY(string.isNull()))
5654 return QByteArray();
5655
5656 // create a QByteArray with the worst case scenario size
5657 QByteArray ba(string.size() * 2, Qt::Uninitialized);
5658 const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
5659 ba.truncate(sz);
5660
5661 return ba;
5662}
5663
5664// QLatin1 methods that use helpers from qstring.cpp
5665char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5666{
5667 const qsizetype len = in.size();
5668 qt_from_latin1(out, in.data(), len);
5669 return std::next(out, len);
5670}
5671
5672char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5673{
5674 const qsizetype len = in.size();
5675 qt_to_latin1(reinterpret_cast<uchar *>(out), in.utf16(), len);
5676 return out + len;
5677}
5678
5679/*!
5680 \fn QByteArray QString::toLatin1() const
5681
5682 Returns a Latin-1 representation of the string as a QByteArray.
5683
5684 The returned byte array is undefined if the string contains non-Latin1
5685 characters. Those characters may be suppressed or replaced with a
5686 question mark.
5687
5688 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5689*/
5690
5691static QByteArray qt_convert_to_local_8bit(QStringView string);
5692
5693/*!
5694 \fn QByteArray QString::toLocal8Bit() const
5695
5696 Returns the local 8-bit representation of the string as a
5697 QByteArray.
5698
5699 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5700
5701 If this string contains any characters that cannot be encoded in the
5702 local 8-bit encoding, the returned byte array is undefined. Those
5703 characters may be suppressed or replaced by another.
5704
5705 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5706*/
5707
5708QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5709{
5710 return qt_convert_to_local_8bit(QStringView(data, size));
5711}
5712
5713static QByteArray qt_convert_to_local_8bit(QStringView string)
5714{
5715 if (string.isNull())
5716 return QByteArray();
5717 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5718 return fromUtf16(string);
5719}
5720
5721/*!
5722 \since 5.10
5723 \internal
5724 \relates QStringView
5725
5726 Returns a local 8-bit representation of \a string as a QByteArray.
5727
5728 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5729 current code page is being used.
5730
5731 The behavior is undefined if \a string contains characters not
5732 supported by the locale's 8-bit encoding.
5733
5734 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5735*/
5737{
5738 return qt_convert_to_local_8bit(string);
5739}
5740
5741static QByteArray qt_convert_to_utf8(QStringView str);
5742
5743/*!
5744 \fn QByteArray QString::toUtf8() const
5745
5746 Returns a UTF-8 representation of the string as a QByteArray.
5747
5748 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5749 string like QString.
5750
5751 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5752*/
5753
5754QByteArray QString::toUtf8_helper(const QString &str)
5755{
5756 return qt_convert_to_utf8(str);
5757}
5758
5759static QByteArray qt_convert_to_utf8(QStringView str)
5760{
5761 if (str.isNull())
5762 return QByteArray();
5763
5764 return QUtf8::convertFromUnicode(str);
5765}
5766
5767/*!
5768 \since 5.10
5769 \internal
5770 \relates QStringView
5771
5772 Returns a UTF-8 representation of \a string as a QByteArray.
5773
5774 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5775 string like QStringView.
5776
5777 \sa QString::toUtf8(), QStringView::toUtf8()
5778*/
5780{
5781 return qt_convert_to_utf8(string);
5782}
5783
5784static QList<uint> qt_convert_to_ucs4(QStringView string);
5785
5786/*!
5787 \since 4.2
5788
5789 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5790
5791 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5792 this string will be encoded in UTF-32. Any invalid sequence of code units in
5793 this string is replaced by the Unicode replacement character
5794 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5795
5796 The returned list is not 0-terminated.
5797
5798 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5799 fromUcs4(), toWCharArray()
5800*/
5801QList<uint> QString::toUcs4() const
5802{
5803 return qt_convert_to_ucs4(*this);
5804}
5805
5806static QList<uint> qt_convert_to_ucs4(QStringView string)
5807{
5808 QList<uint> v(string.size());
5809 uint *a = const_cast<uint*>(v.constData());
5810 QStringIterator it(string);
5811 while (it.hasNext())
5812 *a++ = it.next();
5813 v.resize(a - v.constData());
5814 return v;
5815}
5816
5817/*!
5818 \since 5.10
5819 \internal
5820 \relates QStringView
5821
5822 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5823
5824 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5825 this string will be encoded in UTF-32. Any invalid sequence of code units in
5826 this string is replaced by the Unicode replacement character
5827 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5828
5829 The returned list is not 0-terminated.
5830
5831 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5832 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5833*/
5834QList<uint> QtPrivate::convertToUcs4(QStringView string)
5835{
5836 return qt_convert_to_ucs4(string);
5837}
5838
5839/*!
5840 \fn QString QString::fromLatin1(QByteArrayView str)
5841 \overload
5842 \since 6.0
5843
5844 Returns a QString initialized with the Latin-1 string \a str.
5845
5846 \note: any null ('\\0') bytes in the byte array will be included in this
5847 string, converted to Unicode null characters (U+0000).
5848*/
5849QString QString::fromLatin1(QByteArrayView ba)
5850{
5851 DataPointer d;
5852 if (!ba.data()) {
5853 // nothing to do
5854 } else if (ba.size() == 0) {
5855 d = DataPointer::fromRawData(&_empty, 0);
5856 } else {
5857 d = DataPointer(ba.size(), ba.size());
5858 Q_CHECK_PTR(d.data());
5859 d.data()[ba.size()] = '\0';
5860 char16_t *dst = d.data();
5861
5862 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5863 }
5864 return QString(std::move(d));
5865}
5866
5867/*!
5868 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5869 Returns a QString initialized with the first \a size characters
5870 of the Latin-1 string \a str.
5871
5872 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5873
5874 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5875*/
5876
5877/*!
5878 \fn QString QString::fromLatin1(const QByteArray &str)
5879 \overload
5880 \since 5.0
5881
5882 Returns a QString initialized with the Latin-1 string \a str.
5883
5884 \note: any null ('\\0') bytes in the byte array will be included in this
5885 string, converted to Unicode null characters (U+0000). This behavior is
5886 different from Qt 5.x.
5887*/
5888
5889/*!
5890 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5891 Returns a QString initialized with the first \a size characters
5892 of the 8-bit string \a str.
5893
5894 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5895
5896 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5897
5898 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5899*/
5900
5901/*!
5902 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5903 \overload
5904 \since 5.0
5905
5906 Returns a QString initialized with the 8-bit string \a str.
5907
5908 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5909
5910 \note: any null ('\\0') bytes in the byte array will be included in this
5911 string, converted to Unicode null characters (U+0000). This behavior is
5912 different from Qt 5.x.
5913*/
5914
5915/*!
5916 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5917 \overload
5918 \since 6.0
5919
5920 Returns a QString initialized with the 8-bit string \a str.
5921
5922 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5923
5924 \note: any null ('\\0') bytes in the byte array will be included in this
5925 string, converted to Unicode null characters (U+0000).
5926*/
5927QString QString::fromLocal8Bit(QByteArrayView ba)
5928{
5929 if (ba.isNull())
5930 return QString();
5931 if (ba.isEmpty())
5932 return QString(DataPointer::fromRawData(&_empty, 0));
5933 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5934 return toUtf16(ba);
5935}
5936
5937/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5938 Returns a QString initialized with the first \a size bytes
5939 of the UTF-8 string \a str.
5940
5941 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5942
5943 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5944 string like QString. However, invalid sequences are possible with UTF-8
5945 and, if any such are found, they will be replaced with one or more
5946 "replacement characters", or suppressed. These include non-Unicode
5947 sequences, non-characters, overlong sequences or surrogate codepoints
5948 encoded into UTF-8.
5949
5950 This function can be used to process incoming data incrementally as long as
5951 all UTF-8 characters are terminated within the incoming data. Any
5952 unterminated characters at the end of the string will be replaced or
5953 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
5954
5955 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
5956*/
5957
5958/*!
5959 \fn QString QString::fromUtf8(const char8_t *str)
5960 \overload
5961 \since 6.1
5962
5963 This overload is only available when compiling in C++20 mode.
5964*/
5965
5966/*!
5967 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
5968 \overload
5969 \since 6.0
5970
5971 This overload is only available when compiling in C++20 mode.
5972*/
5973
5974/*!
5975 \fn QString QString::fromUtf8(const QByteArray &str)
5976 \overload
5977 \since 5.0
5978
5979 Returns a QString initialized with the UTF-8 string \a str.
5980
5981 \note: any null ('\\0') bytes in the byte array will be included in this
5982 string, converted to Unicode null characters (U+0000). This behavior is
5983 different from Qt 5.x.
5984*/
5985
5986/*!
5987 \fn QString QString::fromUtf8(QByteArrayView str)
5988 \overload
5989 \since 6.0
5990
5991 Returns a QString initialized with the UTF-8 string \a str.
5992
5993 \note: any null ('\\0') bytes in the byte array will be included in this
5994 string, converted to Unicode null characters (U+0000).
5995*/
5996QString QString::fromUtf8(QByteArrayView ba)
5997{
5998 if (ba.isNull())
5999 return QString();
6000 if (ba.isEmpty())
6001 return QString(DataPointer::fromRawData(&_empty, 0));
6002 return QUtf8::convertToUnicode(ba);
6003}
6004
6005#ifndef QT_BOOTSTRAPPED
6006/*!
6007 \since 5.3
6008 Returns a QString initialized with the first \a size characters
6009 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6010
6011 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6012
6013 This function checks for a Byte Order Mark (BOM). If it is missing,
6014 host byte order is assumed.
6015
6016 This function is slow compared to the other Unicode conversions.
6017 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6018
6019 QString makes a deep copy of the Unicode data.
6020
6021 \sa utf16(), setUtf16(), fromStdU16String()
6022*/
6023QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6024{
6025 if (!unicode)
6026 return QString();
6027 if (size < 0)
6028 size = QtPrivate::qustrlen(unicode);
6029 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6030 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6031}
6032
6033/*!
6034 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6035 \deprecated [6.0] Use the \c char16_t overload instead.
6036*/
6037
6038/*!
6039 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6040 \since 4.2
6041 \deprecated [6.0] Use the \c char32_t overload instead.
6042*/
6043
6044/*!
6045 \since 5.3
6046
6047 Returns a QString initialized with the first \a size characters
6048 of the Unicode string \a unicode (encoded as UTF-32).
6049
6050 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6051
6052 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6053 fromStdU32String()
6054*/
6055QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6056{
6057 if (!unicode)
6058 return QString();
6059 if (size < 0) {
6060 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6061 size = wcslen(reinterpret_cast<const wchar_t *>(unicode));
6062 else
6063 size = std::char_traits<char32_t>::length(unicode);
6064 }
6065 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6066 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6067}
6068#endif // !QT_BOOTSTRAPPED
6069
6070/*!
6071 Resizes the string to \a size characters and copies \a unicode
6072 into the string.
6073
6074 If \a unicode is \nullptr, nothing is copied, but the string is still
6075 resized to \a size.
6076
6077 \sa unicode(), setUtf16()
6078*/
6079QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6080{
6081 resize(size);
6082 if (unicode && size)
6083 memcpy(d.data(), unicode, size * sizeof(QChar));
6084 return *this;
6085}
6086
6087/*!
6088 \fn QString::setUnicode(const char16_t *unicode, qsizetype size)
6089 \overload
6090 \since 6.9
6091
6092 \sa unicode(), setUtf16()
6093*/
6094
6095/*!
6096 \fn QString::setUtf16(const char16_t *unicode, qsizetype size)
6097 \since 6.9
6098
6099 Resizes the string to \a size characters and copies \a unicode
6100 into the string.
6101
6102 If \a unicode is \nullptr, nothing is copied, but the string is still
6103 resized to \a size.
6104
6105 Note that unlike fromUtf16(), this function does not consider BOMs and
6106 possibly differing byte ordering.
6107
6108 \sa utf16(), setUnicode()
6109*/
6110
6111/*!
6112 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6113 \obsolete Use the \c char16_t overload instead.
6114*/
6115
6116/*!
6117 \fn QString QString::simplified() const
6118
6119 Returns a string that has whitespace removed from the start
6120 and the end, and that has each sequence of internal whitespace
6121 replaced with a single space.
6122
6123 Whitespace means any character for which QChar::isSpace() returns
6124 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6125 '\\f', '\\r', and ' '.
6126
6127 Example:
6128
6129 \snippet qstring/main.cpp 57
6130
6131 \sa trimmed()
6132*/
6133QString QString::simplified_helper(const QString &str)
6134{
6135 return QStringAlgorithms<const QString>::simplified_helper(str);
6136}
6137
6138QString QString::simplified_helper(QString &str)
6139{
6140 return QStringAlgorithms<QString>::simplified_helper(str);
6141}
6142
6143namespace {
6144 template <typename StringView>
6145 StringView qt_trimmed(StringView s) noexcept
6146 {
6147 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6148 return StringView{begin, end};
6149 }
6150}
6151
6152/*!
6153 \fn QStringView QtPrivate::trimmed(QStringView s)
6154 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6155 \internal
6156 \relates QStringView
6157 \since 5.10
6158
6159 Returns \a s with whitespace removed from the start and the end.
6160
6161 Whitespace means any character for which QChar::isSpace() returns
6162 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6163 '\\f', '\\r', and ' '.
6164
6165 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6166*/
6167QStringView QtPrivate::trimmed(QStringView s) noexcept
6168{
6169 return qt_trimmed(s);
6170}
6171
6172QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6173{
6174 return qt_trimmed(s);
6175}
6176
6177/*!
6178 \fn QString QString::trimmed() const
6179
6180 Returns a string that has whitespace removed from the start and
6181 the end.
6182
6183 Whitespace means any character for which QChar::isSpace() returns
6184 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6185 '\\f', '\\r', and ' '.
6186
6187 Example:
6188
6189 \snippet qstring/main.cpp 82
6190
6191 Unlike simplified(), trimmed() leaves internal whitespace alone.
6192
6193 \sa simplified()
6194*/
6195QString QString::trimmed_helper(const QString &str)
6196{
6197 return QStringAlgorithms<const QString>::trimmed_helper(str);
6198}
6199
6200QString QString::trimmed_helper(QString &str)
6201{
6202 return QStringAlgorithms<QString>::trimmed_helper(str);
6203}
6204
6205/*! \fn const QChar QString::at(qsizetype position) const
6206
6207 Returns the character at the given index \a position in the
6208 string.
6209
6210 The \a position must be a valid index position in the string
6211 (i.e., 0 <= \a position < size()).
6212
6213 \sa operator[]()
6214*/
6215
6216/*!
6217 \fn QChar &QString::operator[](qsizetype position)
6218
6219 Returns the character at the specified \a position in the string as a
6220 modifiable reference.
6221
6222 Example:
6223
6224 \snippet qstring/main.cpp 85
6225
6226 \sa at()
6227*/
6228
6229/*!
6230 \fn const QChar QString::operator[](qsizetype position) const
6231
6232 \overload operator[]()
6233*/
6234
6235/*!
6236 \fn QChar QString::front() const
6237 \since 5.10
6238
6239 Returns the first character in the string.
6240 Same as \c{at(0)}.
6241
6242 This function is provided for STL compatibility.
6243
6244 \warning Calling this function on an empty string constitutes
6245 undefined behavior.
6246
6247 \sa back(), at(), operator[]()
6248*/
6249
6250/*!
6251 \fn QChar QString::back() const
6252 \since 5.10
6253
6254 Returns the last character in the string.
6255 Same as \c{at(size() - 1)}.
6256
6257 This function is provided for STL compatibility.
6258
6259 \warning Calling this function on an empty string constitutes
6260 undefined behavior.
6261
6262 \sa front(), at(), operator[]()
6263*/
6264
6265/*!
6266 \fn QChar &QString::front()
6267 \since 5.10
6268
6269 Returns a reference to the first character in the string.
6270 Same as \c{operator[](0)}.
6271
6272 This function is provided for STL compatibility.
6273
6274 \warning Calling this function on an empty string constitutes
6275 undefined behavior.
6276
6277 \sa back(), at(), operator[]()
6278*/
6279
6280/*!
6281 \fn QChar &QString::back()
6282 \since 5.10
6283
6284 Returns a reference to the last character in the string.
6285 Same as \c{operator[](size() - 1)}.
6286
6287 This function is provided for STL compatibility.
6288
6289 \warning Calling this function on an empty string constitutes
6290 undefined behavior.
6291
6292 \sa front(), at(), operator[]()
6293*/
6294
6295/*!
6296 \fn void QString::truncate(qsizetype position)
6297
6298 Truncates the string starting from, and including, the element at index
6299 \a position.
6300
6301 If the specified \a position index is beyond the end of the
6302 string, nothing happens.
6303
6304 Example:
6305
6306 \snippet qstring/main.cpp 83
6307
6308 If \a position is negative, it is equivalent to passing zero.
6309
6310 \sa chop(), resize(), first(), QStringView::truncate()
6311*/
6312
6313void QString::truncate(qsizetype pos)
6314{
6315 if (pos < size())
6316 resize(pos);
6317}
6318
6319
6320/*!
6321 Removes \a n characters from the end of the string.
6322
6323 If \a n is greater than or equal to size(), the result is an
6324 empty string; if \a n is negative, it is equivalent to passing zero.
6325
6326 Example:
6327 \snippet qstring/main.cpp 15
6328
6329 If you want to remove characters from the \e beginning of the
6330 string, use remove() instead.
6331
6332 \sa truncate(), resize(), remove(), QStringView::chop()
6333*/
6334void QString::chop(qsizetype n)
6335{
6336 if (n > 0)
6337 resize(d.size - n);
6338}
6339
6340/*!
6341 Sets every character in the string to character \a ch. If \a size
6342 is different from -1 (default), the string is resized to \a
6343 size beforehand.
6344
6345 Example:
6346
6347 \snippet qstring/main.cpp 21
6348
6349 \sa resize()
6350*/
6351
6352QString& QString::fill(QChar ch, qsizetype size)
6353{
6354 resize(size < 0 ? d.size : size);
6355 if (d.size)
6356 std::fill(d.data(), d.data() + d.size, ch.unicode());
6357 return *this;
6358}
6359
6360/*!
6361 \fn qsizetype QString::length() const
6362
6363 Returns the number of characters in this string. Equivalent to
6364 size().
6365
6366 \sa resize()
6367*/
6368
6369/*!
6370 \fn qsizetype QString::size() const
6371
6372 Returns the number of characters in this string.
6373
6374 The last character in the string is at position size() - 1.
6375
6376 Example:
6377 \snippet qstring/main.cpp 58
6378
6379 \sa isEmpty(), resize()
6380*/
6381
6382/*!
6383 \fn qsizetype QString::max_size() const
6384 \fn qsizetype QString::maxSize()
6385 \since 6.8
6386
6387 It returns the maximum number of elements that the string can
6388 theoretically hold. In practice, the number can be much smaller,
6389 limited by the amount of memory available to the system.
6390*/
6391
6392/*! \fn bool QString::isNull() const
6393
6394 Returns \c true if this string is null; otherwise returns \c false.
6395
6396 Example:
6397
6398 \snippet qstring/main.cpp 28
6399
6400 Qt makes a distinction between null strings and empty strings for
6401 historical reasons. For most applications, what matters is
6402 whether or not a string contains any data, and this can be
6403 determined using the isEmpty() function.
6404
6405 \sa isEmpty()
6406*/
6407
6408/*! \fn bool QString::isEmpty() const
6409
6410 Returns \c true if the string has no characters; otherwise returns
6411 \c false.
6412
6413 Example:
6414
6415 \snippet qstring/main.cpp 27
6416
6417 \sa size()
6418*/
6419
6420/*! \fn QString &QString::operator+=(const QString &other)
6421
6422 Appends the string \a other onto the end of this string and
6423 returns a reference to this string.
6424
6425 Example:
6426
6427 \snippet qstring/main.cpp 84
6428
6429 This operation is typically very fast (\l{constant time}),
6430 because QString preallocates extra space at the end of the string
6431 data so it can grow without reallocating the entire string each
6432 time.
6433
6434 \sa append(), prepend()
6435*/
6436
6437/*! \fn QString &QString::operator+=(QLatin1StringView str)
6438
6439 \overload operator+=()
6440
6441 Appends the Latin-1 string viewed by \a str to this string.
6442*/
6443
6444/*! \fn QString &QString::operator+=(QUtf8StringView str)
6445 \since 6.5
6446 \overload operator+=()
6447
6448 Appends the UTF-8 string view \a str to this string.
6449*/
6450
6451/*! \fn QString &QString::operator+=(const QByteArray &ba)
6452
6453 \overload operator+=()
6454
6455 Appends the byte array \a ba to this string. The byte array is converted
6456 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6457 are embedded in the \a ba byte array, they will be included in the
6458 transformation.
6459
6460 You can disable this function by defining
6461 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6462 can be useful if you want to ensure that all user-visible strings
6463 go through QObject::tr(), for example.
6464*/
6465
6466/*! \fn QString &QString::operator+=(const char *str)
6467
6468 \overload operator+=()
6469
6470 Appends the string \a str to this string. The const char pointer
6471 is converted to Unicode using the fromUtf8() function.
6472
6473 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6474 when you compile your applications. This can be useful if you want
6475 to ensure that all user-visible strings go through QObject::tr(),
6476 for example.
6477*/
6478
6479/*! \fn QString &QString::operator+=(QStringView str)
6480 \since 6.0
6481 \overload operator+=()
6482
6483 Appends the string view \a str to this string.
6484*/
6485
6486/*! \fn QString &QString::operator+=(QChar ch)
6487
6488 \overload operator+=()
6489
6490 Appends the character \a ch to the string.
6491*/
6492
6493/*!
6494 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6495
6496 \overload operator==()
6497
6498 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6499 Note that no string is equal to \a lhs being 0.
6500
6501 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6502*/
6503
6504/*!
6505 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6506
6507 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6508 \c false.
6509
6510 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6511 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6512*/
6513
6514/*!
6515 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6516
6517 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6518 returns \c false. For \a lhs != 0, this is equivalent to \c
6519 {compare(lhs, rhs) < 0}.
6520
6521 \sa {Comparing Strings}
6522*/
6523
6524/*!
6525 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6526
6527 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6528 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6529 {compare(lhs, rhs) <= 0}.
6530
6531 \sa {Comparing Strings}
6532*/
6533
6534/*!
6535 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6536
6537 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6538 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6539
6540 \sa {Comparing Strings}
6541*/
6542
6543/*!
6544 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6545
6546 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6547 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6548 {compare(lhs, rhs) >= 0}.
6549
6550 \sa {Comparing Strings}
6551*/
6552
6553/*!
6554 \fn QString operator+(const QString &s1, const QString &s2)
6555 \fn QString operator+(QString &&s1, const QString &s2)
6556 \relates QString
6557
6558 Returns a string which is the result of concatenating \a s1 and \a
6559 s2.
6560*/
6561
6562/*!
6563 \fn QString operator+(const QString &s1, const char *s2)
6564 \relates QString
6565
6566 Returns a string which is the result of concatenating \a s1 and \a
6567 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6568 function).
6569
6570 \sa QString::fromUtf8()
6571*/
6572
6573/*!
6574 \fn QString operator+(const char *s1, const QString &s2)
6575 \relates QString
6576
6577 Returns a string which is the result of concatenating \a s1 and \a
6578 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6579 function).
6580
6581 \sa QString::fromUtf8()
6582*/
6583
6584/*!
6585 \fn QString operator+(QStringView lhs, const QString &rhs)
6586 \fn QString operator+(const QString &lhs, QStringView rhs)
6587
6588 \relates QString
6589 \since 6.9
6590
6591 Returns a string that is the result of concatenating \a lhs and \a rhs.
6592*/
6593
6594/*!
6595 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6596 \since 4.2
6597
6598 Compares the string \a s1 with the string \a s2 and returns a negative integer
6599 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6600 and zero if they are equal.
6601
6602 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6603
6604 Case sensitive comparison is based exclusively on the numeric
6605 Unicode values of the characters and is very fast, but is not what
6606 a human would expect. Consider sorting user-visible strings with
6607 localeAwareCompare().
6608
6609 \snippet qstring/main.cpp 16
6610
6611//! [compare-isNull-vs-isEmpty]
6612 \note This function treats null strings the same as empty strings,
6613 for more details see \l {Distinction Between Null and Empty Strings}.
6614//! [compare-isNull-vs-isEmpty]
6615
6616 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6617*/
6618
6619/*!
6620 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6621 \since 4.2
6622 \overload compare()
6623
6624 Performs a comparison of \a s1 and \a s2, using the case
6625 sensitivity setting \a cs.
6626*/
6627
6628/*!
6629 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6630
6631 \since 4.2
6632 \overload compare()
6633
6634 Performs a comparison of \a s1 and \a s2, using the case
6635 sensitivity setting \a cs.
6636*/
6637
6638/*!
6639 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6640
6641 \since 5.12
6642 \overload compare()
6643
6644 Performs a comparison of this with \a s, using the case
6645 sensitivity setting \a cs.
6646*/
6647
6648/*!
6649 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6650
6651 \since 5.14
6652 \overload compare()
6653
6654 Performs a comparison of this with \a ch, using the case
6655 sensitivity setting \a cs.
6656*/
6657
6658/*!
6659 \overload compare()
6660 \since 4.2
6661
6662 Lexically compares this string with the string \a other and returns
6663 a negative integer if this string is less than \a other, a positive
6664 integer if it is greater than \a other, and zero if they are equal.
6665
6666 Same as compare(*this, \a other, \a cs).
6667*/
6668int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6669{
6670 return QtPrivate::compareStrings(*this, other, cs);
6671}
6672
6673/*!
6674 \internal
6675 \since 4.5
6676*/
6677int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6678 Qt::CaseSensitivity cs) noexcept
6679{
6680 Q_ASSERT(length1 >= 0);
6681 Q_ASSERT(length2 >= 0);
6682 Q_ASSERT(data1 || length1 == 0);
6683 Q_ASSERT(data2 || length2 == 0);
6684 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
6685}
6686
6687/*!
6688 \overload compare()
6689 \since 4.2
6690
6691 Same as compare(*this, \a other, \a cs).
6692*/
6693int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6694{
6695 return QtPrivate::compareStrings(*this, other, cs);
6696}
6697
6698/*!
6699 \internal
6700 \since 5.0
6701*/
6702int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6703 Qt::CaseSensitivity cs) noexcept
6704{
6705 Q_ASSERT(length1 >= 0);
6706 Q_ASSERT(data1 || length1 == 0);
6707 if (!data2)
6708 return qt_lencmp(length1, 0);
6709 if (Q_UNLIKELY(length2 < 0))
6710 length2 = qsizetype(strlen(data2));
6711 return QtPrivate::compareStrings(QStringView(data1, length1),
6712 QUtf8StringView(data2, length2), cs);
6713}
6714
6715/*!
6716 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6717 \overload compare()
6718*/
6719
6720/*!
6721 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6722 \overload compare()
6723*/
6724
6725bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6726{
6727 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6728}
6729
6730Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6731{
6732 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6733 return Qt::compareThreeWay(res, 0);
6734}
6735
6736bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6737{
6738 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6739}
6740
6741Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6742{
6743 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6744 return Qt::compareThreeWay(res, 0);
6745}
6746
6747bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6748{
6749 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6750}
6751
6752Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6753{
6754 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6755 return Qt::compareThreeWay(res, 0);
6756}
6757
6758bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6759{
6760 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6761}
6762
6763Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6764{
6765 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6766 return Qt::compareThreeWay(res, 0);
6767}
6768
6769/*!
6770 \internal
6771 \since 6.8
6772*/
6773bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6774{
6775 return QtPrivate::equalStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6776}
6777
6778int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6779{
6780 return QtPrivate::compareStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6781}
6782
6783/*!
6784 \internal
6785 \since 6.8
6786*/
6787bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6788{
6789 Q_ASSERT(len >= 0);
6790 Q_ASSERT(data || len == 0);
6791 return QtPrivate::equalStrings(sv, QUtf8StringView(data, len));
6792}
6793
6794/*!
6795 \internal
6796 \since 6.8
6797*/
6798int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6799{
6800 Q_ASSERT(len >= 0);
6801 Q_ASSERT(data || len == 0);
6802 return QtPrivate::compareStrings(sv, QUtf8StringView(data, len));
6803}
6804
6805/*!
6806 \internal
6807 \since 6.8
6808*/
6809bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6810{
6811 // because qlatin1stringview.h can't include qutf8stringview.h
6812 Q_ASSERT(len >= 0);
6813 Q_ASSERT(s2 || len == 0);
6814 return QtPrivate::equalStrings(s1, QUtf8StringView(s2, len));
6815}
6816
6817/*!
6818 \internal
6819 \since 6.6
6820*/
6821int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6822{
6823 // because qlatin1stringview.h can't include qutf8stringview.h
6824 Q_ASSERT(len >= 0);
6825 Q_ASSERT(s2 || len == 0);
6826 return QtPrivate::compareStrings(s1, QUtf8StringView(s2, len));
6827}
6828
6829/*!
6830 \internal
6831 \since 4.5
6832*/
6833int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6834 Qt::CaseSensitivity cs) noexcept
6835{
6836 Q_ASSERT(length1 >= 0);
6837 Q_ASSERT(data1 || length1 == 0);
6838 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
6839}
6840
6841/*!
6842 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6843
6844 Compares \a s1 with \a s2 and returns an integer less than, equal
6845 to, or greater than zero if \a s1 is less than, equal to, or
6846 greater than \a s2.
6847
6848 The comparison is performed in a locale- and also
6849 platform-dependent manner. Use this function to present sorted
6850 lists of strings to the user.
6851
6852 \sa compare(), QLocale, {Comparing Strings}
6853*/
6854
6855/*!
6856 \fn int QString::localeAwareCompare(QStringView other) const
6857 \since 6.0
6858 \overload localeAwareCompare()
6859
6860 Compares this string with the \a other string and returns an
6861 integer less than, equal to, or greater than zero if this string
6862 is less than, equal to, or greater than the \a other string.
6863
6864 The comparison is performed in a locale- and also
6865 platform-dependent manner. Use this function to present sorted
6866 lists of strings to the user.
6867
6868 Same as \c {localeAwareCompare(*this, other)}.
6869
6870 \sa {Comparing Strings}
6871*/
6872
6873/*!
6874 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6875 \since 6.0
6876 \overload localeAwareCompare()
6877
6878 Compares \a s1 with \a s2 and returns an integer less than, equal
6879 to, or greater than zero if \a s1 is less than, equal to, or
6880 greater than \a s2.
6881
6882 The comparison is performed in a locale- and also
6883 platform-dependent manner. Use this function to present sorted
6884 lists of strings to the user.
6885
6886 \sa {Comparing Strings}
6887*/
6888
6889
6890#if !defined(CSTR_LESS_THAN)
6891#define CSTR_LESS_THAN 1
6892#define CSTR_EQUAL 2
6893#define CSTR_GREATER_THAN 3
6894#endif
6895
6896/*!
6897 \overload localeAwareCompare()
6898
6899 Compares this string with the \a other string and returns an
6900 integer less than, equal to, or greater than zero if this string
6901 is less than, equal to, or greater than the \a other string.
6902
6903 The comparison is performed in a locale- and also
6904 platform-dependent manner. Use this function to present sorted
6905 lists of strings to the user.
6906
6907 Same as \c {localeAwareCompare(*this, other)}.
6908
6909 \sa {Comparing Strings}
6910*/
6911int QString::localeAwareCompare(const QString &other) const
6912{
6913 return localeAwareCompare_helper(constData(), size(), other.constData(), other.size());
6914}
6915
6916/*!
6917 \internal
6918 \since 4.5
6919*/
6920int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6921 const QChar *data2, qsizetype length2)
6922{
6923 Q_ASSERT(length1 >= 0);
6924 Q_ASSERT(data1 || length1 == 0);
6925 Q_ASSERT(length2 >= 0);
6926 Q_ASSERT(data2 || length2 == 0);
6927
6928 // do the right thing for null and empty
6929 if (length1 == 0 || length2 == 0)
6930 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6931 Qt::CaseSensitive);
6932
6933#if QT_CONFIG(icu)
6934 return QCollator::defaultCompare(QStringView(data1, length1), QStringView(data2, length2));
6935#else
6936 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6937 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6938# if defined(Q_OS_WIN)
6939 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6940
6941 switch (res) {
6942 case CSTR_LESS_THAN:
6943 return -1;
6944 case CSTR_GREATER_THAN:
6945 return 1;
6946 default:
6947 return 0;
6948 }
6949# elif defined (Q_OS_DARWIN)
6950 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
6951 // strings the same way as native applications do, and also respects
6952 // the "Order for sorted lists" setting in the International preferences
6953 // panel.
6954 const CFStringRef thisString =
6955 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6956 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
6957 const CFStringRef otherString =
6958 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
6959 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
6960
6961 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
6962 CFRelease(thisString);
6963 CFRelease(otherString);
6964 return result;
6965# elif defined(Q_OS_UNIX)
6966 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
6967 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
6968# else
6969# error "This case shouldn't happen"
6970 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
6971# endif
6972#endif // !QT_CONFIG(icu)
6973}
6974
6975
6976/*!
6977 \fn const QChar *QString::unicode() const
6978
6979 Returns a Unicode representation of the string.
6980 The result remains valid until the string is modified.
6981
6982 \note The returned string may not be '\\0'-terminated.
6983 Use size() to determine the length of the array.
6984
6985 \sa utf16(), fromRawData()
6986*/
6987
6988/*!
6989 \fn const ushort *QString::utf16() const
6990 \obsolete [6.11] Use nullTerminate() and cast data() to \c{const char16_t *}
6991
6992 Returns the QString as a '\\0\'-terminated array of unsigned
6993 shorts. The result remains valid until the string is modified.
6994
6995 The returned string is in host byte order.
6996
6997 \sa unicode()
6998*/
6999
7000const ushort *QString::utf16() const
7001{
7002 if (!d->isMutable()) {
7003 // ensure '\0'-termination for ::fromRawData strings
7004 const_cast<QString*>(this)->reallocData(d.size, QArrayData::KeepSize);
7005 }
7006 return reinterpret_cast<const ushort *>(d.data());
7007}
7008
7009/*!
7010 \fn QString &QString::nullTerminate()
7011 \since 6.10
7012
7013 If this string data isn't null-terminated, this method will make a deep
7014 copy of the data and make it null-terminated.
7015
7016 A QString is null-terminated by default, however in some cases (e.g.
7017 when using fromRawData()), the string data doesn't necessarily end
7018 with a \c {\0} character, which could be a problem when calling methods
7019 that expect a null-terminated string.
7020
7021 \sa nullTerminated(), fromRawData(), setRawData()
7022*/
7023QString &QString::nullTerminate()
7024{
7025 // ensure '\0'-termination for ::fromRawData strings
7026 if (!d->isMutable())
7027 *this = QString{constData(), size()};
7028 return *this;
7029}
7030
7031/*!
7032 \fn QString QString::nullTerminated() const &
7033 \fn QString QString::nullTerminated() &&
7034 \since 6.10
7035
7036 Returns a copy of this string that is always null-terminated.
7037
7038 \sa nullTerminate(), fromRawData(), setRawData()
7039*/
7040QString QString::nullTerminated() const &
7041{
7042 // ensure '\0'-termination for ::fromRawData strings
7043 if (!d->isMutable())
7044 return QString{constData(), size()};
7045 return *this;
7046}
7047
7048QString QString::nullTerminated() &&
7049{
7050 nullTerminate();
7051 return std::move(*this);
7052}
7053
7054/*!
7055 Returns a string of size \a width that contains this string
7056 padded by the \a fill character.
7057
7058 If \a truncate is \c false and the size() of the string is more than
7059 \a width, then the returned string is a copy of the string.
7060
7061 \snippet qstring/main.cpp 32
7062
7063 If \a truncate is \c true and the size() of the string is more than
7064 \a width, then any characters in a copy of the string after
7065 position \a width are removed, and the copy is returned.
7066
7067 \snippet qstring/main.cpp 33
7068
7069 \sa rightJustified()
7070*/
7071
7072QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7073{
7074 QString result;
7075 qsizetype len = size();
7076 qsizetype padlen = width - len;
7077 if (padlen > 0) {
7078 result.resize(len+padlen);
7079 if (len)
7080 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
7081 QChar *uc = (QChar*)result.d.data() + len;
7082 while (padlen--)
7083 * uc++ = fill;
7084 } else {
7085 if (truncate)
7086 result = left(width);
7087 else
7088 result = *this;
7089 }
7090 return result;
7091}
7092
7093/*!
7094 Returns a string of size() \a width that contains the \a fill
7095 character followed by the string. For example:
7096
7097 \snippet qstring/main.cpp 49
7098
7099 If \a truncate is \c false and the size() of the string is more than
7100 \a width, then the returned string is a copy of the string.
7101
7102 If \a truncate is true and the size() of the string is more than
7103 \a width, then the resulting string is truncated at position \a
7104 width.
7105
7106 \snippet qstring/main.cpp 50
7107
7108 \sa leftJustified()
7109*/
7110
7111QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7112{
7113 QString result;
7114 qsizetype len = size();
7115 qsizetype padlen = width - len;
7116 if (padlen > 0) {
7117 result.resize(len+padlen);
7118 QChar *uc = (QChar*)result.d.data();
7119 while (padlen--)
7120 * uc++ = fill;
7121 if (len)
7122 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
7123 } else {
7124 if (truncate)
7125 result = left(width);
7126 else
7127 result = *this;
7128 }
7129 return result;
7130}
7131
7132/*!
7133 \fn QString QString::toLower() const
7134
7135 Returns a lowercase copy of the string.
7136
7137 \snippet qstring/main.cpp 75
7138
7139 The case conversion will always happen in the 'C' locale. For
7140 locale-dependent case folding use QLocale::toLower()
7141
7142 \sa toUpper(), QLocale::toLower()
7143*/
7144
7145namespace QUnicodeTables {
7146/*
7147 \internal
7148 Converts the \a str string starting from the position pointed to by the \a
7149 it iterator, using the Unicode case traits \c Traits, and returns the
7150 result. The input string must not be empty (the convertCase function below
7151 guarantees that).
7152
7153 The string type \c{T} is also a template and is either \c{const QString} or
7154 \c{QString}. This function can do both copy-conversion and in-place
7155 conversion depending on the state of the \a str parameter:
7156 \list
7157 \li \c{T} is \c{const QString}: copy-convert
7158 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7159 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7160 \endlist
7161
7162 In copy-convert mode, the local variable \c{s} is detached from the input
7163 \a str. In the in-place convert mode, \a str is in moved-from state and
7164 \c{s} contains the only copy of the string, without reallocation (thus,
7165 \a it is still valid).
7166
7167 There is one pathological case left: when the in-place conversion needs to
7168 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7169 it pointer.
7170 */
7171template <typename T>
7172Q_NEVER_INLINE
7174{
7175 Q_ASSERT(!str.isEmpty());
7176 QString s = std::move(str); // will copy if T is const QString
7177 QChar *pp = s.begin() + it.index(); // will detach if necessary
7178
7179 do {
7180 const auto folded = fullConvertCase(it.next(), which);
7181 if (Q_UNLIKELY(folded.size() > 1)) {
7182 if (folded.chars[0] == *pp && folded.size() == 2) {
7183 // special case: only second actually changed (e.g. surrogate pairs),
7184 // avoid slow case
7185 ++pp;
7186 *pp++ = folded.chars[1];
7187 } else {
7188 // slow path: the string is growing
7189 qsizetype inpos = it.index() - 1;
7191
7192 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
7193 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7194
7195 // Adjust the input iterator if we are performing an in-place conversion
7196 if constexpr (!std::is_const<T>::value)
7198 }
7199 } else {
7200 *pp++ = folded.chars[0];
7201 }
7202 } while (it.hasNext());
7203
7204 return s;
7205}
7206
7207template <typename T>
7208static QString convertCase(T &str, QUnicodeTables::Case which)
7209{
7210 const QChar *p = str.constBegin();
7211 const QChar *e = p + str.size();
7212
7213 // this avoids out of bounds check in the loop
7214 while (e != p && e[-1].isHighSurrogate())
7215 --e;
7216
7217 QStringIterator it(p, e);
7218 while (it.hasNext()) {
7219 const char32_t uc = it.next();
7220 if (caseConversion(uc)[which].diff) {
7221 it.recede();
7222 return detachAndConvertCase(str, it, which);
7223 }
7224 }
7225 return std::move(str);
7226}
7227} // namespace QUnicodeTables
7228
7229QString QString::toLower_helper(const QString &str)
7230{
7231 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7232}
7233
7234QString QString::toLower_helper(QString &str)
7235{
7236 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7237}
7238
7239/*!
7240 \fn QString QString::toCaseFolded() const
7241
7242 Returns the case folded equivalent of the string. For most Unicode
7243 characters this is the same as toLower().
7244*/
7245
7246QString QString::toCaseFolded_helper(const QString &str)
7247{
7248 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7249}
7250
7251QString QString::toCaseFolded_helper(QString &str)
7252{
7253 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7254}
7255
7256/*!
7257 \fn QString QString::toUpper() const
7258
7259 Returns an uppercase copy of the string.
7260
7261 \snippet qstring/main.cpp 81
7262
7263 The case conversion will always happen in the 'C' locale. For
7264 locale-dependent case folding use QLocale::toUpper().
7265
7266 \note In some cases the uppercase form of a string may be longer than the
7267 original.
7268
7269 \note Since 2024, the German language officially prefers to uppercase ß
7270 (U+00DF LATIN SMALL LETTER SHARP S) as ẞ (U+1E9E LATIN CAPITAL LETTER SHARP S).
7271 Qt's implementation follows Unicode, which still mandates the use of "SS".
7272 If you need to implement the new German rules, you need to manually do
7273 \c{replace(u'ß', u'ẞ')} \e{before} calling this function.
7274
7275 \sa toLower(), QLocale::toLower()
7276*/
7277
7278QString QString::toUpper_helper(const QString &str)
7279{
7280 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7281}
7282
7283QString QString::toUpper_helper(QString &str)
7284{
7285 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7286}
7287
7288/*!
7289 \since 5.5
7290
7291 Safely builds a formatted string from the format string \a cformat
7292 and an arbitrary list of arguments.
7293
7294 The format string supports the conversion specifiers, length modifiers,
7295 and flags provided by printf() in the standard C++ library. The \a cformat
7296 string and \c{%s} arguments must be UTF-8 encoded.
7297
7298 \note The \c{%lc} escape sequence expects a unicode character of type
7299 \c char16_t (as returned by QChar::unicode()), or \c ushort.
7300 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7301 of unicode characters of type \c char16_t, or \c ushort (as returned by
7302 QString::utf16()). This is at odds with the printf() in the standard C++
7303 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7304 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7305 where the size of \c {wchar_t} is not 16 bits.
7306
7307 \warning We do not recommend using QString::asprintf() in new Qt
7308 code. Instead, consider using QTextStream or arg(), both of
7309 which support Unicode strings seamlessly and are type-safe.
7310 Here is an example that uses QTextStream:
7311
7312 \snippet qstring/main.cpp 64
7313
7314 For \l {QObject::tr()}{translations}, especially if the strings
7315 contains more than one escape sequence, you should consider using
7316 the arg() function instead. This allows the order of the
7317 replacements to be controlled by the translator.
7318
7319 \sa arg()
7320*/
7321
7322QString QString::asprintf(const char *cformat, ...)
7323{
7324 va_list ap;
7325 va_start(ap, cformat);
7326 QString s = vasprintf(cformat, ap);
7327 va_end(ap);
7328 return s;
7329}
7330
7331static void append_utf8(QString &qs, const char *cs, qsizetype len)
7332{
7333 const qsizetype oldSize = qs.size();
7334 qs.resize(oldSize + len);
7335 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
7336 qs.resize(newEnd - qs.constData());
7337}
7338
7339static uint parse_flag_characters(const char * &c) noexcept
7340{
7341 uint flags = QLocaleData::ZeroPadExponent;
7342 while (true) {
7343 switch (*c) {
7344 case '#':
7347 break;
7348 case '0': flags |= QLocaleData::ZeroPadded; break;
7349 case '-': flags |= QLocaleData::LeftAdjusted; break;
7350 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7351 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7352 case '\'': flags |= QLocaleData::GroupDigits; break;
7353 default: return flags;
7354 }
7355 ++c;
7356 }
7357}
7358
7359static int parse_field_width(const char *&c, qsizetype size)
7360{
7361 Q_ASSERT(isAsciiDigit(*c));
7362 const char *const stop = c + size;
7363
7364 // can't be negative - started with a digit
7365 // contains at least one digit
7366 auto [result, used] = qstrntoull(c, size, 10);
7367 c += used;
7368 if (used <= 0)
7369 return false;
7370 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7371 while (c < stop && isAsciiDigit(*c))
7372 ++c;
7373 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7374}
7375
7377
7378static inline bool can_consume(const char * &c, char ch) noexcept
7379{
7380 if (*c == ch) {
7381 ++c;
7382 return true;
7383 }
7384 return false;
7385}
7386
7387static LengthMod parse_length_modifier(const char * &c) noexcept
7388{
7389 switch (*c++) {
7390 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
7391 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
7392 case 'L': return lm_L;
7393 case 'j': return lm_j;
7394 case 'z':
7395 case 'Z': return lm_z;
7396 case 't': return lm_t;
7397 }
7398 --c; // don't consume *c - it wasn't a flag
7399 return lm_none;
7400}
7401
7402/*!
7403 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7404 \since 5.5
7405
7406 Equivalent method to asprintf(), but takes a va_list \a ap
7407 instead a list of variable arguments. See the asprintf()
7408 documentation for an explanation of \a cformat.
7409
7410 This method does not call the va_end macro, the caller
7411 is responsible to call va_end on \a ap.
7412
7413 \sa asprintf()
7414*/
7415
7416QString QString::vasprintf(const char *cformat, va_list ap)
7417{
7418 if (!cformat || !*cformat) {
7419 // Qt 1.x compat
7420 return fromLatin1("");
7421 }
7422
7423 // Parse cformat
7424
7425 QString result;
7426 const char *c = cformat;
7427 const char *formatEnd = cformat + qstrlen(cformat);
7428 for (;;) {
7429 // Copy non-escape chars to result
7430 const char *cb = c;
7431 while (*c != '\0' && *c != '%')
7432 c++;
7433 append_utf8(result, cb, qsizetype(c - cb));
7434
7435 if (*c == '\0')
7436 break;
7437
7438 // Found '%'
7439 const char *escape_start = c;
7440 ++c;
7441
7442 if (*c == '\0') {
7443 result.append(u'%'); // a % at the end of the string - treat as non-escape text
7444 break;
7445 }
7446 if (*c == '%') {
7447 result.append(u'%'); // %%
7448 ++c;
7449 continue;
7450 }
7451
7452 uint flags = parse_flag_characters(c);
7453
7454 if (*c == '\0') {
7455 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7456 break;
7457 }
7458
7459 // Parse field width
7460 int width = -1; // -1 means unspecified
7461 if (isAsciiDigit(*c)) {
7462 width = parse_field_width(c, formatEnd - c);
7463 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7464 width = va_arg(ap, int);
7465 if (width < 0)
7466 width = -1; // treat all negative numbers as unspecified
7467 ++c;
7468 }
7469
7470 if (*c == '\0') {
7471 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7472 break;
7473 }
7474
7475 // Parse precision
7476 int precision = -1; // -1 means unspecified
7477 if (*c == '.') {
7478 ++c;
7479 precision = 0;
7480 if (isAsciiDigit(*c)) {
7481 precision = parse_field_width(c, formatEnd - c);
7482 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7483 precision = va_arg(ap, int);
7484 if (precision < 0)
7485 precision = -1; // treat all negative numbers as unspecified
7486 ++c;
7487 }
7488 }
7489
7490 if (*c == '\0') {
7491 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7492 break;
7493 }
7494
7495 const LengthMod length_mod = parse_length_modifier(c);
7496
7497 if (*c == '\0') {
7498 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7499 break;
7500 }
7501
7502 // Parse the conversion specifier and do the conversion
7503 QString subst;
7504 switch (*c) {
7505 case 'd':
7506 case 'i': {
7507 qint64 i;
7508 switch (length_mod) {
7509 case lm_none: i = va_arg(ap, int); break;
7510 case lm_hh: i = va_arg(ap, int); break;
7511 case lm_h: i = va_arg(ap, int); break;
7512 case lm_l: i = va_arg(ap, long int); break;
7513 case lm_ll: i = va_arg(ap, qint64); break;
7514 case lm_j: i = va_arg(ap, long int); break;
7515
7516 /* ptrdiff_t actually, but it should be the same for us */
7517 case lm_z: i = va_arg(ap, qsizetype); break;
7518 case lm_t: i = va_arg(ap, qsizetype); break;
7519 default: i = 0; break;
7520 }
7521 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
7522 ++c;
7523 break;
7524 }
7525 case 'o':
7526 case 'u':
7527 case 'x':
7528 case 'X': {
7529 quint64 u;
7530 switch (length_mod) {
7531 case lm_none: u = va_arg(ap, uint); break;
7532 case lm_hh: u = va_arg(ap, uint); break;
7533 case lm_h: u = va_arg(ap, uint); break;
7534 case lm_l: u = va_arg(ap, ulong); break;
7535 case lm_ll: u = va_arg(ap, quint64); break;
7536 case lm_t: u = va_arg(ap, size_t); break;
7537 case lm_z: u = va_arg(ap, size_t); break;
7538 default: u = 0; break;
7539 }
7540
7541 if (isAsciiUpper(*c))
7542 flags |= QLocaleData::CapitalEorX;
7543
7544 int base = 10;
7545 switch (QtMiscUtils::toAsciiLower(*c)) {
7546 case 'o':
7547 base = 8; break;
7548 case 'u':
7549 base = 10; break;
7550 case 'x':
7551 base = 16; break;
7552 default: break;
7553 }
7554 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
7555 ++c;
7556 break;
7557 }
7558 case 'E':
7559 case 'e':
7560 case 'F':
7561 case 'f':
7562 case 'G':
7563 case 'g':
7564 case 'A':
7565 case 'a': {
7566 double d;
7567 if (length_mod == lm_L)
7568 d = va_arg(ap, long double); // not supported - converted to a double
7569 else
7570 d = va_arg(ap, double);
7571
7572 if (isAsciiUpper(*c))
7573 flags |= QLocaleData::CapitalEorX;
7574
7575 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7576 switch (QtMiscUtils::toAsciiLower(*c)) {
7577 case 'e': form = QLocaleData::DFExponent; break;
7578 case 'a': // not supported - decimal form used instead
7579 case 'f': form = QLocaleData::DFDecimal; break;
7580 case 'g': form = QLocaleData::DFSignificantDigits; break;
7581 default: break;
7582 }
7583 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7584 ++c;
7585 break;
7586 }
7587 case 'c': {
7588 if (length_mod == lm_l)
7589 subst = QChar::fromUcs2(va_arg(ap, int));
7590 else
7591 subst = QLatin1Char((uchar) va_arg(ap, int));
7592 ++c;
7593 break;
7594 }
7595 case 's': {
7596 if (length_mod == lm_l) {
7597 const char16_t *buff = va_arg(ap, const char16_t*);
7598 const auto *ch = buff;
7599 while (precision != 0 && *ch != 0) {
7600 ++ch;
7601 --precision;
7602 }
7603 subst.setUtf16(buff, ch - buff);
7604 } else if (precision == -1) {
7605 subst = QString::fromUtf8(va_arg(ap, const char*));
7606 } else {
7607 const char *buff = va_arg(ap, const char*);
7608 subst = QString::fromUtf8(buff, qstrnlen(buff, precision));
7609 }
7610 ++c;
7611 break;
7612 }
7613 case 'p': {
7614 void *arg = va_arg(ap, void*);
7615 const quint64 i = reinterpret_cast<quintptr>(arg);
7616 flags |= QLocaleData::ShowBase;
7617 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
7618 ++c;
7619 break;
7620 }
7621 case 'n':
7622 switch (length_mod) {
7623 case lm_hh: {
7624 signed char *n = va_arg(ap, signed char*);
7625 *n = result.size();
7626 break;
7627 }
7628 case lm_h: {
7629 short int *n = va_arg(ap, short int*);
7630 *n = result.size();
7631 break;
7632 }
7633 case lm_l: {
7634 long int *n = va_arg(ap, long int*);
7635 *n = result.size();
7636 break;
7637 }
7638 case lm_ll: {
7639 qint64 *n = va_arg(ap, qint64*);
7640 *n = result.size();
7641 break;
7642 }
7643 default: {
7644 int *n = va_arg(ap, int*);
7645 *n = int(result.size());
7646 break;
7647 }
7648 }
7649 ++c;
7650 break;
7651
7652 default: // bad escape, treat as non-escape text
7653 for (const char *cc = escape_start; cc != c; ++cc)
7654 result.append(QLatin1Char(*cc));
7655 continue;
7656 }
7657
7658 if (flags & QLocaleData::LeftAdjusted)
7659 result.append(subst.leftJustified(width));
7660 else
7661 result.append(subst.rightJustified(width));
7662 }
7663
7664 return result;
7665}
7666
7667/*!
7668 \fn QString::toLongLong(bool *ok, int base) const
7669
7670 Returns the string converted to a \c{long long} using base \a
7671 base, which is 10 by default and must be between 2 and 36, or 0.
7672 Returns 0 if the conversion fails.
7673
7674 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7675 to \c false, and success by setting *\a{ok} to \c true.
7676
7677 If \a base is 0, the C language convention is used: if the string begins
7678 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7679 2 is used; otherwise, if the string begins with "0", base 8 is used;
7680 otherwise, base 10 is used.
7681
7682 The string conversion will always happen in the 'C' locale. For
7683 locale-dependent conversion use QLocale::toLongLong()
7684
7685 Example:
7686
7687 \snippet qstring/main.cpp 74
7688
7689 This function ignores leading and trailing whitespace.
7690
7691 \note Support for the "0b" prefix was added in Qt 6.4.
7692
7693 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7694*/
7695
7696template <typename Int>
7697static Int toIntegral(QStringView string, bool *ok, int base)
7698{
7699#if defined(QT_CHECK_RANGE)
7700 if (base != 0 && (base < 2 || base > 36)) {
7701 qWarning("QString::toIntegral: Invalid base (%d)", base);
7702 base = 10;
7703 }
7704#endif
7705
7706 QVarLengthArray<uchar> latin1(string.size());
7707 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7708 QSimpleParsedNumber<Int> r;
7709 if constexpr (std::is_signed_v<Int>)
7710 r = QLocaleData::bytearrayToLongLong(latin1, base);
7711 else
7712 r = QLocaleData::bytearrayToUnsLongLong(latin1, base);
7713 if (ok)
7714 *ok = r.ok();
7715 return r.result;
7716}
7717
7718qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7719{
7720 return toIntegral<qlonglong>(string, ok, base);
7721}
7722
7723/*!
7724 \fn QString::toULongLong(bool *ok, int base) const
7725
7726 Returns the string converted to an \c{unsigned long long} using base \a
7727 base, which is 10 by default and must be between 2 and 36, or 0.
7728 Returns 0 if the conversion fails.
7729
7730 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7731 to \c false, and success by setting *\a{ok} to \c true.
7732
7733 If \a base is 0, the C language convention is used: if the string begins
7734 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7735 2 is used; otherwise, if the string begins with "0", base 8 is used;
7736 otherwise, base 10 is used.
7737
7738 The string conversion will always happen in the 'C' locale. For
7739 locale-dependent conversion use QLocale::toULongLong()
7740
7741 Example:
7742
7743 \snippet qstring/main.cpp 79
7744
7745 This function ignores leading and trailing whitespace.
7746
7747 \note Support for the "0b" prefix was added in Qt 6.4.
7748
7749 \sa number(), toLongLong(), QLocale::toULongLong()
7750*/
7751
7752qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7753{
7754 return toIntegral<qulonglong>(string, ok, base);
7755}
7756
7757/*!
7758 \fn long QString::toLong(bool *ok, int base) const
7759
7760 Returns the string converted to a \c long using base \a
7761 base, which is 10 by default and must be between 2 and 36, or 0.
7762 Returns 0 if the conversion fails.
7763
7764 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7765 to \c false, and success by setting *\a{ok} to \c true.
7766
7767 If \a base is 0, the C language convention is used: if the string begins
7768 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7769 2 is used; otherwise, if the string begins with "0", base 8 is used;
7770 otherwise, base 10 is used.
7771
7772 The string conversion will always happen in the 'C' locale. For
7773 locale-dependent conversion use QLocale::toLongLong()
7774
7775 Example:
7776
7777 \snippet qstring/main.cpp 73
7778
7779 This function ignores leading and trailing whitespace.
7780
7781 \note Support for the "0b" prefix was added in Qt 6.4.
7782
7783 \sa number(), toULong(), toInt(), QLocale::toInt()
7784*/
7785
7786/*!
7787 \fn ulong QString::toULong(bool *ok, int base) const
7788
7789 Returns the string converted to an \c{unsigned long} using base \a
7790 base, which is 10 by default and must be between 2 and 36, or 0.
7791 Returns 0 if the conversion fails.
7792
7793 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7794 to \c false, and success by setting *\a{ok} to \c true.
7795
7796 If \a base is 0, the C language convention is used: if the string begins
7797 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7798 2 is used; otherwise, if the string begins with "0", base 8 is used;
7799 otherwise, base 10 is used.
7800
7801 The string conversion will always happen in the 'C' locale. For
7802 locale-dependent conversion use QLocale::toULongLong()
7803
7804 Example:
7805
7806 \snippet qstring/main.cpp 78
7807
7808 This function ignores leading and trailing whitespace.
7809
7810 \note Support for the "0b" prefix was added in Qt 6.4.
7811
7812 \sa number(), QLocale::toUInt()
7813*/
7814
7815/*!
7816 \fn int QString::toInt(bool *ok, int base) const
7817 Returns the string converted to an \c int using base \a
7818 base, which is 10 by default and must be between 2 and 36, or 0.
7819 Returns 0 if the conversion fails.
7820
7821 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7822 to \c false, and success by setting *\a{ok} to \c true.
7823
7824 If \a base is 0, the C language convention is used: if the string begins
7825 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7826 2 is used; otherwise, if the string begins with "0", base 8 is used;
7827 otherwise, base 10 is used.
7828
7829 The string conversion will always happen in the 'C' locale. For
7830 locale-dependent conversion use QLocale::toInt()
7831
7832 Example:
7833
7834 \snippet qstring/main.cpp 72
7835
7836 This function ignores leading and trailing whitespace.
7837
7838 \note Support for the "0b" prefix was added in Qt 6.4.
7839
7840 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7841*/
7842
7843/*!
7844 \fn uint QString::toUInt(bool *ok, int base) const
7845 Returns the string converted to an \c{unsigned int} using base \a
7846 base, which is 10 by default and must be between 2 and 36, or 0.
7847 Returns 0 if the conversion fails.
7848
7849 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7850 to \c false, and success by setting *\a{ok} to \c true.
7851
7852 If \a base is 0, the C language convention is used: if the string begins
7853 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7854 2 is used; otherwise, if the string begins with "0", base 8 is used;
7855 otherwise, base 10 is used.
7856
7857 The string conversion will always happen in the 'C' locale. For
7858 locale-dependent conversion use QLocale::toUInt()
7859
7860 Example:
7861
7862 \snippet qstring/main.cpp 77
7863
7864 This function ignores leading and trailing whitespace.
7865
7866 \note Support for the "0b" prefix was added in Qt 6.4.
7867
7868 \sa number(), toInt(), QLocale::toUInt()
7869*/
7870
7871/*!
7872 \fn short QString::toShort(bool *ok, int base) const
7873
7874 Returns the string converted to a \c short using base \a
7875 base, which is 10 by default and must be between 2 and 36, or 0.
7876 Returns 0 if the conversion fails.
7877
7878 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7879 to \c false, and success by setting *\a{ok} to \c true.
7880
7881 If \a base is 0, the C language convention is used: if the string begins
7882 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7883 2 is used; otherwise, if the string begins with "0", base 8 is used;
7884 otherwise, base 10 is used.
7885
7886 The string conversion will always happen in the 'C' locale. For
7887 locale-dependent conversion use QLocale::toShort()
7888
7889 Example:
7890
7891 \snippet qstring/main.cpp 76
7892
7893 This function ignores leading and trailing whitespace.
7894
7895 \note Support for the "0b" prefix was added in Qt 6.4.
7896
7897 \sa number(), toUShort(), toInt(), QLocale::toShort()
7898*/
7899
7900/*!
7901 \fn ushort QString::toUShort(bool *ok, int base) const
7902
7903 Returns the string converted to an \c{unsigned short} using base \a
7904 base, which is 10 by default and must be between 2 and 36, or 0.
7905 Returns 0 if the conversion fails.
7906
7907 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7908 to \c false, and success by setting *\a{ok} to \c true.
7909
7910 If \a base is 0, the C language convention is used: if the string begins
7911 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7912 2 is used; otherwise, if the string begins with "0", base 8 is used;
7913 otherwise, base 10 is used.
7914
7915 The string conversion will always happen in the 'C' locale. For
7916 locale-dependent conversion use QLocale::toUShort()
7917
7918 Example:
7919
7920 \snippet qstring/main.cpp 80
7921
7922 This function ignores leading and trailing whitespace.
7923
7924 \note Support for the "0b" prefix was added in Qt 6.4.
7925
7926 \sa number(), toShort(), QLocale::toUShort()
7927*/
7928
7929/*!
7930 Returns the string converted to a \c double value.
7931
7932 Returns an infinity if the conversion overflows or 0.0 if the
7933 conversion fails for other reasons (e.g. underflow).
7934
7935 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7936 to \c false, and success by setting *\a{ok} to \c true.
7937
7938 \snippet qstring/main.cpp 66
7939
7940 \warning The QString content may only contain valid numerical characters
7941 which includes the plus/minus sign, the character e used in scientific
7942 notation, and the decimal point. Including the unit or additional characters
7943 leads to a conversion error.
7944
7945 \snippet qstring/main.cpp 67
7946
7947 The string conversion will always happen in the 'C' locale. For
7948 locale-dependent conversion use QLocale::toDouble()
7949
7950 \snippet qstring/main.cpp 68
7951
7952 For historical reasons, this function does not handle
7953 thousands group separators. If you need to convert such numbers,
7954 use QLocale::toDouble().
7955
7956 \snippet qstring/main.cpp 69
7957
7958 This function ignores leading and trailing whitespace.
7959
7960 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7961*/
7962
7963double QString::toDouble(bool *ok) const
7964{
7965 return QStringView(*this).toDouble(ok);
7966}
7967
7968double QStringView::toDouble(bool *ok) const
7969{
7970 QStringView string = qt_trimmed(*this);
7971 QVarLengthArray<uchar> latin1(string.size());
7972 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7973 auto r = qt_asciiToDouble(reinterpret_cast<const char *>(latin1.data()), string.size());
7974 if (ok != nullptr)
7975 *ok = r.ok();
7976 return r.result;
7977}
7978
7979/*!
7980 Returns the string converted to a \c float value.
7981
7982 Returns an infinity if the conversion overflows or 0.0 if the
7983 conversion fails for other reasons (e.g. underflow).
7984
7985 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7986 to \c false, and success by setting *\a{ok} to \c true.
7987
7988 \warning The QString content may only contain valid numerical characters
7989 which includes the plus/minus sign, the character e used in scientific
7990 notation, and the decimal point. Including the unit or additional characters
7991 leads to a conversion error.
7992
7993 The string conversion will always happen in the 'C' locale. For
7994 locale-dependent conversion use QLocale::toFloat()
7995
7996 For historical reasons, this function does not handle
7997 thousands group separators. If you need to convert such numbers,
7998 use QLocale::toFloat().
7999
8000 Example:
8001
8002 \snippet qstring/main.cpp 71
8003
8004 This function ignores leading and trailing whitespace.
8005
8006 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
8007*/
8008
8009float QString::toFloat(bool *ok) const
8010{
8011 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8012}
8013
8014float QStringView::toFloat(bool *ok) const
8015{
8016 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8017}
8018
8019/*! \fn QString &QString::setNum(int n, int base)
8020
8021 Sets the string to the printed value of \a n in the specified \a
8022 base, and returns a reference to the string.
8023
8024 The base is 10 by default and must be between 2 and 36.
8025
8026 \snippet qstring/main.cpp 56
8027
8028 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8029 To get a localized string representation of a number, use
8030 QLocale::toString() with the appropriate locale.
8031
8032 \sa number()
8033*/
8034
8035/*! \fn QString &QString::setNum(uint n, int base)
8036
8037 \overload
8038*/
8039
8040/*! \fn QString &QString::setNum(long n, int base)
8041
8042 \overload
8043*/
8044
8045/*! \fn QString &QString::setNum(ulong n, int base)
8046
8047 \overload
8048*/
8049
8050/*!
8051 \overload
8052*/
8053QString &QString::setNum(qlonglong n, int base)
8054{
8055 return *this = number(n, base);
8056}
8057
8058/*!
8059 \overload
8060*/
8061QString &QString::setNum(qulonglong n, int base)
8062{
8063 return *this = number(n, base);
8064}
8065
8066/*! \fn QString &QString::setNum(short n, int base)
8067
8068 \overload
8069*/
8070
8071/*! \fn QString &QString::setNum(ushort n, int base)
8072
8073 \overload
8074*/
8075
8076/*!
8077 \overload
8078
8079 Sets the string to the printed value of \a n, formatted according to the
8080 given \a format and \a precision, and returns a reference to the string.
8081
8082 \sa number(), QLocale::FloatingPointPrecisionOption, {Number formats}
8083*/
8084
8085QString &QString::setNum(double n, char format, int precision)
8086{
8087 return *this = number(n, format, precision);
8088}
8089
8090/*!
8091 \fn QString &QString::setNum(float n, char format, int precision)
8092 \overload
8093
8094 Sets the string to the printed value of \a n, formatted according
8095 to the given \a format and \a precision, and returns a reference
8096 to the string.
8097
8098 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8099 To get a localized string representation of a number, use
8100 QLocale::toString() with the appropriate locale.
8101
8102 \sa number()
8103*/
8104
8105
8106/*!
8107 \fn QString QString::number(long n, int base)
8108
8109 Returns a string equivalent of the number \a n according to the
8110 specified \a base.
8111
8112 The base is 10 by default and must be between 2
8113 and 36. For bases other than 10, \a n is treated as an
8114 unsigned integer.
8115
8116 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8117 To get a localized string representation of a number, use
8118 QLocale::toString() with the appropriate locale.
8119
8120 \snippet qstring/main.cpp 35
8121
8122 \sa setNum()
8123*/
8124
8125QString QString::number(long n, int base)
8126{
8127 return number(qlonglong(n), base);
8128}
8129
8130/*!
8131 \fn QString QString::number(ulong n, int base)
8132
8133 \overload
8134*/
8135QString QString::number(ulong n, int base)
8136{
8137 return number(qulonglong(n), base);
8138}
8139
8140/*!
8141 \overload
8142*/
8143QString QString::number(int n, int base)
8144{
8145 return number(qlonglong(n), base);
8146}
8147
8148/*!
8149 \overload
8150*/
8151QString QString::number(uint n, int base)
8152{
8153 return number(qulonglong(n), base);
8154}
8155
8156/*!
8157 \overload
8158*/
8159QString QString::number(qlonglong n, int base)
8160{
8161#if defined(QT_CHECK_RANGE)
8162 if (base < 2 || base > 36) {
8163 qWarning("QString::setNum: Invalid base (%d)", base);
8164 base = 10;
8165 }
8166#endif
8167 bool negative = n < 0;
8168 /*
8169 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8170 taking an absolute value has to take a slight detour.
8171 */
8172 return qulltoBasicLatin(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8173}
8174
8175/*!
8176 \overload
8177*/
8178QString QString::number(qulonglong n, int base)
8179{
8180#if defined(QT_CHECK_RANGE)
8181 if (base < 2 || base > 36) {
8182 qWarning("QString::setNum: Invalid base (%d)", base);
8183 base = 10;
8184 }
8185#endif
8186 return qulltoBasicLatin(n, base, false);
8187}
8188
8189
8190/*!
8191 Returns a string representing the floating-point number \a n.
8192
8193 Returns a string that represents \a n, formatted according to the specified
8194 \a format and \a precision.
8195
8196 For formats with an exponent, the exponent will show its sign and have at
8197 least two digits, left-padding the exponent with zero if needed.
8198
8199 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number formats}
8200*/
8201QString QString::number(double n, char format, int precision)
8202{
8203 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8204
8205 switch (QtMiscUtils::toAsciiLower(format)) {
8206 case 'f':
8207 form = QLocaleData::DFDecimal;
8208 break;
8209 case 'e':
8210 form = QLocaleData::DFExponent;
8211 break;
8212 case 'g':
8213 form = QLocaleData::DFSignificantDigits;
8214 break;
8215 default:
8216#if defined(QT_CHECK_RANGE)
8217 qWarning("QString::setNum: Invalid format char '%c'", format);
8218#endif
8219 break;
8220 }
8221
8222 return qdtoBasicLatin(n, form, precision, isAsciiUpper(format));
8223}
8224
8225namespace {
8226template<class ResultList, class StringSource>
8227static ResultList splitString(const StringSource &source, QStringView sep,
8228 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8229{
8230 ResultList list;
8231 typename StringSource::size_type start = 0;
8232 typename StringSource::size_type end;
8233 typename StringSource::size_type extra = 0;
8234 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8235 if (start != end || behavior == Qt::KeepEmptyParts)
8236 list.append(source.sliced(start, end - start));
8237 start = end + sep.size();
8238 extra = (sep.size() == 0 ? 1 : 0);
8239 }
8240 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8241 list.append(source.sliced(start));
8242 return list;
8243}
8244
8245} // namespace
8246
8247/*!
8248 Splits the string into substrings wherever \a sep occurs, and
8249 returns the list of those strings. If \a sep does not match
8250 anywhere in the string, split() returns a single-element list
8251 containing this string.
8252
8253 \a cs specifies whether \a sep should be matched case
8254 sensitively or case insensitively.
8255
8256 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8257 appear in the result. By default, empty entries are kept.
8258
8259 Example:
8260
8261 \snippet qstring/main.cpp 62
8262
8263 If \a sep is empty, split() returns an empty string, followed
8264 by each of the string's characters, followed by another empty string:
8265
8266 \snippet qstring/main.cpp 62-empty
8267
8268 To understand this behavior, recall that the empty string matches
8269 everywhere, so the above is qualitatively the same as:
8270
8271 \snippet qstring/main.cpp 62-slashes
8272
8273 \sa QStringList::join(), section()
8274
8275 \since 5.14
8276*/
8277QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8278{
8279 return splitString<QStringList>(*this, sep, behavior, cs);
8280}
8281
8282/*!
8283 \overload
8284 \since 5.14
8285*/
8286QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8287{
8288 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
8289}
8290
8291/*!
8292 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8293 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8294
8295
8296 Splits the view into substring views wherever \a sep occurs, and
8297 returns the list of those string views.
8298
8299 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8300 the result.
8301
8302 \note All the returned views are valid as long as the data referenced by
8303 this string view is valid. Destroying the data will cause all views to
8304 become dangling.
8305
8306 \since 6.0
8307*/
8308QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8309{
8310 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
8311}
8312
8313QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8314{
8315 return split(QStringView(&sep, 1), behavior, cs);
8316}
8317
8318#if QT_CONFIG(regularexpression)
8319namespace {
8320template<class ResultList, typename String, typename MatchingFunction>
8321static ResultList splitString(const String &source, const QRegularExpression &re,
8322 MatchingFunction matchingFunction,
8323 Qt::SplitBehavior behavior)
8324{
8325 ResultList list;
8326 if (!re.isValid()) {
8327 qtWarnAboutInvalidRegularExpression(re, "QString", "split");
8328 return list;
8329 }
8330
8331 qsizetype start = 0;
8332 qsizetype end = 0;
8333 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8334 while (iterator.hasNext()) {
8335 QRegularExpressionMatch match = iterator.next();
8336 end = match.capturedStart();
8337 if (start != end || behavior == Qt::KeepEmptyParts)
8338 list.append(source.sliced(start, end - start));
8339 start = match.capturedEnd();
8340 }
8341
8342 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8343 list.append(source.sliced(start));
8344
8345 return list;
8346}
8347} // namespace
8348
8349/*!
8350 \overload
8351 \since 5.14
8352
8353 Splits the string into substrings wherever the regular expression
8354 \a re matches, and returns the list of those strings. If \a re
8355 does not match anywhere in the string, split() returns a
8356 single-element list containing this string.
8357
8358 Here is an example where we extract the words in a sentence
8359 using one or more whitespace characters as the separator:
8360
8361 \snippet qstring/main.cpp 90
8362
8363 Here is a similar example, but this time we use any sequence of
8364 non-word characters as the separator:
8365
8366 \snippet qstring/main.cpp 91
8367
8368 Here is a third example where we use a zero-length assertion,
8369 \b{\\b} (word boundary), to split the string into an
8370 alternating sequence of non-word and word tokens:
8371
8372 \snippet qstring/main.cpp 92
8373
8374 \sa QStringList::join(), section()
8375*/
8376QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8377{
8378#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8379 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8380#else
8381 const auto matchingFunction = &QRegularExpression::globalMatch;
8382#endif
8383 return splitString<QStringList>(*this,
8384 re,
8385 matchingFunction,
8386 behavior);
8387}
8388
8389/*!
8390 \overload
8391 \since 6.0
8392
8393 Splits the string into substring views wherever the regular expression \a re
8394 matches, and returns the list of those strings. If \a re does not match
8395 anywhere in the string, split() returns a single-element list containing
8396 this string as view.
8397
8398 \note The views in the returned list are sub-views of this view; as such,
8399 they reference the same data as it and only remain valid for as long as that
8400 data remains live.
8401*/
8402QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8403{
8404 return splitString<QList<QStringView>>(*this, re, &QRegularExpression::globalMatchView, behavior);
8405}
8406
8407#endif // QT_CONFIG(regularexpression)
8408
8409/*!
8410 \enum QString::NormalizationForm
8411
8412 This enum describes the various normalized forms of Unicode text.
8413
8414 \value NormalizationForm_D Canonical Decomposition
8415 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8416 \value NormalizationForm_KD Compatibility Decomposition
8417 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8418
8419 \sa normalized(),
8420 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8421*/
8422
8423/*!
8424 \since 4.5
8425
8426 Returns a copy of this string repeated the specified number of \a times.
8427
8428 If \a times is less than 1, an empty string is returned.
8429
8430 Example:
8431
8432 \snippet code/src_corelib_text_qstring.cpp 8
8433*/
8434QString QString::repeated(qsizetype times) const
8435{
8436 if (d.size == 0)
8437 return *this;
8438
8439 if (times <= 1) {
8440 if (times == 1)
8441 return *this;
8442 return QString();
8443 }
8444
8445 const qsizetype resultSize = times * d.size;
8446
8447 QString result;
8448 result.reserve(resultSize);
8449 if (result.capacity() != resultSize)
8450 return QString(); // not enough memory
8451
8452 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
8453
8454 qsizetype sizeSoFar = d.size;
8455 char16_t *end = result.d.data() + sizeSoFar;
8456
8457 const qsizetype halfResultSize = resultSize >> 1;
8458 while (sizeSoFar <= halfResultSize) {
8459 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
8460 end += sizeSoFar;
8461 sizeSoFar <<= 1;
8462 }
8463 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
8464 result.d.data()[resultSize] = '\0';
8465 result.d.size = resultSize;
8466 return result;
8467}
8468
8469void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8470{
8471 {
8472 // check if it's fully ASCII first, because then we have no work
8473 auto start = reinterpret_cast<const char16_t *>(data->constData());
8474 const char16_t *p = start + from;
8475 if (isAscii_helper(p, p + data->size() - from))
8476 return;
8477 if (p > start + from)
8478 from = p - start - 1; // need one before the non-ASCII to perform NFC
8479 }
8480
8481 if (version == QChar::Unicode_Unassigned) {
8482 version = QChar::currentUnicodeVersion();
8483 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8484 const QString &s = *data;
8485 QChar *d = nullptr;
8487 if (n.version > version) {
8488 qsizetype pos = from;
8489 if (QChar::requiresSurrogates(n.ucs4)) {
8490 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
8491 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
8492
8493 // scan for this codepoint
8494 for ( ; pos < s.size() - 1; ++pos) {
8495 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low)
8496 break;
8497 }
8498 if (pos == s.size())
8499 continue; // no correction necessary
8500
8501 // detach if necessary
8502 if (!d)
8503 d = data->data();
8504 if (QChar::requiresSurrogates(n.old_mapping)) {
8505 // no shrinking
8506 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
8507 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
8508 while (pos < s.size() - 1) {
8509 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
8510 d[pos] = QChar(oldHigh);
8511 d[++pos] = QChar(oldLow);
8512 }
8513 ++pos;
8514 }
8515 } else {
8516 // shrinking, so a little harder
8517 char16_t old = char16_t(n.old_mapping);
8518 qsizetype outpos = pos;
8519 for ( ; pos < s.size(); ++outpos, ++pos) {
8520 if (pos < s.size() - 1 && s.at(pos).unicode() == ucs4High
8521 && s.at(pos + 1).unicode() == ucs4Low) {
8522 d[outpos] = QChar(old);
8523 ++pos;
8524 }
8525 }
8526 data->truncate(outpos);
8527 d = nullptr;
8528 }
8529 } else {
8530 Q_ASSERT(!QChar::requiresSurrogates(n.old_mapping)); // BMP maps to BMP
8531 while (pos < s.size()) {
8532 if (s.at(pos).unicode() == n.ucs4) {
8533 if (!d)
8534 d = data->data();
8535 d[pos] = QChar(n.old_mapping);
8536 }
8537 ++pos;
8538 }
8539 }
8540 }
8541 }
8542 }
8543
8544 if (normalizationQuickCheckHelper(data, mode, from, &from))
8545 return;
8546
8547 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
8548
8549 canonicalOrderHelper(data, version, from);
8550
8551 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8552 return;
8553
8554 composeHelper(data, version, from);
8555}
8556
8557/*!
8558 Returns the string in the given Unicode normalization \a mode,
8559 according to the given \a version of the Unicode standard.
8560*/
8561QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8562{
8563 QString copy = *this;
8564 qt_string_normalize(&copy, mode, version, 0);
8565 return copy;
8566}
8567
8568#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8569static void checkArgEscape(QStringView s)
8570{
8571 // If we're in here, it means that qArgDigitValue has accepted the
8572 // digit. We can skip the check in case we already know it will
8573 // succeed.
8574 if (!supportUnicodeDigitValuesInArg())
8575 return;
8576
8577 const auto isNonAsciiDigit = [](QChar c) {
8578 return c.unicode() < u'0' || c.unicode() > u'9';
8579 };
8580
8581 if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
8582 const auto accumulateDigit = [](int partial, QChar digit) {
8583 return partial * 10 + digit.digitValue();
8584 };
8585 const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
8586
8587 qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8588 " it is currently being interpreted as the %d-th substitution.\n"
8589 " This is deprecated; support for non-ASCII digits will be dropped\n"
8590 " in a future version of Qt.",
8591 qUtf16Printable(s.toString()),
8592 parsedNumber);
8593 }
8594}
8595#endif
8596
8598{
8599 int min_escape; // lowest escape sequence number
8600 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8601 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8602 // contain 'L'
8603 qsizetype escape_len; // total length of escape sequences which will be replaced
8604};
8605
8606static ArgEscapeData findArgEscapes(QStringView s)
8607{
8608 const QChar *uc_begin = s.begin();
8609 const QChar *uc_end = s.end();
8610
8611 ArgEscapeData d;
8612
8613 d.min_escape = INT_MAX;
8614 d.occurrences = 0;
8615 d.escape_len = 0;
8616 d.locale_occurrences = 0;
8617
8618 const QChar *c = uc_begin;
8619 while (c != uc_end) {
8620 while (c != uc_end && c->unicode() != '%')
8621 ++c;
8622
8623 if (c == uc_end)
8624 break;
8625 const QChar *escape_start = c;
8626 if (++c == uc_end)
8627 break;
8628
8629 bool locale_arg = false;
8630 if (c->unicode() == 'L') {
8631 locale_arg = true;
8632 if (++c == uc_end)
8633 break;
8634 }
8635
8636 int escape = qArgDigitValue(*c);
8637 if (escape == -1)
8638 continue;
8639
8640 // ### Qt 7: do not allow anything but ASCII digits
8641 // in arg()'s replacements.
8642#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8643 const QChar *escapeBegin = c;
8644 const QChar *escapeEnd = escapeBegin + 1;
8645#endif
8646
8647 ++c;
8648
8649 if (c != uc_end) {
8650 const int next_escape = qArgDigitValue(*c);
8651 if (next_escape != -1) {
8652 escape = (10 * escape) + next_escape;
8653 ++c;
8654#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8655 ++escapeEnd;
8656#endif
8657 }
8658 }
8659
8660#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8661 checkArgEscape(QStringView(escapeBegin, escapeEnd));
8662#endif
8663
8664 if (escape > d.min_escape)
8665 continue;
8666
8667 if (escape < d.min_escape) {
8668 d.min_escape = escape;
8669 d.occurrences = 0;
8670 d.escape_len = 0;
8671 d.locale_occurrences = 0;
8672 }
8673
8674 ++d.occurrences;
8675 if (locale_arg)
8676 ++d.locale_occurrences;
8677 d.escape_len += c - escape_start;
8678 }
8679 return d;
8680}
8681
8682static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8683 QStringView arg, QStringView larg, QChar fillChar)
8684{
8685 // Negative field-width for right-padding, positive for left-padding:
8686 const qsizetype abs_field_width = qAbs(field_width);
8687 const qsizetype result_len =
8688 s.size() - d.escape_len
8689 + (d.occurrences - d.locale_occurrences) * qMax(abs_field_width, arg.size())
8690 + d.locale_occurrences * qMax(abs_field_width, larg.size());
8691
8692 QString result(result_len, Qt::Uninitialized);
8693 QChar *rc = const_cast<QChar *>(result.unicode());
8694 QChar *const result_end = rc + result_len;
8695 qsizetype repl_cnt = 0;
8696
8697 const QChar *c = s.begin();
8698 const QChar *const uc_end = s.end();
8699 while (c != uc_end) {
8700 Q_ASSERT(d.occurrences > repl_cnt);
8701 /* We don't have to check increments of c against uc_end because, as
8702 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8703 sequences remaining. */
8704
8705 const QChar *text_start = c;
8706 while (c->unicode() != '%')
8707 ++c;
8708
8709 const QChar *escape_start = c++;
8710 const bool localize = c->unicode() == 'L';
8711 if (localize)
8712 ++c;
8713
8714 int escape = qArgDigitValue(*c);
8715 if (escape != -1 && c + 1 != uc_end) {
8716 const int digit = qArgDigitValue(c[1]);
8717 if (digit != -1) {
8718 ++c;
8719 escape = 10 * escape + digit;
8720 }
8721 }
8722
8723 if (escape != d.min_escape) {
8724 memcpy(rc, text_start, (c - text_start) * sizeof(QChar));
8725 rc += c - text_start;
8726 } else {
8727 ++c;
8728
8729 memcpy(rc, text_start, (escape_start - text_start) * sizeof(QChar));
8730 rc += escape_start - text_start;
8731
8732 const QStringView use = localize ? larg : arg;
8733 const qsizetype pad_chars = abs_field_width - use.size();
8734 // (If negative, relevant loops are no-ops: no need to check.)
8735
8736 if (field_width > 0) { // left padded
8737 rc = std::fill_n(rc, pad_chars, fillChar);
8738 }
8739
8740 if (use.size())
8741 memcpy(rc, use.data(), use.size() * sizeof(QChar));
8742 rc += use.size();
8743
8744 if (field_width < 0) { // right padded
8745 rc = std::fill_n(rc, pad_chars, fillChar);
8746 }
8747
8748 if (++repl_cnt == d.occurrences) {
8749 memcpy(rc, c, (uc_end - c) * sizeof(QChar));
8750 rc += uc_end - c;
8751 Q_ASSERT(rc == result_end);
8752 c = uc_end;
8753 }
8754 }
8755 }
8756 Q_ASSERT(rc == result_end);
8757
8758 return result;
8759}
8760
8761/*!
8762 \fn template <typename T, QString::if_string_like<T> = true> QString QString::arg(const T &a, int fieldWidth, QChar fillChar) const
8763
8764 Returns a copy of this string with the lowest-numbered place-marker
8765 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8766
8767 \a fieldWidth specifies the minimum amount of space that \a a
8768 shall occupy. If \a a requires less space than \a fieldWidth, it
8769 is padded to \a fieldWidth with character \a fillChar. A positive
8770 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8771 produces left-aligned text.
8772
8773 This example shows how we might create a \c status string for
8774 reporting progress while processing a list of files:
8775
8776 \snippet qstring/main.cpp 11-qstringview
8777
8778 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8779 %2. Finally, \c arg(fileName) replaces \c %3.
8780
8781 One advantage of using arg() over asprintf() is that the order of the
8782 numbered place markers can change, if the application's strings are
8783 translated into other languages, but each arg() will still replace
8784 the lowest-numbered unreplaced place-marker, no matter where it
8785 appears. Also, if place-marker \c %i appears more than once in the
8786 string, arg() replaces all of them.
8787
8788 If there is no unreplaced place-marker remaining, a warning message
8789 is printed and the result is undefined. Place-marker numbers must be
8790 in the range 1 to 99.
8791
8792 \note In Qt versions prior to 6.9, this function was overloaded on
8793 \c{char}, QChar, QString, QStringView, and QLatin1StringView and in some
8794 cases, \c{wchar_t} and \c{char16_t} arguments would resolve to the integer
8795 overloads. In Qt versions prior to 5.10, this function lacked the
8796 QStringView and QLatin1StringView overloads.
8797*/
8798QString QString::arg_impl(QAnyStringView a, int fieldWidth, QChar fillChar) const
8799{
8800 ArgEscapeData d = findArgEscapes(*this);
8801
8802 if (Q_UNLIKELY(d.occurrences == 0)) {
8803 qWarning("QString::arg: Argument missing: \"%ls\", \"%ls\"", qUtf16Printable(*this),
8804 qUtf16Printable(a.toString()));
8805 return *this;
8806 }
8807 struct {
8808 QVarLengthArray<char16_t> out;
8809 QStringView operator()(QStringView in) noexcept { return in; }
8810 QStringView operator()(QLatin1StringView in)
8811 {
8812 out.resize(in.size());
8813 qt_from_latin1(out.data(), in.data(), size_t(in.size()));
8814 return out;
8815 }
8816 QStringView operator()(QUtf8StringView in)
8817 {
8818 out.resize(in.size());
8819 return QStringView{out.data(), QUtf8::convertToUnicode(out.data(), in)};
8820 }
8821 } convert;
8822
8823 QStringView sv = a.visit(std::ref(convert));
8824 return replaceArgEscapes(*this, d, fieldWidth, sv, sv, fillChar);
8825}
8826
8827/*!
8828 \fn template <typename T, QString::if_integral_non_char<T> = true> QString QString::arg(T a, int fieldWidth, int base, QChar fillChar) const
8829 \overload arg()
8830
8831 The \a a argument is expressed in base \a base, which is 10 by
8832 default and must be between 2 and 36. For bases other than 10, \a a
8833 is treated as an unsigned integer.
8834
8835 \a fieldWidth specifies the minimum amount of space that \a a is
8836 padded to and filled with the character \a fillChar. A positive
8837 value produces right-aligned text; a negative value produces
8838 left-aligned text.
8839
8840 The '%' can be followed by an 'L', in which case the sequence is
8841 replaced with a localized representation of \a a. The conversion
8842 uses the default locale, set by QLocale::setDefault(). If no default
8843 locale was specified, the system locale is used. The 'L' flag is
8844 ignored if \a base is not 10.
8845
8846 \snippet qstring/main.cpp 12
8847 \snippet qstring/main.cpp 14
8848
8849 \note In Qt versions prior to 6.10.1, this function accepted arguments of
8850 types that implicitly convert to integral types. This is no longer supported,
8851 except for (unscoped) enums, because it also accepted types convertible to
8852 floating-point types, losing precision when those were printed as integers. A
8853 backwards-compatible fix is to cast such types to a C++ type whose displayed
8854 form matches your intent (\c int, \c float, ...).
8855
8856 \note In Qt versions prior to 6.9, this function was overloaded on various
8857 integral types and sometimes incorrectly accepted \c char and \c char16_t
8858 arguments.
8859
8860 \sa {Number formats}
8861*/
8862QString QString::arg_impl(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8863{
8864 ArgEscapeData d = findArgEscapes(*this);
8865
8866 if (d.occurrences == 0) {
8867 qWarning("QString::arg: Argument missing: \"%ls\", %llu", qUtf16Printable(*this), a);
8868 return *this;
8869 }
8870
8871 unsigned flags = QLocaleData::NoFlags;
8872 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8873 if (fillChar == u'0')
8874 flags = QLocaleData::ZeroPadded;
8875
8876 QString arg;
8877 if (d.occurrences > d.locale_occurrences) {
8878 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
8879 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8880 }
8881
8882 QString localeArg;
8883 if (d.locale_occurrences > 0) {
8884 QLocale locale;
8885 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8886 flags |= QLocaleData::GroupDigits;
8887 localeArg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
8888 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8889 }
8890
8891 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8892}
8893
8894QString QString::arg_impl(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8895{
8896 ArgEscapeData d = findArgEscapes(*this);
8897
8898 if (d.occurrences == 0) {
8899 qWarning("QString::arg: Argument missing: \"%ls\", %lld", qUtf16Printable(*this), a);
8900 return *this;
8901 }
8902
8903 unsigned flags = QLocaleData::NoFlags;
8904 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8905 if (fillChar == u'0')
8906 flags = QLocaleData::ZeroPadded;
8907
8908 QString arg;
8909 if (d.occurrences > d.locale_occurrences) {
8910 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8911 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8912 }
8913
8914 QString localeArg;
8915 if (d.locale_occurrences > 0) {
8916 QLocale locale;
8917 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8918 flags |= QLocaleData::GroupDigits;
8919 localeArg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8920 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8921 }
8922
8923 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8924}
8925
8926/*!
8927 \fn template <typename T, QString::if_floating_point<T> = true> QString QString::arg(T a, int fieldWidth, char format, int precision, QChar fillChar) const
8928 \overload arg()
8929
8930 Argument \a a is formatted according to the specified \a format and
8931 \a precision. See \l{Floating-point Formats} for details.
8932
8933 \a fieldWidth specifies the minimum amount of space that \a a is
8934 padded to and filled with the character \a fillChar. A positive
8935 value produces right-aligned text; a negative value produces
8936 left-aligned text.
8937
8938 \snippet code/src_corelib_text_qstring.cpp 2
8939
8940 \note In Qt versions prior to 6.9, this function was a regular function
8941 taking \c double. As a consequence of being a template function now, it no
8942 longer accepts arguments that merely implicitly convert to floating-point
8943 types. A backwards-compatible fix is to cast such types to one of the C++
8944 floating-point types.
8945
8946 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number formats}
8947*/
8948QString QString::arg_impl(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8949{
8950 ArgEscapeData d = findArgEscapes(*this);
8951
8952 if (d.occurrences == 0) {
8953 qWarning("QString::arg: Argument missing: \"%ls\", %g", qUtf16Printable(*this), a);
8954 return *this;
8955 }
8956
8957 unsigned flags = QLocaleData::NoFlags;
8958 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8959 if (fillChar == u'0')
8960 flags |= QLocaleData::ZeroPadded;
8961
8962 if (isAsciiUpper(format))
8963 flags |= QLocaleData::CapitalEorX;
8964
8965 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8966 switch (QtMiscUtils::toAsciiLower(format)) {
8967 case 'f':
8968 form = QLocaleData::DFDecimal;
8969 break;
8970 case 'e':
8971 form = QLocaleData::DFExponent;
8972 break;
8973 case 'g':
8974 form = QLocaleData::DFSignificantDigits;
8975 break;
8976 default:
8977#if defined(QT_CHECK_RANGE)
8978 qWarning("QString::arg: Invalid format char '%c'", format);
8979#endif
8980 break;
8981 }
8982
8983 QString arg;
8984 if (d.occurrences > d.locale_occurrences) {
8985 arg = QLocaleData::c()->doubleToString(a, precision, form, fieldWidth,
8986 flags | QLocaleData::ZeroPadExponent);
8987 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8988 || fieldWidth <= arg.size());
8989 }
8990
8991 QString localeArg;
8992 if (d.locale_occurrences > 0) {
8993 QLocale locale;
8994
8995 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8996 if (!(numberOptions & QLocale::OmitGroupSeparator))
8997 flags |= QLocaleData::GroupDigits;
8998 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
8999 flags |= QLocaleData::ZeroPadExponent;
9000 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
9001 flags |= QLocaleData::AddTrailingZeroes;
9002 localeArg = locale.d->m_data->doubleToString(a, precision, form, fieldWidth, flags);
9003 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
9004 || fieldWidth <= localeArg.size());
9005 }
9006
9007 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
9008}
9009
9010static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9011static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9012
9013template <typename Char>
9014static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9015{
9016 qsizetype i = *pos;
9017 ++i;
9018 if (i < len && uc[i] == u'L')
9019 ++i;
9020 if (i < len) {
9021 int escape = to_unicode(uc[i]) - '0';
9022 if (uint(escape) >= 10U)
9023 return -1;
9024 ++i;
9025 if (i < len) {
9026 // there's a second digit
9027 int digit = to_unicode(uc[i]) - '0';
9028 if (uint(digit) < 10U) {
9029 escape = (escape * 10) + digit;
9030 ++i;
9031 }
9032 }
9033 *pos = i;
9034 return escape;
9035 }
9036 return -1;
9037}
9038
9039/*
9040 Algorithm for multiArg:
9041
9042 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9043 The L is parsed and accepted for compatibility with non-multi-arg, but since
9044 multiArg only accepts strings as replacements, the localization request can
9045 be safely ignored.
9046 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9047 either points at text to be copied verbatim (in which case the int is -1),
9048 or, initially, at the textual representation of the placeholder. In that case,
9049 the int contains the numerical number as parsed from the placeholder.
9050 3. Next, collect all the non-negative ints found, sort them in ascending order and
9051 remove duplicates.
9052 3a. If the result has more entries than multiArg() was given replacement strings,
9053 we have found placeholders we can't satisfy with replacement strings. That is
9054 fine (there could be another .arg() call coming after this one), so just
9055 truncate the result to the number of actual multiArg() replacement strings.
9056 3b. If the result has less entries than multiArg() was given replacement strings,
9057 the string is missing placeholders. This is an error that the user should be
9058 warned about.
9059 4. The result of step (3) is a mapping from the index of any replacement string to
9060 placeholder number. This is the wrong way around, but since placeholder
9061 numbers could get as large as 999, while we typically don't have more than 9
9062 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9063 each time we need to map a placeholder number to a replacement string index
9064 (that's a linear search; but still *much* faster than using an associative container).
9065 5. Next, for each of the tuples found in step (1), do the following:
9066 5a. If the int is negative, do nothing.
9067 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9068 the string-ref with a string-ref for the (complete) I'th replacement string.
9069 5c. Otherwise, do nothing.
9070 6. Concatenate all string refs into a single result string.
9071*/
9072
9073namespace {
9074struct Part
9075{
9076 Part() = default; // for QVarLengthArray; do not use
9077 constexpr Part(QAnyStringView s, int num = -1)
9078 : string{s}, number{num} {}
9079
9080 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9081
9082 QAnyStringView string;
9083 int number;
9084};
9085} // unnamed namespace
9086
9088
9089namespace {
9090
9091enum { ExpectedParts = 32 };
9092
9093typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9094typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9095
9096template <typename StringView>
9097static ParseResult parseMultiArgFormatString_impl(StringView s)
9098{
9099 ParseResult result;
9100
9101 const auto uc = s.data();
9102 const auto len = s.size();
9103 const auto end = len - 1;
9104 qsizetype i = 0;
9105 qsizetype last = 0;
9106
9107 while (i < end) {
9108 if (uc[i] == u'%') {
9109 qsizetype percent = i;
9110 int number = getEscape(uc, &i, len);
9111 if (number != -1) {
9112 if (last != percent)
9113 result.push_back(Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9114 result.push_back(Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9115 last = i;
9116 continue;
9117 }
9118 }
9119 ++i;
9120 }
9121
9122 if (last < len)
9123 result.push_back(Part{s.sliced(last, len - last)}); // trailing literal text
9124
9125 return result;
9126}
9127
9128static ParseResult parseMultiArgFormatString(QAnyStringView s)
9129{
9130 return s.visit([] (auto s) { return parseMultiArgFormatString_impl(s); });
9131}
9132
9133static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9134{
9135 ArgIndexToPlaceholderMap result;
9136
9137 for (const Part &part : parts) {
9138 if (part.number >= 0)
9139 result.push_back(part.number);
9140 }
9141
9142 std::sort(result.begin(), result.end());
9143 result.erase(std::unique(result.begin(), result.end()),
9144 result.end());
9145
9146 return result;
9147}
9148
9149static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9150{
9151 using namespace QtPrivate;
9152 qsizetype totalSize = 0;
9153 for (Part &part : parts) {
9154 if (part.number != -1) {
9155 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
9156 if (it != argIndexToPlaceholderMap.end()) {
9157 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9158 switch (arg.tag) {
9159 case ArgBase::L1:
9160 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
9161 break;
9162 case ArgBase::Any:
9163 part.reset(static_cast<const QAnyStringArg&>(arg).string);
9164 break;
9165 case ArgBase::U16:
9166 part.reset(static_cast<const QStringViewArg&>(arg).string);
9167 break;
9168 }
9169 }
9170 }
9171 totalSize += part.string.size();
9172 }
9173 return totalSize;
9174}
9175
9176} // unnamed namespace
9177
9178QString QtPrivate::argToQString(QAnyStringView pattern, size_t numArgs, const ArgBase **args)
9179{
9180 // Step 1-2 above
9181 ParseResult parts = parseMultiArgFormatString(pattern);
9182
9183 // 3-4
9184 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9185
9186 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9187 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
9188 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9189 qWarning("QString::arg: %d argument(s) missing in %ls",
9190 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9191
9192 // 5
9193 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9194
9195 // 6:
9196 QString result(totalSize, Qt::Uninitialized);
9197 auto out = const_cast<QChar*>(result.constData());
9198
9199 struct Concatenate {
9200 QChar *out;
9201 QChar *operator()(QLatin1String part) noexcept
9202 {
9203 if (part.size()) {
9204 qt_from_latin1(reinterpret_cast<char16_t*>(out),
9205 part.data(), part.size());
9206 }
9207 return out + part.size();
9208 }
9209 QChar *operator()(QUtf8StringView part) noexcept
9210 {
9211 return QUtf8::convertToUnicode(out, part);
9212 }
9213 QChar *operator()(QStringView part) noexcept
9214 {
9215 if (part.size())
9216 memcpy(out, part.data(), part.size() * sizeof(QChar));
9217 return out + part.size();
9218 }
9219 };
9220
9221 for (const Part &part : parts)
9222 out = part.string.visit(Concatenate{out});
9223
9224 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9225 result.truncate(out - result.cbegin());
9226
9227 return result;
9228}
9229
9230/*! \fn bool QString::isRightToLeft() const
9231
9232 Returns \c true if the string is read right to left.
9233
9234 \sa QStringView::isRightToLeft()
9235*/
9236bool QString::isRightToLeft() const
9237{
9238 return QtPrivate::isRightToLeft(QStringView(*this));
9239}
9240
9241/*!
9242 \fn bool QString::isValidUtf16() const noexcept
9243 \since 5.15
9244
9245 Returns \c true if the string contains valid UTF-16 encoded data,
9246 or \c false otherwise.
9247
9248 Note that this function does not perform any special validation of the
9249 data; it merely checks if it can be successfully decoded from UTF-16.
9250 The data is assumed to be in host byte order; the presence of a BOM
9251 is meaningless.
9252
9253 \sa QStringView::isValidUtf16()
9254*/
9255
9256/*! \fn QChar *QString::data()
9257
9258 Returns a pointer to the data stored in the QString. The pointer
9259 can be used to access and modify the characters that compose the
9260 string.
9261
9262 Unlike constData() and unicode(), the returned data is always
9263 '\\0'-terminated.
9264
9265 Example:
9266
9267 \snippet qstring/main.cpp 19
9268
9269 Note that the pointer remains valid only as long as the string is
9270 not modified by other means. For read-only access, constData() is
9271 faster because it never causes a \l{deep copy} to occur.
9272
9273 \sa constData(), operator[]()
9274*/
9275
9276/*! \fn const QChar *QString::data() const
9277
9278 \overload
9279
9280 \note The returned string may not be '\\0'-terminated.
9281 Use size() to determine the length of the array.
9282
9283 \sa fromRawData()
9284*/
9285
9286/*! \fn const QChar *QString::constData() const
9287
9288 Returns a pointer to the data stored in the QString. The pointer
9289 can be used to access the characters that compose the string.
9290
9291 Note that the pointer remains valid only as long as the string is
9292 not modified.
9293
9294 \note The returned string may not be '\\0'-terminated.
9295 Use size() to determine the length of the array.
9296
9297 \sa data(), operator[](), fromRawData()
9298*/
9299
9300/*! \fn void QString::push_front(const QString &other)
9301
9302 This function is provided for STL compatibility, prepending the
9303 given \a other string to the beginning of this string. It is
9304 equivalent to \c prepend(other).
9305
9306 \sa prepend()
9307*/
9308
9309/*! \fn void QString::push_front(QChar ch)
9310
9311 \overload
9312
9313 Prepends the given \a ch character to the beginning of this string.
9314*/
9315
9316/*! \fn void QString::push_back(const QString &other)
9317
9318 This function is provided for STL compatibility, appending the
9319 given \a other string onto the end of this string. It is
9320 equivalent to \c append(other).
9321
9322 \sa append()
9323*/
9324
9325/*! \fn void QString::push_back(QChar ch)
9326
9327 \overload
9328
9329 Appends the given \a ch character onto the end of this string.
9330*/
9331
9332/*!
9333 \since 6.1
9334
9335 Removes from the string the characters in the half-open range
9336 [ \a first , \a last ). Returns an iterator to the character
9337 immediately after the last erased character (i.e. the character
9338 referred to by \a last before the erase).
9339*/
9340QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9341{
9342 const auto start = std::distance(cbegin(), first);
9343 const auto len = std::distance(first, last);
9344 remove(start, len);
9345 return begin() + start;
9346}
9347
9348/*!
9349 \fn QString::iterator QString::erase(QString::const_iterator it)
9350
9351 \overload
9352 \since 6.5
9353
9354 Removes the character denoted by \c it from the string.
9355 Returns an iterator to the character immediately after the
9356 erased character.
9357
9358 \code
9359 QString c = "abcdefg";
9360 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9361 \endcode
9362*/
9363
9364/*! \fn void QString::shrink_to_fit()
9365 \since 5.10
9366
9367 This function is provided for STL compatibility. It is
9368 equivalent to squeeze().
9369
9370 \sa squeeze()
9371*/
9372
9373/*!
9374 \fn std::string QString::toStdString() const
9375
9376 Returns a std::string object with the data contained in this
9377 QString. The Unicode data is converted into 8-bit characters using
9378 the toUtf8() function.
9379
9380 This method is mostly useful to pass a QString to a function
9381 that accepts a std::string object.
9382
9383 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9384*/
9385std::string QString::toStdString() const
9386{
9387 std::string result;
9388 if (isEmpty())
9389 return result;
9390
9391 auto writeToBuffer = [this](char *out, size_t) {
9392 char *last = QUtf8::convertFromUnicode(out, *this);
9393 return last - out;
9394 };
9395 size_t maxSize = size() * 3; // worst case for UTF-8
9396#ifdef __cpp_lib_string_resize_and_overwrite
9397 // C++23
9398 result.resize_and_overwrite(maxSize, writeToBuffer);
9399#else
9400 result.resize(maxSize);
9401 result.resize(writeToBuffer(result.data(), result.size()));
9402#endif
9403 return result;
9404}
9405
9406/*!
9407 \fn QString QString::fromRawData(const char16_t *unicode, qsizetype size)
9408 \since 6.10
9409
9410 Constructs a QString that uses the first \a size Unicode characters
9411 in the array \a unicode. The data in \a unicode is \e not
9412 copied. The caller must be able to guarantee that \a unicode will
9413 not be deleted or modified as long as the QString (or an
9414 unmodified copy of it) exists.
9415
9416 Any attempts to modify the QString or copies of it will cause it
9417 to create a deep copy of the data, ensuring that the raw data
9418 isn't modified.
9419
9420 Here is an example of how we can use a QRegularExpression on raw data in
9421 memory without requiring to copy the data into a QString:
9422
9423 \snippet qstring/main.cpp 22
9424 \snippet qstring/main.cpp 23
9425
9426 \warning A string created with fromRawData() is \e not
9427 '\\0'-terminated, unless the raw data contains a '\\0' character
9428 at position \a size. This means unicode() will \e not return a
9429 '\\0'-terminated string (although utf16() does, at the cost of
9430 copying the raw data).
9431
9432 \sa fromUtf16(), setRawData(), data(), constData(),
9433 nullTerminate(), nullTerminated()
9434*/
9435
9436/*!
9437 \fn QString QString::fromRawData(const QChar *unicode, qsizetype size)
9438 \overload
9439*/
9440
9441/*!
9442 \since 4.7
9443
9444 Resets the QString to use the first \a size Unicode characters
9445 in the array \a unicode. The data in \a unicode is \e not
9446 copied. The caller must be able to guarantee that \a unicode will
9447 not be deleted or modified as long as the QString (or an
9448 unmodified copy of it) exists.
9449
9450 This function can be used instead of fromRawData() to re-use
9451 existings QString objects to save memory re-allocations.
9452
9453 \sa fromRawData(), nullTerminate(), nullTerminated()
9454*/
9455QString &QString::setRawData(const QChar *unicode, qsizetype size)
9456{
9457 if (!unicode || !size) {
9458 clear();
9459 }
9460 *this = fromRawData(unicode, size);
9461 return *this;
9462}
9463
9464/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9465 \since 5.5
9466
9467 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9468
9469 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9470*/
9471
9472/*!
9473 \fn std::u16string QString::toStdU16String() const
9474 \since 5.5
9475
9476 Returns a std::u16string object with the data contained in this
9477 QString. The Unicode data is the same as returned by the utf16()
9478 method.
9479
9480 \sa utf16(), toStdWString(), toStdU32String()
9481*/
9482
9483/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9484 \since 5.5
9485
9486 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9487
9488 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9489*/
9490
9491/*!
9492 \fn std::u32string QString::toStdU32String() const
9493 \since 5.5
9494
9495 Returns a std::u32string object with the data contained in this
9496 QString. The Unicode data is the same as returned by the toUcs4()
9497 method.
9498
9499 \sa toUcs4(), toStdWString(), toStdU16String()
9500*/
9501
9502#if !defined(QT_NO_DATASTREAM)
9503/*!
9504 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9505 \relates QString
9506
9507 Writes the given \a string to the specified \a stream.
9508
9509 \sa {Serializing Qt Data Types}
9510*/
9511
9512QDataStream &operator<<(QDataStream &out, const QString &str)
9513{
9514 if (out.version() == 1) {
9515 out << str.toLatin1();
9516 } else {
9517 if (!str.isNull() || out.version() < 3) {
9518 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9519 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9520 static_cast<qsizetype>(sizeof(QChar) * str.size()));
9521 } else {
9522 QVarLengthArray<char16_t> buffer(str.size());
9523 qbswap<sizeof(char16_t)>(str.constData(), str.size(), buffer.data());
9524 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9525 static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9526 }
9527 } else {
9528 QDataStream::writeQSizeType(out, -1); // write null marker
9529 }
9530 }
9531 return out;
9532}
9533
9534/*!
9535 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9536 \relates QString
9537
9538 Reads a string from the specified \a stream into the given \a string.
9539
9540 \sa {Serializing Qt Data Types}
9541*/
9542
9543QDataStream &operator>>(QDataStream &in, QString &str)
9544{
9545 if (in.version() == 1) {
9546 QByteArray l;
9547 in >> l;
9548 str = QString::fromLatin1(l);
9549 } else {
9550 qint64 size = QDataStream::readQSizeType(in);
9551 qsizetype bytes = size;
9552 if (size != bytes || size < -1) {
9553 str.clear();
9554 in.setStatus(QDataStream::SizeLimitExceeded);
9555 return in;
9556 }
9557 if (bytes == -1) { // null string
9558 str = QString();
9559 } else if (bytes > 0) {
9560 if (bytes & 0x1) {
9561 str.clear();
9562 in.setStatus(QDataStream::ReadCorruptData);
9563 return in;
9564 }
9565
9566 const qsizetype Step = 1024 * 1024;
9567 qsizetype len = bytes / 2;
9568 qsizetype allocated = 0;
9569
9570 while (allocated < len) {
9571 int blockSize = qMin(Step, len - allocated);
9572 str.resize(allocated + blockSize);
9573 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9574 blockSize * 2) != blockSize * 2) {
9575 str.clear();
9576 in.setStatus(QDataStream::ReadPastEnd);
9577 return in;
9578 }
9579 allocated += blockSize;
9580 }
9581
9582 if ((in.byteOrder() == QDataStream::BigEndian)
9583 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9584 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9585 qbswap<sizeof(*data)>(data, len, data);
9586 }
9587 } else {
9588 str = QString(QLatin1StringView(""));
9589 }
9590 }
9591 return in;
9592}
9593#endif // QT_NO_DATASTREAM
9594
9595/*!
9596 \typedef QString::Data
9597 \internal
9598*/
9599
9600/*!
9601 \typedef QString::DataPtr
9602 \internal
9603*/
9604
9605/*!
9606 \fn DataPtr & QString::data_ptr()
9607 \internal
9608*/
9609
9610/*!
9611 \since 5.11
9612 \internal
9613 \relates QStringView
9614
9615 Returns \c true if the string is read right to left.
9616
9617 \sa QString::isRightToLeft()
9618*/
9619bool QtPrivate::isRightToLeft(QStringView string) noexcept
9620{
9621 int isolateLevel = 0;
9622
9623 for (QStringIterator i(string); i.hasNext();) {
9624 const char32_t c = i.next();
9625
9626 switch (QChar::direction(c)) {
9627 case QChar::DirRLI:
9628 case QChar::DirLRI:
9629 case QChar::DirFSI:
9630 ++isolateLevel;
9631 break;
9632 case QChar::DirPDI:
9633 if (isolateLevel)
9634 --isolateLevel;
9635 break;
9636 case QChar::DirL:
9637 if (isolateLevel)
9638 break;
9639 return false;
9640 case QChar::DirR:
9641 case QChar::DirAL:
9642 if (isolateLevel)
9643 break;
9644 return true;
9645 case QChar::DirEN:
9646 case QChar::DirES:
9647 case QChar::DirET:
9648 case QChar::DirAN:
9649 case QChar::DirCS:
9650 case QChar::DirB:
9651 case QChar::DirS:
9652 case QChar::DirWS:
9653 case QChar::DirON:
9654 case QChar::DirLRE:
9655 case QChar::DirLRO:
9656 case QChar::DirRLE:
9657 case QChar::DirRLO:
9658 case QChar::DirPDF:
9659 case QChar::DirNSM:
9660 case QChar::DirBN:
9661 break;
9662 }
9663 }
9664 return false;
9665}
9666
9667qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9668{
9669 qsizetype num = 0;
9670 qsizetype i = -1;
9671 if (haystack.size() > 500 && needle.size() > 5) {
9672 QStringMatcher matcher(needle, cs);
9673 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9674 ++num;
9675 } else {
9676 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9677 ++num;
9678 }
9679 return num;
9680}
9681
9682qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9683{
9684 if (cs == Qt::CaseSensitive)
9685 return std::count(haystack.cbegin(), haystack.cend(), needle);
9686
9687 needle = foldCase(needle);
9688 return std::count_if(haystack.cbegin(), haystack.cend(),
9689 [needle](const QChar c) { return foldAndCompare(c, needle); });
9690}
9691
9692qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9693{
9694 qsizetype num = 0;
9695 qsizetype i = -1;
9696
9697 QLatin1StringMatcher matcher(needle, cs);
9698 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9699 ++num;
9700
9701 return num;
9702}
9703
9704qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9705{
9706 if (haystack.size() < needle.size())
9707 return 0;
9708
9709 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9710 return 0;
9711
9712 qsizetype num = 0;
9713 qsizetype i = -1;
9714
9715 QVarLengthArray<uchar> s(needle.size());
9716 qt_to_latin1_unchecked(s.data(), needle.utf16(), needle.size());
9717
9718 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9719 cs);
9720 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9721 ++num;
9722
9723 return num;
9724}
9725
9726qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9727{
9728 if (haystack.size() < needle.size())
9729 return -1;
9730
9731 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9732 return QtPrivate::count(haystack, QStringView(s.data(), s.size()), cs);
9733}
9734
9735qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9736{
9737 // non-L1 needles cannot possibly match in L1-only haystacks
9738 if (needle.unicode() > 0xff)
9739 return 0;
9740
9741 if (cs == Qt::CaseSensitive) {
9742 return std::count(haystack.cbegin(), haystack.cend(), needle.toLatin1());
9743 } else {
9744 return std::count_if(haystack.cbegin(), haystack.cend(),
9745 CaseInsensitiveL1::matcher(needle.toLatin1()));
9746 }
9747}
9748
9749/*!
9750 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9751 \since 5.10
9752 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9753 \since 5.10
9754 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9755 \since 5.10
9756 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9757 \since 5.10
9758 \internal
9759 \relates QStringView
9760
9761 Returns \c true if \a haystack starts with \a needle,
9762 otherwise returns \c false.
9763
9764 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9765
9766 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9767*/
9768
9769bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9770{
9771 return qt_starts_with_impl(haystack, needle, cs);
9772}
9773
9774bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9775{
9776 return qt_starts_with_impl(haystack, needle, cs);
9777}
9778
9779bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9780{
9781 return qt_starts_with_impl(haystack, needle, cs);
9782}
9783
9784bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9785{
9786 return qt_starts_with_impl(haystack, needle, cs);
9787}
9788
9789/*!
9790 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9791 \since 5.10
9792 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9793 \since 5.10
9794 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9795 \since 5.10
9796 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9797 \since 5.10
9798 \internal
9799 \relates QStringView
9800
9801 Returns \c true if \a haystack ends with \a needle,
9802 otherwise returns \c false.
9803
9804 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9805
9806 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9807*/
9808
9809bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9810{
9811 return qt_ends_with_impl(haystack, needle, cs);
9812}
9813
9814bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9815{
9816 return qt_ends_with_impl(haystack, needle, cs);
9817}
9818
9819bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9820{
9821 return qt_ends_with_impl(haystack, needle, cs);
9822}
9823
9824bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9825{
9826 return qt_ends_with_impl(haystack, needle, cs);
9827}
9828
9829qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9830{
9831 const qsizetype l = haystack0.size();
9832 const qsizetype sl = needle0.size();
9833 if (sl == 1)
9834 return findString(haystack0, from, needle0[0], cs);
9835 if (from < 0)
9836 from += l;
9837 if (std::size_t(sl + from) > std::size_t(l))
9838 return -1;
9839 if (!sl)
9840 return from;
9841 if (!l)
9842 return -1;
9843
9844 /*
9845 We use the Boyer-Moore algorithm in cases where the overhead
9846 for the skip table should pay off, otherwise we use a simple
9847 hash function.
9848 */
9849 if (l > 500 && sl > 5)
9850 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9851
9852 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9853 /*
9854 We use some hashing for efficiency's sake. Instead of
9855 comparing strings, we compare the hash value of str with that
9856 of a part of this QString. Only if that matches, we call
9857 qt_string_compare().
9858 */
9859 const char16_t *needle = needle0.utf16();
9860 const char16_t *haystack = haystack0.utf16() + from;
9861 const char16_t *end = haystack0.utf16() + (l - sl);
9862 const qregisteruint sl_minus_1 = sl - 1;
9863 qregisteruint hashNeedle = 0, hashHaystack = 0;
9864 qsizetype idx;
9865
9866 if (cs == Qt::CaseSensitive) {
9867 for (idx = 0; idx < sl; ++idx) {
9868 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9869 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9870 }
9871 hashHaystack -= haystack[sl_minus_1];
9872
9873 while (haystack <= end) {
9874 hashHaystack += haystack[sl_minus_1];
9875 if (hashHaystack == hashNeedle
9876 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9877 return haystack - haystack0.utf16();
9878
9879 REHASH(*haystack);
9880 ++haystack;
9881 }
9882 } else {
9883 const char16_t *haystack_start = haystack0.utf16();
9884 for (idx = 0; idx < sl; ++idx) {
9885 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9886 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9887 }
9888 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9889
9890 while (haystack <= end) {
9891 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
9892 if (hashHaystack == hashNeedle
9893 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
9894 return haystack - haystack0.utf16();
9895
9896 REHASH(foldCase(haystack, haystack_start));
9897 ++haystack;
9898 }
9899 }
9900 return -1;
9901}
9902
9903qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9904{
9905 if (haystack.size() < needle.size())
9906 return -1;
9907
9908 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9909 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9910}
9911
9912qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9913{
9914 if (haystack.size() < needle.size())
9915 return -1;
9916
9917 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9918 return -1;
9919
9920 if (needle.size() == 1) {
9921 const char n = needle.front().toLatin1();
9922 return QtPrivate::findString(haystack, from, QLatin1StringView(&n, 1), cs);
9923 }
9924
9925 QVarLengthArray<char> s(needle.size());
9926 qt_to_latin1_unchecked(reinterpret_cast<uchar *>(s.data()), needle.utf16(), needle.size());
9927 return QtPrivate::findString(haystack, from, QLatin1StringView(s.data(), s.size()), cs);
9928}
9929
9930qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9931{
9932 if (from < 0)
9933 from += haystack.size();
9934 if (from < 0)
9935 return -1;
9936 qsizetype adjustedSize = haystack.size() - from;
9937 if (adjustedSize < needle.size())
9938 return -1;
9939 if (needle.size() == 0)
9940 return from;
9941
9942 if (cs == Qt::CaseSensitive) {
9943
9944 if (needle.size() == 1) {
9945 Q_ASSERT(haystack.data() != nullptr); // see size check above
9946 if (auto it = memchr(haystack.data() + from, needle.front().toLatin1(), adjustedSize))
9947 return static_cast<const char *>(it) - haystack.data();
9948 return -1;
9949 }
9950
9951 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
9952 return matcher.indexIn(haystack, from);
9953 }
9954
9955 // If the needle is sufficiently small we simply iteratively search through
9956 // the haystack. When the needle is too long we use a boyer-moore searcher
9957 // from the standard library, if available. If it is not available then the
9958 // QLatin1Strings are converted to QString and compared as such. Though
9959 // initialization is slower the boyer-moore search it employs still makes up
9960 // for it when haystack and needle are sufficiently long.
9961 // The needle size was chosen by testing various lengths using the
9962 // qstringtokenizer benchmark with the
9963 // "tokenize_qlatin1string_qlatin1string" test.
9964#ifdef Q_CC_MSVC
9965 const qsizetype threshold = 1;
9966#else
9967 const qsizetype threshold = 13;
9968#endif
9969 if (needle.size() <= threshold) {
9970 const auto begin = haystack.begin();
9971 const auto end = haystack.end() - needle.size() + 1;
9972 auto ciMatch = CaseInsensitiveL1::matcher(needle[0].toLatin1());
9973 const qsizetype nlen1 = needle.size() - 1;
9974 for (auto it = std::find_if(begin + from, end, ciMatch); it != end;
9975 it = std::find_if(it + 1, end, ciMatch)) {
9976 // In this comparison we skip the first character because we know it's a match
9977 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
9978 return std::distance(begin, it);
9979 }
9980 return -1;
9981 }
9982
9983 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
9984 return matcher.indexIn(haystack, from);
9985}
9986
9987qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
9988{
9989 return qLastIndexOf(haystack, QChar(needle), from, cs);
9990}
9991
9992qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9993{
9994 return qLastIndexOf(haystack, from, needle, cs);
9995}
9996
9997qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9998{
9999 return qLastIndexOf(haystack, from, needle, cs);
10000}
10001
10002qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
10003{
10004 return qLastIndexOf(haystack, from, needle, cs);
10005}
10006
10007qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
10008{
10009 return qLastIndexOf(haystack, from, needle, cs);
10010}
10011
10012#if QT_CONFIG(regularexpression)
10013qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10014{
10015 if (!re.isValid()) {
10016 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "indexOf");
10017 return -1;
10018 }
10019
10020 QRegularExpressionMatch match = stringHaystack
10021 ? re.match(*stringHaystack, from)
10022 : re.matchView(viewHaystack, from);
10023 if (match.hasMatch()) {
10024 const qsizetype ret = match.capturedStart();
10025 if (rmatch)
10026 *rmatch = std::move(match);
10027 return ret;
10028 }
10029
10030 return -1;
10031}
10032
10033qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10034{
10035 return indexOf(haystack, nullptr, re, from, rmatch);
10036}
10037
10038qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10039{
10040 if (!re.isValid()) {
10041 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "lastIndexOf");
10042 return -1;
10043 }
10044
10045 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10046 QRegularExpressionMatchIterator iterator = stringHaystack
10047 ? re.globalMatch(*stringHaystack)
10048 : re.globalMatchView(viewHaystack);
10049 qsizetype lastIndex = -1;
10050 while (iterator.hasNext()) {
10051 QRegularExpressionMatch match = iterator.next();
10052 qsizetype start = match.capturedStart();
10053 if (start < endpos) {
10054 lastIndex = start;
10055 if (rmatch)
10056 *rmatch = std::move(match);
10057 } else {
10058 break;
10059 }
10060 }
10061
10062 return lastIndex;
10063}
10064
10065qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10066{
10067 return lastIndexOf(haystack, nullptr, re, from, rmatch);
10068}
10069
10070bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10071{
10072 if (!re.isValid()) {
10073 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "contains");
10074 return false;
10075 }
10076 QRegularExpressionMatch m = stringHaystack
10077 ? re.match(*stringHaystack)
10078 : re.matchView(viewHaystack);
10079 bool hasMatch = m.hasMatch();
10080 if (hasMatch && rmatch)
10081 *rmatch = std::move(m);
10082 return hasMatch;
10083}
10084
10085bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10086{
10087 return contains(haystack, nullptr, re, rmatch);
10088}
10089
10090qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10091{
10092 if (!re.isValid()) {
10093 qtWarnAboutInvalidRegularExpression(re, "QString(View)", "count");
10094 return 0;
10095 }
10096 qsizetype count = 0;
10097 qsizetype index = -1;
10098 qsizetype len = haystack.size();
10099 while (index <= len - 1) {
10100 QRegularExpressionMatch match = re.matchView(haystack, index + 1);
10101 if (!match.hasMatch())
10102 break;
10103 count++;
10104
10105 // Search again, from the next character after the beginning of this
10106 // capture. If the capture starts with a surrogate pair, both together
10107 // count as "one character".
10108 index = match.capturedStart();
10109 if (index < len && haystack[index].isHighSurrogate())
10110 ++index;
10111 }
10112 return count;
10113}
10114
10115#endif // QT_CONFIG(regularexpression)
10116
10117/*!
10118 \since 5.0
10119
10120 Converts a plain text string to an HTML string with
10121 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10122 entities.
10123
10124 Example:
10125
10126 \snippet code/src_corelib_text_qstring.cpp 7
10127*/
10128QString QString::toHtmlEscaped() const
10129{
10130 const auto pos = std::u16string_view(*this).find_first_of(u"<>&\"");
10131 if (pos == std::u16string_view::npos)
10132 return *this;
10133 QString rich;
10134 const qsizetype len = size();
10135 rich.reserve(qsizetype(len * 1.1));
10136 rich += qToStringViewIgnoringNull(*this).first(pos);
10137 for (auto ch : qToStringViewIgnoringNull(*this).sliced(pos)) {
10138 if (ch == u'<')
10139 rich += "&lt;"_L1;
10140 else if (ch == u'>')
10141 rich += "&gt;"_L1;
10142 else if (ch == u'&')
10143 rich += "&amp;"_L1;
10144 else if (ch == u'"')
10145 rich += "&quot;"_L1;
10146 else
10147 rich += ch;
10148 }
10149 rich.squeeze();
10150 return rich;
10151}
10152
10153/*!
10154 \macro QStringLiteral(str)
10155 \relates QString
10156
10157 The macro generates the data for a QString out of the string literal \a str
10158 at compile time. Creating a QString from it is free in this case, and the
10159 generated string data is stored in the read-only segment of the compiled
10160 object file.
10161
10162 If you have code that looks like this:
10163
10164 \snippet code/src_corelib_text_qstring.cpp 9
10165
10166 then a temporary QString will be created to be passed as the \c{hasAttribute}
10167 function parameter. This can be quite expensive, as it involves a memory
10168 allocation and the copy/conversion of the data into QString's internal
10169 encoding.
10170
10171 This cost can be avoided by using QStringLiteral instead:
10172
10173 \snippet code/src_corelib_text_qstring.cpp 10
10174
10175 In this case, QString's internal data will be generated at compile time; no
10176 conversion or allocation will occur at runtime.
10177
10178 Using QStringLiteral instead of a double quoted plain C++ string literal can
10179 significantly speed up creation of QString instances from data known at
10180 compile time.
10181
10182 \note QLatin1StringView can still be more efficient than QStringLiteral
10183 when the string is passed to a function that has an overload taking
10184 QLatin1StringView and this overload avoids conversion to QString. For
10185 instance, QString::operator==() can compare to a QLatin1StringView
10186 directly:
10187
10188 \snippet code/src_corelib_text_qstring.cpp 11
10189
10190 \note Some compilers have bugs encoding strings containing characters outside
10191 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10192 those cases. It is optional otherwise.
10193
10194 \note QStringLiteral is interchangeable with \l operator""_s. The latter saves
10195 typing when many string literals are present in the code.
10196
10197 \sa QByteArrayLiteral
10198*/
10199
10200#if QT_DEPRECATED_SINCE(6, 8)
10201/*!
10202 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10203
10204 \relates QString
10205 \since 6.2
10206 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10207
10208 Literal operator that creates a QString out of the first \a size characters in
10209 the char16_t string literal \a str.
10210
10211 The QString is created at compile time, and the generated string data is stored
10212 in the read-only segment of the compiled object file. Duplicate literals may
10213 share the same read-only memory. This functionality is interchangeable with
10214 QStringLiteral, but saves typing when many string literals are present in the
10215 code.
10216
10217 The following code creates a QString:
10218 \code
10219 auto str = u"hello"_qs;
10220 \endcode
10221
10222 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10223*/
10224#endif // QT_DEPRECATED_SINCE(6, 8)
10225
10226/*!
10227 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10228
10229 \relates QString
10230 \since 6.4
10231
10232 Literal operator that creates a QString out of the first \a size characters in
10233 the char16_t string literal \a str.
10234
10235 The QString is created at compile time, and the generated string data is stored
10236 in the read-only segment of the compiled object file. Duplicate literals may
10237 share the same read-only memory. This functionality is interchangeable with
10238 QStringLiteral, but saves typing when many string literals are present in the
10239 code.
10240
10241 The following code creates a QString:
10242 \code
10243 using namespace Qt::StringLiterals;
10244
10245 auto str = u"hello"_s;
10246 \endcode
10247
10248 \sa Qt::Literals::StringLiterals
10249*/
10250
10251/*!
10252 \internal
10253 */
10254void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10255{
10256 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10257}
10258
10259/*!
10260 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10261 \relates QString
10262 \since 6.1
10263
10264 Removes all elements that compare equal to \a t from the
10265 string \a s. Returns the number of elements removed, if any.
10266
10267 \sa erase_if
10268*/
10269
10270/*!
10271 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10272 \relates QString
10273 \since 6.1
10274
10275 Removes all elements for which the predicate \a pred returns true
10276 from the string \a s. Returns the number of elements removed, if
10277 any.
10278
10279 \sa erase
10280*/
10281
10282/*!
10283 \macro const char *qPrintable(const QString &str)
10284 \relates QString
10285
10286 Returns \a str as a \c{const char *}. This is equivalent to
10287 \a{str}.toLocal8Bit().\l{QByteArray::}{constData()}.
10288
10289 The char pointer will be invalid after the statement in which
10290 qPrintable() is used. This is because the array returned by
10291 QString::toLocal8Bit() will fall out of scope.
10292
10293 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10294 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10295 local 8-bit encoding. Therefore qUtf8Printable() should be used
10296 for logging strings instead of qPrintable().
10297
10298 \sa qUtf8Printable()
10299*/
10300
10301/*!
10302 \macro const char *qUtf8Printable(const QString &str)
10303 \relates QString
10304 \since 5.4
10305
10306 Returns \a str as a \c{const char *}. This is equivalent to
10307 \a{str}.toUtf8().\l{QByteArray::}{constData()}.
10308
10309 The char pointer will be invalid after the statement in which
10310 qUtf8Printable() is used. This is because the array returned by
10311 QString::toUtf8() will fall out of scope.
10312
10313 Example:
10314
10315 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10316
10317 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10318*/
10319
10320/*!
10321 \macro const wchar_t *qUtf16Printable(const QString &str)
10322 \relates QString
10323 \since 5.7
10324
10325 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10326 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10327
10328 The only useful thing you can do with the return value of this macro is to
10329 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10330 the return value is \e{not} a valid \c{const wchar_t*}!
10331
10332 In general, the pointer will be invalid after the statement in which
10333 qUtf16Printable() is used. This is because the pointer may have been
10334 obtained from a temporary expression, which will fall out of scope.
10335
10336 Example:
10337
10338 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10339
10340 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10341*/
10342
10343QT_END_NAMESPACE
10344
10345#undef REHASH
QString convertToQString(QAnyStringView string)
Definition qstring.cpp:5578
Definition qlist.h:81
char32_t next(char32_t invalidAs=QChar::ReplacementCharacter)
bool hasNext() const
\inmodule QtCore
QList< uint > convertToUcs4(QStringView string)
Definition qstring.cpp:5834
QByteArray convertToUtf8(QStringView string)
Definition qstring.cpp:5779
QByteArray convertToLocal8Bit(QStringView string)
Definition qstring.cpp:5736
QByteArray convertToLatin1(QStringView string)
Definition qstring.cpp:5595
Combined button and popup list for selecting options.
static QString convertCase(T &str, QUnicodeTables::Case which)
Definition qstring.cpp:7208
static constexpr NormalizationCorrection uc_normalization_corrections[]
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9769
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9809
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLower(QStringView s) noexcept
Definition qstring.cpp:5515
const QString & asString(const QString &s)
Definition qstring.h:1678
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept
Definition qstring.cpp:906
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool equalStrings(QStringView lhs, QStringView rhs) noexcept
Definition qstring.cpp:1374
qsizetype findString(QStringView str, qsizetype from, QChar needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isRightToLeft(QStringView string) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1StringView s) noexcept
Definition qstring.cpp:851
constexpr bool isLatin1(QLatin1StringView s) noexcept
Definition qstring.h:77
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrcasechr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:776
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isUpper(QStringView s) noexcept
Definition qstring.cpp:5520
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrchr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:688
void qt_to_latin1_unchecked(uchar *dst, const char16_t *uc, qsizetype len)
Definition qstring.cpp:1189
static char16_t foldCase(char16_t ch) noexcept
Definition qchar.cpp:1696
#define __has_feature(x)
uint QT_FASTCALL fetch1Pixel< QPixelLayout::BPP1LSB >(const uchar *src, int index)
bool comparesEqual(const QFileInfo &lhs, const QFileInfo &rhs)
static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
Definition qstring.cpp:859
static Int toIntegral(QStringView string, bool *ok, int base)
Definition qstring.cpp:7697
void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1184
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6752
static void append_utf8(QString &qs, const char *cs, qsizetype len)
Definition qstring.cpp:7331
#define ATTRIBUTE_NO_SANITIZE
Definition qstring.cpp:367
bool qt_is_ascii(const char *&ptr, const char *end) noexcept
Definition qstring.cpp:787
static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
Definition qstring.cpp:5504
static void replace_helper(QString &str, QSpan< qsizetype > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3692
Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
Definition qstring.cpp:921
static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
Definition qstring.cpp:1347
bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6758
Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE)
static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
Definition qstring.cpp:3501
static bool needsReallocate(const QString &str, qsizetype newSize)
Definition qstring.cpp:2637
static int qArgDigitValue(QChar ch) noexcept
Definition qstring.cpp:1614
bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6747
#define REHASH(a)
Definition qstring.cpp:66
bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6736
static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
Definition qstring.cpp:1265
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
Definition qstring.cpp:1220
static QByteArray qt_convert_to_latin1(QStringView string)
Definition qstring.cpp:5601
static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
Definition qstring.cpp:1340
static QList< uint > qt_convert_to_ucs4(QStringView string)
Definition qstring.cpp:5806
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static QByteArray qt_convert_to_local_8bit(QStringView string)
Definition qstring.cpp:5713
static LengthMod parse_length_modifier(const char *&c) noexcept
Definition qstring.cpp:7387
static ArgEscapeData findArgEscapes(QStringView s)
Definition qstring.cpp:8606
static QByteArray qt_convert_to_utf8(QStringView str)
Definition qstring.cpp:5759
static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1005
QtPrivate::QCaseInsensitiveLatin1Hash CaseInsensitiveL1
Definition qstring.cpp:1354
LengthMod
Definition qstring.cpp:7376
@ lm_z
Definition qstring.cpp:7376
@ lm_none
Definition qstring.cpp:7376
@ lm_t
Definition qstring.cpp:7376
@ lm_l
Definition qstring.cpp:7376
@ lm_ll
Definition qstring.cpp:7376
@ lm_hh
Definition qstring.cpp:7376
@ lm_L
Definition qstring.cpp:7376
@ lm_h
Definition qstring.cpp:7376
@ lm_j
Definition qstring.cpp:7376
static void insert_helper(QString &str, qsizetype i, const T &toInsert)
Definition qstring.cpp:2976
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
Definition qstring.cpp:1356
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6730
static char16_t to_unicode(const char c)
Definition qstring.cpp:9011
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6763
static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width, QStringView arg, QStringView larg, QChar fillChar)
Definition qstring.cpp:8682
static QVarLengthArray< char16_t > qt_from_latin1_to_qvla(QLatin1StringView str)
Definition qstring.cpp:996
static Q_NEVER_INLINE int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
Definition qstring.cpp:1238
void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
Definition qstring.cpp:8469
static uint parse_flag_characters(const char *&c) noexcept
Definition qstring.cpp:7339
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
Definition qstring.cpp:1195
static char16_t to_unicode(const QChar c)
Definition qstring.cpp:9010
QDataStream & operator>>(QDataStream &in, QString &str)
Definition qstring.cpp:9543
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
Definition qstring.cpp:9014
static int ucstrncmp(const char16_t *a, const char *b, size_t l)
Definition qstring.cpp:1318
static bool can_consume(const char *&c, char ch) noexcept
Definition qstring.cpp:7378
static int parse_field_width(const char *&c, qsizetype size)
Definition qstring.cpp:7359
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6741
#define qUtf16Printable(string)
Definition qstring.h:1695
qsizetype occurrences
Definition qstring.cpp:8600
qsizetype escape_len
Definition qstring.cpp:8603
qsizetype locale_occurrences
Definition qstring.cpp:8601
\inmodule QtCore \reentrant
Definition qchar.h:18
constexpr char16_t unicode() const noexcept
Converts a Latin-1 character to an 16-bit-encoded Unicode representation of the character.
Definition qchar.h:22
constexpr QLatin1Char(char c) noexcept
Constructs a Latin-1 character for c.
Definition qchar.h:20
@ BlankBeforePositive
Definition qlocale_p.h:270
@ AddTrailingZeroes
Definition qlocale_p.h:267
static int difference(char lhs, char rhs)