Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qstring.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// Copyright (C) 2019 Mail.ru Group.
4// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
5
6#include "qstringlist.h"
7#if QT_CONFIG(regularexpression)
8#include "qregularexpression.h"
9#endif
11#include <private/qstringconverter_p.h>
12#include <private/qtools_p.h>
14#include "private/qsimd_p.h"
15#include <qnumeric.h>
16#include <qdatastream.h>
17#include <qlist.h>
18#include "qlocale.h"
19#include "qlocale_p.h"
20#include "qspan.h"
21#include "qstringbuilder.h"
22#include "qstringmatcher.h"
24#include "qdebug.h"
25#include "qendian.h"
26#include "qcollator.h"
27#include "qttypetraits.h"
28
29#ifdef Q_OS_DARWIN
30#include <private/qcore_mac_p.h>
31#endif
32
33#include <private/qfunctions_p.h>
34
35#include <limits.h>
36#include <string.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <stdarg.h>
40#include <wchar.h>
41
42#include "qchar.cpp"
47#include "qthreadstorage.h"
48
49#include <algorithm>
50#include <functional>
51
52#ifdef Q_OS_WIN
53# include <qt_windows.h>
54# if !defined(QT_BOOTSTRAPPED) && (defined(QT_NO_CAST_FROM_ASCII) || defined(QT_NO_CAST_TO_ASCII))
55// MSVC requires this, but let's apply it to MinGW compilers too, just in case
56# error "This file cannot be compiled with QT_NO_CAST_{TO,FROM}_ASCII, "
57 "otherwise some QString functions will not get exported."
58# endif
59#endif
60
61#ifdef truncate
62# undef truncate
63#endif
64
65#define REHASH(a)
66 if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT)
67 hashHaystack -= decltype(hashHaystack)(a) << sl_minus_1;
68 hashHaystack <<= 1
69
71
72using namespace Qt::StringLiterals;
73using namespace QtMiscUtils;
74
75const char16_t QString::_empty = 0;
76
77// in qstringmatcher.cpp
78qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs);
79
80namespace {
81enum StringComparisonMode {
82 CompareStringsForEquality,
83 CompareStringsForOrdering
84};
85
86template <typename Pointer>
87char32_t foldCaseHelper(Pointer ch, Pointer start) = delete;
88
89template <>
90char32_t foldCaseHelper<const QChar*>(const QChar* ch, const QChar* start)
91{
92 return foldCase(reinterpret_cast<const char16_t*>(ch),
93 reinterpret_cast<const char16_t*>(start));
94}
95
96template <>
97char32_t foldCaseHelper<const char*>(const char* ch, const char*)
98{
99 return foldCase(char16_t(uchar(*ch)));
100}
101
102template <typename T>
103char16_t valueTypeToUtf16(T t) = delete;
104
105template <>
106char16_t valueTypeToUtf16<QChar>(QChar t)
107{
108 return t.unicode();
109}
110
111template <>
112char16_t valueTypeToUtf16<char>(char t)
113{
114 return char16_t{uchar(t)};
115}
116
117template <typename T>
118static inline bool foldAndCompare(const T a, const T b)
119{
120 return foldCase(a) == b;
121}
122
123/*!
124 \internal
125
126 Returns the index position of the first occurrence of the
127 character \a ch in the string given by \a str and \a len,
128 searching forward from index
129 position \a from. Returns -1 if \a ch could not be found.
130*/
131template <typename Haystack>
132static inline qsizetype qLastIndexOf(Haystack haystack, QChar needle,
133 qsizetype from, Qt::CaseSensitivity cs) noexcept
134{
135 if (haystack.size() == 0)
136 return -1;
137 if (from < 0)
138 from += haystack.size();
139 else if (std::size_t(from) > std::size_t(haystack.size()))
140 from = haystack.size() - 1;
141 if (from >= 0) {
142 char16_t c = needle.unicode();
143 const auto b = haystack.data();
144 auto n = b + from;
145 if (cs == Qt::CaseSensitive) {
146 for (; n >= b; --n)
147 if (valueTypeToUtf16(*n) == c)
148 return n - b;
149 } else {
150 c = foldCase(c);
151 for (; n >= b; --n)
152 if (foldCase(valueTypeToUtf16(*n)) == c)
153 return n - b;
154 }
155 }
156 return -1;
157}
158template <> qsizetype
159qLastIndexOf(QString, QChar, qsizetype, Qt::CaseSensitivity) noexcept = delete; // unwanted, would detach
160
161template<typename Haystack, typename Needle>
162static qsizetype qLastIndexOf(Haystack haystack0, qsizetype from,
163 Needle needle0, Qt::CaseSensitivity cs) noexcept
164{
165 const qsizetype sl = needle0.size();
166 if (sl == 1)
167 return qLastIndexOf(haystack0, needle0.front(), from, cs);
168
169 const qsizetype l = haystack0.size();
170 if (from < 0)
171 from += l;
172 if (from == l && sl == 0)
173 return from;
174 const qsizetype delta = l - sl;
175 if (std::size_t(from) > std::size_t(l) || delta < 0)
176 return -1;
177 if (from > delta)
178 from = delta;
179
180 auto sv = [sl](const typename Haystack::value_type *v) { return Haystack(v, sl); };
181
182 auto haystack = haystack0.data();
183 const auto needle = needle0.data();
184 const auto *end = haystack;
185 haystack += from;
186 const qregisteruint sl_minus_1 = sl ? sl - 1 : 0;
187 const auto *n = needle + sl_minus_1;
188 const auto *h = haystack + sl_minus_1;
189 qregisteruint hashNeedle = 0, hashHaystack = 0;
190
191 if (cs == Qt::CaseSensitive) {
192 for (qsizetype idx = 0; idx < sl; ++idx) {
193 hashNeedle = (hashNeedle << 1) + valueTypeToUtf16(*(n - idx));
194 hashHaystack = (hashHaystack << 1) + valueTypeToUtf16(*(h - idx));
195 }
196 hashHaystack -= valueTypeToUtf16(*haystack);
197
198 while (haystack >= end) {
199 hashHaystack += valueTypeToUtf16(*haystack);
200 if (hashHaystack == hashNeedle
201 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
202 return haystack - end;
203 --haystack;
204 REHASH(valueTypeToUtf16(haystack[sl]));
205 }
206 } else {
207 for (qsizetype idx = 0; idx < sl; ++idx) {
208 hashNeedle = (hashNeedle << 1) + foldCaseHelper(n - idx, needle);
209 hashHaystack = (hashHaystack << 1) + foldCaseHelper(h - idx, end);
210 }
211 hashHaystack -= foldCaseHelper(haystack, end);
212
213 while (haystack >= end) {
214 hashHaystack += foldCaseHelper(haystack, end);
215 if (hashHaystack == hashNeedle
216 && QtPrivate::compareStrings(sv(haystack), needle0, Qt::CaseInsensitive) == 0)
217 return haystack - end;
218 --haystack;
219 REHASH(foldCaseHelper(haystack + sl, end));
220 }
221 }
222 return -1;
223}
224
225template <typename Haystack, typename Needle>
226bool qt_starts_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
227{
228 if (haystack.isNull())
229 return needle.isNull();
230 const auto haystackLen = haystack.size();
231 const auto needleLen = needle.size();
232 if (haystackLen == 0)
233 return needleLen == 0;
234 if (needleLen > haystackLen)
235 return false;
236
237 return QtPrivate::compareStrings(haystack.first(needleLen), needle, cs) == 0;
238}
239
240template <typename Haystack, typename Needle>
241bool qt_ends_with_impl(Haystack haystack, Needle needle, Qt::CaseSensitivity cs) noexcept
242{
243 if (haystack.isNull())
244 return needle.isNull();
245 const auto haystackLen = haystack.size();
246 const auto needleLen = needle.size();
247 if (haystackLen == 0)
248 return needleLen == 0;
249 if (haystackLen < needleLen)
250 return false;
251
252 return QtPrivate::compareStrings(haystack.last(needleLen), needle, cs) == 0;
253}
254
255template <typename T>
256static void append_helper(QString &self, T view)
257{
258 const auto strData = view.data();
259 const qsizetype strSize = view.size();
260 auto &d = self.data_ptr();
261 if (strData && strSize > 0) {
262 // the number of UTF-8 code units is always at a minimum equal to the number
263 // of equivalent UTF-16 code units
264 d.detachAndGrow(QArrayData::GrowsAtEnd, strSize, nullptr, nullptr);
265 Q_CHECK_PTR(d.data());
266 Q_ASSERT(strSize <= d.freeSpaceAtEnd());
267
268 auto dst = std::next(d.data(), d.size);
269 if constexpr (std::is_same_v<T, QUtf8StringView>) {
270 dst = QUtf8::convertToUnicode(dst, view);
271 } else if constexpr (std::is_same_v<T, QLatin1StringView>) {
272 QLatin1::convertToUnicode(dst, view);
273 dst += strSize;
274 } else {
275 static_assert(QtPrivate::type_dependent_false<T>(),
276 "Can only operate on UTF-8 and Latin-1");
277 }
278 self.resize(std::distance(d.begin(), dst));
279 } else if (d.isNull() && !view.isNull()) { // special case
280 self = QLatin1StringView("");
281 }
282}
283
284template <uint MaxCount> struct UnrollTailLoop
285{
286 template <typename RetType, typename Functor1, typename Functor2, typename Number>
287 static inline RetType exec(Number count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, Number i = 0)
288 {
289 /* equivalent to:
290 * while (count--) {
291 * if (loopCheck(i))
292 * return returnIfFailed(i);
293 * }
294 * return returnIfExited;
295 */
296
297 if (!count)
298 return returnIfExited;
299
300 bool check = loopCheck(i);
301 if (check)
302 return returnIfFailed(i);
303
304 return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
305 }
306
307 template <typename Functor, typename Number>
308 static inline void exec(Number count, Functor code)
309 {
310 /* equivalent to:
311 * for (Number i = 0; i < count; ++i)
312 * code(i);
313 */
314 exec(count, 0, [=](Number i) -> bool { code(i); return false; }, [](Number) { return 0; });
315 }
316};
317template <> template <typename RetType, typename Functor1, typename Functor2, typename Number>
318inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, Functor2, Number)
319{
320 return returnIfExited;
321}
322} // unnamed namespace
323
324/*
325 * Note on the use of SIMD in qstring.cpp:
326 *
327 * Several operations with strings are improved with the use of SIMD code,
328 * since they are repetitive. For MIPS, we have hand-written assembly code
329 * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
330 * x86, we can only use intrinsics and therefore everything is contained in
331 * qstring.cpp. We need to use intrinsics only for those platforms due to the
332 * different compilers and toolchains used, which have different syntax for
333 * assembly sources.
334 *
335 * ** SSE notes: **
336 *
337 * Whenever multiple alternatives are equivalent or near so, we prefer the one
338 * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
339 * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
340 * SSE versions should be done when there is a clear performance benefit and
341 * requires fallback code to SSE2, if it exists.
342 *
343 * Performance measurement in the past shows that most strings are short in
344 * size and, therefore, do not benefit from alignment prologues. That is,
345 * trying to find a 16-byte-aligned boundary to operate on is often more
346 * expensive than executing the unaligned operation directly. In addition, note
347 * that the QString private data is designed so that the data is stored on
348 * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
349 * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
350 * 50% of the time), so skipping the alignment prologue is actually optimizing
351 * for the common case.
352 */
353
354#if defined(__mips_dsp)
355// From qstring_mips_dsp_asm.S
356extern "C" void qt_fromlatin1_mips_asm_unroll4 (char16_t*, const char*, uint);
357extern "C" void qt_fromlatin1_mips_asm_unroll8 (char16_t*, const char*, uint);
358extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const char16_t *src, int length);
359#endif
360
361#if defined(__SSE2__) && defined(Q_CC_GNU)
362// We may overrun the buffer, but that's a false positive:
363// this won't crash nor produce incorrect results
364# define ATTRIBUTE_NO_SANITIZE __attribute__((__no_sanitize_address__, __no_sanitize_thread__))
365#else
366# define ATTRIBUTE_NO_SANITIZE
367#endif
368
369#ifdef __SSE2__
370static constexpr bool UseSse4_1 = bool(qCompilerCpuFeatures & CpuFeatureSSE4_1);
371static constexpr bool UseAvx2 = UseSse4_1 &&
372 (qCompilerCpuFeatures & CpuFeatureArchHaswell) == CpuFeatureArchHaswell;
373
374[[maybe_unused]]
375static Q_ALWAYS_INLINE __m128i mm_load8_zero_extend(const void *ptr)
376{
377 const __m128i *dataptr = static_cast<const __m128i *>(ptr);
378 if constexpr (UseSse4_1) {
379 // use a MOVQ followed by PMOVZXBW
380 // if AVX2 is present, these should combine into a single VPMOVZXBW instruction
381 __m128i data = _mm_loadl_epi64(dataptr);
382 return _mm_cvtepu8_epi16(data);
383 }
384
385 // use MOVQ followed by PUNPCKLBW
386 __m128i data = _mm_loadl_epi64(dataptr);
387 return _mm_unpacklo_epi8(data, _mm_setzero_si128());
388}
389
390[[maybe_unused]] ATTRIBUTE_NO_SANITIZE
391static qsizetype qustrlen_sse2(const char16_t *str) noexcept
392{
393 // find the 16-byte alignment immediately prior or equal to str
394 quintptr misalignment = quintptr(str) & 0xf;
395 Q_ASSERT((misalignment & 1) == 0);
396 const char16_t *ptr = str - (misalignment / 2);
397
398 // load 16 bytes and see if we have a null
399 // (aligned loads can never segfault)
400 const __m128i zeroes = _mm_setzero_si128();
401 __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
402 __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
403 uint mask = _mm_movemask_epi8(comparison);
404
405 // ignore the result prior to the beginning of str
406 mask >>= misalignment;
407
408 // Have we found something in the first block? Need to handle it now
409 // because of the left shift above.
410 if (mask)
411 return qCountTrailingZeroBits(mask) / sizeof(char16_t);
412
413 constexpr qsizetype Step = sizeof(__m128i) / sizeof(char16_t);
414 qsizetype size = Step - misalignment / sizeof(char16_t);
415
416 size -= Step;
417 do {
418 size += Step;
419 data = _mm_load_si128(reinterpret_cast<const __m128i *>(str + size));
420
421 comparison = _mm_cmpeq_epi16(data, zeroes);
422 mask = _mm_movemask_epi8(comparison);
423 } while (mask == 0);
424
425 // found a null
426 return size + qCountTrailingZeroBits(mask) / sizeof(char16_t);
427}
428
429// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
430// the no non-zero was found. Returns false and updates \a ptr to point to the
431// first 16-bit word that has any bit set (note: if the input is 8-bit, \a ptr
432// may be updated to one byte short).
433static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
434{
435 auto updatePtr = [&](uint result) {
436 // found a character matching the mask
437 uint idx = qCountTrailingZeroBits(~result);
438 ptr += idx;
439 return false;
440 };
441
442 if constexpr (UseSse4_1) {
443# ifndef Q_OS_QNX // compiler fails in the code below
444 __m128i mask;
445 auto updatePtrSimd = [&](__m128i data) -> bool {
446 __m128i masked = _mm_and_si128(mask, data);
447 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
448 uint result = _mm_movemask_epi8(comparison);
449 return updatePtr(result);
450 };
451
452 if constexpr (UseAvx2) {
453 // AVX2 implementation: test 32 bytes at a time
454 const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
455 while (ptr + 32 <= end) {
456 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
457 if (!_mm256_testz_si256(mask256, data)) {
458 // found a character matching the mask
459 __m256i masked256 = _mm256_and_si256(mask256, data);
460 __m256i comparison256 = _mm256_cmpeq_epi16(masked256, _mm256_setzero_si256());
461 return updatePtr(_mm256_movemask_epi8(comparison256));
462 }
463 ptr += 32;
464 }
465
466 mask = _mm256_castsi256_si128(mask256);
467 } else {
468 // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
469 // comparisons, unrolled)
470 mask = _mm_set1_epi32(maskval);
471 while (ptr + 32 <= end) {
472 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
473 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
474 if (!_mm_testz_si128(mask, data1))
475 return updatePtrSimd(data1);
476
477 ptr += 16;
478 if (!_mm_testz_si128(mask, data2))
479 return updatePtrSimd(data2);
480 ptr += 16;
481 }
482 }
483
484 // AVX2 and SSE4.1: final 16-byte comparison
485 if (ptr + 16 <= end) {
486 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
487 if (!_mm_testz_si128(mask, data1))
488 return updatePtrSimd(data1);
489 ptr += 16;
490 }
491
492 // and final 8-byte comparison
493 if (ptr + 8 <= end) {
494 __m128i data1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
495 if (!_mm_testz_si128(mask, data1))
496 return updatePtrSimd(data1);
497 ptr += 8;
498 }
499
500 return true;
501# endif // QNX
502 }
503
504 // SSE2 implementation: test 16 bytes at a time.
505 const __m128i mask = _mm_set1_epi32(maskval);
506 while (ptr + 16 <= end) {
507 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
508 __m128i masked = _mm_and_si128(mask, data);
509 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
510 quint16 result = _mm_movemask_epi8(comparison);
511 if (result != 0xffff)
512 return updatePtr(result);
513 ptr += 16;
514 }
515
516 // and one 8-byte comparison
517 if (ptr + 8 <= end) {
518 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
519 __m128i masked = _mm_and_si128(mask, data);
520 __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
521 quint8 result = _mm_movemask_epi8(comparison);
522 if (result != 0xff)
523 return updatePtr(result);
524 ptr += 8;
525 }
526
527 return true;
528}
529
530template <StringComparisonMode Mode, typename Char> [[maybe_unused]]
531static int ucstrncmp_sse2(const char16_t *a, const Char *b, size_t l)
532{
533 static_assert(std::is_unsigned_v<Char>);
534
535 // Using the PMOVMSKB instruction, we get two bits for each UTF-16 character
536 // we compare. This lambda helps extract the code unit.
537 static const auto codeUnitAt = [](const auto *n, qptrdiff idx) -> int {
538 constexpr int Stride = 2;
539 // this is the same as:
540 // return n[idx / Stride];
541 // but using pointer arithmetic to avoid the compiler dividing by two
542 // and multiplying by two in the case of char16_t (we know idx is even,
543 // but the compiler does not). This is not UB.
544
545 auto ptr = reinterpret_cast<const uchar *>(n);
546 ptr += idx / (Stride / sizeof(*n));
547 return *reinterpret_cast<decltype(n)>(ptr);
548 };
549 auto difference = [a, b](uint mask, qptrdiff offset) {
550 if (Mode == CompareStringsForEquality)
551 return 1;
552 uint idx = qCountTrailingZeroBits(mask);
553 return codeUnitAt(a + offset, idx) - codeUnitAt(b + offset, idx);
554 };
555
556 static const auto load8Chars = [](const auto *ptr) {
557 if (sizeof(*ptr) == 2)
558 return _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
559 __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
560 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
561 };
562 static const auto load4Chars = [](const auto *ptr) {
563 if (sizeof(*ptr) == 2)
564 return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
565 __m128i chunk = _mm_cvtsi32_si128(qFromUnaligned<quint32>(ptr));
566 return _mm_unpacklo_epi8(chunk, _mm_setzero_si128());
567 };
568
569 // we're going to read a[0..15] and b[0..15] (32 bytes)
570 auto processChunk16Chars = [a, b](qptrdiff offset) -> uint {
571 if constexpr (UseAvx2) {
572 __m256i a_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(a + offset));
573 __m256i b_data;
574 if (sizeof(Char) == 1) {
575 // expand to UTF-16 via zero-extension
576 __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
577 b_data = _mm256_cvtepu8_epi16(chunk);
578 } else {
579 b_data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(b + offset));
580 }
581 __m256i result = _mm256_cmpeq_epi16(a_data, b_data);
582 return _mm256_movemask_epi8(result);
583 }
584
585 __m128i a_data1 = load8Chars(a + offset);
586 __m128i a_data2 = load8Chars(a + offset + 8);
587 __m128i b_data1, b_data2;
588 if (sizeof(Char) == 1) {
589 // expand to UTF-16 via unpacking
590 __m128i b_data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(b + offset));
591 b_data1 = _mm_unpacklo_epi8(b_data, _mm_setzero_si128());
592 b_data2 = _mm_unpackhi_epi8(b_data, _mm_setzero_si128());
593 } else {
594 b_data1 = load8Chars(b + offset);
595 b_data2 = load8Chars(b + offset + 8);
596 }
597 __m128i result1 = _mm_cmpeq_epi16(a_data1, b_data1);
598 __m128i result2 = _mm_cmpeq_epi16(a_data2, b_data2);
599 return _mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16;
600 };
601
602 if (l >= sizeof(__m256i) / sizeof(char16_t)) {
603 qptrdiff offset = 0;
604 for ( ; l >= offset + sizeof(__m256i) / sizeof(char16_t); offset += sizeof(__m256i) / sizeof(char16_t)) {
605 uint mask = ~processChunk16Chars(offset);
606 if (mask)
607 return difference(mask, offset);
608 }
609
610 // maybe overlap the last 32 bytes
611 if (size_t(offset) < l) {
612 offset = l - sizeof(__m256i) / sizeof(char16_t);
613 uint mask = ~processChunk16Chars(offset);
614 return mask ? difference(mask, offset) : 0;
615 }
616 } else if (l >= 4) {
617 __m128i a_data1, b_data1;
618 __m128i a_data2, b_data2;
619 int width;
620 if (l >= 8) {
621 width = 8;
622 a_data1 = load8Chars(a);
623 b_data1 = load8Chars(b);
624 a_data2 = load8Chars(a + l - width);
625 b_data2 = load8Chars(b + l - width);
626 } else {
627 // we're going to read a[0..3] and b[0..3] (8 bytes)
628 width = 4;
629 a_data1 = load4Chars(a);
630 b_data1 = load4Chars(b);
631 a_data2 = load4Chars(a + l - width);
632 b_data2 = load4Chars(b + l - width);
633 }
634
635 __m128i result = _mm_cmpeq_epi16(a_data1, b_data1);
636 ushort mask = ~_mm_movemask_epi8(result);
637 if (mask)
638 return difference(mask, 0);
639
640 result = _mm_cmpeq_epi16(a_data2, b_data2);
641 mask = ~_mm_movemask_epi8(result);
642 if (mask)
643 return difference(mask, l - width);
644 } else {
645 // reset l
646 l &= 3;
647
648 const auto lambda = [=](size_t i) -> int {
649 return a[i] - b[i];
650 };
651 return UnrollTailLoop<3>::exec(l, 0, lambda, lambda);
652 }
653 return 0;
654}
655#endif
656
657Q_NEVER_INLINE
658qsizetype QtPrivate::qustrlen(const char16_t *str) noexcept
659{
660#if defined(__SSE2__) && !(defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)) && !(defined(__SANITIZE_THREAD__) || __has_feature(thread_sanitizer))
661 return qustrlen_sse2(str);
662#endif
663
664 if (sizeof(wchar_t) == sizeof(char16_t))
665 return wcslen(reinterpret_cast<const wchar_t *>(str));
666
667 qsizetype result = 0;
668 while (*str++)
669 ++result;
670 return result;
671}
672
673qsizetype QtPrivate::qustrnlen(const char16_t *str, qsizetype maxlen) noexcept
674{
675 return qustrchr({ str, maxlen }, u'\0') - str;
676}
677
678/*!
679 * \internal
680 *
681 * Searches for character \a c in the string \a str and returns a pointer to
682 * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
683 * character is not found, this function returns a pointer to the end of the
684 * string -- that is, \c{str.end()}.
685 */
687const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
688{
689 const char16_t *n = str.utf16();
690 const char16_t *e = n + str.size();
691
692#ifdef __SSE2__
693 bool loops = true;
694 // Using the PMOVMSKB instruction, we get two bits for each character
695 // we compare.
696 __m128i mch;
697 if constexpr (UseAvx2) {
698 // we're going to read n[0..15] (32 bytes)
699 __m256i mch256 = _mm256_set1_epi32(c | (c << 16));
700 for (const char16_t *next = n + 16; next <= e; n = next, next += 16) {
701 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
702 __m256i result = _mm256_cmpeq_epi16(data, mch256);
703 uint mask = uint(_mm256_movemask_epi8(result));
704 if (mask) {
705 uint idx = qCountTrailingZeroBits(mask);
706 return n + idx / 2;
707 }
708 }
709 loops = false;
710 mch = _mm256_castsi256_si128(mch256);
711 } else {
712 mch = _mm_set1_epi32(c | (c << 16));
713 }
714
715 auto hasMatch = [mch, &n](__m128i data, ushort validityMask) {
716 __m128i result = _mm_cmpeq_epi16(data, mch);
717 uint mask = uint(_mm_movemask_epi8(result));
718 if ((mask & validityMask) == 0)
719 return false;
720 uint idx = qCountTrailingZeroBits(mask);
721 n += idx / 2;
722 return true;
723 };
724
725 // we're going to read n[0..7] (16 bytes)
726 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
727 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
728 if (hasMatch(data, 0xffff))
729 return n;
730
731 if (!loops) {
732 n += 8;
733 break;
734 }
735 }
736
737# if !defined(__OPTIMIZE_SIZE__)
738 // we're going to read n[0..3] (8 bytes)
739 if (e - n > 3) {
740 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
741 if (hasMatch(data, 0xff))
742 return n;
743
744 n += 4;
745 }
746
747 return UnrollTailLoop<3>::exec(e - n, e,
748 [=](qsizetype i) { return n[i] == c; },
749 [=](qsizetype i) { return n + i; });
750# endif
751#elif defined(__ARM_NEON__)
752 const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
753 const uint16x8_t ch_vec = vdupq_n_u16(c);
754 for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
755 uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
756 uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
757 if (ushort(mask)) {
758 // found a match
759 return n + qCountTrailingZeroBits(mask);
760 }
761 }
762#endif // aarch64
763
764 return std::find(n, e, c);
765}
766
767/*!
768 * \internal
769 *
770 * Searches case-insensitively for character \a c in the string \a str and
771 * returns a pointer to it. Iif the character is not found, this function
772 * returns a pointer to the end of the string -- that is, \c{str.end()}.
773 */
775const char16_t *QtPrivate::qustrcasechr(QStringView str, char16_t c) noexcept
776{
777 const QChar *n = str.begin();
778 const QChar *e = str.end();
779 c = foldCase(c);
780 auto it = std::find_if(n, e, [c](auto ch) { return foldAndCompare(ch, QChar(c)); });
781 return reinterpret_cast<const char16_t *>(it);
782}
783
784// Note: ptr on output may be off by one and point to a preceding US-ASCII
785// character. Usually harmless.
786bool qt_is_ascii(const char *&ptr, const char *end) noexcept
787{
788#if defined(__SSE2__)
789 // Testing for the high bit can be done efficiently with just PMOVMSKB
790 bool loops = true;
791 if constexpr (UseAvx2) {
792 while (ptr + 32 <= end) {
793 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
794 quint32 mask = _mm256_movemask_epi8(data);
795 if (mask) {
796 uint idx = qCountTrailingZeroBits(mask);
797 ptr += idx;
798 return false;
799 }
800 ptr += 32;
801 }
802 loops = false;
803 }
804
805 while (ptr + 16 <= end) {
806 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
807 quint32 mask = _mm_movemask_epi8(data);
808 if (mask) {
809 uint idx = qCountTrailingZeroBits(mask);
810 ptr += idx;
811 return false;
812 }
813 ptr += 16;
814
815 if (!loops)
816 break;
817 }
818 if (ptr + 8 <= end) {
819 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(ptr));
820 quint8 mask = _mm_movemask_epi8(data);
821 if (mask) {
822 uint idx = qCountTrailingZeroBits(mask);
823 ptr += idx;
824 return false;
825 }
826 ptr += 8;
827 }
828#endif
829
830 while (ptr + 4 <= end) {
831 quint32 data = qFromUnaligned<quint32>(ptr);
832 if (data &= 0x80808080U) {
833 uint idx = QSysInfo::ByteOrder == QSysInfo::BigEndian
834 ? qCountLeadingZeroBits(data)
835 : qCountTrailingZeroBits(data);
836 ptr += idx / 8;
837 return false;
838 }
839 ptr += 4;
840 }
841
842 while (ptr != end) {
843 if (quint8(*ptr) & 0x80)
844 return false;
845 ++ptr;
846 }
847 return true;
848}
849
850bool QtPrivate::isAscii(QLatin1StringView s) noexcept
851{
852 const char *ptr = s.begin();
853 const char *end = s.end();
854
855 return qt_is_ascii(ptr, end);
856}
857
858static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
859{
860#ifdef __SSE2__
861 const char *ptr8 = reinterpret_cast<const char *>(ptr);
862 const char *end8 = reinterpret_cast<const char *>(end);
863 bool ok = simdTestMask(ptr8, end8, 0xff80ff80);
864 ptr = reinterpret_cast<const char16_t *>(ptr8);
865 if (!ok)
866 return false;
867#endif
868
869 while (ptr != end) {
870 if (*ptr & 0xff80)
871 return false;
872 ++ptr;
873 }
874 return true;
875}
876
877bool QtPrivate::isAscii(QStringView s) noexcept
878{
879 const char16_t *ptr = s.utf16();
880 const char16_t *end = ptr + s.size();
881
882 return isAscii_helper(ptr, end);
883}
884
885bool QtPrivate::isLatin1(QStringView s) noexcept
886{
887 const char16_t *ptr = s.utf16();
888 const char16_t *end = ptr + s.size();
889
890#ifdef __SSE2__
891 const char *ptr8 = reinterpret_cast<const char *>(ptr);
892 const char *end8 = reinterpret_cast<const char *>(end);
893 if (!simdTestMask(ptr8, end8, 0xff00ff00))
894 return false;
895 ptr = reinterpret_cast<const char16_t *>(ptr8);
896#endif
897
898 while (ptr != end) {
899 if (*ptr++ > 0xff)
900 return false;
901 }
902 return true;
903}
904
905bool QtPrivate::isValidUtf16(QStringView s) noexcept
906{
907 constexpr char32_t InvalidCodePoint = UINT_MAX;
908
909 QStringIterator i(s);
910 while (i.hasNext()) {
911 const char32_t c = i.next(InvalidCodePoint);
912 if (c == InvalidCodePoint)
913 return false;
914 }
915
916 return true;
917}
918
919// conversion between Latin 1 and UTF-16
920Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
921{
922 /* SIMD:
923 * Unpacking with SSE has been shown to improve performance on recent CPUs
924 * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
925 * itself in exactly the same way as one would do it with intrinsics.
926 */
927#if defined(__SSE2__)
928 // we're going to read str[offset..offset+15] (16 bytes)
929 const __m128i nullMask = _mm_setzero_si128();
930 auto processOneChunk = [=](qptrdiff offset) {
931 const __m128i chunk = _mm_loadu_si128((const __m128i*)(str + offset)); // load
932 if constexpr (UseAvx2) {
933 // zero extend to an YMM register
934 const __m256i extended = _mm256_cvtepu8_epi16(chunk);
935
936 // store
937 _mm256_storeu_si256((__m256i*)(dst + offset), extended);
938 } else {
939 // unpack the first 8 bytes, padding with zeros
940 const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
941 _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store
942
943 // unpack the last 8 bytes, padding with zeros
944 const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
945 _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store
946 }
947 };
948
949 const char *e = str + size;
950 if (size >= sizeof(__m128i)) {
951 qptrdiff offset = 0;
952 for ( ; str + offset + sizeof(__m128i) <= e; offset += sizeof(__m128i))
953 processOneChunk(offset);
954 if (str + offset < e)
955 processOneChunk(size - sizeof(__m128i));
956 return;
957 }
958
959# if !defined(__OPTIMIZE_SIZE__)
960 if (size >= 4) {
961 // two overlapped loads & stores, of either 64-bit or of 32-bit
962 if (size >= 8) {
963 const __m128i unpacked1 = mm_load8_zero_extend(str);
964 const __m128i unpacked2 = mm_load8_zero_extend(str + size - 8);
965 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), unpacked1);
966 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + size - 8), unpacked2);
967 } else {
968 const __m128i chunk1 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str));
969 const __m128i chunk2 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(str + size - 4));
970 const __m128i unpacked1 = _mm_unpacklo_epi8(chunk1, nullMask);
971 const __m128i unpacked2 = _mm_unpacklo_epi8(chunk2, nullMask);
972 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), unpacked1);
973 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + size - 4), unpacked2);
974 }
975 return;
976 } else {
977 size = size % 4;
978 return UnrollTailLoop<3>::exec(qsizetype(size), [=](qsizetype i) { dst[i] = uchar(str[i]); });
979 }
980# endif
981#endif
982#if defined(__mips_dsp)
983 static_assert(sizeof(qsizetype) == sizeof(int),
984 "oops, the assembler implementation needs to be called in a loop");
985 if (size > 20)
986 qt_fromlatin1_mips_asm_unroll8(dst, str, size);
987 else
988 qt_fromlatin1_mips_asm_unroll4(dst, str, size);
989#else
990 while (size--)
991 *dst++ = (uchar)*str++;
992#endif
993}
994
995static QVarLengthArray<char16_t> qt_from_latin1_to_qvla(QLatin1StringView str)
996{
997 const qsizetype len = str.size();
998 QVarLengthArray<char16_t> arr(len);
999 qt_from_latin1(arr.data(), str.data(), len);
1000 return arr;
1001}
1002
1003template <bool Checked>
1004static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
1005{
1006#if defined(__SSE2__)
1007 auto questionMark256 = []() {
1008 if constexpr (UseAvx2)
1009 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128('?'));
1010 else
1011 return 0;
1012 }();
1013 auto outOfRange256 = []() {
1014 if constexpr (UseAvx2)
1015 return _mm256_broadcastw_epi16(_mm_cvtsi32_si128(0x100));
1016 else
1017 return 0;
1018 }();
1019 __m128i questionMark, outOfRange;
1020 if constexpr (UseAvx2) {
1021 questionMark = _mm256_castsi256_si128(questionMark256);
1022 outOfRange = _mm256_castsi256_si128(outOfRange256);
1023 } else {
1024 questionMark = _mm_set1_epi16('?');
1025 outOfRange = _mm_set1_epi16(0x100);
1026 }
1027
1028 auto mergeQuestionMarks = [=](__m128i chunk) {
1029 if (!Checked)
1030 return chunk;
1031
1032 // SSE has no compare instruction for unsigned comparison.
1033 if constexpr (UseSse4_1) {
1034 // We use an unsigned uc = qMin(uc, 0x100) and then compare for equality.
1035 chunk = _mm_min_epu16(chunk, outOfRange);
1036 const __m128i offLimitMask = _mm_cmpeq_epi16(chunk, outOfRange);
1037 chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
1038 return chunk;
1039 }
1040 // The variables must be shiffted + 0x8000 to be compared
1041 const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
1042 const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
1043
1044 const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
1045 const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
1046
1047 // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
1048 // the 16 bits that were correct contains zeros
1049 const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
1050
1051 // correctBytes contains the bytes that were in limit
1052 // the 16 bits that were off limits contains zeros
1053 const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
1054
1055 // merge offLimitQuestionMark and correctBytes to have the result
1056 chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
1057
1058 Q_UNUSED(outOfRange);
1059 return chunk;
1060 };
1061
1062 // we're going to read to src[offset..offset+15] (16 bytes)
1063 auto loadChunkAt = [=](qptrdiff offset) {
1064 __m128i chunk1, chunk2;
1065 if constexpr (UseAvx2) {
1066 __m256i chunk = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + offset));
1067 if (Checked) {
1068 // See mergeQuestionMarks lambda above for details
1069 chunk = _mm256_min_epu16(chunk, outOfRange256);
1070 const __m256i offLimitMask = _mm256_cmpeq_epi16(chunk, outOfRange256);
1071 chunk = _mm256_blendv_epi8(chunk, questionMark256, offLimitMask);
1072 }
1073
1074 chunk2 = _mm256_extracti128_si256(chunk, 1);
1075 chunk1 = _mm256_castsi256_si128(chunk);
1076 } else {
1077 chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
1078 chunk1 = mergeQuestionMarks(chunk1);
1079
1080 chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
1081 chunk2 = mergeQuestionMarks(chunk2);
1082 }
1083
1084 // pack the two vector to 16 x 8bits elements
1085 return _mm_packus_epi16(chunk1, chunk2);
1086 };
1087
1088 if (size_t(length) >= sizeof(__m128i)) {
1089 // because of possible overlapping, we won't process the last chunk in the loop
1090 qptrdiff offset = 0;
1091 for ( ; offset + 2 * sizeof(__m128i) < size_t(length); offset += sizeof(__m128i))
1092 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), loadChunkAt(offset));
1093
1094 // overlapped conversion of the last full chunk and the tail
1095 __m128i last1 = loadChunkAt(offset);
1096 __m128i last2 = loadChunkAt(length - sizeof(__m128i));
1097 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + offset), last1);
1098 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + length - sizeof(__m128i)), last2);
1099 return;
1100 }
1101
1102# if !defined(__OPTIMIZE_SIZE__)
1103 if (length >= 4) {
1104 // this code is fine even for in-place conversion because we load both
1105 // before any store
1106 if (length >= 8) {
1107 __m128i chunk1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
1108 __m128i chunk2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + length - 8));
1109 chunk1 = mergeQuestionMarks(chunk1);
1110 chunk2 = mergeQuestionMarks(chunk2);
1111
1112 // pack, where the upper half is ignored
1113 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1114 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1115 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), result1);
1116 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + length - 8), result2);
1117 } else {
1118 __m128i chunk1 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
1119 __m128i chunk2 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + length - 4));
1120 chunk1 = mergeQuestionMarks(chunk1);
1121 chunk2 = mergeQuestionMarks(chunk2);
1122
1123 // pack, we'll zero the upper three quarters
1124 const __m128i result1 = _mm_packus_epi16(chunk1, chunk1);
1125 const __m128i result2 = _mm_packus_epi16(chunk2, chunk2);
1126 qToUnaligned(_mm_cvtsi128_si32(result1), dst);
1127 qToUnaligned(_mm_cvtsi128_si32(result2), dst + length - 4);
1128 }
1129 return;
1130 }
1131
1132 length = length % 4;
1133 return UnrollTailLoop<3>::exec(length, [=](qsizetype i) {
1134 if (Checked)
1135 dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
1136 else
1137 dst[i] = src[i];
1138 });
1139# else
1140 length = length % 16;
1141# endif // optimize size
1142#elif defined(__ARM_NEON__)
1143 // Refer to the documentation of the SSE2 implementation.
1144 // This uses exactly the same method as for SSE except:
1145 // 1) neon has unsigned comparison
1146 // 2) packing is done to 64 bits (8 x 8bits component).
1147 if (length >= 16) {
1148 const qsizetype chunkCount = length >> 3; // divided by 8
1149 const uint16x8_t questionMark = vdupq_n_u16('?'); // set
1150 const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
1151 for (qsizetype i = 0; i < chunkCount; ++i) {
1152 uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
1153 src += 8;
1154
1155 if (Checked) {
1156 const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
1157 const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
1158 const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
1159 chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
1160 }
1161 const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
1162 vst1_u8(dst, result); // store
1163 dst += 8;
1164 }
1165 length = length % 8;
1166 }
1167#endif
1168#if defined(__mips_dsp)
1169 static_assert(sizeof(qsizetype) == sizeof(int),
1170 "oops, the assembler implementation needs to be called in a loop");
1171 qt_toLatin1_mips_dsp_asm(dst, src, length);
1172#else
1173 while (length--) {
1174 if (Checked)
1175 *dst++ = (*src>0xff) ? '?' : (uchar) *src;
1176 else
1177 *dst++ = *src;
1178 ++src;
1179 }
1180#endif
1181}
1182
1183void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
1184{
1185 qt_to_latin1_internal<true>(dst, src, length);
1186}
1187
1188void qt_to_latin1_unchecked(uchar *dst, const char16_t *src, qsizetype length)
1189{
1190 qt_to_latin1_internal<false>(dst, src, length);
1191}
1192
1193// Unicode case-insensitive comparison (argument order matches QStringView)
1194Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
1195{
1196 if (a == b)
1197 return qt_lencmp(alen, blen);
1198
1199 char32_t alast = 0;
1200 char32_t blast = 0;
1201 qsizetype l = qMin(alen, blen);
1202 qsizetype i;
1203 for (i = 0; i < l; ++i) {
1204// qDebug() << Qt::hex << alast << blast;
1205// qDebug() << Qt::hex << "*a=" << *a << "alast=" << alast << "folded=" << foldCase (*a, alast);
1206// qDebug() << Qt::hex << "*b=" << *b << "blast=" << blast << "folded=" << foldCase (*b, blast);
1207 int diff = foldCase(a[i], alast) - foldCase(b[i], blast);
1208 if ((diff))
1209 return diff;
1210 }
1211 if (i == alen) {
1212 if (i == blen)
1213 return 0;
1214 return -1;
1215 }
1216 return 1;
1217}
1218
1219// Case-insensitive comparison between a QStringView and a QLatin1StringView
1220// (argument order matches those types)
1221Q_NEVER_INLINE static int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
1222{
1223 qsizetype l = qMin(alen, blen);
1224 qsizetype i;
1225 for (i = 0; i < l; ++i) {
1226 int diff = foldCase(a[i]) - foldCase(char16_t{uchar(b[i])});
1227 if ((diff))
1228 return diff;
1229 }
1230 if (i == alen) {
1231 if (i == blen)
1232 return 0;
1233 return -1;
1234 }
1235 return 1;
1236}
1237
1238// Case-insensitive comparison between a Unicode string and a UTF-8 string
1239Q_NEVER_INLINE static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
1240{
1241 auto src1 = reinterpret_cast<const uchar *>(utf8);
1242 auto end1 = reinterpret_cast<const uchar *>(utf8end);
1243 QStringIterator src2(utf16, utf16end);
1244
1245 while (src1 < end1 && src2.hasNext()) {
1246 char32_t decoded[1];
1247 char32_t *output = decoded;
1248 char32_t &uc1 = decoded[0];
1249 uchar b = *src1++;
1250 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
1251 if (res < 0) {
1252 // decoding error
1253 uc1 = QChar::ReplacementCharacter;
1254 } else {
1255 uc1 = QChar::toCaseFolded(uc1);
1256 }
1257
1258 char32_t uc2 = QChar::toCaseFolded(src2.next());
1259 int diff = uc1 - uc2; // can't underflow
1260 if (diff)
1261 return diff;
1262 }
1263
1264 // the shorter string sorts first
1265 return (end1 > src1) - int(src2.hasNext());
1266}
1267
1268#if defined(__mips_dsp)
1269// From qstring_mips_dsp_asm.S
1270extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
1271 const char16_t *b,
1272 unsigned len);
1273#endif
1274
1275// Unicode case-sensitive compare two same-sized strings
1276template <StringComparisonMode Mode>
1277static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
1278{
1279 // This function isn't memcmp() because that can return the wrong sorting
1280 // result in little-endian architectures: 0x00ff must sort before 0x0100,
1281 // but the bytes in memory are FF 00 and 00 01.
1282
1283#ifndef __OPTIMIZE_SIZE__
1284# if defined(__mips_dsp)
1285 static_assert(sizeof(uint) == sizeof(size_t));
1286 if (l >= 8) {
1287 return qt_ucstrncmp_mips_dsp_asm(a, b, l);
1288 }
1289# elif defined(__SSE2__)
1290 return ucstrncmp_sse2<Mode>(a, b, l);
1291# elif defined(__ARM_NEON__)
1292 if (l >= 8) {
1293 const char16_t *end = a + l;
1294 const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
1295 while (end - a > 7) {
1296 uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
1297 uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
1298
1299 uint8_t r = ~(uint8_t)vaddvq_u16(vandq_u16(vceqq_u16(da, db), mask));
1300 if (r) {
1301 // found a different QChar
1302 if (Mode == CompareStringsForEquality)
1303 return 1;
1304 uint idx = qCountTrailingZeroBits(r);
1305 return a[idx] - b[idx];
1306 }
1307 a += 8;
1308 b += 8;
1309 }
1310 l &= 7;
1311 }
1312 const auto lambda = [=](size_t i) -> int {
1313 return a[i] - b[i];
1314 };
1315 return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
1316# endif // MIPS DSP or __SSE2__ or __ARM_NEON__
1317#endif // __OPTIMIZE_SIZE__
1318
1319 if (Mode == CompareStringsForEquality || QSysInfo::ByteOrder == QSysInfo::BigEndian)
1320 return memcmp(a, b, l * sizeof(char16_t));
1321
1322 for (size_t i = 0; i < l; ++i) {
1323 if (int diff = a[i] - b[i])
1324 return diff;
1325 }
1326 return 0;
1327}
1328
1329template <StringComparisonMode Mode>
1330static int ucstrncmp(const char16_t *a, const char *b, size_t l)
1331{
1332 const uchar *c = reinterpret_cast<const uchar *>(b);
1333 const char16_t *uc = a;
1334 const char16_t *e = uc + l;
1335
1336#if defined(__SSE2__) && !defined(__OPTIMIZE_SIZE__)
1337 return ucstrncmp_sse2<Mode>(uc, c, l);
1338#endif
1339
1340 while (uc < e) {
1341 int diff = *uc - *c;
1342 if (diff)
1343 return diff;
1344 uc++, c++;
1345 }
1346
1347 return 0;
1348}
1349
1350// Unicode case-sensitive equality
1351template <typename Char2>
1352static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
1353{
1354 return ucstrncmp<CompareStringsForEquality>(a, b, alen) == 0;
1355}
1356
1357// Unicode case-sensitive comparison
1358template <typename Char2>
1359static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
1360{
1361 const size_t l = qMin(alen, blen);
1362 int cmp = ucstrncmp<CompareStringsForOrdering>(a, b, l);
1363 return cmp ? cmp : qt_lencmp(alen, blen);
1364}
1365
1366using CaseInsensitiveL1 = QtPrivate::QCaseInsensitiveLatin1Hash;
1367
1368static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
1369{
1370 // We're called with QLatin1StringView's .data() and .size():
1371 Q_ASSERT(lSize >= 0 && rSize >= 0);
1372 if (!lSize)
1373 return rSize ? -1 : 0;
1374 if (!rSize)
1375 return 1;
1376 const qsizetype size = std::min(lSize, rSize);
1377
1378 Q_ASSERT(lhsChar && rhsChar); // since both lSize and rSize are positive
1379 for (qsizetype i = 0; i < size; i++) {
1380 if (int res = CaseInsensitiveL1::difference(lhsChar[i], rhsChar[i]))
1381 return res;
1382 }
1383 return qt_lencmp(lSize, rSize);
1384}
1385
1386bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept
1387{
1388 Q_ASSERT(lhs.size() == rhs.size());
1389 return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16());
1390}
1391
1392bool QtPrivate::equalStrings(QStringView lhs, QLatin1StringView rhs) noexcept
1393{
1394 Q_ASSERT(lhs.size() == rhs.size());
1395 return ucstreq(lhs.utf16(), lhs.size(), rhs.latin1());
1396}
1397
1398bool QtPrivate::equalStrings(QLatin1StringView lhs, QStringView rhs) noexcept
1399{
1400 return QtPrivate::equalStrings(rhs, lhs);
1401}
1402
1403bool QtPrivate::equalStrings(QLatin1StringView lhs, QLatin1StringView rhs) noexcept
1404{
1405 Q_ASSERT(lhs.size() == rhs.size());
1406 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1407}
1408
1409bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QStringView rhs) noexcept
1410{
1411 return QUtf8::compareUtf8(lhs, rhs) == 0;
1412}
1413
1414bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1415{
1416 return QtPrivate::equalStrings(rhs, lhs);
1417}
1418
1419bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
1420{
1421 return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
1422}
1423
1424bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
1425{
1426 return QtPrivate::equalStrings(rhs, lhs);
1427}
1428
1429bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs) noexcept
1430{
1431#if QT_VERSION >= QT_VERSION_CHECK(7, 0, 0) || defined(QT_BOOTSTRAPPED) || defined(QT_STATIC)
1432 Q_ASSERT(lhs.size() == rhs.size());
1433#else
1434 // operator== didn't enforce size prior to Qt 6.2
1435 if (lhs.size() != rhs.size())
1436 return false;
1437#endif
1438 return (!lhs.size() || memcmp(lhs.data(), rhs.data(), lhs.size()) == 0);
1439}
1440
1441bool QAnyStringView::equal(QAnyStringView lhs, QAnyStringView rhs) noexcept
1442{
1443 if (lhs.size() != rhs.size() && lhs.isUtf8() == rhs.isUtf8())
1444 return false;
1445 return lhs.visit([rhs](auto lhs) {
1446 return rhs.visit([lhs](auto rhs) {
1447 return QtPrivate::equalStrings(lhs, rhs);
1448 });
1449 });
1450}
1451
1452/*!
1453 \relates QStringView
1454 \internal
1455 \since 5.10
1456
1457 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1458
1459 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1460
1461 Case-sensitive comparison is based exclusively on the numeric Unicode values
1462 of the characters and is very fast, but is not what a human would expect.
1463 Consider sorting user-visible strings with QString::localeAwareCompare().
1464
1465 \sa {Comparing Strings}
1466*/
1467int QtPrivate::compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1468{
1469 if (cs == Qt::CaseSensitive)
1470 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size());
1471 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.utf16());
1472}
1473
1474/*!
1475 \relates QStringView
1476 \internal
1477 \since 5.10
1478 \overload
1479
1480 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1481
1482 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1483
1484 Case-sensitive comparison is based exclusively on the numeric Unicode values
1485 of the characters and is very fast, but is not what a human would expect.
1486 Consider sorting user-visible strings with QString::localeAwareCompare().
1487
1488 \sa {Comparing Strings}
1489*/
1490int QtPrivate::compareStrings(QStringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1491{
1492 if (cs == Qt::CaseSensitive)
1493 return ucstrcmp(lhs.utf16(), lhs.size(), rhs.latin1(), rhs.size());
1494 return ucstricmp(lhs.size(), lhs.utf16(), rhs.size(), rhs.latin1());
1495}
1496
1497/*!
1498 \relates QStringView
1499 \internal
1500 \since 6.0
1501 \overload
1502*/
1503int QtPrivate::compareStrings(QStringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1504{
1505 return -compareStrings(rhs, lhs, cs);
1506}
1507
1508/*!
1509 \relates QStringView
1510 \internal
1511 \since 5.10
1512 \overload
1513*/
1514int QtPrivate::compareStrings(QLatin1StringView lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1515{
1516 return -compareStrings(rhs, lhs, cs);
1517}
1518
1519/*!
1520 \relates QStringView
1521 \internal
1522 \since 5.10
1523 \overload
1524
1525 Returns an integer that compares to 0 as \a lhs compares to \a rhs.
1526
1527 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
1528
1529 Case-sensitive comparison is based exclusively on the numeric Latin-1 values
1530 of the characters and is very fast, but is not what a human would expect.
1531 Consider sorting user-visible strings with QString::localeAwareCompare().
1532
1533 \sa {Comparing Strings}
1534*/
1535int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1536{
1537 if (lhs.isEmpty())
1538 return qt_lencmp(qsizetype(0), rhs.size());
1539 if (rhs.isEmpty())
1540 return qt_lencmp(lhs.size(), qsizetype(0));
1541 if (cs == Qt::CaseInsensitive)
1542 return latin1nicmp(lhs.data(), lhs.size(), rhs.data(), rhs.size());
1543 const auto l = std::min(lhs.size(), rhs.size());
1544 int r = memcmp(lhs.data(), rhs.data(), l);
1545 return r ? r : qt_lencmp(lhs.size(), rhs.size());
1546}
1547
1548/*!
1549 \relates QStringView
1550 \internal
1551 \since 6.0
1552 \overload
1553*/
1554int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1555{
1556 return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
1557}
1558
1559/*!
1560 \relates QStringView
1561 \internal
1562 \since 6.0
1563 \overload
1564*/
1565int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QStringView rhs, Qt::CaseSensitivity cs) noexcept
1566{
1567 if (cs == Qt::CaseSensitive)
1568 return QUtf8::compareUtf8(lhs, rhs);
1569 return ucstricmp8(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
1570}
1571
1572/*!
1573 \relates QStringView
1574 \internal
1575 \since 6.0
1576 \overload
1577*/
1578int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs, Qt::CaseSensitivity cs) noexcept
1579{
1580 return -compareStrings(rhs, lhs, cs);
1581}
1582
1583/*!
1584 \relates QStringView
1585 \internal
1586 \since 6.0
1587 \overload
1588*/
1589int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
1590{
1591 return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
1592}
1593
1594int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
1595{
1596 return lhs.visit([rhs, cs](auto lhs) {
1597 return rhs.visit([lhs, cs](auto rhs) {
1598 return QtPrivate::compareStrings(lhs, rhs, cs);
1599 });
1600 });
1601}
1602
1603// ### Qt 7: do not allow anything but ASCII digits
1604// in arg()'s replacements.
1605#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1606static bool supportUnicodeDigitValuesInArg()
1607{
1608 static const bool result = []() {
1609 static const char supportUnicodeDigitValuesEnvVar[]
1610 = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
1611
1612 if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
1613 return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
1614
1615#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
1616 return true;
1617#else
1618 return false;
1619#endif
1620 }();
1621
1622 return result;
1623}
1624#endif
1625
1626static int qArgDigitValue(QChar ch) noexcept
1627{
1628#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
1629 if (supportUnicodeDigitValuesInArg())
1630 return ch.digitValue();
1631#endif
1632 if (ch >= u'0' && ch <= u'9')
1633 return int(ch.unicode() - u'0');
1634 return -1;
1635}
1636
1637#if QT_CONFIG(regularexpression)
1638Q_DECL_COLD_FUNCTION
1639void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where);
1640#endif
1641
1642/*!
1643 \macro QT_RESTRICTED_CAST_FROM_ASCII
1644 \relates QString
1645
1646 Disables most automatic conversions from source literals and 8-bit data
1647 to unicode QStrings, but allows the use of
1648 the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors,
1649 and the \c{QString::operator=(const char (&ch)[N])} assignment operator.
1650 This gives most of the type-safety benefits of \l QT_NO_CAST_FROM_ASCII
1651 but does not require user code to wrap character and string literals
1652 with QLatin1Char, QLatin1StringView or similar.
1653
1654 Using this macro together with source strings outside the 7-bit range,
1655 non-literals, or literals with embedded NUL characters is undefined.
1656
1657 \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII
1658*/
1659
1660/*!
1661 \macro QT_NO_CAST_FROM_ASCII
1662 \relates QString
1663 \relates QChar
1664
1665 Disables automatic conversions from 8-bit strings (\c{char *}) to Unicode
1666 QStrings, as well as from 8-bit \c{char} types (\c{char} and
1667 \c{unsigned char}) to QChar.
1668
1669 \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1670 QT_NO_CAST_FROM_BYTEARRAY
1671*/
1672
1673/*!
1674 \macro QT_NO_CAST_TO_ASCII
1675 \relates QString
1676
1677 Disables automatic conversion from QString to 8-bit strings (\c{char *}).
1678
1679 \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII,
1680 QT_NO_CAST_FROM_BYTEARRAY
1681*/
1682
1683/*!
1684 \macro QT_ASCII_CAST_WARNINGS
1685 \internal
1686 \relates QString
1687
1688 This macro can be defined to force a warning whenever a function is
1689 called that automatically converts between unicode and 8-bit encodings.
1690
1691 Note: This only works for compilers that support warnings for
1692 deprecated API.
1693
1694 \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
1695*/
1696
1697/*!
1698 \class QString
1699 \inmodule QtCore
1700 \reentrant
1701
1702 \brief The QString class provides a Unicode character string.
1703
1704 \ingroup tools
1705 \ingroup shared
1706 \ingroup string-processing
1707
1708 \compares strong
1709 \compareswith strong QChar QLatin1StringView {const char16_t *} \
1710 QStringView QUtf8StringView
1711 \endcompareswith
1712 \compareswith strong QByteArray QByteArrayView {const char *}
1713 When comparing with byte arrays, their content is interpreted as UTF-8.
1714 \endcompareswith
1715
1716 QString stores a string of 16-bit \l{QChar}s, where each QChar
1717 corresponds to one UTF-16 code unit. (Unicode characters
1718 with code values above 65535 are stored using surrogate pairs,
1719 that is, two consecutive \l{QChar}s.)
1720
1721 \l{Unicode} is an international standard that supports most of the
1722 writing systems in use today. It is a superset of US-ASCII (ANSI
1723 X3.4-1986) and Latin-1 (ISO 8859-1), and all the US-ASCII/Latin-1
1724 characters are available at the same code positions.
1725
1726 Behind the scenes, QString uses \l{implicit sharing}
1727 (copy-on-write) to reduce memory usage and to avoid the needless
1728 copying of data. This also helps reduce the inherent overhead of
1729 storing 16-bit characters instead of 8-bit characters.
1730
1731 In addition to QString, Qt also provides the QByteArray class to
1732 store raw bytes and traditional 8-bit '\\0'-terminated strings.
1733 For most purposes, QString is the class you want to use. It is
1734 used throughout the Qt API, and the Unicode support ensures that
1735 your applications are easy to translate if you want to expand
1736 your application's market at some point. Two prominent cases
1737 where QByteArray is appropriate are when you need to store raw
1738 binary data, and when memory conservation is critical (like in
1739 embedded systems).
1740
1741 \section1 Initializing a string
1742
1743 One way to initialize a QString is to pass a \c{const char
1744 *} to its constructor. For example, the following code creates a
1745 QString of size 5 containing the data "Hello":
1746
1747 \snippet qstring/main.cpp 0
1748
1749 QString converts the \c{const char *} data into Unicode using the
1750 fromUtf8() function.
1751
1752 In all of the QString functions that take \c{const char *}
1753 parameters, the \c{const char *} is interpreted as a classic
1754 C-style \c{'\\0'}-terminated string. Except where the function's
1755 name overtly indicates some other encoding, such \c{const char *}
1756 parameters are assumed to be encoded in UTF-8.
1757
1758 You can also provide string data as an array of \l{QChar}s:
1759
1760 \snippet qstring/main.cpp 1
1761
1762 QString makes a deep copy of the QChar data, so you can modify it
1763 later without experiencing side effects. You can avoid taking a
1764 deep copy of the character data by using QStringView or
1765 QString::fromRawData() instead.
1766
1767 Another approach is to set the size of the string using resize()
1768 and to initialize the data character per character. QString uses
1769 0-based indexes, just like C++ arrays. To access the character at
1770 a particular index position, you can use \l operator[](). On
1771 non-\c{const} strings, \l operator[]() returns a reference to a
1772 character that can be used on the left side of an assignment. For
1773 example:
1774
1775 \snippet qstring/main.cpp 2
1776
1777 For read-only access, an alternative syntax is to use the at()
1778 function:
1779
1780 \snippet qstring/main.cpp 3
1781
1782 The at() function can be faster than \l operator[]() because it
1783 never causes a \l{deep copy} to occur. Alternatively, use the
1784 first(), last(), or sliced() functions to extract several characters
1785 at a time.
1786
1787 A QString can embed '\\0' characters (QChar::Null). The size()
1788 function always returns the size of the whole string, including
1789 embedded '\\0' characters.
1790
1791 After a call to the resize() function, newly allocated characters
1792 have undefined values. To set all the characters in the string to
1793 a particular value, use the fill() function.
1794
1795 QString provides dozens of overloads designed to simplify string
1796 usage. For example, if you want to compare a QString with a string
1797 literal, you can write code like this and it will work as expected:
1798
1799 \snippet qstring/main.cpp 4
1800
1801 You can also pass string literals to functions that take QStrings
1802 as arguments, invoking the QString(const char *)
1803 constructor. Similarly, you can pass a QString to a function that
1804 takes a \c{const char *} argument using the \l qPrintable() macro,
1805 which returns the given QString as a \c{const char *}. This is
1806 equivalent to calling <QString>.toLocal8Bit().constData().
1807
1808 \section1 Manipulating string data
1809
1810 QString provides the following basic functions for modifying the
1811 character data: append(), prepend(), insert(), replace(), and
1812 remove(). For example:
1813
1814 \snippet qstring/main.cpp 5
1815
1816 In the above example, the replace() function's first two arguments are the
1817 position from which to start replacing and the number of characters that
1818 should be replaced.
1819
1820 When data-modifying functions increase the size of the string,
1821 QString may reallocate the memory in which it holds its data. When
1822 this happens, QString expands by more than it immediately needs so as
1823 to have space for further expansion without reallocation until the size
1824 of the string has significantly increased.
1825
1826 The insert(), remove(), and, when replacing a sub-string with one of
1827 different size, replace() functions can be slow (\l{linear time}) for
1828 large strings because they require moving many characters in the string
1829 by at least one position in memory.
1830
1831 If you are building a QString gradually and know in advance
1832 approximately how many characters the QString will contain, you
1833 can call reserve(), asking QString to preallocate a certain amount
1834 of memory. You can also call capacity() to find out how much
1835 memory the QString actually has allocated.
1836
1837 QString provides \l{STL-style iterators} (QString::const_iterator and
1838 QString::iterator). In practice, iterators are handy when working with
1839 generic algorithms provided by the C++ standard library.
1840
1841 \note Iterators over a QString, and references to individual characters
1842 within one, cannot be relied on to remain valid when any non-\c{const}
1843 method of the QString is called. Accessing such an iterator or reference
1844 after the call to a non-\c{const} method leads to undefined behavior. When
1845 stability for iterator-like functionality is required, you should use
1846 indexes instead of iterators, as they are not tied to QString's internal
1847 state and thus do not get invalidated.
1848
1849 \note Due to \l{implicit sharing}, the first non-\c{const} operator or
1850 function used on a given QString may cause it to internally perform a deep
1851 copy of its data. This invalidates all iterators over the string and
1852 references to individual characters within it. Do not call non-const
1853 functions while keeping iterators. Accessing an iterator or reference
1854 after it has been invalidated leads to undefined behavior. See the
1855 \l{Implicit sharing iterator problem} section for more information.
1856
1857 A frequent requirement is to remove or simplify the spacing between
1858 visible characters in a string. The characters that make up that spacing
1859 are those for which \l {QChar::}{isSpace()} returns \c true, such as
1860 the simple space \c{' '}, the horizontal tab \c{'\\t'} and the newline \c{'\\n'}.
1861 To obtain a copy of a string leaving out any spacing from its start and end,
1862 use \l trimmed(). To also replace each sequence of spacing characters within
1863 the string with a simple space, \c{' '}, use \l simplified().
1864
1865 If you want to find all occurrences of a particular character or
1866 substring in a QString, use the indexOf() or lastIndexOf()
1867 functions.The former searches forward, the latter searches backward.
1868 Either can be told an index position from which to start their search.
1869 Each returns the index position of the character or substring if they
1870 find it; otherwise, they return -1. For example, here is a typical loop
1871 that finds all occurrences of a particular substring:
1872
1873 \snippet qstring/main.cpp 6
1874
1875 QString provides many functions for converting numbers into
1876 strings and strings into numbers. See the arg() functions, the
1877 setNum() functions, the number() static functions, and the
1878 toInt(), toDouble(), and similar functions.
1879
1880 To get an uppercase or lowercase version of a string, use toUpper() or
1881 toLower().
1882
1883 Lists of strings are handled by the QStringList class. You can
1884 split a string into a list of strings using the split() function,
1885 and join a list of strings into a single string with an optional
1886 separator using QStringList::join(). You can obtain a filtered list
1887 from a string list by selecting the entries in it that contain a
1888 particular substring or match a particular QRegularExpression.
1889 See QStringList::filter() for details.
1890
1891 \section1 Querying string data
1892
1893 To see if a QString starts or ends with a particular substring, use
1894 startsWith() or endsWith(). To check whether a QString contains a
1895 specific character or substring, use the contains() function. To
1896 find out how many times a particular character or substring occurs
1897 in a string, use count().
1898
1899 To obtain a pointer to the actual character data, call data() or
1900 constData(). These functions return a pointer to the beginning of
1901 the QChar data. The pointer is guaranteed to remain valid until a
1902 non-\c{const} function is called on the QString.
1903
1904 \section2 Comparing strings
1905
1906 QStrings can be compared using overloaded operators such as \l
1907 operator<(), \l operator<=(), \l operator==(), \l operator>=(),
1908 and so on. The comparison is based exclusively on the lexicographical
1909 order of the two strings, seen as sequences of UTF-16 code units.
1910 It is very fast but is not what a human would expect; the
1911 QString::localeAwareCompare() function is usually a better choice for
1912 sorting user-interface strings, when such a comparison is available.
1913
1914 When Qt is linked with the ICU library (which it usually is), its
1915 locale-aware sorting is used. Otherwise, platform-specific solutions
1916 are used:
1917 \list
1918 \li On Windows, localeAwareCompare() uses the current user locale,
1919 as set in the \uicontrol{regional} and \uicontrol{language}
1920 options portion of \uicontrol{Control Panel}.
1921 \li On \macos and iOS, \l localeAwareCompare() compares according
1922 to the \uicontrol{Order for sorted lists} setting in the
1923 \uicontrol{International preferences} panel.
1924 \li On other Unix-like systems, the comparison falls back to the
1925 system library's \c strcoll().
1926 \endlist
1927
1928 \section1 Converting between encoded string data and QString
1929
1930 QString provides the following functions that return a
1931 \c{const char *} version of the string as QByteArray: toUtf8(),
1932 toLatin1(), and toLocal8Bit().
1933
1934 \list
1935 \li toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
1936 \li toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
1937 superset of US-ASCII (ANSI X3.4-1986) that supports the entire
1938 Unicode character set through multibyte sequences.
1939 \li toLocal8Bit() returns an 8-bit string using the system's local
1940 encoding. This is the same as toUtf8() on Unix systems.
1941 \endlist
1942
1943 To convert from one of these encodings, QString provides
1944 fromLatin1(), fromUtf8(), and fromLocal8Bit(). Other
1945 encodings are supported through the QStringEncoder and QStringDecoder
1946 classes.
1947
1948 As mentioned above, QString provides a lot of functions and
1949 operators that make it easy to interoperate with \c{const char *}
1950 strings. But this functionality is a double-edged sword: It makes
1951 QString more convenient to use if all strings are US-ASCII or
1952 Latin-1, but there is always the risk that an implicit conversion
1953 from or to \c{const char *} is done using the wrong 8-bit
1954 encoding. To minimize these risks, you can turn off these implicit
1955 conversions by defining some of the following preprocessor symbols:
1956
1957 \list
1958 \li \l QT_NO_CAST_FROM_ASCII disables automatic conversions from
1959 C string literals and pointers to Unicode.
1960 \li \l QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions
1961 from C characters and character arrays but disables automatic
1962 conversions from character pointers to Unicode.
1963 \li \l QT_NO_CAST_TO_ASCII disables automatic conversion from QString
1964 to C strings.
1965 \endlist
1966
1967 You then need to explicitly call fromUtf8(), fromLatin1(),
1968 or fromLocal8Bit() to construct a QString from an
1969 8-bit string, or use the lightweight QLatin1StringView class. For
1970 example:
1971
1972 \snippet code/src_corelib_text_qstring.cpp 1
1973
1974 Similarly, you must call toLatin1(), toUtf8(), or
1975 toLocal8Bit() explicitly to convert the QString to an 8-bit
1976 string.
1977
1978 \table 100 %
1979 \header
1980 \li Note for C Programmers
1981
1982 \row
1983 \li
1984 Due to C++'s type system and the fact that QString is
1985 \l{implicitly shared}, QStrings may be treated like \c{int}s or
1986 other basic types. For example:
1987
1988 \snippet qstring/main.cpp 7
1989
1990 The \c result variable is a normal variable allocated on the
1991 stack. When \c return is called, and because we're returning by
1992 value, the copy constructor is called and a copy of the string is
1993 returned. No actual copying takes place thanks to the implicit
1994 sharing.
1995
1996 \endtable
1997
1998 \section1 Distinction between null and empty strings
1999
2000 For historical reasons, QString distinguishes between null
2001 and empty strings. A \e null string is a string that is
2002 initialized using QString's default constructor or by passing
2003 \nullptr to the constructor. An \e empty string is any
2004 string with size 0. A null string is always empty, but an empty
2005 string isn't necessarily null:
2006
2007 \snippet qstring/main.cpp 8
2008
2009 All functions except isNull() treat null strings the same as empty
2010 strings. For example, toUtf8().constData() returns a valid pointer
2011 (not \nullptr) to a '\\0' character for a null string. We
2012 recommend that you always use the isEmpty() function and avoid isNull().
2013
2014 \section1 Number formats
2015
2016 When a QString::arg() \c{'%'} format specifier includes the \c{'L'} locale
2017 qualifier, and the base is ten (its default), the default locale is
2018 used. This can be set using \l{QLocale::setDefault()}. For more refined
2019 control of localized string representations of numbers, see
2020 QLocale::toString(). All other number formatting done by QString follows the
2021 C locale's representation of numbers.
2022
2023 When QString::arg() applies left-padding to numbers, the fill character
2024 \c{'0'} is treated specially. If the number is negative, its minus sign
2025 appears before the zero-padding. If the field is localized, the
2026 locale-appropriate zero character is used in place of \c{'0'}. For
2027 floating-point numbers, this special treatment only applies if the number is
2028 finite.
2029
2030 \section2 Floating-point formats
2031
2032 In member functions (for example, arg() and number()) that format floating-point
2033 numbers (\c float or \c double) as strings, the representation used can be
2034 controlled by a choice of \e format and \e precision, whose meanings are as
2035 for \l {QLocale::toString(double, char, int)}.
2036
2037 If the selected \e format includes an exponent, localized forms follow the
2038 locale's convention on digits in the exponent. For non-localized formatting,
2039 the exponent shows its sign and includes at least two digits, left-padding
2040 with zero if needed.
2041
2042 \section1 More efficient string construction
2043
2044 Many strings are known at compile time. The QString constructor from
2045 C++ string literals will copy the contents of the string,
2046 treating the contents as UTF-8. This requires memory allocation and
2047 re-encoding string data, operations that will happen at runtime.
2048 If the string data is known at compile time, you can use the QStringLiteral
2049 macro or similarly \c{operator""_s} to create QString's payload at compile
2050 time instead.
2051
2052 Using the QString \c{'+'} operator, it is easy to construct a
2053 complex string from multiple substrings. You will often write code
2054 like this:
2055
2056 \snippet qstring/stringbuilder.cpp 0
2057
2058 There is nothing wrong with either of these string constructions,
2059 but there are a few hidden inefficiencies:
2060
2061 First, repeated use of the \c{'+'} operator may lead to
2062 multiple memory allocations. When concatenating \e{n} substrings,
2063 where \e{n > 2}, there can be as many as \e{n - 1} calls to the
2064 memory allocator.
2065
2066 These allocations can be optimized by an internal class
2067 \c{QStringBuilder}. This class is marked
2068 internal and does not appear in the documentation, because you
2069 aren't meant to instantiate it in your code. Its use will be
2070 automatic, as described below.
2071
2072 \c{QStringBuilder} uses expression templates and reimplements the
2073 \c{'%'} operator so that when you use \c{'%'} for string
2074 concatenation instead of \c{'+'}, multiple substring
2075 concatenations will be postponed until the final result is about
2076 to be assigned to a QString. At this point, the amount of memory
2077 required for the final result is known. The memory allocator is
2078 then called \e{once} to get the required space, and the substrings
2079 are copied into it one by one.
2080
2081 Additional efficiency is gained by inlining and reducing reference
2082 counting (the QString created from a \c{QStringBuilder}
2083 has a ref count of 1, whereas QString::append() needs an extra
2084 test).
2085
2086 There are two ways you can access this improved method of string
2087 construction. The straightforward way is to include
2088 \c{QStringBuilder} wherever you want to use it and use the
2089 \c{'%'} operator instead of \c{'+'} when concatenating strings:
2090
2091 \snippet qstring/stringbuilder.cpp 5
2092
2093 A more global approach, which is more convenient but not entirely
2094 source-compatible, is to define \c QT_USE_QSTRINGBUILDER (by adding
2095 it to the compiler flags) at build time. This will make concatenating
2096 strings with \c{'+'} work the same way as \c{QStringBuilder's} \c{'%'}.
2097
2098 \note Using automatic type deduction (for example, by using the \c
2099 auto keyword) with the result of string concatenation when QStringBuilder
2100 is enabled will show that the concatenation is indeed an object of a
2101 QStringBuilder specialization:
2102
2103 \snippet qstring/stringbuilder.cpp 6
2104
2105 This does not cause any harm, as QStringBuilder will implicitly convert to
2106 QString when required. If this is undesirable, then one should specify
2107 the necessary types instead of having the compiler deduce them:
2108
2109 \snippet qstring/stringbuilder.cpp 7
2110
2111 \section1 Maximum size and out-of-memory conditions
2112
2113 The maximum size of QString depends on the architecture. Most 64-bit
2114 systems can allocate more than 2 GB of memory, with a typical limit
2115 of 2^63 bytes. The actual value also depends on the overhead required for
2116 managing the data block. As a result, you can expect a maximum size
2117 of 2 GB minus overhead on 32-bit platforms and 2^63 bytes minus overhead
2118 on 64-bit platforms. The number of elements that can be stored in a
2119 QString is this maximum size divided by the size of QChar.
2120
2121 When memory allocation fails, QString throws a \c std::bad_alloc
2122 exception if the application was compiled with exception support.
2123 Out-of-memory conditions in Qt containers are the only cases where Qt
2124 will throw exceptions. If exceptions are disabled, then running out of
2125 memory is undefined behavior.
2126
2127 \note Target operating systems may impose limits on how much memory an
2128 application can allocate, in total, or on the size of individual allocations.
2129 This may further restrict the size of string a QString can hold.
2130 Mitigating or controlling the behavior these limits cause is beyond the
2131 scope of the Qt API.
2132
2133 \sa fromRawData(), QChar, QStringView, QLatin1StringView, QByteArray
2134*/
2135
2136/*! \typedef QString::ConstIterator
2137
2138 Qt-style synonym for QString::const_iterator.
2139*/
2140
2141/*! \typedef QString::Iterator
2142
2143 Qt-style synonym for QString::iterator.
2144*/
2145
2146/*! \typedef QString::const_iterator
2147
2148 \sa QString::iterator
2149*/
2150
2151/*! \typedef QString::iterator
2152
2153 \sa QString::const_iterator
2154*/
2155
2156/*! \typedef QString::const_reverse_iterator
2157 \since 5.6
2158
2159 \sa QString::reverse_iterator, QString::const_iterator
2160*/
2161
2162/*! \typedef QString::reverse_iterator
2163 \since 5.6
2164
2165 \sa QString::const_reverse_iterator, QString::iterator
2166*/
2167
2168/*!
2169 \typedef QString::size_type
2170*/
2171
2172/*!
2173 \typedef QString::difference_type
2174*/
2175
2176/*!
2177 \typedef QString::const_reference
2178*/
2179/*!
2180 \typedef QString::reference
2181*/
2182
2183/*!
2184 \typedef QString::const_pointer
2185
2186 The QString::const_pointer typedef provides an STL-style
2187 const pointer to a QString element (QChar).
2188*/
2189/*!
2190 \typedef QString::pointer
2191
2192 The QString::pointer typedef provides an STL-style
2193 pointer to a QString element (QChar).
2194*/
2195
2196/*!
2197 \typedef QString::value_type
2198*/
2199
2200/*! \fn QString::iterator QString::begin()
2201
2202 Returns an \l{STL-style iterators}{STL-style iterator} pointing to the
2203 first character in the string.
2204
2205//! [iterator-invalidation-func-desc]
2206 \warning The returned iterator is invalidated on detachment or when the
2207 QString is modified.
2208//! [iterator-invalidation-func-desc]
2209
2210 \sa constBegin(), end()
2211*/
2212
2213/*! \fn QString::const_iterator QString::begin() const
2214
2215 \overload begin()
2216*/
2217
2218/*! \fn QString::const_iterator QString::cbegin() const
2219 \since 5.0
2220
2221 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2222 first character in the string.
2223
2224 \include qstring.cpp iterator-invalidation-func-desc
2225
2226 \sa begin(), cend()
2227*/
2228
2229/*! \fn QString::const_iterator QString::constBegin() const
2230
2231 Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
2232 first character in the string.
2233
2234 \include qstring.cpp iterator-invalidation-func-desc
2235
2236 \sa begin(), constEnd()
2237*/
2238
2239/*! \fn QString::iterator QString::end()
2240
2241 Returns an \l{STL-style iterators}{STL-style iterator} pointing just after
2242 the last character in the string.
2243
2244 \include qstring.cpp iterator-invalidation-func-desc
2245
2246 \sa begin(), constEnd()
2247*/
2248
2249/*! \fn QString::const_iterator QString::end() const
2250
2251 \overload end()
2252*/
2253
2254/*! \fn QString::const_iterator QString::cend() const
2255 \since 5.0
2256
2257 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2258 after the last character in the string.
2259
2260 \include qstring.cpp iterator-invalidation-func-desc
2261
2262 \sa cbegin(), end()
2263*/
2264
2265/*! \fn QString::const_iterator QString::constEnd() const
2266
2267 Returns a const \l{STL-style iterators}{STL-style iterator} pointing just
2268 after the last character in the string.
2269
2270 \include qstring.cpp iterator-invalidation-func-desc
2271
2272 \sa constBegin(), end()
2273*/
2274
2275/*! \fn QString::reverse_iterator QString::rbegin()
2276 \since 5.6
2277
2278 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to
2279 the first character in the string, in reverse order.
2280
2281 \include qstring.cpp iterator-invalidation-func-desc
2282
2283 \sa begin(), crbegin(), rend()
2284*/
2285
2286/*! \fn QString::const_reverse_iterator QString::rbegin() const
2287 \since 5.6
2288 \overload
2289*/
2290
2291/*! \fn QString::const_reverse_iterator QString::crbegin() const
2292 \since 5.6
2293
2294 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2295 pointing to the first character in the string, in reverse order.
2296
2297 \include qstring.cpp iterator-invalidation-func-desc
2298
2299 \sa begin(), rbegin(), rend()
2300*/
2301
2302/*! \fn QString::reverse_iterator QString::rend()
2303 \since 5.6
2304
2305 Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing just
2306 after the last character in the string, in reverse order.
2307
2308 \include qstring.cpp iterator-invalidation-func-desc
2309
2310 \sa end(), crend(), rbegin()
2311*/
2312
2313/*! \fn QString::const_reverse_iterator QString::rend() const
2314 \since 5.6
2315 \overload
2316*/
2317
2318/*! \fn QString::const_reverse_iterator QString::crend() const
2319 \since 5.6
2320
2321 Returns a const \l{STL-style iterators}{STL-style} reverse iterator
2322 pointing just after the last character in the string, in reverse order.
2323
2324 \include qstring.cpp iterator-invalidation-func-desc
2325
2326 \sa end(), rend(), rbegin()
2327*/
2328
2329/*!
2330 \fn QString::QString()
2331
2332 Constructs a null string. Null strings are also considered empty.
2333
2334 \sa isEmpty(), isNull(), {Distinction Between Null and Empty Strings}
2335*/
2336
2337/*!
2338 \fn QString::QString(QString &&other)
2339
2340 Move-constructs a QString instance, making it point at the same
2341 object that \a other was pointing to.
2342
2343 \since 5.2
2344*/
2345
2346/*! \fn QString::QString(const char *str)
2347
2348 Constructs a string initialized with the 8-bit string \a str. The
2349 given const char pointer is converted to Unicode using the
2350 fromUtf8() function.
2351
2352 You can disable this constructor by defining
2353 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2354 can be useful if you want to ensure that all user-visible strings
2355 go through QObject::tr(), for example.
2356
2357 \note Defining \l QT_RESTRICTED_CAST_FROM_ASCII also disables
2358 this constructor, but enables a \c{QString(const char (&ch)[N])}
2359 constructor instead. Using non-literal input, or input with
2360 embedded NUL characters, or non-7-bit characters is undefined
2361 in this case.
2362
2363 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2364*/
2365
2366/*! \fn QString::QString(const char8_t *str)
2367
2368 Constructs a string initialized with the UTF-8 string \a str. The
2369 given const char8_t pointer is converted to Unicode using the
2370 fromUtf8() function.
2371
2372 \since 6.1
2373 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2374*/
2375
2376/*
2377//! [from-std-string]
2378Returns a copy of the \a str string. The given string is assumed to be
2379encoded in \1, and is converted to QString using the \2 function.
2380//! [from-std-string]
2381*/
2382
2383/*! \fn QString QString::fromStdString(const std::string &str)
2384
2385 \include qstring.cpp {from-std-string} {UTF-8} {fromUtf8()}
2386
2387 \sa fromLatin1(), fromLocal8Bit(), fromUtf8(), QByteArray::fromStdString()
2388*/
2389
2390/*! \fn QString QString::fromStdWString(const std::wstring &str)
2391
2392 Returns a copy of the \a str string. The given string is assumed
2393 to be encoded in utf16 if the size of wchar_t is 2 bytes (e.g. on
2394 windows) and ucs4 if the size of wchar_t is 4 bytes (most Unix
2395 systems).
2396
2397 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2398 fromStdU16String(), fromStdU32String()
2399*/
2400
2401/*! \fn QString QString::fromWCharArray(const wchar_t *string, qsizetype size)
2402 \since 4.2
2403
2404 Reads the first \a size code units of the \c wchar_t array to whose start
2405 \a string points, converting them to Unicode and returning the result as
2406 a QString. The encoding used by \c wchar_t is assumed to be UTF-32 if the
2407 type's size is four bytes or UTF-16 if its size is two bytes.
2408
2409 If \a size is -1 (default), the \a string must be '\\0'-terminated.
2410
2411 \sa fromUtf16(), fromLatin1(), fromLocal8Bit(), fromUtf8(), fromUcs4(),
2412 fromStdWString()
2413*/
2414
2415/*! \fn std::wstring QString::toStdWString() const
2416
2417 Returns a std::wstring object with the data contained in this
2418 QString. The std::wstring is encoded in UTF-16 on platforms where
2419 wchar_t is 2 bytes wide (for example, Windows) and in UTF-32 on platforms
2420 where wchar_t is 4 bytes wide (most Unix systems).
2421
2422 This method is mostly useful to pass a QString to a function
2423 that accepts a std::wstring object.
2424
2425 \sa utf16(), toLatin1(), toUtf8(), toLocal8Bit(), toStdU16String(),
2426 toStdU32String()
2427*/
2428
2429qsizetype QString::toUcs4_helper(const char16_t *uc, qsizetype length, char32_t *out)
2430{
2431 qsizetype count = 0;
2432
2433 QStringIterator i(QStringView(uc, length));
2434 while (i.hasNext())
2435 out[count++] = i.next();
2436
2437 return count;
2438}
2439
2440/*! \fn qsizetype QString::toWCharArray(wchar_t *array) const
2441 \since 4.2
2442
2443 Fills the \a array with the data contained in this QString object.
2444 The array is encoded in UTF-16 on platforms where
2445 wchar_t is 2 bytes wide (e.g. windows) and in UTF-32 on platforms
2446 where wchar_t is 4 bytes wide (most Unix systems).
2447
2448 \a array has to be allocated by the caller and contain enough space to
2449 hold the complete string (allocating the array with the same length as the
2450 string is always sufficient).
2451
2452 This function returns the actual length of the string in \a array.
2453
2454 \note This function does not append a null character to the array.
2455
2456 \sa utf16(), toUcs4(), toLatin1(), toUtf8(), toLocal8Bit(), toStdWString(),
2457 QStringView::toWCharArray()
2458*/
2459
2460/*! \fn QString::QString(const QString &other)
2461
2462 Constructs a copy of \a other.
2463
2464 This operation takes \l{constant time}, because QString is
2465 \l{implicitly shared}. This makes returning a QString from a
2466 function very fast. If a shared instance is modified, it will be
2467 copied (copy-on-write), and that takes \l{linear time}.
2468
2469 \sa operator=()
2470*/
2471
2472/*!
2473 Constructs a string initialized with the first \a size characters
2474 of the QChar array \a unicode.
2475
2476 If \a unicode is 0, a null string is constructed.
2477
2478 If \a size is negative, \a unicode is assumed to point to a '\\0'-terminated
2479 array and its length is determined dynamically. The terminating
2480 null character is not considered part of the string.
2481
2482 QString makes a deep copy of the string data. The unicode data is copied as
2483 is and the Byte Order Mark is preserved if present.
2484
2485 \sa fromRawData()
2486*/
2487QString::QString(const QChar *unicode, qsizetype size)
2488{
2489 if (!unicode) {
2490 d.clear();
2491 } else {
2492 if (size < 0)
2493 size = QtPrivate::qustrlen(reinterpret_cast<const char16_t *>(unicode));
2494 if (!size) {
2495 d = DataPointer::fromRawData(&_empty, 0);
2496 } else {
2497 d = DataPointer(size, size);
2498 Q_CHECK_PTR(d.data());
2499 memcpy(d.data(), unicode, size * sizeof(QChar));
2500 d.data()[size] = '\0';
2501 }
2502 }
2503}
2504
2505/*!
2506 Constructs a string of the given \a size with every character set
2507 to \a ch.
2508
2509 \sa fill()
2510*/
2511QString::QString(qsizetype size, QChar ch)
2512{
2513 if (size <= 0) {
2514 d = DataPointer::fromRawData(&_empty, 0);
2515 } else {
2516 d = DataPointer(size, size);
2517 Q_CHECK_PTR(d.data());
2518 d.data()[size] = '\0';
2519 char16_t *b = d.data();
2520 char16_t *e = d.data() + size;
2521 const char16_t value = ch.unicode();
2522 std::fill(b, e, value);
2523 }
2524}
2525
2526/*! \fn QString::QString(qsizetype size, Qt::Initialization)
2527 \internal
2528
2529 Constructs a string of the given \a size without initializing the
2530 characters. This is only used in \c QStringBuilder::toString().
2531*/
2532QString::QString(qsizetype size, Qt::Initialization)
2533{
2534 if (size <= 0) {
2535 d = DataPointer::fromRawData(&_empty, 0);
2536 } else {
2537 d = DataPointer(size, size);
2538 Q_CHECK_PTR(d.data());
2539 d.data()[size] = '\0';
2540 }
2541}
2542
2543/*! \fn QString::QString(QLatin1StringView str)
2544
2545 Constructs a copy of the Latin-1 string viewed by \a str.
2546
2547 \sa fromLatin1()
2548*/
2549
2550/*!
2551 Constructs a string of size 1 containing the character \a ch.
2552*/
2553QString::QString(QChar ch)
2554{
2555 d = DataPointer(1, 1);
2556 Q_CHECK_PTR(d.data());
2557 d.data()[0] = ch.unicode();
2558 d.data()[1] = '\0';
2559}
2560
2561/*! \fn QString::QString(const QByteArray &ba)
2562
2563 Constructs a string initialized with the byte array \a ba. The
2564 given byte array is converted to Unicode using fromUtf8().
2565
2566 You can disable this constructor by defining
2567 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2568 can be useful if you want to ensure that all user-visible strings
2569 go through QObject::tr(), for example.
2570
2571 \note Any null ('\\0') bytes in the byte array will be included in this
2572 string, converted to Unicode null characters (U+0000). This behavior is
2573 different from Qt 5.x.
2574
2575 \sa fromLatin1(), fromLocal8Bit(), fromUtf8()
2576*/
2577
2578/*! \fn QString::QString(const Null &)
2579 \internal
2580*/
2581
2582/*! \fn QString::QString(QStringPrivate)
2583 \internal
2584*/
2585
2586/*! \fn QString &QString::operator=(const QString::Null &)
2587 \internal
2588*/
2589
2590/*!
2591 \fn QString::~QString()
2592
2593 Destroys the string.
2594*/
2595
2596
2597/*! \fn void QString::swap(QString &other)
2598 \since 4.8
2599 \memberswap{string}
2600*/
2601
2602/*! \fn void QString::detach()
2603
2604 \internal
2605*/
2606
2607/*! \fn bool QString::isDetached() const
2608
2609 \internal
2610*/
2611
2612/*! \fn bool QString::isSharedWith(const QString &other) const
2613
2614 \internal
2615*/
2616
2617/*! \fn QString::operator std::u16string_view() const
2618 \since 6.7
2619
2620 Converts this QString object to a \c{std::u16string_view} object.
2621*/
2622
2623static bool needsReallocate(const QString &str, qsizetype newSize)
2624{
2625 const auto capacityAtEnd = str.capacity() - str.data_ptr().freeSpaceAtBegin();
2626 return newSize > capacityAtEnd;
2627}
2628
2629/*!
2630 Sets the size of the string to \a size characters.
2631
2632 If \a size is greater than the current size, the string is
2633 extended to make it \a size characters long with the extra
2634 characters added to the end. The new characters are uninitialized.
2635
2636 If \a size is less than the current size, characters beyond position
2637 \a size are excluded from the string.
2638
2639 \note While resize() will grow the capacity if needed, it never shrinks
2640 capacity. To shed excess capacity, use squeeze().
2641
2642 Example:
2643
2644 \snippet qstring/main.cpp 45
2645
2646 If you want to append a certain number of identical characters to
2647 the string, use the \l {QString::}{resize(qsizetype, QChar)} overload.
2648
2649 If you want to expand the string so that it reaches a certain
2650 width and fill the new positions with a particular character, use
2651 the leftJustified() function:
2652
2653 If \a size is negative, it is equivalent to passing zero.
2654
2655 \snippet qstring/main.cpp 47
2656
2657 \sa truncate(), reserve(), squeeze()
2658*/
2659
2660void QString::resize(qsizetype size)
2661{
2662 if (size < 0)
2663 size = 0;
2664
2665 if (d->needsDetach() || needsReallocate(*this, size))
2666 reallocData(size, QArrayData::Grow);
2667 d.size = size;
2668 if (d->allocatedCapacity())
2669 d.data()[size] = u'\0';
2670}
2671
2672/*!
2673 \overload
2674 \since 5.7
2675
2676 Unlike \l {QString::}{resize(qsizetype)}, this overload
2677 initializes the new characters to \a fillChar:
2678
2679 \snippet qstring/main.cpp 46
2680*/
2681
2682void QString::resize(qsizetype newSize, QChar fillChar)
2683{
2684 const qsizetype oldSize = size();
2685 resize(newSize);
2686 const qsizetype difference = size() - oldSize;
2687 if (difference > 0)
2688 std::fill_n(d.data() + oldSize, difference, fillChar.unicode());
2689}
2690
2691
2692/*!
2693 \since 6.8
2694
2695 Sets the size of the string to \a size characters. If the size of
2696 the string grows, the new characters are uninitialized.
2697
2698 The behavior is identical to \c{resize(size)}.
2699
2700 \sa resize()
2701*/
2702
2703void QString::resizeForOverwrite(qsizetype size)
2704{
2705 resize(size);
2706}
2707
2708
2709/*! \fn qsizetype QString::capacity() const
2710
2711 Returns the maximum number of characters that can be stored in
2712 the string without forcing a reallocation.
2713
2714 The sole purpose of this function is to provide a means of fine
2715 tuning QString's memory usage. In general, you will rarely ever
2716 need to call this function. If you want to know how many
2717 characters are in the string, call size().
2718
2719 \note a statically allocated string will report a capacity of 0,
2720 even if it's not empty.
2721
2722 \note The free space position in the allocated memory block is undefined. In
2723 other words, one should not assume that the free memory is always located
2724 after the initialized elements.
2725
2726 \sa reserve(), squeeze()
2727*/
2728
2729/*!
2730 \fn void QString::reserve(qsizetype size)
2731
2732 Ensures the string has space for at least \a size characters.
2733
2734 If you know in advance how large a string will be, you can call this
2735 function to save repeated reallocation while building it.
2736 This can improve performance when building a string incrementally.
2737 A long sequence of operations that add to a string may trigger several
2738 reallocations, the last of which may leave you with significantly more
2739 space than you need. This is less efficient than doing a single
2740 allocation of the right size at the start.
2741
2742 If in doubt about how much space shall be needed, it is usually better to
2743 use an upper bound as \a size, or a high estimate of the most likely size,
2744 if a strict upper bound would be much bigger than this. If \a size is an
2745 underestimate, the string will grow as needed once the reserved size is
2746 exceeded, which may lead to a larger allocation than your best
2747 overestimate would have and will slow the operation that triggers it.
2748
2749 \warning reserve() reserves memory but does not change the size of the
2750 string. Accessing data beyond the end of the string is undefined behavior.
2751 If you need to access memory beyond the current end of the string,
2752 use resize().
2753
2754 This function is useful for code that needs to build up a long
2755 string and wants to avoid repeated reallocation. In this example,
2756 we want to add to the string until some condition is \c true, and
2757 we're fairly sure that size is large enough to make a call to
2758 reserve() worthwhile:
2759
2760 \snippet qstring/main.cpp 44
2761
2762 \sa squeeze(), capacity(), resize()
2763*/
2764
2765/*!
2766 \fn void QString::squeeze()
2767
2768 Releases any memory not required to store the character data.
2769
2770 The sole purpose of this function is to provide a means of fine
2771 tuning QString's memory usage. In general, you will rarely ever
2772 need to call this function.
2773
2774 \sa reserve(), capacity()
2775*/
2776
2777void QString::reallocData(qsizetype alloc, QArrayData::AllocationOption option)
2778{
2779 if (!alloc) {
2780 d = DataPointer::fromRawData(&_empty, 0);
2781 return;
2782 }
2783
2784 // don't use reallocate path when reducing capacity and there's free space
2785 // at the beginning: might shift data pointer outside of allocated space
2786 const bool cannotUseReallocate = d.freeSpaceAtBegin() > 0;
2787
2788 if (d->needsDetach() || cannotUseReallocate) {
2789 DataPointer dd(alloc, qMin(alloc, d.size), option);
2790 Q_CHECK_PTR(dd.data());
2791 if (dd.size > 0)
2792 ::memcpy(dd.data(), d.data(), dd.size * sizeof(QChar));
2793 dd.data()[dd.size] = 0;
2794 d = dd;
2795 } else {
2796 d->reallocate(alloc, option);
2797 }
2798}
2799
2800void QString::reallocGrowData(qsizetype n)
2801{
2802 if (!n) // expected to always allocate
2803 n = 1;
2804
2805 if (d->needsDetach()) {
2806 DataPointer dd(DataPointer::allocateGrow(d, n, QArrayData::GrowsAtEnd));
2807 Q_CHECK_PTR(dd.data());
2808 dd->copyAppend(d.data(), d.data() + d.size);
2809 dd.data()[dd.size] = 0;
2810 d = dd;
2811 } else {
2812 d->reallocate(d.constAllocatedCapacity() + n, QArrayData::Grow);
2813 }
2814}
2815
2816/*! \fn void QString::clear()
2817
2818 Clears the contents of the string and makes it null.
2819
2820 \sa resize(), isNull()
2821*/
2822
2823/*! \fn QString &QString::operator=(const QString &other)
2824
2825 Assigns \a other to this string and returns a reference to this
2826 string.
2827*/
2828
2829QString &QString::operator=(const QString &other) noexcept
2830{
2831 d = other.d;
2832 return *this;
2833}
2834
2835/*!
2836 \fn QString &QString::operator=(QString &&other)
2837
2838 Move-assigns \a other to this QString instance.
2839
2840 \since 5.2
2841*/
2842
2843/*! \fn QString &QString::operator=(QLatin1StringView str)
2844
2845 \overload operator=()
2846
2847 Assigns the Latin-1 string viewed by \a str to this string.
2848*/
2849QString &QString::operator=(QLatin1StringView other)
2850{
2851 const qsizetype capacityAtEnd = capacity() - d.freeSpaceAtBegin();
2852 if (isDetached() && other.size() <= capacityAtEnd) { // assumes d->alloc == 0 -> !isDetached() (sharedNull)
2853 d.size = other.size();
2854 d.data()[other.size()] = 0;
2855 qt_from_latin1(d.data(), other.latin1(), other.size());
2856 } else {
2857 *this = fromLatin1(other.latin1(), other.size());
2858 }
2859 return *this;
2860}
2861
2862/*! \fn QString &QString::operator=(const QByteArray &ba)
2863
2864 \overload operator=()
2865
2866 Assigns \a ba to this string. The byte array is converted to Unicode
2867 using the fromUtf8() function.
2868
2869 You can disable this operator by defining
2870 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
2871 can be useful if you want to ensure that all user-visible strings
2872 go through QObject::tr(), for example.
2873*/
2874
2875/*! \fn QString &QString::operator=(const char *str)
2876
2877 \overload operator=()
2878
2879 Assigns \a str to this string. The const char pointer is converted
2880 to Unicode using the fromUtf8() function.
2881
2882 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
2883 or \l QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications.
2884 This can be useful if you want to ensure that all user-visible strings
2885 go through QObject::tr(), for example.
2886*/
2887
2888/*!
2889 \overload operator=()
2890
2891 Sets the string to contain the single character \a ch.
2892*/
2893QString &QString::operator=(QChar ch)
2894{
2895 return assign(1, ch);
2896}
2897
2898/*!
2899 \fn QString& QString::insert(qsizetype position, const QString &str)
2900
2901 Inserts the string \a str at the given index \a position and
2902 returns a reference to this string.
2903
2904 Example:
2905
2906 \snippet qstring/main.cpp 26
2907
2908//! [string-grow-at-insertion]
2909 This string grows to accommodate the insertion. If \a position is beyond
2910 the end of the string, space characters are appended to the string to reach
2911 this \a position, followed by \a str.
2912//! [string-grow-at-insertion]
2913
2914 \sa append(), prepend(), replace(), remove()
2915*/
2916
2917/*!
2918 \fn QString& QString::insert(qsizetype position, QStringView str)
2919 \since 6.0
2920 \overload insert()
2921
2922 Inserts the string view \a str at the given index \a position and
2923 returns a reference to this string.
2924
2925 \include qstring.cpp string-grow-at-insertion
2926*/
2927
2928
2929/*!
2930 \fn QString& QString::insert(qsizetype position, const char *str)
2931 \since 5.5
2932 \overload insert()
2933
2934 Inserts the C string \a str at the given index \a position and
2935 returns a reference to this string.
2936
2937 \include qstring.cpp string-grow-at-insertion
2938
2939 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2940 defined.
2941*/
2942
2943/*!
2944 \fn QString& QString::insert(qsizetype position, const QByteArray &str)
2945 \since 5.5
2946 \overload insert()
2947
2948 Interprets the contents of \a str as UTF-8, inserts the Unicode string
2949 it encodes at the given index \a position and returns a reference to
2950 this string.
2951
2952 \include qstring.cpp string-grow-at-insertion
2953
2954 This function is not available when \l QT_NO_CAST_FROM_ASCII is
2955 defined.
2956*/
2957
2958/*! \internal
2959 T is a view or a container on/of QChar, char16_t, or char
2960*/
2961template <typename T>
2962static void insert_helper(QString &str, qsizetype i, const T &toInsert)
2963{
2964 auto &str_d = str.data_ptr();
2965 qsizetype difference = 0;
2966 if (Q_UNLIKELY(i > str_d.size))
2967 difference = i - str_d.size;
2968 const qsizetype oldSize = str_d.size;
2969 const qsizetype insert_size = toInsert.size();
2970 const qsizetype newSize = str_d.size + difference + insert_size;
2971 const auto side = i == 0 ? QArrayData::GrowsAtBeginning : QArrayData::GrowsAtEnd;
2972
2973 if (str_d.needsDetach() || needsReallocate(str, newSize)) {
2974 const auto cbegin = str.cbegin();
2975 const auto cend = str.cend();
2976 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend;
2977 QString other;
2978 // Using detachAndGrow() so that prepend optimization works and QStringBuilder
2979 // unittests pass
2980 other.data_ptr().detachAndGrow(side, newSize, nullptr, nullptr);
2981 other.append(QStringView(cbegin, insert_start));
2982 other.resize(i, u' ');
2983 other.append(toInsert);
2984 other.append(QStringView(insert_start, cend));
2985 str.swap(other);
2986 return;
2987 }
2988
2989 str_d.detachAndGrow(side, difference + insert_size, nullptr, nullptr);
2990 Q_CHECK_PTR(str_d.data());
2991 str.resize(newSize);
2992
2993 auto begin = str_d.begin();
2994 auto old_end = std::next(begin, oldSize);
2995 std::fill_n(old_end, difference, u' ');
2996 auto insert_start = std::next(begin, i);
2997 if (difference == 0)
2998 std::move_backward(insert_start, old_end, str_d.end());
2999
3000 using Char = std::remove_cv_t<typename T::value_type>;
3001 if constexpr(std::is_same_v<Char, QChar>)
3002 std::copy_n(reinterpret_cast<const char16_t *>(toInsert.data()), insert_size, insert_start);
3003 else if constexpr (std::is_same_v<Char, char16_t>)
3004 std::copy_n(toInsert.data(), insert_size, insert_start);
3005 else if constexpr (std::is_same_v<Char, char>)
3006 qt_from_latin1(insert_start, toInsert.data(), insert_size);
3007}
3008
3009/*!
3010 \fn QString &QString::insert(qsizetype position, QLatin1StringView str)
3011 \overload insert()
3012
3013 Inserts the Latin-1 string viewed by \a str at the given index \a position.
3014
3015 \include qstring.cpp string-grow-at-insertion
3016*/
3017QString &QString::insert(qsizetype i, QLatin1StringView str)
3018{
3019 const char *s = str.latin1();
3020 if (i < 0 || !s || !(*s))
3021 return *this;
3022
3023 insert_helper(*this, i, str);
3024 return *this;
3025}
3026
3027/*!
3028 \fn QString &QString::insert(qsizetype position, QUtf8StringView str)
3029 \overload insert()
3030 \since 6.5
3031
3032 Inserts the UTF-8 string view \a str at the given index \a position.
3033
3034 \note Inserting variable-width UTF-8-encoded string data is conceptually slower
3035 than inserting fixed-width string data such as UTF-16 (QStringView) or Latin-1
3036 (QLatin1StringView) and should thus be used sparingly.
3037
3038 \include qstring.cpp string-grow-at-insertion
3039*/
3040QString &QString::insert(qsizetype i, QUtf8StringView s)
3041{
3042 auto insert_size = s.size();
3043 if (i < 0 || insert_size <= 0)
3044 return *this;
3045
3046 qsizetype difference = 0;
3047 if (Q_UNLIKELY(i > d.size))
3048 difference = i - d.size;
3049
3050 const qsizetype newSize = d.size + difference + insert_size;
3051
3052 if (d.needsDetach() || needsReallocate(*this, newSize)) {
3053 const auto cbegin = this->cbegin();
3054 const auto insert_start = difference == 0 ? std::next(cbegin, i) : cend();
3055 QString other;
3056 other.reserve(newSize);
3057 other.append(QStringView(cbegin, insert_start));
3058 if (difference > 0)
3059 other.resize(i, u' ');
3060 other.append(s);
3061 other.append(QStringView(insert_start, cend()));
3062 swap(other);
3063 return *this;
3064 }
3065
3066 if (i >= d.size) {
3067 d.detachAndGrow(QArrayData::GrowsAtEnd, difference + insert_size, nullptr, nullptr);
3068 Q_CHECK_PTR(d.data());
3069
3070 if (difference > 0)
3071 resize(i, u' ');
3072 append(s);
3073 } else {
3074 // Optimal insertion of Utf8 data is at the end, anywhere else could
3075 // potentially lead to moving characters twice if Utf8 data size
3076 // (variable-width) is less than the equivalent Utf16 data size
3077 QVarLengthArray<char16_t> buffer(insert_size); // ### optimize (QTBUG-108546)
3078 char16_t *b = QUtf8::convertToUnicode(buffer.data(), s);
3079 insert_helper(*this, i, QStringView(buffer.data(), b));
3080 }
3081
3082 return *this;
3083}
3084
3085/*!
3086 \fn QString& QString::insert(qsizetype position, const QChar *unicode, qsizetype size)
3087 \overload insert()
3088
3089 Inserts the first \a size characters of the QChar array \a unicode
3090 at the given index \a position in the string.
3091
3092 This string grows to accommodate the insertion. If \a position is beyond
3093 the end of the string, space characters are appended to the string to reach
3094 this \a position, followed by \a size characters of the QChar array
3095 \a unicode.
3096*/
3097QString& QString::insert(qsizetype i, const QChar *unicode, qsizetype size)
3098{
3099 if (i < 0 || size <= 0)
3100 return *this;
3101
3102 // In case when data points into "this"
3103 if (!d->needsDetach() && QtPrivate::q_points_into_range(unicode, *this)) {
3104 QVarLengthArray copy(unicode, unicode + size);
3105 insert(i, copy.data(), size);
3106 } else {
3107 insert_helper(*this, i, QStringView(unicode, size));
3108 }
3109
3110 return *this;
3111}
3112
3113/*!
3114 \fn QString& QString::insert(qsizetype position, QChar ch)
3115 \overload insert()
3116
3117 Inserts \a ch at the given index \a position in the string.
3118
3119 This string grows to accommodate the insertion. If \a position is beyond
3120 the end of the string, space characters are appended to the string to reach
3121 this \a position, followed by \a ch.
3122*/
3123
3124QString& QString::insert(qsizetype i, QChar ch)
3125{
3126 if (i < 0)
3127 i += d.size;
3128 return insert(i, &ch, 1);
3129}
3130
3131/*!
3132 Appends the string \a str onto the end of this string.
3133
3134 Example:
3135
3136 \snippet qstring/main.cpp 9
3137
3138 This is the same as using the insert() function:
3139
3140 \snippet qstring/main.cpp 10
3141
3142 The append() function is typically very fast (\l{constant time}),
3143 because QString preallocates extra space at the end of the string
3144 data so it can grow without reallocating the entire string each
3145 time.
3146
3147 \sa operator+=(), prepend(), insert()
3148*/
3149QString &QString::append(const QString &str)
3150{
3151 if (!str.isNull()) {
3152 if (isNull()) {
3153 if (Q_UNLIKELY(!str.d.isMutable()))
3154 assign(str); // fromRawData, so we do a deep copy
3155 else
3156 operator=(str);
3157 } else if (str.size()) {
3158 append(str.constData(), str.size());
3159 }
3160 }
3161 return *this;
3162}
3163
3164/*!
3165 \fn QString &QString::append(QStringView v)
3166 \overload append()
3167 \since 6.0
3168
3169 Appends the given string view \a v to this string and returns the result.
3170*/
3171
3172/*!
3173 \overload append()
3174 \since 5.0
3175
3176 Appends \a len characters from the QChar array \a str to this string.
3177*/
3178QString &QString::append(const QChar *str, qsizetype len)
3179{
3180 if (str && len > 0) {
3181 static_assert(sizeof(QChar) == sizeof(char16_t), "Unexpected difference in sizes");
3182 // the following should be safe as QChar uses char16_t as underlying data
3183 const char16_t *char16String = reinterpret_cast<const char16_t *>(str);
3184 d->growAppend(char16String, char16String + len);
3185 d.data()[d.size] = u'\0';
3186 }
3187 return *this;
3188}
3189
3190/*!
3191 \overload append()
3192
3193 Appends the Latin-1 string viewed by \a str to this string.
3194*/
3195QString &QString::append(QLatin1StringView str)
3196{
3197 append_helper(*this, str);
3198 return *this;
3199}
3200
3201/*!
3202 \overload append()
3203 \since 6.5
3204
3205 Appends the UTF-8 string view \a str to this string.
3206*/
3207QString &QString::append(QUtf8StringView str)
3208{
3209 append_helper(*this, str);
3210 return *this;
3211}
3212
3213/*! \fn QString &QString::append(const QByteArray &ba)
3214
3215 \overload append()
3216
3217 Appends the byte array \a ba to this string. The given byte array
3218 is converted to Unicode using the fromUtf8() function.
3219
3220 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3221 when you compile your applications. This can be useful if you want
3222 to ensure that all user-visible strings go through QObject::tr(),
3223 for example.
3224*/
3225
3226/*! \fn QString &QString::append(const char *str)
3227
3228 \overload append()
3229
3230 Appends the string \a str to this string. The given const char
3231 pointer is converted to Unicode using the fromUtf8() function.
3232
3233 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
3234 when you compile your applications. This can be useful if you want
3235 to ensure that all user-visible strings go through QObject::tr(),
3236 for example.
3237*/
3238
3239/*!
3240 \overload append()
3241
3242 Appends the character \a ch to this string.
3243*/
3244QString &QString::append(QChar ch)
3245{
3246 d.detachAndGrow(QArrayData::GrowsAtEnd, 1, nullptr, nullptr);
3247 d->copyAppend(1, ch.unicode());
3248 d.data()[d.size] = '\0';
3249 return *this;
3250}
3251
3252/*! \fn QString &QString::prepend(const QString &str)
3253
3254 Prepends the string \a str to the beginning of this string and
3255 returns a reference to this string.
3256
3257 This operation is typically very fast (\l{constant time}), because
3258 QString preallocates extra space at the beginning of the string data,
3259 so it can grow without reallocating the entire string each time.
3260
3261 Example:
3262
3263 \snippet qstring/main.cpp 36
3264
3265 \sa append(), insert()
3266*/
3267
3268/*! \fn QString &QString::prepend(QLatin1StringView str)
3269
3270 \overload prepend()
3271
3272 Prepends the Latin-1 string viewed by \a str to this string.
3273*/
3274
3275/*! \fn QString &QString::prepend(QUtf8StringView str)
3276 \since 6.5
3277 \overload prepend()
3278
3279 Prepends the UTF-8 string view \a str to this string.
3280*/
3281
3282/*! \fn QString &QString::prepend(const QChar *str, qsizetype len)
3283 \since 5.5
3284 \overload prepend()
3285
3286 Prepends \a len characters from the QChar array \a str to this string and
3287 returns a reference to this string.
3288*/
3289
3290/*! \fn QString &QString::prepend(QStringView str)
3291 \since 6.0
3292 \overload prepend()
3293
3294 Prepends the string view \a str to the beginning of this string and
3295 returns a reference to this string.
3296*/
3297
3298/*! \fn QString &QString::prepend(const QByteArray &ba)
3299
3300 \overload prepend()
3301
3302 Prepends the byte array \a ba to this string. The byte array is
3303 converted to Unicode using the fromUtf8() function.
3304
3305 You can disable this function by defining
3306 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3307 can be useful if you want to ensure that all user-visible strings
3308 go through QObject::tr(), for example.
3309*/
3310
3311/*! \fn QString &QString::prepend(const char *str)
3312
3313 \overload prepend()
3314
3315 Prepends the string \a str to this string. The const char pointer
3316 is converted to Unicode using the fromUtf8() function.
3317
3318 You can disable this function by defining
3319 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
3320 can be useful if you want to ensure that all user-visible strings
3321 go through QObject::tr(), for example.
3322*/
3323
3324/*! \fn QString &QString::prepend(QChar ch)
3325
3326 \overload prepend()
3327
3328 Prepends the character \a ch to this string.
3329*/
3330
3331/*!
3332 \fn QString &QString::assign(QAnyStringView v)
3333 \since 6.6
3334
3335 Replaces the contents of this string with a copy of \a v and returns a
3336 reference to this string.
3337
3338 The size of this string will be equal to the size of \a v, converted to
3339 UTF-16 as if by \c{v.toString()}. Unlike QAnyStringView::toString(), however,
3340 this function only allocates memory if the estimated size exceeds the capacity
3341 of this string or this string is shared.
3342
3343 \sa QAnyStringView::toString()
3344*/
3345
3346/*!
3347 \fn QString &QString::assign(qsizetype n, QChar c)
3348 \since 6.6
3349
3350 Replaces the contents of this string with \a n copies of \a c and
3351 returns a reference to this string.
3352
3353 The size of this string will be equal to \a n, which has to be non-negative.
3354
3355 This function will only allocate memory if \a n exceeds the capacity of this
3356 string or this string is shared.
3357
3358 \sa fill()
3359*/
3360
3361/*!
3362 \fn template <typename InputIterator, QString::if_compatible_iterator<InputIterator>> QString &QString::assign(InputIterator first, InputIterator last)
3363 \since 6.6
3364
3365 Replaces the contents of this string with a copy of the elements in the
3366 iterator range [\a first, \a last) and returns a reference to this string.
3367
3368 The size of this string will be equal to the decoded length of the elements
3369 in the range [\a first, \a last), which need not be the same as the length of
3370 the range itself, because this function transparently recodes the input
3371 character set to UTF-16.
3372
3373 This function will only allocate memory if the number of elements in the
3374 range, or, for non-UTF-16-encoded input, the maximum possible size of the
3375 resulting string, exceeds the capacity of this string, or if this string is
3376 shared.
3377
3378 \note The behavior is undefined if either argument is an iterator into *this or
3379 [\a first, \a last) is not a valid range.
3380
3381 \constraints
3382 \c InputIterator meets the requirements of a
3383 \l {https://en.cppreference.com/w/cpp/named_req/InputIterator} {LegacyInputIterator}
3384 and the \c{value_type} of \c InputIterator is one of the following character types:
3385 \list
3386 \li QChar
3387 \li QLatin1Char
3388 \li \c {char}
3389 \li \c {unsigned char}
3390 \li \c {signed char}
3391 \li \c {char8_t}
3392 \li \c char16_t
3393 \li (on platforms, such as Windows, where it is a 16-bit type) \c wchar_t
3394 \li \c char32_t
3395 \endlist
3396*/
3397
3398QString &QString::assign(QAnyStringView s)
3399{
3400 if (s.size() <= capacity() && isDetached()) {
3401 const auto offset = d.freeSpaceAtBegin();
3402 if (offset)
3403 d.setBegin(d.begin() - offset);
3404 resize(0);
3405 s.visit([this](auto input) {
3406 this->append(input);
3407 });
3408 } else {
3409 *this = s.toString();
3410 }
3411 return *this;
3412}
3413
3414#ifndef QT_BOOTSTRAPPED
3415QString &QString::assign_helper(const char32_t *data, qsizetype len)
3416{
3417 // worst case: each char32_t requires a surrogate pair, so
3418 const auto requiredCapacity = len * 2;
3419 if (requiredCapacity <= capacity() && isDetached()) {
3420 const auto offset = d.freeSpaceAtBegin();
3421 if (offset)
3422 d.setBegin(d.begin() - offset);
3423 auto begin = reinterpret_cast<QChar *>(d.begin());
3424 auto ba = QByteArrayView(reinterpret_cast<const std::byte*>(data), len * sizeof(char32_t));
3425 QStringConverter::State state;
3426 const auto end = QUtf32::convertToUnicode(begin, ba, &state, DetectEndianness);
3427 d.size = end - begin;
3428 d.data()[d.size] = u'\0';
3429 } else {
3430 *this = QString::fromUcs4(data, len);
3431 }
3432 return *this;
3433}
3434#endif
3435
3436/*!
3437 \fn QString &QString::remove(qsizetype position, qsizetype n)
3438
3439 Removes \a n characters from the string, starting at the given \a
3440 position index, and returns a reference to the string.
3441
3442 If the specified \a position index is within the string, but \a
3443 position + \a n is beyond the end of the string, the string is
3444 truncated at the specified \a position.
3445
3446 If \a n is <= 0 nothing is changed.
3447
3448 \snippet qstring/main.cpp 37
3449
3450//! [shrinking-erase]
3451 Element removal will preserve the string's capacity and not reduce the
3452 amount of allocated memory. To shed extra capacity and free as much memory
3453 as possible, call squeeze() after the last change to the string's size.
3454//! [shrinking-erase]
3455
3456 \sa insert(), replace()
3457*/
3458QString &QString::remove(qsizetype pos, qsizetype len)
3459{
3460 if (pos < 0) // count from end of string
3461 pos += size();
3462
3463 if (size_t(pos) >= size_t(size()) || len <= 0)
3464 return *this;
3465
3466 len = std::min(len, size() - pos);
3467
3468 if (!d->isShared()) {
3469 d->erase(d.begin() + pos, len);
3470 d.data()[d.size] = u'\0';
3471 } else {
3472 // TODO: either reserve "size()", which is bigger than needed, or
3473 // modify the shrinking-erase docs of this method (since the size
3474 // of "copy" won't have any extra capacity any more)
3475 const qsizetype sz = size() - len;
3476 QString copy{sz, Qt::Uninitialized};
3477 auto begin = d.begin();
3478 auto toRemove_start = d.begin() + pos;
3479 copy.d->copyRanges({{begin, toRemove_start},
3480 {toRemove_start + len, d.end()}});
3481 swap(copy);
3482 }
3483 return *this;
3484}
3485
3486template<typename T>
3487static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
3488{
3489 const auto needleSize = needle.size();
3490 if (!needleSize)
3491 return;
3492
3493 // avoid detach if nothing to do:
3494 qsizetype i = s.indexOf(needle, 0, cs);
3495 if (i < 0)
3496 return;
3497
3498 QString::DataPointer &dptr = s.data_ptr();
3499 auto begin = dptr.begin();
3500 auto end = dptr.end();
3501
3502 auto copyFunc = [&](auto &dst) {
3503 auto src = begin + i + needleSize;
3504 while (src < end) {
3505 i = s.indexOf(needle, std::distance(begin, src), cs);
3506 auto hit = i == -1 ? end : begin + i;
3507 dst = std::copy(src, hit, dst);
3508 src = hit + needleSize;
3509 }
3510 return dst;
3511 };
3512
3513 if (!dptr->needsDetach()) {
3514 auto dst = begin + i;
3515 dst = copyFunc(dst);
3516 s.truncate(std::distance(begin, dst));
3517 } else {
3518 QString copy{s.size(), Qt::Uninitialized};
3519 auto copy_begin = copy.begin();
3520 auto dst = std::copy(begin, begin + i, copy_begin); // Chunk before the first hit
3521 dst = copyFunc(dst);
3522 copy.resize(std::distance(copy_begin, dst));
3523 s.swap(copy);
3524 }
3525}
3526
3527/*!
3528 Removes every occurrence of the given \a str string in this
3529 string, and returns a reference to this string.
3530
3531 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3532
3533 This is the same as \c replace(str, "", cs).
3534
3535 \include qstring.cpp shrinking-erase
3536
3537 \sa replace()
3538*/
3539QString &QString::remove(const QString &str, Qt::CaseSensitivity cs)
3540{
3541 const auto s = str.d.data();
3542 if (QtPrivate::q_points_into_range(s, d))
3543 removeStringImpl(*this, QStringView{QVarLengthArray(s, s + str.size())}, cs);
3544 else
3545 removeStringImpl(*this, qToStringViewIgnoringNull(str), cs);
3546 return *this;
3547}
3548
3549/*!
3550 \since 5.11
3551 \overload
3552
3553 Removes every occurrence of the given Latin-1 string viewed by \a str
3554 from this string, and returns a reference to this string.
3555
3556 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3557
3558 This is the same as \c replace(str, "", cs).
3559
3560 \include qstring.cpp shrinking-erase
3561
3562 \sa replace()
3563*/
3564QString &QString::remove(QLatin1StringView str, Qt::CaseSensitivity cs)
3565{
3566 removeStringImpl(*this, str, cs);
3567 return *this;
3568}
3569
3570/*!
3571 \fn QString &QString::removeAt(qsizetype pos)
3572
3573 \since 6.5
3574
3575 Removes the character at index \a pos. If \a pos is out of bounds
3576 (i.e. \a pos >= size()), this function does nothing.
3577
3578 \sa remove()
3579*/
3580
3581/*!
3582 \fn QString &QString::removeFirst()
3583
3584 \since 6.5
3585
3586 Removes the first character in this string. If the string is empty,
3587 this function does nothing.
3588
3589 \sa remove()
3590*/
3591
3592/*!
3593 \fn QString &QString::removeLast()
3594
3595 \since 6.5
3596
3597 Removes the last character in this string. If the string is empty,
3598 this function does nothing.
3599
3600 \sa remove()
3601*/
3602
3603/*!
3604 Removes every occurrence of the character \a ch in this string, and
3605 returns a reference to this string.
3606
3607 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3608
3609 Example:
3610
3611 \snippet qstring/main.cpp 38
3612
3613 This is the same as \c replace(ch, "", cs).
3614
3615 \include qstring.cpp shrinking-erase
3616
3617 \sa replace()
3618*/
3619QString &QString::remove(QChar ch, Qt::CaseSensitivity cs)
3620{
3621 const qsizetype idx = indexOf(ch, 0, cs);
3622 if (idx == -1)
3623 return *this;
3624
3625 const bool isCase = cs == Qt::CaseSensitive;
3626 ch = isCase ? ch : ch.toCaseFolded();
3627 auto match = [ch, isCase](QChar x) {
3628 return ch == (isCase ? x : x.toCaseFolded());
3629 };
3630
3631
3632 auto begin = d.begin();
3633 auto first_match = begin + idx;
3634 auto end = d.end();
3635 if (!d->isShared()) {
3636 auto it = std::remove_if(first_match, end, match);
3637 d->erase(it, std::distance(it, end));
3638 d.data()[d.size] = u'\0';
3639 } else {
3640 // Instead of detaching, create a new string and copy all characters except for
3641 // the ones we're removing
3642 // TODO: size() is more than the needed since "copy" would be shorter
3643 QString copy{size(), Qt::Uninitialized};
3644 auto dst = copy.d.begin();
3645 auto it = std::copy(begin, first_match, dst); // Chunk before idx
3646 it = std::remove_copy_if(first_match + 1, end, it, match);
3647 copy.d.size = std::distance(dst, it);
3648 copy.d.data()[copy.d.size] = u'\0';
3649 *this = std::move(copy);
3650 }
3651 return *this;
3652}
3653
3654/*!
3655 \fn QString &QString::remove(const QRegularExpression &re)
3656 \since 5.0
3657
3658 Removes every occurrence of the regular expression \a re in the
3659 string, and returns a reference to the string. For example:
3660
3661 \snippet qstring/main.cpp 96
3662
3663 \include qstring.cpp shrinking-erase
3664
3665 \sa indexOf(), lastIndexOf(), replace()
3666*/
3667
3668/*!
3669 \fn template <typename Predicate> QString &QString::removeIf(Predicate pred)
3670 \since 6.1
3671
3672 Removes all elements for which the predicate \a pred returns true
3673 from the string. Returns a reference to the string.
3674
3675 \sa remove()
3676*/
3677
3678
3679/*! \internal
3680 Instead of detaching, or reallocating if "before" is shorter than "after"
3681 and there isn't enough capacity, create a new string, copy characters to it
3682 as needed, then swap it with "str".
3683*/
3684static void replace_with_copy(QString &str, QSpan<size_t> indices, qsizetype blen,
3685 QStringView after)
3686{
3687 const qsizetype alen = after.size();
3688 const char16_t *after_b = after.utf16();
3689
3690 const QString::DataPointer &str_d = str.data_ptr();
3691 auto src_start = str_d.begin();
3692 const qsizetype newSize = str_d.size + indices.size() * (alen - blen);
3693 QString copy{ newSize, Qt::Uninitialized };
3694 QString::DataPointer &copy_d = copy.data_ptr();
3695 auto dst = copy_d.begin();
3696 for (size_t index : indices) {
3697 auto hit = str_d.begin() + index;
3698 dst = std::copy(src_start, hit, dst);
3699 dst = std::copy_n(after_b, alen, dst);
3700 src_start = hit + blen;
3701 }
3702 dst = std::copy(src_start, str_d.end(), dst);
3703 str.swap(copy);
3704}
3705
3706// No detaching or reallocation is needed
3707static void replace_in_place(QString &str, QSpan<size_t> indices,
3708 qsizetype blen, QStringView after)
3709{
3710 const qsizetype alen = after.size();
3711 const char16_t *after_b = after.utf16();
3712 const char16_t *after_e = after.utf16() + after.size();
3713
3714 if (blen == alen) { // Replace in place
3715 for (size_t index : indices)
3716 std::copy_n(after_b, alen, str.data_ptr().begin() + index);
3717 } else if (blen > alen) { // Replace from front
3718 char16_t *begin = str.data_ptr().begin();
3719 char16_t *hit = begin + indices.front();
3720 char16_t *to = hit;
3721 to = std::copy_n(after_b, alen, to);
3722 char16_t *movestart = hit + blen;
3723 for (size_t index : indices.sliced(1)) {
3724 hit = begin + index;
3725 to = std::move(movestart, hit, to);
3726 to = std::copy_n(after_b, alen, to);
3727 movestart = hit + blen;
3728 }
3729 to = std::move(movestart, str.data_ptr().end(), to);
3730 str.resize(std::distance(begin, to));
3731 } else { // blen < alen, Replace from back
3732 const qsizetype oldSize = str.data_ptr().size;
3733 const qsizetype adjust = indices.size() * (alen - blen);
3734 const qsizetype newSize = oldSize + adjust;
3735
3736 str.resize(newSize);
3737 char16_t *begin = str.data_ptr().begin();
3738 char16_t *moveend = begin + oldSize;
3739 char16_t *to = str.data_ptr().end();
3740
3741 for (auto it = indices.rbegin(), end = indices.rend(); it != end; ++it) {
3742 char16_t *hit = begin + *it;
3743 char16_t *movestart = hit + blen;
3744 to = std::move_backward(movestart, moveend, to);
3745 to = std::copy_backward(after_b, after_e, to);
3746 moveend = hit;
3747 }
3748 }
3749}
3750
3751static void replace_helper(QString &str, QSpan<size_t> indices, qsizetype blen, QStringView after)
3752{
3753 const qsizetype oldSize = str.data_ptr().size;
3754 const qsizetype adjust = indices.size() * (after.size() - blen);
3755 const qsizetype newSize = oldSize + adjust;
3756 if (str.data_ptr().needsDetach() || needsReallocate(str, newSize)) {
3757 replace_with_copy(str, indices, blen, after);
3758 return;
3759 }
3760
3761 if (QtPrivate::q_points_into_range(after.begin(), str))
3762 // Copy after if it lies inside our own d.b area (which we could
3763 // possibly invalidate via a realloc or modify by replacement)
3764 replace_in_place(str, indices, blen, QVarLengthArray(after.begin(), after.end()));
3765 else
3766 replace_in_place(str, indices, blen, after);
3767}
3768
3769/*!
3770 \fn QString &QString::replace(qsizetype position, qsizetype n, const QString &after)
3771
3772 Replaces \a n characters beginning at index \a position with
3773 the string \a after and returns a reference to this string.
3774
3775 \note If the specified \a position index is within the string,
3776 but \a position + \a n goes outside the strings range,
3777 then \a n will be adjusted to stop at the end of the string.
3778
3779 Example:
3780
3781 \snippet qstring/main.cpp 40
3782
3783 \sa insert(), remove()
3784*/
3785QString &QString::replace(qsizetype pos, qsizetype len, const QString &after)
3786{
3787 return replace(pos, len, after.constData(), after.size());
3788}
3789
3790/*!
3791 \fn QString &QString::replace(qsizetype position, qsizetype n, const QChar *after, qsizetype alen)
3792 \overload replace()
3793 Replaces \a n characters beginning at index \a position with the
3794 first \a alen characters of the QChar array \a after and returns a
3795 reference to this string.
3796*/
3797QString &QString::replace(qsizetype pos, qsizetype len, const QChar *after, qsizetype alen)
3798{
3799 if (size_t(pos) > size_t(this->size()))
3800 return *this;
3801 if (len > this->size() - pos)
3802 len = this->size() - pos;
3803
3804 size_t index = pos;
3805 replace_helper(*this, QSpan(&index, 1), len, QStringView{after, alen});
3806 return *this;
3807}
3808
3809/*!
3810 \fn QString &QString::replace(qsizetype position, qsizetype n, QChar after)
3811 \overload replace()
3812
3813 Replaces \a n characters beginning at index \a position with the
3814 character \a after and returns a reference to this string.
3815*/
3816QString &QString::replace(qsizetype pos, qsizetype len, QChar after)
3817{
3818 return replace(pos, len, &after, 1);
3819}
3820
3821/*!
3822 \overload replace()
3823 Replaces every occurrence of the string \a before with the string \a
3824 after and returns a reference to this string.
3825
3826 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3827
3828 Example:
3829
3830 \snippet qstring/main.cpp 41
3831
3832 \note The replacement text is not rescanned after it is inserted.
3833
3834 Example:
3835
3836 \snippet qstring/main.cpp 86
3837
3838//! [empty-before-arg-in-replace]
3839 \note If you use an empty \a before argument, the \a after argument will be
3840 inserted \e {before and after} each character of the string.
3841//! [empty-before-arg-in-replace]
3842
3843*/
3844QString &QString::replace(const QString &before, const QString &after, Qt::CaseSensitivity cs)
3845{
3846 return replace(before.constData(), before.size(), after.constData(), after.size(), cs);
3847}
3848
3849/*!
3850 \since 4.5
3851 \overload replace()
3852
3853 Replaces each occurrence in this string of the first \a blen
3854 characters of \a before with the first \a alen characters of \a
3855 after and returns a reference to this string.
3856
3857 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3858
3859 \note If \a before points to an \e empty string (that is, \a blen == 0),
3860 the string pointed to by \a after will be inserted \e {before and after}
3861 each character in this string.
3862*/
3863QString &QString::replace(const QChar *before, qsizetype blen,
3864 const QChar *after, qsizetype alen,
3865 Qt::CaseSensitivity cs)
3866{
3867 if (d.size == 0) {
3868 if (blen)
3869 return *this;
3870 } else {
3871 if (cs == Qt::CaseSensitive && before == after && blen == alen)
3872 return *this;
3873 }
3874 if (alen == 0 && blen == 0)
3875 return *this;
3876 if (alen == 1 && blen == 1)
3877 return replace(*before, *after, cs);
3878
3879 QStringMatcher matcher(before, blen, cs);
3880
3881 qsizetype index = 0;
3882
3883 QVarLengthArray<size_t> indices;
3884 while ((index = matcher.indexIn(*this, index)) != -1) {
3885 indices.push_back(index);
3886 if (blen) // Step over before:
3887 index += blen;
3888 else // Only count one instance of empty between any two characters:
3889 index++;
3890 }
3891 if (indices.isEmpty())
3892 return *this;
3893
3894 replace_helper(*this, indices, blen, QStringView{after, alen});
3895 return *this;
3896}
3897
3898/*!
3899 \overload replace()
3900 Replaces every occurrence of the character \a ch in the string with
3901 \a after and returns a reference to this string.
3902
3903 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3904*/
3905QString& QString::replace(QChar ch, const QString &after, Qt::CaseSensitivity cs)
3906{
3907 if (after.size() == 0)
3908 return remove(ch, cs);
3909
3910 if (after.size() == 1)
3911 return replace(ch, after.front(), cs);
3912
3913 if (size() == 0)
3914 return *this;
3915
3916 const char16_t cc = (cs == Qt::CaseSensitive ? ch.unicode() : ch.toCaseFolded().unicode());
3917
3918 QVarLengthArray<size_t> indices;
3919 if (cs == Qt::CaseSensitive) {
3920 const char16_t *begin = d.begin();
3921 const char16_t *end = d.end();
3922 QStringView view(begin, end);
3923 const char16_t *hit = nullptr;
3924 while ((hit = QtPrivate::qustrchr(view, cc)) != end) {
3925 indices.push_back(std::distance(begin, hit));
3926 view = QStringView(std::next(hit), end);
3927 }
3928 } else {
3929 for (qsizetype i = 0; i < d.size; ++i)
3930 if (QChar::toCaseFolded(d.data()[i]) == cc)
3931 indices.push_back(i);
3932 }
3933 if (indices.isEmpty())
3934 return *this;
3935
3936 replace_helper(*this, indices, 1, after);
3937 return *this;
3938}
3939
3940/*!
3941 \overload replace()
3942 Replaces every occurrence of the character \a before with the
3943 character \a after and returns a reference to this string.
3944
3945 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3946*/
3947QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
3948{
3949 const qsizetype idx = indexOf(before, 0, cs);
3950 if (idx == -1)
3951 return *this;
3952
3953 const char16_t achar = after.unicode();
3954 char16_t bchar = before.unicode();
3955
3956 auto matchesCIS = [](char16_t beforeChar) {
3957 return [beforeChar](char16_t ch) { return foldAndCompare(ch, beforeChar); };
3958 };
3959
3960 auto hit = d.begin() + idx;
3961 if (!d.needsDetach()) {
3962 *hit++ = achar;
3963 if (cs == Qt::CaseSensitive) {
3964 std::replace(hit, d.end(), bchar, achar);
3965 } else {
3966 bchar = foldCase(bchar);
3967 std::replace_if(hit, d.end(), matchesCIS(bchar), achar);
3968 }
3969 } else {
3970 QString other{ d.size, Qt::Uninitialized };
3971 auto dest = std::copy(d.begin(), hit, other.d.begin());
3972 *dest++ = achar;
3973 ++hit;
3974 if (cs == Qt::CaseSensitive) {
3975 std::replace_copy(hit, d.end(), dest, bchar, achar);
3976 } else {
3977 bchar = foldCase(bchar);
3978 std::replace_copy_if(hit, d.end(), dest, matchesCIS(bchar), achar);
3979 }
3980
3981 swap(other);
3982 }
3983 return *this;
3984}
3985
3986/*!
3987 \since 4.5
3988 \overload replace()
3989
3990 Replaces every occurrence in this string of the Latin-1 string viewed
3991 by \a before with the Latin-1 string viewed by \a after, and returns a
3992 reference to this string.
3993
3994 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
3995
3996 \note The text is not rescanned after a replacement.
3997
3998 \include qstring.cpp empty-before-arg-in-replace
3999*/
4000QString &QString::replace(QLatin1StringView before, QLatin1StringView after, Qt::CaseSensitivity cs)
4001{
4002 const qsizetype alen = after.size();
4003 const qsizetype blen = before.size();
4004 if (blen == 1 && alen == 1)
4005 return replace(before.front(), after.front(), cs);
4006
4007 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4008 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
4009 return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
4010}
4011
4012/*!
4013 \since 4.5
4014 \overload replace()
4015
4016 Replaces every occurrence in this string of the Latin-1 string viewed
4017 by \a before with the string \a after, and returns a reference to this
4018 string.
4019
4020 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4021
4022 \note The text is not rescanned after a replacement.
4023
4024 \include qstring.cpp empty-before-arg-in-replace
4025*/
4026QString &QString::replace(QLatin1StringView before, const QString &after, Qt::CaseSensitivity cs)
4027{
4028 const qsizetype blen = before.size();
4029 if (blen == 1 && after.size() == 1)
4030 return replace(before.front(), after.front(), cs);
4031
4032 QVarLengthArray<char16_t> b = qt_from_latin1_to_qvla(before);
4033 return replace((const QChar *)b.data(), blen, after.constData(), after.d.size, cs);
4034}
4035
4036/*!
4037 \since 4.5
4038 \overload replace()
4039
4040 Replaces every occurrence of the string \a before with the string \a
4041 after and returns a reference to this string.
4042
4043 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4044
4045 \note The text is not rescanned after a replacement.
4046
4047 \include qstring.cpp empty-before-arg-in-replace
4048*/
4049QString &QString::replace(const QString &before, QLatin1StringView after, Qt::CaseSensitivity cs)
4050{
4051 const qsizetype alen = after.size();
4052 if (before.size() == 1 && alen == 1)
4053 return replace(before.front(), after.front(), cs);
4054
4055 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4056 return replace(before.constData(), before.d.size, (const QChar *)a.data(), alen, cs);
4057}
4058
4059/*!
4060 \since 4.5
4061 \overload replace()
4062
4063 Replaces every occurrence of the character \a c with the string \a
4064 after and returns a reference to this string.
4065
4066 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4067
4068 \note The text is not rescanned after a replacement.
4069*/
4070QString &QString::replace(QChar c, QLatin1StringView after, Qt::CaseSensitivity cs)
4071{
4072 const qsizetype alen = after.size();
4073 if (alen == 1)
4074 return replace(c, after.front(), cs);
4075
4076 QVarLengthArray<char16_t> a = qt_from_latin1_to_qvla(after);
4077 return replace(&c, 1, (const QChar *)a.data(), alen, cs);
4078}
4079
4080/*!
4081 \fn bool QString::operator==(const QString &lhs, const QString &rhs)
4082 \overload operator==()
4083
4084 Returns \c true if string \a lhs is equal to string \a rhs; otherwise
4085 returns \c false.
4086
4087 \include qstring.cpp compare-isNull-vs-isEmpty
4088
4089 \sa {Comparing Strings}
4090*/
4091
4092/*!
4093 \fn bool QString::operator==(const QString &lhs, const QLatin1StringView &rhs)
4094
4095 \overload operator==()
4096
4097 Returns \c true if \a lhs is equal to \a rhs; otherwise
4098 returns \c false.
4099*/
4100
4101/*!
4102 \fn bool QString::operator==(const QLatin1StringView &lhs, const QString &rhs)
4103
4104 \overload operator==()
4105
4106 Returns \c true if \a lhs is equal to \a rhs; otherwise
4107 returns \c false.
4108*/
4109
4110/*! \fn bool QString::operator==(const QString &lhs, const QByteArray &rhs)
4111
4112 \overload operator==()
4113
4114 The \a rhs byte array is converted to a QUtf8StringView.
4115
4116 You can disable this operator by defining
4117 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4118 can be useful if you want to ensure that all user-visible strings
4119 go through QObject::tr(), for example.
4120
4121 Returns \c true if string \a lhs is lexically equal to \a rhs.
4122 Otherwise returns \c false.
4123*/
4124
4125/*! \fn bool QString::operator==(const QString &lhs, const char * const &rhs)
4126
4127 \overload operator==()
4128
4129 The \a rhs const char pointer is converted to a QUtf8StringView.
4130
4131 You can disable this operator by defining
4132 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4133 can be useful if you want to ensure that all user-visible strings
4134 go through QObject::tr(), for example.
4135*/
4136
4137/*!
4138 \fn bool QString::operator<(const QString &lhs, const QString &rhs)
4139
4140 \overload operator<()
4141
4142 Returns \c true if string \a lhs is lexically less than string
4143 \a rhs; otherwise returns \c false.
4144
4145 \sa {Comparing Strings}
4146*/
4147
4148/*!
4149 \fn bool QString::operator<(const QString &lhs, const QLatin1StringView &rhs)
4150
4151 \overload operator<()
4152
4153 Returns \c true if \a lhs is lexically less than \a rhs;
4154 otherwise returns \c false.
4155*/
4156
4157/*!
4158 \fn bool QString::operator<(const QLatin1StringView &lhs, const QString &rhs)
4159
4160 \overload operator<()
4161
4162 Returns \c true if \a lhs is lexically less than \a rhs;
4163 otherwise returns \c false.
4164*/
4165
4166/*! \fn bool QString::operator<(const QString &lhs, const QByteArray &rhs)
4167
4168 \overload operator<()
4169
4170 The \a rhs byte array is converted to a QUtf8StringView.
4171 If any NUL characters ('\\0') are embedded in the byte array, they will be
4172 included in the transformation.
4173
4174 You can disable this operator
4175 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4176 can be useful if you want to ensure that all user-visible strings
4177 go through QObject::tr(), for example.
4178*/
4179
4180/*! \fn bool QString::operator<(const QString &lhs, const char * const &rhs)
4181
4182 Returns \c true if string \a lhs is lexically less than string \a rhs.
4183 Otherwise returns \c false.
4184
4185 \overload operator<()
4186
4187 The \a rhs const char pointer is converted to a QUtf8StringView.
4188
4189 You can disable this operator by defining
4190 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4191 can be useful if you want to ensure that all user-visible strings
4192 go through QObject::tr(), for example.
4193*/
4194
4195/*! \fn bool QString::operator<=(const QString &lhs, const QString &rhs)
4196
4197 Returns \c true if string \a lhs is lexically less than or equal to
4198 string \a rhs; otherwise returns \c false.
4199
4200 \sa {Comparing Strings}
4201*/
4202
4203/*!
4204 \fn bool QString::operator<=(const QString &lhs, const QLatin1StringView &rhs)
4205
4206 \overload operator<=()
4207
4208 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4209 otherwise returns \c false.
4210*/
4211
4212/*!
4213 \fn bool QString::operator<=(const QLatin1StringView &lhs, const QString &rhs)
4214
4215 \overload operator<=()
4216
4217 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
4218 otherwise returns \c false.
4219*/
4220
4221/*! \fn bool QString::operator<=(const QString &lhs, const QByteArray &rhs)
4222
4223 \overload operator<=()
4224
4225 The \a rhs byte array is converted to a QUtf8StringView.
4226 If any NUL characters ('\\0') are embedded in the byte array, they will be
4227 included in the transformation.
4228
4229 You can disable this operator by defining
4230 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4231 can be useful if you want to ensure that all user-visible strings
4232 go through QObject::tr(), for example.
4233*/
4234
4235/*! \fn bool QString::operator<=(const QString &lhs, const char * const &rhs)
4236
4237 \overload operator<=()
4238
4239 The \a rhs const char pointer is converted to a QUtf8StringView.
4240
4241 You can disable this operator by defining
4242 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4243 can be useful if you want to ensure that all user-visible strings
4244 go through QObject::tr(), for example.
4245*/
4246
4247/*! \fn bool QString::operator>(const QString &lhs, const QString &rhs)
4248
4249 Returns \c true if string \a lhs is lexically greater than string \a rhs;
4250 otherwise returns \c false.
4251
4252 \sa {Comparing Strings}
4253*/
4254
4255/*!
4256 \fn bool QString::operator>(const QString &lhs, const QLatin1StringView &rhs)
4257
4258 \overload operator>()
4259
4260 Returns \c true if \a lhs is lexically greater than \a rhs;
4261 otherwise returns \c false.
4262*/
4263
4264/*!
4265 \fn bool QString::operator>(const QLatin1StringView &lhs, const QString &rhs)
4266
4267 \overload operator>()
4268
4269 Returns \c true if \a lhs is lexically greater than \a rhs;
4270 otherwise returns \c false.
4271*/
4272
4273/*! \fn bool QString::operator>(const QString &lhs, const QByteArray &rhs)
4274
4275 \overload operator>()
4276
4277 The \a rhs byte array is converted to a QUtf8StringView.
4278 If any NUL characters ('\\0') are embedded in the byte array, they will be
4279 included in the transformation.
4280
4281 You can disable this operator by defining
4282 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4283 can be useful if you want to ensure that all user-visible strings
4284 go through QObject::tr(), for example.
4285*/
4286
4287/*! \fn bool QString::operator>(const QString &lhs, const char * const &rhs)
4288
4289 \overload operator>()
4290
4291 The \a rhs const char pointer is converted to a QUtf8StringView.
4292
4293 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4294 when you compile your applications. This can be useful if you want
4295 to ensure that all user-visible strings go through QObject::tr(),
4296 for example.
4297*/
4298
4299/*! \fn bool QString::operator>=(const QString &lhs, const QString &rhs)
4300
4301 Returns \c true if string \a lhs is lexically greater than or equal to
4302 string \a rhs; otherwise returns \c false.
4303
4304 \sa {Comparing Strings}
4305*/
4306
4307/*!
4308 \fn bool QString::operator>=(const QString &lhs, const QLatin1StringView &rhs)
4309
4310 \overload operator>=()
4311
4312 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4313 otherwise returns \c false.
4314*/
4315
4316/*!
4317 \fn bool QString::operator>=(const QLatin1StringView &lhs, const QString &rhs)
4318
4319 \overload operator>=()
4320
4321 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
4322 otherwise returns \c false.
4323*/
4324
4325/*! \fn bool QString::operator>=(const QString &lhs, const QByteArray &rhs)
4326
4327 \overload operator>=()
4328
4329 The \a rhs byte array is converted to a QUtf8StringView.
4330 If any NUL characters ('\\0') are embedded in the byte array, they will be
4331 included in the transformation.
4332
4333 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4334 when you compile your applications. This can be useful if you want
4335 to ensure that all user-visible strings go through QObject::tr(),
4336 for example.
4337*/
4338
4339/*! \fn bool QString::operator>=(const QString &lhs, const char * const &rhs)
4340
4341 \overload operator>=()
4342
4343 The \a rhs const char pointer is converted to a QUtf8StringView.
4344
4345 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4346 when you compile your applications. This can be useful if you want
4347 to ensure that all user-visible strings go through QObject::tr(),
4348 for example.
4349*/
4350
4351/*! \fn bool QString::operator!=(const QString &lhs, const QString &rhs)
4352
4353 Returns \c true if string \a lhs is not equal to string \a rhs;
4354 otherwise returns \c false.
4355
4356 \sa {Comparing Strings}
4357*/
4358
4359/*! \fn bool QString::operator!=(const QString &lhs, const QLatin1StringView &rhs)
4360
4361 Returns \c true if string \a lhs is not equal to string \a rhs.
4362 Otherwise returns \c false.
4363
4364 \overload operator!=()
4365*/
4366
4367/*! \fn bool QString::operator!=(const QString &lhs, const QByteArray &rhs)
4368
4369 \overload operator!=()
4370
4371 The \a rhs byte array is converted to a QUtf8StringView.
4372 If any NUL characters ('\\0') are embedded in the byte array, they will be
4373 included in the transformation.
4374
4375 You can disable this operator by defining \l QT_NO_CAST_FROM_ASCII
4376 when you compile your applications. This can be useful if you want
4377 to ensure that all user-visible strings go through QObject::tr(),
4378 for example.
4379*/
4380
4381/*! \fn bool QString::operator!=(const QString &lhs, const char * const &rhs)
4382
4383 \overload operator!=()
4384
4385 The \a rhs const char pointer is converted to a QUtf8StringView.
4386
4387 You can disable this operator by defining
4388 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
4389 can be useful if you want to ensure that all user-visible strings
4390 go through QObject::tr(), for example.
4391*/
4392
4393/*! \fn bool QString::operator==(const QByteArray &lhs, const QString &rhs)
4394
4395 Returns \c true if byte array \a lhs is equal to the UTF-8 encoding of
4396 \a rhs; otherwise returns \c false.
4397
4398 The comparison is case sensitive.
4399
4400 You can disable this operator by defining \c
4401 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4402 then need to call QString::fromUtf8(), QString::fromLatin1(),
4403 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4404 array to a QString before doing the comparison.
4405*/
4406
4407/*! \fn bool QString::operator!=(const QByteArray &lhs, const QString &rhs)
4408
4409 Returns \c true if byte array \a lhs is not equal to the UTF-8 encoding of
4410 \a rhs; otherwise returns \c false.
4411
4412 The comparison is case sensitive.
4413
4414 You can disable this operator by defining \c
4415 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4416 then need to call QString::fromUtf8(), QString::fromLatin1(),
4417 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4418 array to a QString before doing the comparison.
4419*/
4420
4421/*! \fn bool QString::operator<(const QByteArray &lhs, const QString &rhs)
4422
4423 Returns \c true if byte array \a lhs is lexically less than the UTF-8 encoding
4424 of \a rhs; otherwise returns \c false.
4425
4426 The comparison is case sensitive.
4427
4428 You can disable this operator by defining \c
4429 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4430 then need to call QString::fromUtf8(), QString::fromLatin1(),
4431 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4432 array to a QString before doing the comparison.
4433*/
4434
4435/*! \fn bool QString::operator>(const QByteArray &lhs, const QString &rhs)
4436
4437 Returns \c true if byte array \a lhs is lexically greater than the UTF-8
4438 encoding of \a rhs; otherwise returns \c false.
4439
4440 The comparison is case sensitive.
4441
4442 You can disable this operator by defining \c
4443 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4444 then need to call QString::fromUtf8(), QString::fromLatin1(),
4445 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4446 array to a QString before doing the comparison.
4447*/
4448
4449/*! \fn bool QString::operator<=(const QByteArray &lhs, const QString &rhs)
4450
4451 Returns \c true if byte array \a lhs is lexically less than or equal to the
4452 UTF-8 encoding of \a rhs; otherwise returns \c false.
4453
4454 The comparison is case sensitive.
4455
4456 You can disable this operator by defining \c
4457 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4458 then need to call QString::fromUtf8(), QString::fromLatin1(),
4459 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4460 array to a QString before doing the comparison.
4461*/
4462
4463/*! \fn bool QString::operator>=(const QByteArray &lhs, const QString &rhs)
4464
4465 Returns \c true if byte array \a lhs is greater than or equal to the UTF-8
4466 encoding of \a rhs; otherwise returns \c false.
4467
4468 The comparison is case sensitive.
4469
4470 You can disable this operator by defining \c
4471 QT_NO_CAST_FROM_ASCII when you compile your applications. You
4472 then need to call QString::fromUtf8(), QString::fromLatin1(),
4473 or QString::fromLocal8Bit() explicitly if you want to convert the byte
4474 array to a QString before doing the comparison.
4475*/
4476
4477/*!
4478 \include qstring.qdocinc {qstring-first-index-of} {string} {str}
4479
4480 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4481
4482 Example:
4483
4484 \snippet qstring/main.cpp 24
4485
4486 \include qstring.qdocinc negative-index-start-search-from-end
4487
4488 \sa lastIndexOf(), contains(), count()
4489*/
4490qsizetype QString::indexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4491{
4492 return QtPrivate::findString(QStringView(unicode(), size()), from, QStringView(str.unicode(), str.size()), cs);
4493}
4494
4495/*!
4496 \fn qsizetype QString::indexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4497 \since 5.14
4498 \overload indexOf()
4499
4500 \include qstring.qdocinc {qstring-first-index-of} {string view} {str}
4501
4502 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4503
4504 \include qstring.qdocinc negative-index-start-search-from-end
4505
4506 \sa QStringView::indexOf(), lastIndexOf(), contains(), count()
4507*/
4508
4509/*!
4510 \since 4.5
4511
4512 \include {qstring.qdocinc} {qstring-first-index-of} {Latin-1 string viewed by} {str}
4513
4514 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4515
4516 Example:
4517
4518 \snippet qstring/main.cpp 24
4519
4520 \include qstring.qdocinc negative-index-start-search-from-end
4521
4522 \sa lastIndexOf(), contains(), count()
4523*/
4524
4525qsizetype QString::indexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4526{
4527 return QtPrivate::findString(QStringView(unicode(), size()), from, str, cs);
4528}
4529
4530/*!
4531 \fn qsizetype QString::indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4532 \overload indexOf()
4533
4534 \include qstring.qdocinc {qstring-first-index-of} {character} {ch}
4535*/
4536
4537/*!
4538 \include qstring.qdocinc {qstring-last-index-of} {string} {str}
4539
4540 \include qstring.qdocinc negative-index-start-search-from-end
4541
4542 Returns -1 if \a str is not found.
4543
4544 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4545
4546 Example:
4547
4548 \snippet qstring/main.cpp 29
4549
4550 \note When searching for a 0-length \a str, the match at the end of
4551 the data is excluded from the search by a negative \a from, even
4552 though \c{-1} is normally thought of as searching from the end of the
4553 string: the match at the end is \e after the last character, so it is
4554 excluded. To include such a final empty match, either give a positive
4555 value for \a from or omit the \a from parameter entirely.
4556
4557 \sa indexOf(), contains(), count()
4558*/
4559qsizetype QString::lastIndexOf(const QString &str, qsizetype from, Qt::CaseSensitivity cs) const
4560{
4561 return QtPrivate::lastIndexOf(QStringView(*this), from, str, cs);
4562}
4563
4564/*!
4565 \fn qsizetype QString::lastIndexOf(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4566 \since 6.2
4567 \overload lastIndexOf()
4568
4569 Returns the index position of the last occurrence of the string \a
4570 str in this string. Returns -1 if \a str is not found.
4571
4572 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4573
4574 Example:
4575
4576 \snippet qstring/main.cpp 29
4577
4578 \sa indexOf(), contains(), count()
4579*/
4580
4581
4582/*!
4583 \since 4.5
4584 \overload lastIndexOf()
4585
4586 \include qstring.qdocinc {qstring-last-index-of} {Latin-1 string viewed by} {str}
4587
4588 \include qstring.qdocinc negative-index-start-search-from-end
4589
4590 Returns -1 if \a str is not found.
4591
4592 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4593
4594 Example:
4595
4596 \snippet qstring/main.cpp 29
4597
4598 \note When searching for a 0-length \a str, the match at the end of
4599 the data is excluded from the search by a negative \a from, even
4600 though \c{-1} is normally thought of as searching from the end of the
4601 string: the match at the end is \e after the last character, so it is
4602 excluded. To include such a final empty match, either give a positive
4603 value for \a from or omit the \a from parameter entirely.
4604
4605 \sa indexOf(), contains(), count()
4606*/
4607qsizetype QString::lastIndexOf(QLatin1StringView str, qsizetype from, Qt::CaseSensitivity cs) const
4608{
4609 return QtPrivate::lastIndexOf(*this, from, str, cs);
4610}
4611
4612/*!
4613 \fn qsizetype QString::lastIndexOf(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4614 \since 6.2
4615 \overload lastIndexOf()
4616
4617 Returns the index position of the last occurrence of the string \a
4618 str in this string. Returns -1 if \a str is not found.
4619
4620 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4621
4622 Example:
4623
4624 \snippet qstring/main.cpp 29
4625
4626 \sa indexOf(), contains(), count()
4627*/
4628
4629/*!
4630 \fn qsizetype QString::lastIndexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const
4631 \overload lastIndexOf()
4632
4633 \include qstring.qdocinc {qstring-last-index-of} {character} {ch}
4634*/
4635
4636/*!
4637 \fn QString::lastIndexOf(QChar ch, Qt::CaseSensitivity) const
4638 \since 6.3
4639 \overload lastIndexOf()
4640*/
4641
4642/*!
4643 \fn qsizetype QString::lastIndexOf(QStringView str, qsizetype from, Qt::CaseSensitivity cs) const
4644 \since 5.14
4645 \overload lastIndexOf()
4646
4647 \include qstring.qdocinc {qstring-last-index-of} {string view} {str}
4648
4649 \include qstring.qdocinc negative-index-start-search-from-end
4650
4651 Returns -1 if \a str is not found.
4652
4653 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4654
4655 \note When searching for a 0-length \a str, the match at the end of
4656 the data is excluded from the search by a negative \a from, even
4657 though \c{-1} is normally thought of as searching from the end of the
4658 string: the match at the end is \e after the last character, so it is
4659 excluded. To include such a final empty match, either give a positive
4660 value for \a from or omit the \a from parameter entirely.
4661
4662 \sa indexOf(), contains(), count()
4663*/
4664
4665/*!
4666 \fn qsizetype QString::lastIndexOf(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4667 \since 6.2
4668 \overload lastIndexOf()
4669
4670 Returns the index position of the last occurrence of the string view \a
4671 str in this string. Returns -1 if \a str is not found.
4672
4673 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4674
4675 \sa indexOf(), contains(), count()
4676*/
4677
4678#if QT_CONFIG(regularexpression)
4679struct QStringCapture
4680{
4681 qsizetype pos;
4682 qsizetype len;
4683 int no;
4684};
4685Q_DECLARE_TYPEINFO(QStringCapture, Q_PRIMITIVE_TYPE);
4686
4687/*!
4688 \overload replace()
4689 \since 5.0
4690
4691 Replaces every occurrence of the regular expression \a re in the
4692 string with \a after. Returns a reference to the string. For
4693 example:
4694
4695 \snippet qstring/main.cpp 87
4696
4697 For regular expressions containing capturing groups,
4698 occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
4699 with the string captured by the corresponding capturing group.
4700
4701 \snippet qstring/main.cpp 88
4702
4703 \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch
4704*/
4705QString &QString::replace(const QRegularExpression &re, const QString &after)
4706{
4707 if (!re.isValid()) {
4708 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString::replace");
4709 return *this;
4710 }
4711
4712 const QString copy(*this);
4713 QRegularExpressionMatchIterator iterator = re.globalMatch(copy);
4714 if (!iterator.hasNext()) // no matches at all
4715 return *this;
4716
4717 reallocData(d.size, QArrayData::KeepSize);
4718
4719 qsizetype numCaptures = re.captureCount();
4720
4721 // 1. build the backreferences list, holding where the backreferences
4722 // are in the replacement string
4723 QVarLengthArray<QStringCapture> backReferences;
4724 const qsizetype al = after.size();
4725 const QChar *ac = after.unicode();
4726
4727 for (qsizetype i = 0; i < al - 1; i++) {
4728 if (ac[i] == u'\\') {
4729 int no = ac[i + 1].digitValue();
4730 if (no > 0 && no <= numCaptures) {
4731 QStringCapture backReference;
4732 backReference.pos = i;
4733 backReference.len = 2;
4734
4735 if (i < al - 2) {
4736 int secondDigit = ac[i + 2].digitValue();
4737 if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
4738 no = (no * 10) + secondDigit;
4739 ++backReference.len;
4740 }
4741 }
4742
4743 backReference.no = no;
4744 backReferences.append(backReference);
4745 }
4746 }
4747 }
4748
4749 // 2. iterate on the matches. For every match, copy in chunks
4750 // - the part before the match
4751 // - the after string, with the proper replacements for the backreferences
4752
4753 qsizetype newLength = 0; // length of the new string, with all the replacements
4754 qsizetype lastEnd = 0;
4755 QVarLengthArray<QStringView> chunks;
4756 const QStringView copyView{ copy }, afterView{ after };
4757 while (iterator.hasNext()) {
4758 QRegularExpressionMatch match = iterator.next();
4759 qsizetype len;
4760 // add the part before the match
4761 len = match.capturedStart() - lastEnd;
4762 if (len > 0) {
4763 chunks << copyView.mid(lastEnd, len);
4764 newLength += len;
4765 }
4766
4767 lastEnd = 0;
4768 // add the after string, with replacements for the backreferences
4769 for (const QStringCapture &backReference : std::as_const(backReferences)) {
4770 // part of "after" before the backreference
4771 len = backReference.pos - lastEnd;
4772 if (len > 0) {
4773 chunks << afterView.mid(lastEnd, len);
4774 newLength += len;
4775 }
4776
4777 // backreference itself
4778 len = match.capturedLength(backReference.no);
4779 if (len > 0) {
4780 chunks << copyView.mid(match.capturedStart(backReference.no), len);
4781 newLength += len;
4782 }
4783
4784 lastEnd = backReference.pos + backReference.len;
4785 }
4786
4787 // add the last part of the after string
4788 len = afterView.size() - lastEnd;
4789 if (len > 0) {
4790 chunks << afterView.mid(lastEnd, len);
4791 newLength += len;
4792 }
4793
4794 lastEnd = match.capturedEnd();
4795 }
4796
4797 // 3. trailing string after the last match
4798 if (copyView.size() > lastEnd) {
4799 chunks << copyView.mid(lastEnd);
4800 newLength += copyView.size() - lastEnd;
4801 }
4802
4803 // 4. assemble the chunks together
4804 resize(newLength);
4805 qsizetype i = 0;
4806 QChar *uc = data();
4807 for (const QStringView &chunk : std::as_const(chunks)) {
4808 qsizetype len = chunk.size();
4809 memcpy(uc + i, chunk.constData(), len * sizeof(QChar));
4810 i += len;
4811 }
4812
4813 return *this;
4814}
4815#endif // QT_CONFIG(regularexpression)
4816
4817/*!
4818 Returns the number of (potentially overlapping) occurrences of
4819 the string \a str in this string.
4820
4821 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4822
4823 \sa contains(), indexOf()
4824*/
4825
4826qsizetype QString::count(const QString &str, Qt::CaseSensitivity cs) const
4827{
4828 return QtPrivate::count(QStringView(unicode(), size()), QStringView(str.unicode(), str.size()), cs);
4829}
4830
4831/*!
4832 \overload count()
4833
4834 Returns the number of occurrences of character \a ch in the string.
4835
4836 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4837
4838 \sa contains(), indexOf()
4839*/
4840
4841qsizetype QString::count(QChar ch, Qt::CaseSensitivity cs) const
4842{
4843 return QtPrivate::count(QStringView(unicode(), size()), ch, cs);
4844}
4845
4846/*!
4847 \since 6.0
4848 \overload count()
4849 Returns the number of (potentially overlapping) occurrences of the
4850 string view \a str in this string.
4851
4852 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4853
4854 \sa contains(), indexOf()
4855*/
4856qsizetype QString::count(QStringView str, Qt::CaseSensitivity cs) const
4857{
4858 return QtPrivate::count(*this, str, cs);
4859}
4860
4861/*! \fn bool QString::contains(const QString &str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4862
4863 Returns \c true if this string contains an occurrence of the string
4864 \a str; otherwise returns \c false.
4865
4866 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4867
4868 Example:
4869 \snippet qstring/main.cpp 17
4870
4871 \sa indexOf(), count()
4872*/
4873
4874/*! \fn bool QString::contains(QLatin1StringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4875 \since 5.3
4876
4877 \overload contains()
4878
4879 Returns \c true if this string contains an occurrence of the latin-1 string
4880 \a str; otherwise returns \c false.
4881*/
4882
4883/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4884
4885 \overload contains()
4886
4887 Returns \c true if this string contains an occurrence of the
4888 character \a ch; otherwise returns \c false.
4889*/
4890
4891/*! \fn bool QString::contains(QStringView str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
4892 \since 5.14
4893 \overload contains()
4894
4895 Returns \c true if this string contains an occurrence of the string view
4896 \a str; otherwise returns \c false.
4897
4898 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
4899
4900 \sa indexOf(), count()
4901*/
4902
4903#if QT_CONFIG(regularexpression)
4904/*!
4905 \since 5.5
4906
4907 Returns the index position of the first match of the regular
4908 expression \a re in the string, searching forward from index
4909 position \a from. Returns -1 if \a re didn't match anywhere.
4910
4911 If the match is successful and \a rmatch is not \nullptr, it also
4912 writes the results of the match into the QRegularExpressionMatch object
4913 pointed to by \a rmatch.
4914
4915 Example:
4916
4917 \snippet qstring/main.cpp 93
4918*/
4919qsizetype QString::indexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4920{
4921 return QtPrivate::indexOf(QStringView(*this), this, re, from, rmatch);
4922}
4923
4924/*!
4925 \since 5.5
4926
4927 Returns the index position of the last match of the regular
4928 expression \a re in the string, which starts before the index
4929 position \a from.
4930
4931 \include qstring.qdocinc negative-index-start-search-from-end
4932
4933 Returns -1 if \a re didn't match anywhere.
4934
4935 If the match is successful and \a rmatch is not \nullptr, it also
4936 writes the results of the match into the QRegularExpressionMatch object
4937 pointed to by \a rmatch.
4938
4939 Example:
4940
4941 \snippet qstring/main.cpp 94
4942
4943 \note Due to how the regular expression matching algorithm works,
4944 this function will actually match repeatedly from the beginning of
4945 the string until the position \a from is reached.
4946
4947 \note When searching for a regular expression \a re that may match
4948 0 characters, the match at the end of the data is excluded from the
4949 search by a negative \a from, even though \c{-1} is normally
4950 thought of as searching from the end of the string: the match at
4951 the end is \e after the last character, so it is excluded. To
4952 include such a final empty match, either give a positive value for
4953 \a from or omit the \a from parameter entirely.
4954*/
4955qsizetype QString::lastIndexOf(const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch) const
4956{
4957 return QtPrivate::lastIndexOf(QStringView(*this), this, re, from, rmatch);
4958}
4959
4960/*!
4961 \fn qsizetype QString::lastIndexOf(const QRegularExpression &re, QRegularExpressionMatch *rmatch = nullptr) const
4962 \since 6.2
4963 \overload lastIndexOf()
4964
4965 Returns the index position of the last match of the regular
4966 expression \a re in the string. Returns -1 if \a re didn't match anywhere.
4967
4968 If the match is successful and \a rmatch is not \nullptr, it also
4969 writes the results of the match into the QRegularExpressionMatch object
4970 pointed to by \a rmatch.
4971
4972 Example:
4973
4974 \snippet qstring/main.cpp 94
4975
4976 \note Due to how the regular expression matching algorithm works,
4977 this function will actually match repeatedly from the beginning of
4978 the string until the end of the string is reached.
4979*/
4980
4981/*!
4982 \since 5.1
4983
4984 Returns \c true if the regular expression \a re matches somewhere in this
4985 string; otherwise returns \c false.
4986
4987 If the match is successful and \a rmatch is not \nullptr, it also
4988 writes the results of the match into the QRegularExpressionMatch object
4989 pointed to by \a rmatch.
4990
4991 \sa QRegularExpression::match()
4992*/
4993
4994bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *rmatch) const
4995{
4996 return QtPrivate::contains(QStringView(*this), this, re, rmatch);
4997}
4998
4999/*!
5000 \overload count()
5001 \since 5.0
5002
5003 Returns the number of times the regular expression \a re matches
5004 in the string.
5005
5006 For historical reasons, this function counts overlapping matches,
5007 so in the example below, there are four instances of "ana" or
5008 "ama":
5009
5010 \snippet qstring/main.cpp 95
5011
5012 This behavior is different from simply iterating over the matches
5013 in the string using QRegularExpressionMatchIterator.
5014
5015 \sa QRegularExpression::globalMatch()
5016*/
5017qsizetype QString::count(const QRegularExpression &re) const
5018{
5019 return QtPrivate::count(QStringView(*this), re);
5020}
5021#endif // QT_CONFIG(regularexpression)
5022
5023#if QT_DEPRECATED_SINCE(6, 4)
5024/*! \fn qsizetype QString::count() const
5025 \deprecated [6.4] Use size() or length() instead.
5026 \overload count()
5027
5028 Same as size().
5029*/
5030#endif
5031
5032/*!
5033 \enum QString::SectionFlag
5034
5035 This enum specifies flags that can be used to affect various
5036 aspects of the section() function's behavior with respect to
5037 separators and empty fields.
5038
5039 \value SectionDefault Empty fields are counted, leading and
5040 trailing separators are not included, and the separator is
5041 compared case sensitively.
5042
5043 \value SectionSkipEmpty Treat empty fields as if they don't exist,
5044 i.e. they are not considered as far as \e start and \e end are
5045 concerned.
5046
5047 \value SectionIncludeLeadingSep Include the leading separator (if
5048 any) in the result string.
5049
5050 \value SectionIncludeTrailingSep Include the trailing separator
5051 (if any) in the result string.
5052
5053 \value SectionCaseInsensitiveSeps Compare the separator
5054 case-insensitively.
5055
5056 \sa section()
5057*/
5058
5059/*!
5060 \fn QString QString::section(QChar sep, qsizetype start, qsizetype end = -1, SectionFlags flags) const
5061
5062 This function returns a section of the string.
5063
5064 This string is treated as a sequence of fields separated by the
5065 character, \a sep. The returned string consists of the fields from
5066 position \a start to position \a end inclusive. If \a end is not
5067 specified, all fields from position \a start to the end of the
5068 string are included. Fields are numbered 0, 1, 2, etc., counting
5069 from the left, and -1, -2, etc., counting from right to left.
5070
5071 The \a flags argument can be used to affect some aspects of the
5072 function's behavior, e.g. whether to be case sensitive, whether
5073 to skip empty fields and how to deal with leading and trailing
5074 separators; see \l{SectionFlags}.
5075
5076 \snippet qstring/main.cpp 52
5077
5078 If \a start or \a end is negative, we count fields from the right
5079 of the string, the right-most field being -1, the one from
5080 right-most field being -2, and so on.
5081
5082 \snippet qstring/main.cpp 53
5083
5084 \sa split()
5085*/
5086
5087/*!
5088 \overload section()
5089
5090 \snippet qstring/main.cpp 51
5091 \snippet qstring/main.cpp 54
5092
5093 \sa split()
5094*/
5095
5096QString QString::section(const QString &sep, qsizetype start, qsizetype end, SectionFlags flags) const
5097{
5098 const QList<QStringView> sections = QStringView{ *this }.split(
5099 sep, Qt::KeepEmptyParts, (flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive);
5100 const qsizetype sectionsSize = sections.size();
5101 if (!(flags & SectionSkipEmpty)) {
5102 if (start < 0)
5103 start += sectionsSize;
5104 if (end < 0)
5105 end += sectionsSize;
5106 } else {
5107 qsizetype skip = 0;
5108 for (qsizetype k = 0; k < sectionsSize; ++k) {
5109 if (sections.at(k).isEmpty())
5110 skip++;
5111 }
5112 if (start < 0)
5113 start += sectionsSize - skip;
5114 if (end < 0)
5115 end += sectionsSize - skip;
5116 }
5117 if (start >= sectionsSize || end < 0 || start > end)
5118 return QString();
5119
5120 QString ret;
5121 qsizetype first_i = start, last_i = end;
5122 for (qsizetype x = 0, i = 0; x <= end && i < sectionsSize; ++i) {
5123 const QStringView &section = sections.at(i);
5124 const bool empty = section.isEmpty();
5125 if (x >= start) {
5126 if (x == start)
5127 first_i = i;
5128 if (x == end)
5129 last_i = i;
5130 if (x > start && i > 0)
5131 ret += sep;
5132 ret += section;
5133 }
5134 if (!empty || !(flags & SectionSkipEmpty))
5135 x++;
5136 }
5137 if ((flags & SectionIncludeLeadingSep) && first_i > 0)
5138 ret.prepend(sep);
5139 if ((flags & SectionIncludeTrailingSep) && last_i < sectionsSize - 1)
5140 ret += sep;
5141 return ret;
5142}
5143
5144#if QT_CONFIG(regularexpression)
5145class qt_section_chunk {
5146public:
5147 qt_section_chunk() {}
5148 qt_section_chunk(qsizetype l, QStringView s) : length(l), string(std::move(s)) {}
5149 qsizetype length;
5150 QStringView string;
5151};
5152Q_DECLARE_TYPEINFO(qt_section_chunk, Q_RELOCATABLE_TYPE);
5153
5154static QString extractSections(QSpan<qt_section_chunk> sections, qsizetype start, qsizetype end,
5155 QString::SectionFlags flags)
5156{
5157 const qsizetype sectionsSize = sections.size();
5158
5159 if (!(flags & QString::SectionSkipEmpty)) {
5160 if (start < 0)
5161 start += sectionsSize;
5162 if (end < 0)
5163 end += sectionsSize;
5164 } else {
5165 qsizetype skip = 0;
5166 for (qsizetype k = 0; k < sectionsSize; ++k) {
5167 const qt_section_chunk &section = sections[k];
5168 if (section.length == section.string.size())
5169 skip++;
5170 }
5171 if (start < 0)
5172 start += sectionsSize - skip;
5173 if (end < 0)
5174 end += sectionsSize - skip;
5175 }
5176 if (start >= sectionsSize || end < 0 || start > end)
5177 return QString();
5178
5179 QString ret;
5180 qsizetype x = 0;
5181 qsizetype first_i = start, last_i = end;
5182 for (qsizetype i = 0; x <= end && i < sectionsSize; ++i) {
5183 const qt_section_chunk &section = sections[i];
5184 const bool empty = (section.length == section.string.size());
5185 if (x >= start) {
5186 if (x == start)
5187 first_i = i;
5188 if (x == end)
5189 last_i = i;
5190 if (x != start)
5191 ret += section.string;
5192 else
5193 ret += section.string.mid(section.length);
5194 }
5195 if (!empty || !(flags & QString::SectionSkipEmpty))
5196 x++;
5197 }
5198
5199 if ((flags & QString::SectionIncludeLeadingSep) && first_i >= 0) {
5200 const qt_section_chunk &section = sections[first_i];
5201 ret.prepend(section.string.left(section.length));
5202 }
5203
5204 if ((flags & QString::SectionIncludeTrailingSep)
5205 && last_i < sectionsSize - 1) {
5206 const qt_section_chunk &section = sections[last_i + 1];
5207 ret += section.string.left(section.length);
5208 }
5209
5210 return ret;
5211}
5212
5213/*!
5214 \overload section()
5215 \since 5.0
5216
5217 This string is treated as a sequence of fields separated by the
5218 regular expression, \a re.
5219
5220 \snippet qstring/main.cpp 89
5221
5222 \warning Using this QRegularExpression version is much more expensive than
5223 the overloaded string and character versions.
5224
5225 \sa split(), simplified()
5226*/
5227QString QString::section(const QRegularExpression &re, qsizetype start, qsizetype end, SectionFlags flags) const
5228{
5229 if (!re.isValid()) {
5230 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString::section");
5231 return QString();
5232 }
5233
5234 const QChar *uc = unicode();
5235 if (!uc)
5236 return QString();
5237
5238 QRegularExpression sep(re);
5239 if (flags & SectionCaseInsensitiveSeps)
5240 sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption);
5241
5242 QVarLengthArray<qt_section_chunk> sections;
5243 qsizetype n = size(), m = 0, last_m = 0, last_len = 0;
5244 QRegularExpressionMatchIterator iterator = sep.globalMatch(*this);
5245 while (iterator.hasNext()) {
5246 QRegularExpressionMatch match = iterator.next();
5247 m = match.capturedStart();
5248 sections.append(qt_section_chunk(last_len, QStringView{ *this }.sliced(last_m, m - last_m)));
5249 last_m = m;
5250 last_len = match.capturedLength();
5251 }
5252 sections.append(qt_section_chunk(last_len, QStringView{ *this }.sliced(last_m, n - last_m)));
5253
5254 return extractSections(sections, start, end, flags);
5255}
5256#endif // QT_CONFIG(regularexpression)
5257
5258/*!
5259 \fn QString QString::left(qsizetype n) const &
5260 \fn QString QString::left(qsizetype n) &&
5261
5262 Returns a substring that contains the \a n leftmost characters of
5263 this string (that is, from the beginning of this string up to, but not
5264 including, the element at index position \a n).
5265
5266 If you know that \a n cannot be out of bounds, use first() instead in new
5267 code, because it is faster.
5268
5269 The entire string is returned if \a n is greater than or equal
5270 to size(), or less than zero.
5271
5272 \sa first(), last(), startsWith(), chopped(), chop(), truncate()
5273*/
5274
5275/*!
5276 \fn QString QString::right(qsizetype n) const &
5277 \fn QString QString::right(qsizetype n) &&
5278
5279 Returns a substring that contains the \a n rightmost characters
5280 of the string.
5281
5282 If you know that \a n cannot be out of bounds, use last() instead in new
5283 code, because it is faster.
5284
5285 The entire string is returned if \a n is greater than or equal
5286 to size(), or less than zero.
5287
5288 \sa endsWith(), last(), first(), sliced(), chopped(), chop(), truncate(), slice()
5289*/
5290
5291/*!
5292 \fn QString QString::mid(qsizetype position, qsizetype n) const &
5293 \fn QString QString::mid(qsizetype position, qsizetype n) &&
5294
5295 Returns a string that contains \a n characters of this string, starting
5296 at the specified \a position index up to, but not including, the element
5297 at index position \c {\a position + n}.
5298
5299 If you know that \a position and \a n cannot be out of bounds, use sliced()
5300 instead in new code, because it is faster.
5301
5302 Returns a null string if the \a position index exceeds the
5303 length of the string. If there are less than \a n characters
5304 available in the string starting at the given \a position, or if
5305 \a n is -1 (default), the function returns all characters that
5306 are available from the specified \a position.
5307
5308 \sa first(), last(), sliced(), chopped(), chop(), truncate(), slice()
5309*/
5310QString QString::mid(qsizetype position, qsizetype n) const &
5311{
5312 qsizetype p = position;
5313 qsizetype l = n;
5314 using namespace QtPrivate;
5315 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5316 case QContainerImplHelper::Null:
5317 return QString();
5318 case QContainerImplHelper::Empty:
5319 return QString(DataPointer::fromRawData(&_empty, 0));
5320 case QContainerImplHelper::Full:
5321 return *this;
5322 case QContainerImplHelper::Subset:
5323 return sliced(p, l);
5324 }
5325 Q_UNREACHABLE_RETURN(QString());
5326}
5327
5328QString QString::mid(qsizetype position, qsizetype n) &&
5329{
5330 qsizetype p = position;
5331 qsizetype l = n;
5332 using namespace QtPrivate;
5333 switch (QContainerImplHelper::mid(size(), &p, &l)) {
5334 case QContainerImplHelper::Null:
5335 return QString();
5336 case QContainerImplHelper::Empty:
5337 resize(0); // keep capacity if we've reserve()d
5338 [[fallthrough]];
5339 case QContainerImplHelper::Full:
5340 return std::move(*this);
5341 case QContainerImplHelper::Subset:
5342 return std::move(*this).sliced(p, l);
5343 }
5344 Q_UNREACHABLE_RETURN(QString());
5345}
5346
5347/*!
5348 \fn QString QString::first(qsizetype n) const &
5349 \fn QString QString::first(qsizetype n) &&
5350 \since 6.0
5351
5352 Returns a string that contains the first \a n characters of this string,
5353 (that is, from the beginning of this string up to, but not including,
5354 the element at index position \a n).
5355
5356 \note The behavior is undefined when \a n < 0 or \a n > size().
5357
5358 \snippet qstring/main.cpp 31
5359
5360 \sa last(), sliced(), startsWith(), chopped(), chop(), truncate(), slice()
5361*/
5362
5363/*!
5364 \fn QString QString::last(qsizetype n) const &
5365 \fn QString QString::last(qsizetype n) &&
5366 \since 6.0
5367
5368 Returns the string that contains the last \a n characters of this string.
5369
5370 \note The behavior is undefined when \a n < 0 or \a n > size().
5371
5372 \snippet qstring/main.cpp 48
5373
5374 \sa first(), sliced(), endsWith(), chopped(), chop(), truncate(), slice()
5375*/
5376
5377/*!
5378 \fn QString QString::sliced(qsizetype pos, qsizetype n) const &
5379 \fn QString QString::sliced(qsizetype pos, qsizetype n) &&
5380 \since 6.0
5381
5382 Returns a string that contains \a n characters of this string, starting
5383 at position \a pos up to, but not including, the element at index position
5384 \c {\a pos + n}.
5385
5386 \note The behavior is undefined when \a pos < 0, \a n < 0,
5387 or \a pos + \a n > size().
5388
5389 \snippet qstring/main.cpp 34
5390
5391 \sa first(), last(), chopped(), chop(), truncate(), slice()
5392*/
5393QString QString::sliced_helper(QString &str, qsizetype pos, qsizetype n)
5394{
5395 if (n == 0)
5396 return QString(DataPointer::fromRawData(&_empty, 0));
5397 DataPointer d = std::move(str.d).sliced(pos, n);
5398 d.data()[n] = 0;
5399 return QString(std::move(d));
5400}
5401
5402/*!
5403 \fn QString QString::sliced(qsizetype pos) const &
5404 \fn QString QString::sliced(qsizetype pos) &&
5405 \since 6.0
5406 \overload
5407
5408 Returns a string that contains the portion of this string starting at
5409 position \a pos and extending to its end.
5410
5411 \note The behavior is undefined when \a pos < 0 or \a pos > size().
5412
5413 \sa first(), last(), chopped(), chop(), truncate(), slice()
5414*/
5415
5416/*!
5417 \fn QString &QString::slice(qsizetype pos, qsizetype n)
5418 \since 6.8
5419
5420 Modifies this string to start at position \a pos, up to, but not including,
5421 the character (code point) at index position \c {\a pos + n}; and returns
5422 a reference to this string.
5423
5424 \note The behavior is undefined if \a pos < 0, \a n < 0,
5425 or \a pos + \a n > size().
5426
5427 \snippet qstring/main.cpp slice97
5428
5429 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5430*/
5431
5432/*!
5433 \fn QString &QString::slice(qsizetype pos)
5434 \since 6.8
5435 \overload
5436
5437 Modifies this string to start at position \a pos and extending to its end,
5438 and returns a reference to this string.
5439
5440 \note The behavior is undefined if \a pos < 0 or \a pos > size().
5441
5442 \sa sliced(), first(), last(), chopped(), chop(), truncate()
5443*/
5444
5445/*!
5446 \fn QString QString::chopped(qsizetype len) const &
5447 \fn QString QString::chopped(qsizetype len) &&
5448 \since 5.10
5449
5450 Returns a string that contains the size() - \a len leftmost characters
5451 of this string.
5452
5453 \note The behavior is undefined if \a len is negative or greater than size().
5454
5455 \sa endsWith(), first(), last(), sliced(), chop(), truncate(), slice()
5456*/
5457
5458/*!
5459 Returns \c true if the string starts with \a s; otherwise returns
5460 \c false.
5461
5462 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5463
5464 \snippet qstring/main.cpp 65
5465
5466 \sa endsWith()
5467*/
5468bool QString::startsWith(const QString& s, Qt::CaseSensitivity cs) const
5469{
5470 return qt_starts_with_impl(QStringView(*this), QStringView(s), cs);
5471}
5472
5473/*!
5474 \overload startsWith()
5475 */
5476bool QString::startsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5477{
5478 return qt_starts_with_impl(QStringView(*this), s, cs);
5479}
5480
5481/*!
5482 \overload startsWith()
5483
5484 Returns \c true if the string starts with \a c; otherwise returns
5485 \c false.
5486*/
5487bool QString::startsWith(QChar c, Qt::CaseSensitivity cs) const
5488{
5489 if (!size())
5490 return false;
5491 if (cs == Qt::CaseSensitive)
5492 return at(0) == c;
5493 return foldCase(at(0)) == foldCase(c);
5494}
5495
5496/*!
5497 \fn bool QString::startsWith(QStringView str, Qt::CaseSensitivity cs) const
5498 \since 5.10
5499 \overload
5500
5501 Returns \c true if the string starts with the string view \a str;
5502 otherwise returns \c false.
5503
5504 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5505
5506 \sa endsWith()
5507*/
5508
5509/*!
5510 Returns \c true if the string ends with \a s; otherwise returns
5511 \c false.
5512
5513 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5514
5515 \snippet qstring/main.cpp 20
5516
5517 \sa startsWith()
5518*/
5519bool QString::endsWith(const QString &s, Qt::CaseSensitivity cs) const
5520{
5521 return qt_ends_with_impl(QStringView(*this), QStringView(s), cs);
5522}
5523
5524/*!
5525 \fn bool QString::endsWith(QStringView str, Qt::CaseSensitivity cs) const
5526 \since 5.10
5527 \overload endsWith()
5528 Returns \c true if the string ends with the string view \a str;
5529 otherwise returns \c false.
5530
5531 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
5532
5533 \sa startsWith()
5534*/
5535
5536/*!
5537 \overload endsWith()
5538*/
5539bool QString::endsWith(QLatin1StringView s, Qt::CaseSensitivity cs) const
5540{
5541 return qt_ends_with_impl(QStringView(*this), s, cs);
5542}
5543
5544/*!
5545 Returns \c true if the string ends with \a c; otherwise returns
5546 \c false.
5547
5548 \overload endsWith()
5549 */
5550bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
5551{
5552 if (!size())
5553 return false;
5554 if (cs == Qt::CaseSensitive)
5555 return at(size() - 1) == c;
5556 return foldCase(at(size() - 1)) == foldCase(c);
5557}
5558
5559static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
5560{
5561 QStringIterator it(s);
5562 while (it.hasNext()) {
5563 const char32_t uc = it.next();
5564 if (qGetProp(uc)->cases[c].diff)
5565 return false;
5566 }
5567 return true;
5568}
5569
5570bool QtPrivate::isLower(QStringView s) noexcept
5571{
5572 return checkCase(s, QUnicodeTables::LowerCase);
5573}
5574
5575bool QtPrivate::isUpper(QStringView s) noexcept
5576{
5577 return checkCase(s, QUnicodeTables::UpperCase);
5578}
5579
5580/*!
5581 Returns \c true if the string is uppercase, that is, it's identical
5582 to its toUpper() folding.
5583
5584 Note that this does \e not mean that the string does not contain
5585 lowercase letters (some lowercase letters do not have a uppercase
5586 folding; they are left unchanged by toUpper()).
5587 For more information, refer to the Unicode standard, section 3.13.
5588
5589 \since 5.12
5590
5591 \sa QChar::toUpper(), isLower()
5592*/
5593bool QString::isUpper() const
5594{
5595 return QtPrivate::isUpper(qToStringViewIgnoringNull(*this));
5596}
5597
5598/*!
5599 Returns \c true if the string is lowercase, that is, it's identical
5600 to its toLower() folding.
5601
5602 Note that this does \e not mean that the string does not contain
5603 uppercase letters (some uppercase letters do not have a lowercase
5604 folding; they are left unchanged by toLower()).
5605 For more information, refer to the Unicode standard, section 3.13.
5606
5607 \since 5.12
5608
5609 \sa QChar::toLower(), isUpper()
5610 */
5611bool QString::isLower() const
5612{
5613 return QtPrivate::isLower(qToStringViewIgnoringNull(*this));
5614}
5615
5616static QByteArray qt_convert_to_latin1(QStringView string);
5617
5618QByteArray QString::toLatin1_helper(const QString &string)
5619{
5620 return qt_convert_to_latin1(string);
5621}
5622
5623/*!
5624 \since 6.0
5625 \internal
5626 \relates QAnyStringView
5627
5628 Returns a UTF-16 representation of \a string as a QString.
5629
5630 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5631 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5632*/
5633QString QtPrivate::convertToQString(QAnyStringView string)
5634{
5635 return string.visit([] (auto string) { return string.toString(); });
5636}
5637
5638/*!
5639 \since 5.10
5640 \internal
5641 \relates QStringView
5642
5643 Returns a Latin-1 representation of \a string as a QByteArray.
5644
5645 The behavior is undefined if \a string contains non-Latin1 characters.
5646
5647 \sa QString::toLatin1(), QStringView::toLatin1(), QtPrivate::convertToUtf8(),
5648 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUcs4()
5649*/
5651{
5652 return qt_convert_to_latin1(string);
5653}
5654
5655Q_NEVER_INLINE
5656static QByteArray qt_convert_to_latin1(QStringView string)
5657{
5658 if (Q_UNLIKELY(string.isNull()))
5659 return QByteArray();
5660
5661 QByteArray ba(string.size(), Qt::Uninitialized);
5662
5663 // since we own the only copy, we're going to const_cast the constData;
5664 // that avoids an unnecessary call to detach() and expansion code that will never get used
5665 qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
5666 string.utf16(), string.size());
5667 return ba;
5668}
5669
5670QByteArray QString::toLatin1_helper_inplace(QString &s)
5671{
5672 if (!s.isDetached())
5673 return qt_convert_to_latin1(s);
5674
5675 // We can return our own buffer to the caller.
5676 // Conversion to Latin-1 always shrinks the buffer by half.
5677 // This relies on the fact that we use QArrayData for everything behind the scenes
5678
5679 // First, do the in-place conversion. Since isDetached() == true, the data
5680 // was allocated by QArrayData, so the null terminator must be there.
5681 qsizetype length = s.size();
5682 char16_t *sdata = s.d->data();
5683 Q_ASSERT(sdata[length] == u'\0');
5684 qt_to_latin1(reinterpret_cast<uchar *>(sdata), sdata, length + 1);
5685
5686 // Move the internals over to the byte array.
5687 // Kids, avert your eyes. Don't try this at home.
5688 auto ba_d = std::move(s.d).reinterpreted<char>();
5689
5690 // Some sanity checks
5691 Q_ASSERT(ba_d.d->allocatedCapacity() >= ba_d.size);
5692 Q_ASSERT(s.isNull());
5693 Q_ASSERT(s.isEmpty());
5694 Q_ASSERT(s.constData() == QString().constData());
5695
5696 return QByteArray(std::move(ba_d));
5697}
5698
5699/*!
5700 \since 6.9
5701 \internal
5702 \relates QLatin1StringView
5703
5704 Returns a UTF-8 representation of \a string as a QByteArray.
5705*/
5706QByteArray QtPrivate::convertToUtf8(QLatin1StringView string)
5707{
5708 if (Q_UNLIKELY(string.isNull()))
5709 return QByteArray();
5710
5711 // create a QByteArray with the worst case scenario size
5712 QByteArray ba(string.size() * 2, Qt::Uninitialized);
5713 const qsizetype sz = QUtf8::convertFromLatin1(ba.data(), string) - ba.data();
5714 ba.truncate(sz);
5715
5716 return ba;
5717}
5718
5719// QLatin1 methods that use helpers from qstring.cpp
5720char16_t *QLatin1::convertToUnicode(char16_t *out, QLatin1StringView in) noexcept
5721{
5722 const qsizetype len = in.size();
5723 qt_from_latin1(out, in.data(), len);
5724 return std::next(out, len);
5725}
5726
5727char *QLatin1::convertFromUnicode(char *out, QStringView in) noexcept
5728{
5729 const qsizetype len = in.size();
5730 qt_to_latin1(reinterpret_cast<uchar *>(out), in.utf16(), len);
5731 return out + len;
5732}
5733
5734/*!
5735 \fn QByteArray QString::toLatin1() const
5736
5737 Returns a Latin-1 representation of the string as a QByteArray.
5738
5739 The returned byte array is undefined if the string contains non-Latin1
5740 characters. Those characters may be suppressed or replaced with a
5741 question mark.
5742
5743 \sa fromLatin1(), toUtf8(), toLocal8Bit(), QStringEncoder
5744*/
5745
5746static QByteArray qt_convert_to_local_8bit(QStringView string);
5747
5748/*!
5749 \fn QByteArray QString::toLocal8Bit() const
5750
5751 Returns the local 8-bit representation of the string as a
5752 QByteArray.
5753
5754 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {toUtf8}
5755
5756 If this string contains any characters that cannot be encoded in the
5757 local 8-bit encoding, the returned byte array is undefined. Those
5758 characters may be suppressed or replaced by another.
5759
5760 \sa fromLocal8Bit(), toLatin1(), toUtf8(), QStringEncoder
5761*/
5762
5763QByteArray QString::toLocal8Bit_helper(const QChar *data, qsizetype size)
5764{
5765 return qt_convert_to_local_8bit(QStringView(data, size));
5766}
5767
5768static QByteArray qt_convert_to_local_8bit(QStringView string)
5769{
5770 if (string.isNull())
5771 return QByteArray();
5772 QStringEncoder fromUtf16(QStringEncoder::System, QStringEncoder::Flag::Stateless);
5773 return fromUtf16(string);
5774}
5775
5776/*!
5777 \since 5.10
5778 \internal
5779 \relates QStringView
5780
5781 Returns a local 8-bit representation of \a string as a QByteArray.
5782
5783 On Unix systems this is equivalent to toUtf8(), on Windows the systems
5784 current code page is being used.
5785
5786 The behavior is undefined if \a string contains characters not
5787 supported by the locale's 8-bit encoding.
5788
5789 \sa QString::toLocal8Bit(), QStringView::toLocal8Bit()
5790*/
5792{
5793 return qt_convert_to_local_8bit(string);
5794}
5795
5796static QByteArray qt_convert_to_utf8(QStringView str);
5797
5798/*!
5799 \fn QByteArray QString::toUtf8() const
5800
5801 Returns a UTF-8 representation of the string as a QByteArray.
5802
5803 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5804 string like QString.
5805
5806 \sa fromUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder
5807*/
5808
5809QByteArray QString::toUtf8_helper(const QString &str)
5810{
5811 return qt_convert_to_utf8(str);
5812}
5813
5814static QByteArray qt_convert_to_utf8(QStringView str)
5815{
5816 if (str.isNull())
5817 return QByteArray();
5818
5819 return QUtf8::convertFromUnicode(str);
5820}
5821
5822/*!
5823 \since 5.10
5824 \internal
5825 \relates QStringView
5826
5827 Returns a UTF-8 representation of \a string as a QByteArray.
5828
5829 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5830 string like QStringView.
5831
5832 \sa QString::toUtf8(), QStringView::toUtf8()
5833*/
5835{
5836 return qt_convert_to_utf8(string);
5837}
5838
5839static QList<uint> qt_convert_to_ucs4(QStringView string);
5840
5841/*!
5842 \since 4.2
5843
5844 Returns a UCS-4/UTF-32 representation of the string as a QList<uint>.
5845
5846 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5847 this string will be encoded in UTF-32. Any invalid sequence of code units in
5848 this string is replaced by the Unicode replacement character
5849 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5850
5851 The returned list is not 0-terminated.
5852
5853 \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QStringEncoder,
5854 fromUcs4(), toWCharArray()
5855*/
5856QList<uint> QString::toUcs4() const
5857{
5858 return qt_convert_to_ucs4(*this);
5859}
5860
5861static QList<uint> qt_convert_to_ucs4(QStringView string)
5862{
5863 QList<uint> v(string.size());
5864 uint *a = const_cast<uint*>(v.constData());
5865 QStringIterator it(string);
5866 while (it.hasNext())
5867 *a++ = it.next();
5868 v.resize(a - v.constData());
5869 return v;
5870}
5871
5872/*!
5873 \since 5.10
5874 \internal
5875 \relates QStringView
5876
5877 Returns a UCS-4/UTF-32 representation of \a string as a QList<uint>.
5878
5879 UTF-32 is a Unicode codec and therefore it is lossless. All characters from
5880 this string will be encoded in UTF-32. Any invalid sequence of code units in
5881 this string is replaced by the Unicode replacement character
5882 (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}).
5883
5884 The returned list is not 0-terminated.
5885
5886 \sa QString::toUcs4(), QStringView::toUcs4(), QtPrivate::convertToLatin1(),
5887 QtPrivate::convertToLocal8Bit(), QtPrivate::convertToUtf8()
5888*/
5889QList<uint> QtPrivate::convertToUcs4(QStringView string)
5890{
5891 return qt_convert_to_ucs4(string);
5892}
5893
5894/*!
5895 \fn QString QString::fromLatin1(QByteArrayView str)
5896 \overload
5897 \since 6.0
5898
5899 Returns a QString initialized with the Latin-1 string \a str.
5900
5901 \note: any null ('\\0') bytes in the byte array will be included in this
5902 string, converted to Unicode null characters (U+0000).
5903*/
5904QString QString::fromLatin1(QByteArrayView ba)
5905{
5906 DataPointer d;
5907 if (!ba.data()) {
5908 // nothing to do
5909 } else if (ba.size() == 0) {
5910 d = DataPointer::fromRawData(&_empty, 0);
5911 } else {
5912 d = DataPointer(ba.size(), ba.size());
5913 Q_CHECK_PTR(d.data());
5914 d.data()[ba.size()] = '\0';
5915 char16_t *dst = d.data();
5916
5917 qt_from_latin1(dst, ba.data(), size_t(ba.size()));
5918 }
5919 return QString(std::move(d));
5920}
5921
5922/*!
5923 \fn QString QString::fromLatin1(const char *str, qsizetype size)
5924 Returns a QString initialized with the first \a size characters
5925 of the Latin-1 string \a str.
5926
5927 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5928
5929 \sa toLatin1(), fromUtf8(), fromLocal8Bit()
5930*/
5931
5932/*!
5933 \fn QString QString::fromLatin1(const QByteArray &str)
5934 \overload
5935 \since 5.0
5936
5937 Returns a QString initialized with the Latin-1 string \a str.
5938
5939 \note: any null ('\\0') bytes in the byte array will be included in this
5940 string, converted to Unicode null characters (U+0000). This behavior is
5941 different from Qt 5.x.
5942*/
5943
5944/*!
5945 \fn QString QString::fromLocal8Bit(const char *str, qsizetype size)
5946 Returns a QString initialized with the first \a size characters
5947 of the 8-bit string \a str.
5948
5949 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5950
5951 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5952
5953 \sa toLocal8Bit(), fromLatin1(), fromUtf8()
5954*/
5955
5956/*!
5957 \fn QString QString::fromLocal8Bit(const QByteArray &str)
5958 \overload
5959 \since 5.0
5960
5961 Returns a QString initialized with the 8-bit string \a str.
5962
5963 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5964
5965 \note: any null ('\\0') bytes in the byte array will be included in this
5966 string, converted to Unicode null characters (U+0000). This behavior is
5967 different from Qt 5.x.
5968*/
5969
5970/*!
5971 \fn QString QString::fromLocal8Bit(QByteArrayView str)
5972 \overload
5973 \since 6.0
5974
5975 Returns a QString initialized with the 8-bit string \a str.
5976
5977 \include qstring.qdocinc {qstring-local-8-bit-equivalent} {fromUtf8}
5978
5979 \note: any null ('\\0') bytes in the byte array will be included in this
5980 string, converted to Unicode null characters (U+0000).
5981*/
5982QString QString::fromLocal8Bit(QByteArrayView ba)
5983{
5984 if (ba.isNull())
5985 return QString();
5986 if (ba.isEmpty())
5987 return QString(DataPointer::fromRawData(&_empty, 0));
5988 QStringDecoder toUtf16(QStringDecoder::System, QStringDecoder::Flag::Stateless);
5989 return toUtf16(ba);
5990}
5991
5992/*! \fn QString QString::fromUtf8(const char *str, qsizetype size)
5993 Returns a QString initialized with the first \a size bytes
5994 of the UTF-8 string \a str.
5995
5996 If \a size is \c{-1}, \c{strlen(str)} is used instead.
5997
5998 UTF-8 is a Unicode codec and can represent all characters in a Unicode
5999 string like QString. However, invalid sequences are possible with UTF-8
6000 and, if any such are found, they will be replaced with one or more
6001 "replacement characters", or suppressed. These include non-Unicode
6002 sequences, non-characters, overlong sequences or surrogate codepoints
6003 encoded into UTF-8.
6004
6005 This function can be used to process incoming data incrementally as long as
6006 all UTF-8 characters are terminated within the incoming data. Any
6007 unterminated characters at the end of the string will be replaced or
6008 suppressed. In order to do stateful decoding, please use \l QStringDecoder.
6009
6010 \sa toUtf8(), fromLatin1(), fromLocal8Bit()
6011*/
6012
6013/*!
6014 \fn QString QString::fromUtf8(const char8_t *str)
6015 \overload
6016 \since 6.1
6017
6018 This overload is only available when compiling in C++20 mode.
6019*/
6020
6021/*!
6022 \fn QString QString::fromUtf8(const char8_t *str, qsizetype size)
6023 \overload
6024 \since 6.0
6025
6026 This overload is only available when compiling in C++20 mode.
6027*/
6028
6029/*!
6030 \fn QString QString::fromUtf8(const QByteArray &str)
6031 \overload
6032 \since 5.0
6033
6034 Returns a QString initialized with the UTF-8 string \a str.
6035
6036 \note: any null ('\\0') bytes in the byte array will be included in this
6037 string, converted to Unicode null characters (U+0000). This behavior is
6038 different from Qt 5.x.
6039*/
6040
6041/*!
6042 \fn QString QString::fromUtf8(QByteArrayView str)
6043 \overload
6044 \since 6.0
6045
6046 Returns a QString initialized with the UTF-8 string \a str.
6047
6048 \note: any null ('\\0') bytes in the byte array will be included in this
6049 string, converted to Unicode null characters (U+0000).
6050*/
6051QString QString::fromUtf8(QByteArrayView ba)
6052{
6053 if (ba.isNull())
6054 return QString();
6055 if (ba.isEmpty())
6056 return QString(DataPointer::fromRawData(&_empty, 0));
6057 return QUtf8::convertToUnicode(ba);
6058}
6059
6060#ifndef QT_BOOTSTRAPPED
6061/*!
6062 \since 5.3
6063 Returns a QString initialized with the first \a size characters
6064 of the Unicode string \a unicode (ISO-10646-UTF-16 encoded).
6065
6066 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6067
6068 This function checks for a Byte Order Mark (BOM). If it is missing,
6069 host byte order is assumed.
6070
6071 This function is slow compared to the other Unicode conversions.
6072 Use QString(const QChar *, qsizetype) or QString(const QChar *) if possible.
6073
6074 QString makes a deep copy of the Unicode data.
6075
6076 \sa utf16(), setUtf16(), fromStdU16String()
6077*/
6078QString QString::fromUtf16(const char16_t *unicode, qsizetype size)
6079{
6080 if (!unicode)
6081 return QString();
6082 if (size < 0)
6083 size = QtPrivate::qustrlen(unicode);
6084 QStringDecoder toUtf16(QStringDecoder::Utf16, QStringDecoder::Flag::Stateless);
6085 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 2));
6086}
6087
6088/*!
6089 \fn QString QString::fromUtf16(const ushort *str, qsizetype size)
6090 \deprecated [6.0] Use the \c char16_t overload instead.
6091*/
6092
6093/*!
6094 \fn QString QString::fromUcs4(const uint *str, qsizetype size)
6095 \since 4.2
6096 \deprecated [6.0] Use the \c char32_t overload instead.
6097*/
6098
6099/*!
6100 \since 5.3
6101
6102 Returns a QString initialized with the first \a size characters
6103 of the Unicode string \a unicode (encoded as UTF-32).
6104
6105 If \a size is -1 (default), \a unicode must be '\\0'-terminated.
6106
6107 \sa toUcs4(), fromUtf16(), utf16(), setUtf16(), fromWCharArray(),
6108 fromStdU32String()
6109*/
6110QString QString::fromUcs4(const char32_t *unicode, qsizetype size)
6111{
6112 if (!unicode)
6113 return QString();
6114 if (size < 0) {
6115 if constexpr (sizeof(char32_t) == sizeof(wchar_t))
6116 size = wcslen(reinterpret_cast<const wchar_t *>(unicode));
6117 else
6118 size = std::char_traits<char32_t>::length(unicode);
6119 }
6120 QStringDecoder toUtf16(QStringDecoder::Utf32, QStringDecoder::Flag::Stateless);
6121 return toUtf16(QByteArrayView(reinterpret_cast<const char *>(unicode), size * 4));
6122}
6123#endif // !QT_BOOTSTRAPPED
6124
6125/*!
6126 Resizes the string to \a size characters and copies \a unicode
6127 into the string.
6128
6129 If \a unicode is \nullptr, nothing is copied, but the string is still
6130 resized to \a size.
6131
6132 \sa unicode(), setUtf16()
6133*/
6134QString& QString::setUnicode(const QChar *unicode, qsizetype size)
6135{
6136 resize(size);
6137 if (unicode && size)
6138 memcpy(d.data(), unicode, size * sizeof(QChar));
6139 return *this;
6140}
6141
6142/*!
6143 \fn QString::setUnicode(const char16_t *unicode, qsizetype size)
6144 \overload
6145 \since 6.9
6146
6147 \sa unicode(), setUtf16()
6148*/
6149
6150/*!
6151 \fn QString::setUtf16(const char16_t *unicode, qsizetype size)
6152 \since 6.9
6153
6154 Resizes the string to \a size characters and copies \a unicode
6155 into the string.
6156
6157 If \a unicode is \nullptr, nothing is copied, but the string is still
6158 resized to \a size.
6159
6160 Note that unlike fromUtf16(), this function does not consider BOMs and
6161 possibly differing byte ordering.
6162
6163 \sa utf16(), setUnicode()
6164*/
6165
6166/*!
6167 \fn QString &QString::setUtf16(const ushort *unicode, qsizetype size)
6168 \obsolete Use the \c char16_t overload instead.
6169*/
6170
6171/*!
6172 \fn QString QString::simplified() const
6173
6174 Returns a string that has whitespace removed from the start
6175 and the end, and that has each sequence of internal whitespace
6176 replaced with a single space.
6177
6178 Whitespace means any character for which QChar::isSpace() returns
6179 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6180 '\\f', '\\r', and ' '.
6181
6182 Example:
6183
6184 \snippet qstring/main.cpp 57
6185
6186 \sa trimmed()
6187*/
6188QString QString::simplified_helper(const QString &str)
6189{
6190 return QStringAlgorithms<const QString>::simplified_helper(str);
6191}
6192
6193QString QString::simplified_helper(QString &str)
6194{
6195 return QStringAlgorithms<QString>::simplified_helper(str);
6196}
6197
6198namespace {
6199 template <typename StringView>
6200 StringView qt_trimmed(StringView s) noexcept
6201 {
6202 const auto [begin, end] = QStringAlgorithms<const StringView>::trimmed_helper_positions(s);
6203 return StringView{begin, end};
6204 }
6205}
6206
6207/*!
6208 \fn QStringView QtPrivate::trimmed(QStringView s)
6209 \fn QLatin1StringView QtPrivate::trimmed(QLatin1StringView s)
6210 \internal
6211 \relates QStringView
6212 \since 5.10
6213
6214 Returns \a s with whitespace removed from the start and the end.
6215
6216 Whitespace means any character for which QChar::isSpace() returns
6217 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6218 '\\f', '\\r', and ' '.
6219
6220 \sa QString::trimmed(), QStringView::trimmed(), QLatin1StringView::trimmed()
6221*/
6222QStringView QtPrivate::trimmed(QStringView s) noexcept
6223{
6224 return qt_trimmed(s);
6225}
6226
6227QLatin1StringView QtPrivate::trimmed(QLatin1StringView s) noexcept
6228{
6229 return qt_trimmed(s);
6230}
6231
6232/*!
6233 \fn QString QString::trimmed() const
6234
6235 Returns a string that has whitespace removed from the start and
6236 the end.
6237
6238 Whitespace means any character for which QChar::isSpace() returns
6239 \c true. This includes the ASCII characters '\\t', '\\n', '\\v',
6240 '\\f', '\\r', and ' '.
6241
6242 Example:
6243
6244 \snippet qstring/main.cpp 82
6245
6246 Unlike simplified(), trimmed() leaves internal whitespace alone.
6247
6248 \sa simplified()
6249*/
6250QString QString::trimmed_helper(const QString &str)
6251{
6252 return QStringAlgorithms<const QString>::trimmed_helper(str);
6253}
6254
6255QString QString::trimmed_helper(QString &str)
6256{
6257 return QStringAlgorithms<QString>::trimmed_helper(str);
6258}
6259
6260/*! \fn const QChar QString::at(qsizetype position) const
6261
6262 Returns the character at the given index \a position in the
6263 string.
6264
6265 The \a position must be a valid index position in the string
6266 (i.e., 0 <= \a position < size()).
6267
6268 \sa operator[]()
6269*/
6270
6271/*!
6272 \fn QChar &QString::operator[](qsizetype position)
6273
6274 Returns the character at the specified \a position in the string as a
6275 modifiable reference.
6276
6277 Example:
6278
6279 \snippet qstring/main.cpp 85
6280
6281 \sa at()
6282*/
6283
6284/*!
6285 \fn const QChar QString::operator[](qsizetype position) const
6286
6287 \overload operator[]()
6288*/
6289
6290/*!
6291 \fn QChar QString::front() const
6292 \since 5.10
6293
6294 Returns the first character in the string.
6295 Same as \c{at(0)}.
6296
6297 This function is provided for STL compatibility.
6298
6299 \warning Calling this function on an empty string constitutes
6300 undefined behavior.
6301
6302 \sa back(), at(), operator[]()
6303*/
6304
6305/*!
6306 \fn QChar QString::back() const
6307 \since 5.10
6308
6309 Returns the last character in the string.
6310 Same as \c{at(size() - 1)}.
6311
6312 This function is provided for STL compatibility.
6313
6314 \warning Calling this function on an empty string constitutes
6315 undefined behavior.
6316
6317 \sa front(), at(), operator[]()
6318*/
6319
6320/*!
6321 \fn QChar &QString::front()
6322 \since 5.10
6323
6324 Returns a reference to the first character in the string.
6325 Same as \c{operator[](0)}.
6326
6327 This function is provided for STL compatibility.
6328
6329 \warning Calling this function on an empty string constitutes
6330 undefined behavior.
6331
6332 \sa back(), at(), operator[]()
6333*/
6334
6335/*!
6336 \fn QChar &QString::back()
6337 \since 5.10
6338
6339 Returns a reference to the last character in the string.
6340 Same as \c{operator[](size() - 1)}.
6341
6342 This function is provided for STL compatibility.
6343
6344 \warning Calling this function on an empty string constitutes
6345 undefined behavior.
6346
6347 \sa front(), at(), operator[]()
6348*/
6349
6350/*!
6351 \fn void QString::truncate(qsizetype position)
6352
6353 Truncates the string starting from, and including, the element at index
6354 \a position.
6355
6356 If the specified \a position index is beyond the end of the
6357 string, nothing happens.
6358
6359 Example:
6360
6361 \snippet qstring/main.cpp 83
6362
6363 If \a position is negative, it is equivalent to passing zero.
6364
6365 \sa chop(), resize(), first(), QStringView::truncate()
6366*/
6367
6368void QString::truncate(qsizetype pos)
6369{
6370 if (pos < size())
6371 resize(pos);
6372}
6373
6374
6375/*!
6376 Removes \a n characters from the end of the string.
6377
6378 If \a n is greater than or equal to size(), the result is an
6379 empty string; if \a n is negative, it is equivalent to passing zero.
6380
6381 Example:
6382 \snippet qstring/main.cpp 15
6383
6384 If you want to remove characters from the \e beginning of the
6385 string, use remove() instead.
6386
6387 \sa truncate(), resize(), remove(), QStringView::chop()
6388*/
6389void QString::chop(qsizetype n)
6390{
6391 if (n > 0)
6392 resize(d.size - n);
6393}
6394
6395/*!
6396 Sets every character in the string to character \a ch. If \a size
6397 is different from -1 (default), the string is resized to \a
6398 size beforehand.
6399
6400 Example:
6401
6402 \snippet qstring/main.cpp 21
6403
6404 \sa resize()
6405*/
6406
6407QString& QString::fill(QChar ch, qsizetype size)
6408{
6409 resize(size < 0 ? d.size : size);
6410 if (d.size)
6411 std::fill(d.data(), d.data() + d.size, ch.unicode());
6412 return *this;
6413}
6414
6415/*!
6416 \fn qsizetype QString::length() const
6417
6418 Returns the number of characters in this string. Equivalent to
6419 size().
6420
6421 \sa resize()
6422*/
6423
6424/*!
6425 \fn qsizetype QString::size() const
6426
6427 Returns the number of characters in this string.
6428
6429 The last character in the string is at position size() - 1.
6430
6431 Example:
6432 \snippet qstring/main.cpp 58
6433
6434 \sa isEmpty(), resize()
6435*/
6436
6437/*!
6438 \fn qsizetype QString::max_size() const
6439 \fn qsizetype QString::maxSize()
6440 \since 6.8
6441
6442 It returns the maximum number of elements that the string can
6443 theoretically hold. In practice, the number can be much smaller,
6444 limited by the amount of memory available to the system.
6445*/
6446
6447/*! \fn bool QString::isNull() const
6448
6449 Returns \c true if this string is null; otherwise returns \c false.
6450
6451 Example:
6452
6453 \snippet qstring/main.cpp 28
6454
6455 Qt makes a distinction between null strings and empty strings for
6456 historical reasons. For most applications, what matters is
6457 whether or not a string contains any data, and this can be
6458 determined using the isEmpty() function.
6459
6460 \sa isEmpty()
6461*/
6462
6463/*! \fn bool QString::isEmpty() const
6464
6465 Returns \c true if the string has no characters; otherwise returns
6466 \c false.
6467
6468 Example:
6469
6470 \snippet qstring/main.cpp 27
6471
6472 \sa size()
6473*/
6474
6475/*! \fn QString &QString::operator+=(const QString &other)
6476
6477 Appends the string \a other onto the end of this string and
6478 returns a reference to this string.
6479
6480 Example:
6481
6482 \snippet qstring/main.cpp 84
6483
6484 This operation is typically very fast (\l{constant time}),
6485 because QString preallocates extra space at the end of the string
6486 data so it can grow without reallocating the entire string each
6487 time.
6488
6489 \sa append(), prepend()
6490*/
6491
6492/*! \fn QString &QString::operator+=(QLatin1StringView str)
6493
6494 \overload operator+=()
6495
6496 Appends the Latin-1 string viewed by \a str to this string.
6497*/
6498
6499/*! \fn QString &QString::operator+=(QUtf8StringView str)
6500 \since 6.5
6501 \overload operator+=()
6502
6503 Appends the UTF-8 string view \a str to this string.
6504*/
6505
6506/*! \fn QString &QString::operator+=(const QByteArray &ba)
6507
6508 \overload operator+=()
6509
6510 Appends the byte array \a ba to this string. The byte array is converted
6511 to Unicode using the fromUtf8() function. If any NUL characters ('\\0')
6512 are embedded in the \a ba byte array, they will be included in the
6513 transformation.
6514
6515 You can disable this function by defining
6516 \l QT_NO_CAST_FROM_ASCII when you compile your applications. This
6517 can be useful if you want to ensure that all user-visible strings
6518 go through QObject::tr(), for example.
6519*/
6520
6521/*! \fn QString &QString::operator+=(const char *str)
6522
6523 \overload operator+=()
6524
6525 Appends the string \a str to this string. The const char pointer
6526 is converted to Unicode using the fromUtf8() function.
6527
6528 You can disable this function by defining \l QT_NO_CAST_FROM_ASCII
6529 when you compile your applications. This can be useful if you want
6530 to ensure that all user-visible strings go through QObject::tr(),
6531 for example.
6532*/
6533
6534/*! \fn QString &QString::operator+=(QStringView str)
6535 \since 6.0
6536 \overload operator+=()
6537
6538 Appends the string view \a str to this string.
6539*/
6540
6541/*! \fn QString &QString::operator+=(QChar ch)
6542
6543 \overload operator+=()
6544
6545 Appends the character \a ch to the string.
6546*/
6547
6548/*!
6549 \fn bool QString::operator==(const char * const &lhs, const QString &rhs)
6550
6551 \overload operator==()
6552
6553 Returns \c true if \a lhs is equal to \a rhs; otherwise returns \c false.
6554 Note that no string is equal to \a lhs being 0.
6555
6556 Equivalent to \c {lhs != 0 && compare(lhs, rhs) == 0}.
6557*/
6558
6559/*!
6560 \fn bool QString::operator!=(const char * const &lhs, const QString &rhs)
6561
6562 Returns \c true if \a lhs is not equal to \a rhs; otherwise returns
6563 \c false.
6564
6565 For \a lhs != 0, this is equivalent to \c {compare(} \a lhs, \a rhs
6566 \c {) != 0}. Note that no string is equal to \a lhs being 0.
6567*/
6568
6569/*!
6570 \fn bool QString::operator<(const char * const &lhs, const QString &rhs)
6571
6572 Returns \c true if \a lhs is lexically less than \a rhs; otherwise
6573 returns \c false. For \a lhs != 0, this is equivalent to \c
6574 {compare(lhs, rhs) < 0}.
6575
6576 \sa {Comparing Strings}
6577*/
6578
6579/*!
6580 \fn bool QString::operator<=(const char * const &lhs, const QString &rhs)
6581
6582 Returns \c true if \a lhs is lexically less than or equal to \a rhs;
6583 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6584 {compare(lhs, rhs) <= 0}.
6585
6586 \sa {Comparing Strings}
6587*/
6588
6589/*!
6590 \fn bool QString::operator>(const char * const &lhs, const QString &rhs)
6591
6592 Returns \c true if \a lhs is lexically greater than \a rhs; otherwise
6593 returns \c false. Equivalent to \c {compare(lhs, rhs) > 0}.
6594
6595 \sa {Comparing Strings}
6596*/
6597
6598/*!
6599 \fn bool QString::operator>=(const char * const &lhs, const QString &rhs)
6600
6601 Returns \c true if \a lhs is lexically greater than or equal to \a rhs;
6602 otherwise returns \c false. For \a lhs != 0, this is equivalent to \c
6603 {compare(lhs, rhs) >= 0}.
6604
6605 \sa {Comparing Strings}
6606*/
6607
6608/*!
6609 \fn QString operator+(const QString &s1, const QString &s2)
6610 \fn QString operator+(QString &&s1, const QString &s2)
6611 \relates QString
6612
6613 Returns a string which is the result of concatenating \a s1 and \a
6614 s2.
6615*/
6616
6617/*!
6618 \fn QString operator+(const QString &s1, const char *s2)
6619 \relates QString
6620
6621 Returns a string which is the result of concatenating \a s1 and \a
6622 s2 (\a s2 is converted to Unicode using the QString::fromUtf8()
6623 function).
6624
6625 \sa QString::fromUtf8()
6626*/
6627
6628/*!
6629 \fn QString operator+(const char *s1, const QString &s2)
6630 \relates QString
6631
6632 Returns a string which is the result of concatenating \a s1 and \a
6633 s2 (\a s1 is converted to Unicode using the QString::fromUtf8()
6634 function).
6635
6636 \sa QString::fromUtf8()
6637*/
6638
6639/*!
6640 \fn QString operator+(QStringView lhs, const QString &rhs)
6641 \fn QString operator+(const QString &lhs, QStringView rhs)
6642
6643 \relates QString
6644 \since 6.9
6645
6646 Returns a string that is the result of concatenating \a lhs and \a rhs.
6647*/
6648
6649/*!
6650 \fn int QString::compare(const QString &s1, const QString &s2, Qt::CaseSensitivity cs)
6651 \since 4.2
6652
6653 Compares the string \a s1 with the string \a s2 and returns a negative integer
6654 if \a s1 is less than \a s2, a positive integer if it is greater than \a s2,
6655 and zero if they are equal.
6656
6657 \include qstring.qdocinc {search-comparison-case-sensitivity} {comparison}
6658
6659 Case sensitive comparison is based exclusively on the numeric
6660 Unicode values of the characters and is very fast, but is not what
6661 a human would expect. Consider sorting user-visible strings with
6662 localeAwareCompare().
6663
6664 \snippet qstring/main.cpp 16
6665
6666//! [compare-isNull-vs-isEmpty]
6667 \note This function treats null strings the same as empty strings,
6668 for more details see \l {Distinction Between Null and Empty Strings}.
6669//! [compare-isNull-vs-isEmpty]
6670
6671 \sa operator==(), operator<(), operator>(), {Comparing Strings}
6672*/
6673
6674/*!
6675 \fn int QString::compare(const QString &s1, QLatin1StringView s2, Qt::CaseSensitivity cs)
6676 \since 4.2
6677 \overload compare()
6678
6679 Performs a comparison of \a s1 and \a s2, using the case
6680 sensitivity setting \a cs.
6681*/
6682
6683/*!
6684 \fn int QString::compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6685
6686 \since 4.2
6687 \overload compare()
6688
6689 Performs a comparison of \a s1 and \a s2, using the case
6690 sensitivity setting \a cs.
6691*/
6692
6693/*!
6694 \fn int QString::compare(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6695
6696 \since 5.12
6697 \overload compare()
6698
6699 Performs a comparison of this with \a s, using the case
6700 sensitivity setting \a cs.
6701*/
6702
6703/*!
6704 \fn int QString::compare(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
6705
6706 \since 5.14
6707 \overload compare()
6708
6709 Performs a comparison of this with \a ch, using the case
6710 sensitivity setting \a cs.
6711*/
6712
6713/*!
6714 \overload compare()
6715 \since 4.2
6716
6717 Lexically compares this string with the string \a other and returns
6718 a negative integer if this string is less than \a other, a positive
6719 integer if it is greater than \a other, and zero if they are equal.
6720
6721 Same as compare(*this, \a other, \a cs).
6722*/
6723int QString::compare(const QString &other, Qt::CaseSensitivity cs) const noexcept
6724{
6725 return QtPrivate::compareStrings(*this, other, cs);
6726}
6727
6728/*!
6729 \internal
6730 \since 4.5
6731*/
6732int QString::compare_helper(const QChar *data1, qsizetype length1, const QChar *data2, qsizetype length2,
6733 Qt::CaseSensitivity cs) noexcept
6734{
6735 Q_ASSERT(length1 >= 0);
6736 Q_ASSERT(length2 >= 0);
6737 Q_ASSERT(data1 || length1 == 0);
6738 Q_ASSERT(data2 || length2 == 0);
6739 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2), cs);
6740}
6741
6742/*!
6743 \overload compare()
6744 \since 4.2
6745
6746 Same as compare(*this, \a other, \a cs).
6747*/
6748int QString::compare(QLatin1StringView other, Qt::CaseSensitivity cs) const noexcept
6749{
6750 return QtPrivate::compareStrings(*this, other, cs);
6751}
6752
6753/*!
6754 \internal
6755 \since 5.0
6756*/
6757int QString::compare_helper(const QChar *data1, qsizetype length1, const char *data2, qsizetype length2,
6758 Qt::CaseSensitivity cs) noexcept
6759{
6760 Q_ASSERT(length1 >= 0);
6761 Q_ASSERT(data1 || length1 == 0);
6762 if (!data2)
6763 return qt_lencmp(length1, 0);
6764 if (Q_UNLIKELY(length2 < 0))
6765 length2 = qsizetype(strlen(data2));
6766 return QtPrivate::compareStrings(QStringView(data1, length1),
6767 QUtf8StringView(data2, length2), cs);
6768}
6769
6770/*!
6771 \fn int QString::compare(const QString &s1, QStringView s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6772 \overload compare()
6773*/
6774
6775/*!
6776 \fn int QString::compare(QStringView s1, const QString &s2, Qt::CaseSensitivity cs = Qt::CaseSensitive)
6777 \overload compare()
6778*/
6779
6780bool comparesEqual(const QByteArrayView &lhs, const QChar &rhs) noexcept
6781{
6782 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6783}
6784
6785Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
6786{
6787 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6788 return Qt::compareThreeWay(res, 0);
6789}
6790
6791bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
6792{
6793 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6794}
6795
6796Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
6797{
6798 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6799 return Qt::compareThreeWay(res, 0);
6800}
6801
6802bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
6803{
6804 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6805}
6806
6807Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
6808{
6809 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6810 return Qt::compareThreeWay(res, 0);
6811}
6812
6813bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
6814{
6815 return QtPrivate::equalStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6816}
6817
6818Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
6819{
6820 const int res = QtPrivate::compareStrings(QUtf8StringView(lhs), QStringView(&rhs, 1));
6821 return Qt::compareThreeWay(res, 0);
6822}
6823
6824/*!
6825 \internal
6826 \since 6.8
6827*/
6828bool QT_FASTCALL QChar::equal_helper(QChar lhs, const char *rhs) noexcept
6829{
6830 return QtPrivate::equalStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6831}
6832
6833int QT_FASTCALL QChar::compare_helper(QChar lhs, const char *rhs) noexcept
6834{
6835 return QtPrivate::compareStrings(QStringView(&lhs, 1), QUtf8StringView(rhs));
6836}
6837
6838/*!
6839 \internal
6840 \since 6.8
6841*/
6842bool QStringView::equal_helper(QStringView sv, const char *data, qsizetype len)
6843{
6844 Q_ASSERT(len >= 0);
6845 Q_ASSERT(data || len == 0);
6846 return QtPrivate::equalStrings(sv, QUtf8StringView(data, len));
6847}
6848
6849/*!
6850 \internal
6851 \since 6.8
6852*/
6853int QStringView::compare_helper(QStringView sv, const char *data, qsizetype len)
6854{
6855 Q_ASSERT(len >= 0);
6856 Q_ASSERT(data || len == 0);
6857 return QtPrivate::compareStrings(sv, QUtf8StringView(data, len));
6858}
6859
6860/*!
6861 \internal
6862 \since 6.8
6863*/
6864bool QLatin1StringView::equal_helper(QLatin1StringView s1, const char *s2, qsizetype len) noexcept
6865{
6866 // because qlatin1stringview.h can't include qutf8stringview.h
6867 Q_ASSERT(len >= 0);
6868 Q_ASSERT(s2 || len == 0);
6869 return QtPrivate::equalStrings(s1, QUtf8StringView(s2, len));
6870}
6871
6872/*!
6873 \internal
6874 \since 6.6
6875*/
6876int QLatin1StringView::compare_helper(const QLatin1StringView &s1, const char *s2, qsizetype len) noexcept
6877{
6878 // because qlatin1stringview.h can't include qutf8stringview.h
6879 Q_ASSERT(len >= 0);
6880 Q_ASSERT(s2 || len == 0);
6881 return QtPrivate::compareStrings(s1, QUtf8StringView(s2, len));
6882}
6883
6884/*!
6885 \internal
6886 \since 4.5
6887*/
6888int QLatin1StringView::compare_helper(const QChar *data1, qsizetype length1, QLatin1StringView s2,
6889 Qt::CaseSensitivity cs) noexcept
6890{
6891 Q_ASSERT(length1 >= 0);
6892 Q_ASSERT(data1 || length1 == 0);
6893 return QtPrivate::compareStrings(QStringView(data1, length1), s2, cs);
6894}
6895
6896/*!
6897 \fn int QString::localeAwareCompare(const QString & s1, const QString & s2)
6898
6899 Compares \a s1 with \a s2 and returns an integer less than, equal
6900 to, or greater than zero if \a s1 is less than, equal to, or
6901 greater than \a s2.
6902
6903 The comparison is performed in a locale- and also
6904 platform-dependent manner. Use this function to present sorted
6905 lists of strings to the user.
6906
6907 \sa compare(), QLocale, {Comparing Strings}
6908*/
6909
6910/*!
6911 \fn int QString::localeAwareCompare(QStringView other) const
6912 \since 6.0
6913 \overload localeAwareCompare()
6914
6915 Compares this string with the \a other string and returns an
6916 integer less than, equal to, or greater than zero if this string
6917 is less than, equal to, or greater than the \a other string.
6918
6919 The comparison is performed in a locale- and also
6920 platform-dependent manner. Use this function to present sorted
6921 lists of strings to the user.
6922
6923 Same as \c {localeAwareCompare(*this, other)}.
6924
6925 \sa {Comparing Strings}
6926*/
6927
6928/*!
6929 \fn int QString::localeAwareCompare(QStringView s1, QStringView s2)
6930 \since 6.0
6931 \overload localeAwareCompare()
6932
6933 Compares \a s1 with \a s2 and returns an integer less than, equal
6934 to, or greater than zero if \a s1 is less than, equal to, or
6935 greater than \a s2.
6936
6937 The comparison is performed in a locale- and also
6938 platform-dependent manner. Use this function to present sorted
6939 lists of strings to the user.
6940
6941 \sa {Comparing Strings}
6942*/
6943
6944
6945#if !defined(CSTR_LESS_THAN)
6946#define CSTR_LESS_THAN 1
6947#define CSTR_EQUAL 2
6948#define CSTR_GREATER_THAN 3
6949#endif
6950
6951/*!
6952 \overload localeAwareCompare()
6953
6954 Compares this string with the \a other string and returns an
6955 integer less than, equal to, or greater than zero if this string
6956 is less than, equal to, or greater than the \a other string.
6957
6958 The comparison is performed in a locale- and also
6959 platform-dependent manner. Use this function to present sorted
6960 lists of strings to the user.
6961
6962 Same as \c {localeAwareCompare(*this, other)}.
6963
6964 \sa {Comparing Strings}
6965*/
6966int QString::localeAwareCompare(const QString &other) const
6967{
6968 return localeAwareCompare_helper(constData(), size(), other.constData(), other.size());
6969}
6970
6971/*!
6972 \internal
6973 \since 4.5
6974*/
6975int QString::localeAwareCompare_helper(const QChar *data1, qsizetype length1,
6976 const QChar *data2, qsizetype length2)
6977{
6978 Q_ASSERT(length1 >= 0);
6979 Q_ASSERT(data1 || length1 == 0);
6980 Q_ASSERT(length2 >= 0);
6981 Q_ASSERT(data2 || length2 == 0);
6982
6983 // do the right thing for null and empty
6984 if (length1 == 0 || length2 == 0)
6985 return QtPrivate::compareStrings(QStringView(data1, length1), QStringView(data2, length2),
6986 Qt::CaseSensitive);
6987
6988#if QT_CONFIG(icu)
6989 return QCollator::defaultCompare(QStringView(data1, length1), QStringView(data2, length2));
6990#else
6991 const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
6992 const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
6993# if defined(Q_OS_WIN)
6994 int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
6995
6996 switch (res) {
6997 case CSTR_LESS_THAN:
6998 return -1;
6999 case CSTR_GREATER_THAN:
7000 return 1;
7001 default:
7002 return 0;
7003 }
7004# elif defined (Q_OS_DARWIN)
7005 // Use CFStringCompare for comparing strings on Mac. This makes Qt order
7006 // strings the same way as native applications do, and also respects
7007 // the "Order for sorted lists" setting in the International preferences
7008 // panel.
7009 const CFStringRef thisString =
7010 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
7011 reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
7012 const CFStringRef otherString =
7013 CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
7014 reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
7015
7016 const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
7017 CFRelease(thisString);
7018 CFRelease(otherString);
7019 return result;
7020# elif defined(Q_OS_UNIX)
7021 // declared in <string.h> (no better than QtPrivate::compareStrings() on Android, sadly)
7022 return strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
7023# else
7024# error "This case shouldn't happen"
7025 return QtPrivate::compareStrings(lhs, rhs, Qt::CaseSensitive);
7026# endif
7027#endif // !QT_CONFIG(icu)
7028}
7029
7030
7031/*!
7032 \fn const QChar *QString::unicode() const
7033
7034 Returns a Unicode representation of the string.
7035 The result remains valid until the string is modified.
7036
7037 \note The returned string may not be '\\0'-terminated.
7038 Use size() to determine the length of the array.
7039
7040 \sa utf16(), fromRawData()
7041*/
7042
7043/*!
7044 \fn const ushort *QString::utf16() const
7045
7046 Returns the QString as a '\\0\'-terminated array of unsigned
7047 shorts. The result remains valid until the string is modified.
7048
7049 The returned string is in host byte order.
7050
7051 \sa unicode()
7052*/
7053
7054const ushort *QString::utf16() const
7055{
7056 if (!d->isMutable()) {
7057 // ensure '\0'-termination for ::fromRawData strings
7058 const_cast<QString*>(this)->reallocData(d.size, QArrayData::KeepSize);
7059 }
7060 return reinterpret_cast<const ushort *>(d.data());
7061}
7062
7063/*!
7064 \fn QString &QString::nullTerminate()
7065 \since 6.10
7066
7067 If this string data isn't null-terminated, this method will make a deep
7068 copy of the data and make it null-terminated.
7069
7070 A QString is null-terminated by default, however in some cases (e.g.
7071 when using fromRawData()), the string data doesn't necessarily end
7072 with a \c {\0} character, which could be a problem when calling methods
7073 that expect a null-terminated string.
7074
7075 \sa nullTerminated(), fromRawData(), setRawData()
7076*/
7077
7078/*!
7079 \fn QString QString::nullTerminated() const &
7080 \since 6.10
7081
7082 Returns a copy of this string that is always null-terminated.
7083 See nullTerminate().
7084
7085 \sa nullTerminated(), fromRawData(), setRawData()
7086*/
7087
7088/*!
7089 Returns a string of size \a width that contains this string
7090 padded by the \a fill character.
7091
7092 If \a truncate is \c false and the size() of the string is more than
7093 \a width, then the returned string is a copy of the string.
7094
7095 \snippet qstring/main.cpp 32
7096
7097 If \a truncate is \c true and the size() of the string is more than
7098 \a width, then any characters in a copy of the string after
7099 position \a width are removed, and the copy is returned.
7100
7101 \snippet qstring/main.cpp 33
7102
7103 \sa rightJustified()
7104*/
7105
7106QString QString::leftJustified(qsizetype width, QChar fill, bool truncate) const
7107{
7108 QString result;
7109 qsizetype len = size();
7110 qsizetype padlen = width - len;
7111 if (padlen > 0) {
7112 result.resize(len+padlen);
7113 if (len)
7114 memcpy(result.d.data(), d.data(), sizeof(QChar)*len);
7115 QChar *uc = (QChar*)result.d.data() + len;
7116 while (padlen--)
7117 * uc++ = fill;
7118 } else {
7119 if (truncate)
7120 result = left(width);
7121 else
7122 result = *this;
7123 }
7124 return result;
7125}
7126
7127/*!
7128 Returns a string of size() \a width that contains the \a fill
7129 character followed by the string. For example:
7130
7131 \snippet qstring/main.cpp 49
7132
7133 If \a truncate is \c false and the size() of the string is more than
7134 \a width, then the returned string is a copy of the string.
7135
7136 If \a truncate is true and the size() of the string is more than
7137 \a width, then the resulting string is truncated at position \a
7138 width.
7139
7140 \snippet qstring/main.cpp 50
7141
7142 \sa leftJustified()
7143*/
7144
7145QString QString::rightJustified(qsizetype width, QChar fill, bool truncate) const
7146{
7147 QString result;
7148 qsizetype len = size();
7149 qsizetype padlen = width - len;
7150 if (padlen > 0) {
7151 result.resize(len+padlen);
7152 QChar *uc = (QChar*)result.d.data();
7153 while (padlen--)
7154 * uc++ = fill;
7155 if (len)
7156 memcpy(static_cast<void *>(uc), static_cast<const void *>(d.data()), sizeof(QChar)*len);
7157 } else {
7158 if (truncate)
7159 result = left(width);
7160 else
7161 result = *this;
7162 }
7163 return result;
7164}
7165
7166/*!
7167 \fn QString QString::toLower() const
7168
7169 Returns a lowercase copy of the string.
7170
7171 \snippet qstring/main.cpp 75
7172
7173 The case conversion will always happen in the 'C' locale. For
7174 locale-dependent case folding use QLocale::toLower()
7175
7176 \sa toUpper(), QLocale::toLower()
7177*/
7178
7179namespace QUnicodeTables {
7180/*
7181 \internal
7182 Converts the \a str string starting from the position pointed to by the \a
7183 it iterator, using the Unicode case traits \c Traits, and returns the
7184 result. The input string must not be empty (the convertCase function below
7185 guarantees that).
7186
7187 The string type \c{T} is also a template and is either \c{const QString} or
7188 \c{QString}. This function can do both copy-conversion and in-place
7189 conversion depending on the state of the \a str parameter:
7190 \list
7191 \li \c{T} is \c{const QString}: copy-convert
7192 \li \c{T} is \c{QString} and its refcount != 1: copy-convert
7193 \li \c{T} is \c{QString} and its refcount == 1: in-place convert
7194 \endlist
7195
7196 In copy-convert mode, the local variable \c{s} is detached from the input
7197 \a str. In the in-place convert mode, \a str is in moved-from state and
7198 \c{s} contains the only copy of the string, without reallocation (thus,
7199 \a it is still valid).
7200
7201 There is one pathological case left: when the in-place conversion needs to
7202 reallocate memory to grow the buffer. In that case, we need to adjust the \a
7203 it pointer.
7204 */
7205template <typename T>
7206Q_NEVER_INLINE
7208{
7209 Q_ASSERT(!str.isEmpty());
7210 QString s = std::move(str); // will copy if T is const QString
7211 QChar *pp = s.begin() + it.index(); // will detach if necessary
7212
7213 do {
7214 const auto folded = fullConvertCase(it.next(), which);
7215 if (Q_UNLIKELY(folded.size() > 1)) {
7216 if (folded.chars[0] == *pp && folded.size() == 2) {
7217 // special case: only second actually changed (e.g. surrogate pairs),
7218 // avoid slow case
7219 ++pp;
7220 *pp++ = folded.chars[1];
7221 } else {
7222 // slow path: the string is growing
7223 qsizetype inpos = it.index() - 1;
7225
7226 s.replace(outpos, 1, reinterpret_cast<const QChar *>(folded.data()), folded.size());
7227 pp = const_cast<QChar *>(s.constBegin()) + outpos + folded.size();
7228
7229 // Adjust the input iterator if we are performing an in-place conversion
7230 if constexpr (!std::is_const<T>::value)
7232 }
7233 } else {
7234 *pp++ = folded.chars[0];
7235 }
7236 } while (it.hasNext());
7237
7238 return s;
7239}
7240
7241template <typename T>
7242static QString convertCase(T &str, QUnicodeTables::Case which)
7243{
7244 const QChar *p = str.constBegin();
7245 const QChar *e = p + str.size();
7246
7247 // this avoids out of bounds check in the loop
7248 while (e != p && e[-1].isHighSurrogate())
7249 --e;
7250
7251 QStringIterator it(p, e);
7252 while (it.hasNext()) {
7253 const char32_t uc = it.next();
7254 if (qGetProp(uc)->cases[which].diff) {
7255 it.recede();
7256 return detachAndConvertCase(str, it, which);
7257 }
7258 }
7259 return std::move(str);
7260}
7261} // namespace QUnicodeTables
7262
7263QString QString::toLower_helper(const QString &str)
7264{
7265 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7266}
7267
7268QString QString::toLower_helper(QString &str)
7269{
7270 return QUnicodeTables::convertCase(str, QUnicodeTables::LowerCase);
7271}
7272
7273/*!
7274 \fn QString QString::toCaseFolded() const
7275
7276 Returns the case folded equivalent of the string. For most Unicode
7277 characters this is the same as toLower().
7278*/
7279
7280QString QString::toCaseFolded_helper(const QString &str)
7281{
7282 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7283}
7284
7285QString QString::toCaseFolded_helper(QString &str)
7286{
7287 return QUnicodeTables::convertCase(str, QUnicodeTables::CaseFold);
7288}
7289
7290/*!
7291 \fn QString QString::toUpper() const
7292
7293 Returns an uppercase copy of the string.
7294
7295 \snippet qstring/main.cpp 81
7296
7297 The case conversion will always happen in the 'C' locale. For
7298 locale-dependent case folding use QLocale::toUpper().
7299
7300 \note In some cases the uppercase form of a string may be longer than the
7301 original.
7302
7303 \sa toLower(), QLocale::toLower()
7304*/
7305
7306QString QString::toUpper_helper(const QString &str)
7307{
7308 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7309}
7310
7311QString QString::toUpper_helper(QString &str)
7312{
7313 return QUnicodeTables::convertCase(str, QUnicodeTables::UpperCase);
7314}
7315
7316/*!
7317 \since 5.5
7318
7319 Safely builds a formatted string from the format string \a cformat
7320 and an arbitrary list of arguments.
7321
7322 The format string supports the conversion specifiers, length modifiers,
7323 and flags provided by printf() in the standard C++ library. The \a cformat
7324 string and \c{%s} arguments must be UTF-8 encoded.
7325
7326 \note The \c{%lc} escape sequence expects a unicode character of type
7327 \c char16_t, or \c ushort (as returned by QChar::unicode()).
7328 The \c{%ls} escape sequence expects a pointer to a zero-terminated array
7329 of unicode characters of type \c char16_t, or ushort (as returned by
7330 QString::utf16()). This is at odds with the printf() in the standard C++
7331 library, which defines \c {%lc} to print a wchar_t and \c{%ls} to print
7332 a \c{wchar_t*}, and might also produce compiler warnings on platforms
7333 where the size of \c {wchar_t} is not 16 bits.
7334
7335 \warning We do not recommend using QString::asprintf() in new Qt
7336 code. Instead, consider using QTextStream or arg(), both of
7337 which support Unicode strings seamlessly and are type-safe.
7338 Here is an example that uses QTextStream:
7339
7340 \snippet qstring/main.cpp 64
7341
7342 For \l {QObject::tr()}{translations}, especially if the strings
7343 contains more than one escape sequence, you should consider using
7344 the arg() function instead. This allows the order of the
7345 replacements to be controlled by the translator.
7346
7347 \sa arg()
7348*/
7349
7350QString QString::asprintf(const char *cformat, ...)
7351{
7352 va_list ap;
7353 va_start(ap, cformat);
7354 const QString s = vasprintf(cformat, ap);
7355 va_end(ap);
7356 return s;
7357}
7358
7359static void append_utf8(QString &qs, const char *cs, qsizetype len)
7360{
7361 const qsizetype oldSize = qs.size();
7362 qs.resize(oldSize + len);
7363 const QChar *newEnd = QUtf8::convertToUnicode(qs.data() + oldSize, QByteArrayView(cs, len));
7364 qs.resize(newEnd - qs.constData());
7365}
7366
7367static uint parse_flag_characters(const char * &c) noexcept
7368{
7369 uint flags = QLocaleData::ZeroPadExponent;
7370 while (true) {
7371 switch (*c) {
7372 case '#':
7375 break;
7376 case '0': flags |= QLocaleData::ZeroPadded; break;
7377 case '-': flags |= QLocaleData::LeftAdjusted; break;
7378 case ' ': flags |= QLocaleData::BlankBeforePositive; break;
7379 case '+': flags |= QLocaleData::AlwaysShowSign; break;
7380 case '\'': flags |= QLocaleData::GroupDigits; break;
7381 default: return flags;
7382 }
7383 ++c;
7384 }
7385}
7386
7387static int parse_field_width(const char *&c, qsizetype size)
7388{
7389 Q_ASSERT(isAsciiDigit(*c));
7390 const char *const stop = c + size;
7391
7392 // can't be negative - started with a digit
7393 // contains at least one digit
7394 auto [result, used] = qstrntoull(c, size, 10);
7395 c += used;
7396 if (used <= 0)
7397 return false;
7398 // preserve Qt 5.5 behavior of consuming all digits, no matter how many
7399 while (c < stop && isAsciiDigit(*c))
7400 ++c;
7401 return result < qulonglong(std::numeric_limits<int>::max()) ? int(result) : 0;
7402}
7403
7405
7406static inline bool can_consume(const char * &c, char ch) noexcept
7407{
7408 if (*c == ch) {
7409 ++c;
7410 return true;
7411 }
7412 return false;
7413}
7414
7415static LengthMod parse_length_modifier(const char * &c) noexcept
7416{
7417 switch (*c++) {
7418 case 'h': return can_consume(c, 'h') ? lm_hh : lm_h;
7419 case 'l': return can_consume(c, 'l') ? lm_ll : lm_l;
7420 case 'L': return lm_L;
7421 case 'j': return lm_j;
7422 case 'z':
7423 case 'Z': return lm_z;
7424 case 't': return lm_t;
7425 }
7426 --c; // don't consume *c - it wasn't a flag
7427 return lm_none;
7428}
7429
7430/*!
7431 \fn QString QString::vasprintf(const char *cformat, va_list ap)
7432 \since 5.5
7433
7434 Equivalent method to asprintf(), but takes a va_list \a ap
7435 instead a list of variable arguments. See the asprintf()
7436 documentation for an explanation of \a cformat.
7437
7438 This method does not call the va_end macro, the caller
7439 is responsible to call va_end on \a ap.
7440
7441 \sa asprintf()
7442*/
7443
7444QString QString::vasprintf(const char *cformat, va_list ap)
7445{
7446 if (!cformat || !*cformat) {
7447 // Qt 1.x compat
7448 return fromLatin1("");
7449 }
7450
7451 // Parse cformat
7452
7453 QString result;
7454 const char *c = cformat;
7455 const char *formatEnd = cformat + qstrlen(cformat);
7456 for (;;) {
7457 // Copy non-escape chars to result
7458 const char *cb = c;
7459 while (*c != '\0' && *c != '%')
7460 c++;
7461 append_utf8(result, cb, qsizetype(c - cb));
7462
7463 if (*c == '\0')
7464 break;
7465
7466 // Found '%'
7467 const char *escape_start = c;
7468 ++c;
7469
7470 if (*c == '\0') {
7471 result.append(u'%'); // a % at the end of the string - treat as non-escape text
7472 break;
7473 }
7474 if (*c == '%') {
7475 result.append(u'%'); // %%
7476 ++c;
7477 continue;
7478 }
7479
7480 uint flags = parse_flag_characters(c);
7481
7482 if (*c == '\0') {
7483 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7484 break;
7485 }
7486
7487 // Parse field width
7488 int width = -1; // -1 means unspecified
7489 if (isAsciiDigit(*c)) {
7490 width = parse_field_width(c, formatEnd - c);
7491 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7492 width = va_arg(ap, int);
7493 if (width < 0)
7494 width = -1; // treat all negative numbers as unspecified
7495 ++c;
7496 }
7497
7498 if (*c == '\0') {
7499 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7500 break;
7501 }
7502
7503 // Parse precision
7504 int precision = -1; // -1 means unspecified
7505 if (*c == '.') {
7506 ++c;
7507 precision = 0;
7508 if (isAsciiDigit(*c)) {
7509 precision = parse_field_width(c, formatEnd - c);
7510 } else if (*c == '*') { // can't parse this in another function, not portably, at least
7511 precision = va_arg(ap, int);
7512 if (precision < 0)
7513 precision = -1; // treat all negative numbers as unspecified
7514 ++c;
7515 }
7516 }
7517
7518 if (*c == '\0') {
7519 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7520 break;
7521 }
7522
7523 const LengthMod length_mod = parse_length_modifier(c);
7524
7525 if (*c == '\0') {
7526 result.append(QLatin1StringView(escape_start)); // incomplete escape, treat as non-escape text
7527 break;
7528 }
7529
7530 // Parse the conversion specifier and do the conversion
7531 QString subst;
7532 switch (*c) {
7533 case 'd':
7534 case 'i': {
7535 qint64 i;
7536 switch (length_mod) {
7537 case lm_none: i = va_arg(ap, int); break;
7538 case lm_hh: i = va_arg(ap, int); break;
7539 case lm_h: i = va_arg(ap, int); break;
7540 case lm_l: i = va_arg(ap, long int); break;
7541 case lm_ll: i = va_arg(ap, qint64); break;
7542 case lm_j: i = va_arg(ap, long int); break;
7543
7544 /* ptrdiff_t actually, but it should be the same for us */
7545 case lm_z: i = va_arg(ap, qsizetype); break;
7546 case lm_t: i = va_arg(ap, qsizetype); break;
7547 default: i = 0; break;
7548 }
7549 subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags);
7550 ++c;
7551 break;
7552 }
7553 case 'o':
7554 case 'u':
7555 case 'x':
7556 case 'X': {
7557 quint64 u;
7558 switch (length_mod) {
7559 case lm_none: u = va_arg(ap, uint); break;
7560 case lm_hh: u = va_arg(ap, uint); break;
7561 case lm_h: u = va_arg(ap, uint); break;
7562 case lm_l: u = va_arg(ap, ulong); break;
7563 case lm_ll: u = va_arg(ap, quint64); break;
7564 case lm_t: u = va_arg(ap, size_t); break;
7565 case lm_z: u = va_arg(ap, size_t); break;
7566 default: u = 0; break;
7567 }
7568
7569 if (isAsciiUpper(*c))
7570 flags |= QLocaleData::CapitalEorX;
7571
7572 int base = 10;
7573 switch (QtMiscUtils::toAsciiLower(*c)) {
7574 case 'o':
7575 base = 8; break;
7576 case 'u':
7577 base = 10; break;
7578 case 'x':
7579 base = 16; break;
7580 default: break;
7581 }
7582 subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags);
7583 ++c;
7584 break;
7585 }
7586 case 'E':
7587 case 'e':
7588 case 'F':
7589 case 'f':
7590 case 'G':
7591 case 'g':
7592 case 'A':
7593 case 'a': {
7594 double d;
7595 if (length_mod == lm_L)
7596 d = va_arg(ap, long double); // not supported - converted to a double
7597 else
7598 d = va_arg(ap, double);
7599
7600 if (isAsciiUpper(*c))
7601 flags |= QLocaleData::CapitalEorX;
7602
7603 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
7604 switch (QtMiscUtils::toAsciiLower(*c)) {
7605 case 'e': form = QLocaleData::DFExponent; break;
7606 case 'a': // not supported - decimal form used instead
7607 case 'f': form = QLocaleData::DFDecimal; break;
7608 case 'g': form = QLocaleData::DFSignificantDigits; break;
7609 default: break;
7610 }
7611 subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags);
7612 ++c;
7613 break;
7614 }
7615 case 'c': {
7616 if (length_mod == lm_l)
7617 subst = QChar::fromUcs2(va_arg(ap, int));
7618 else
7619 subst = QLatin1Char((uchar) va_arg(ap, int));
7620 ++c;
7621 break;
7622 }
7623 case 's': {
7624 if (length_mod == lm_l) {
7625 const char16_t *buff = va_arg(ap, const char16_t*);
7626 const auto *ch = buff;
7627 while (precision != 0 && *ch != 0) {
7628 ++ch;
7629 --precision;
7630 }
7631 subst.setUtf16(buff, ch - buff);
7632 } else if (precision == -1) {
7633 subst = QString::fromUtf8(va_arg(ap, const char*));
7634 } else {
7635 const char *buff = va_arg(ap, const char*);
7636 subst = QString::fromUtf8(buff, qstrnlen(buff, precision));
7637 }
7638 ++c;
7639 break;
7640 }
7641 case 'p': {
7642 void *arg = va_arg(ap, void*);
7643 const quint64 i = reinterpret_cast<quintptr>(arg);
7644 flags |= QLocaleData::ShowBase;
7645 subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags);
7646 ++c;
7647 break;
7648 }
7649 case 'n':
7650 switch (length_mod) {
7651 case lm_hh: {
7652 signed char *n = va_arg(ap, signed char*);
7653 *n = result.size();
7654 break;
7655 }
7656 case lm_h: {
7657 short int *n = va_arg(ap, short int*);
7658 *n = result.size();
7659 break;
7660 }
7661 case lm_l: {
7662 long int *n = va_arg(ap, long int*);
7663 *n = result.size();
7664 break;
7665 }
7666 case lm_ll: {
7667 qint64 *n = va_arg(ap, qint64*);
7668 *n = result.size();
7669 break;
7670 }
7671 default: {
7672 int *n = va_arg(ap, int*);
7673 *n = int(result.size());
7674 break;
7675 }
7676 }
7677 ++c;
7678 break;
7679
7680 default: // bad escape, treat as non-escape text
7681 for (const char *cc = escape_start; cc != c; ++cc)
7682 result.append(QLatin1Char(*cc));
7683 continue;
7684 }
7685
7686 if (flags & QLocaleData::LeftAdjusted)
7687 result.append(subst.leftJustified(width));
7688 else
7689 result.append(subst.rightJustified(width));
7690 }
7691
7692 return result;
7693}
7694
7695/*!
7696 \fn QString::toLongLong(bool *ok, int base) const
7697
7698 Returns the string converted to a \c{long long} using base \a
7699 base, which is 10 by default and must be between 2 and 36, or 0.
7700 Returns 0 if the conversion fails.
7701
7702 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7703 to \c false, and success by setting *\a{ok} to \c true.
7704
7705 If \a base is 0, the C language convention is used: if the string begins
7706 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7707 2 is used; otherwise, if the string begins with "0", base 8 is used;
7708 otherwise, base 10 is used.
7709
7710 The string conversion will always happen in the 'C' locale. For
7711 locale-dependent conversion use QLocale::toLongLong()
7712
7713 Example:
7714
7715 \snippet qstring/main.cpp 74
7716
7717 This function ignores leading and trailing whitespace.
7718
7719 \note Support for the "0b" prefix was added in Qt 6.4.
7720
7721 \sa number(), toULongLong(), toInt(), QLocale::toLongLong()
7722*/
7723
7724template <typename Int>
7725static Int toIntegral(QStringView string, bool *ok, int base)
7726{
7727#if defined(QT_CHECK_RANGE)
7728 if (base != 0 && (base < 2 || base > 36)) {
7729 qWarning("QString::toIntegral: Invalid base (%d)", base);
7730 base = 10;
7731 }
7732#endif
7733
7734 QVarLengthArray<uchar> latin1(string.size());
7735 qt_to_latin1(latin1.data(), string.utf16(), string.size());
7736 QSimpleParsedNumber<Int> r;
7737 if constexpr (std::is_signed_v<Int>)
7738 r = QLocaleData::bytearrayToLongLong(latin1, base);
7739 else
7740 r = QLocaleData::bytearrayToUnsLongLong(latin1, base);
7741 if (ok)
7742 *ok = r.ok();
7743 return r.result;
7744}
7745
7746qlonglong QString::toIntegral_helper(QStringView string, bool *ok, int base)
7747{
7748 return toIntegral<qlonglong>(string, ok, base);
7749}
7750
7751/*!
7752 \fn QString::toULongLong(bool *ok, int base) const
7753
7754 Returns the string converted to an \c{unsigned long long} using base \a
7755 base, which is 10 by default and must be between 2 and 36, or 0.
7756 Returns 0 if the conversion fails.
7757
7758 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7759 to \c false, and success by setting *\a{ok} to \c true.
7760
7761 If \a base is 0, the C language convention is used: if the string begins
7762 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7763 2 is used; otherwise, if the string begins with "0", base 8 is used;
7764 otherwise, base 10 is used.
7765
7766 The string conversion will always happen in the 'C' locale. For
7767 locale-dependent conversion use QLocale::toULongLong()
7768
7769 Example:
7770
7771 \snippet qstring/main.cpp 79
7772
7773 This function ignores leading and trailing whitespace.
7774
7775 \note Support for the "0b" prefix was added in Qt 6.4.
7776
7777 \sa number(), toLongLong(), QLocale::toULongLong()
7778*/
7779
7780qulonglong QString::toIntegral_helper(QStringView string, bool *ok, uint base)
7781{
7782 return toIntegral<qulonglong>(string, ok, base);
7783}
7784
7785/*!
7786 \fn long QString::toLong(bool *ok, int base) const
7787
7788 Returns the string converted to a \c long using base \a
7789 base, which is 10 by default and must be between 2 and 36, or 0.
7790 Returns 0 if the conversion fails.
7791
7792 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7793 to \c false, and success by setting *\a{ok} to \c true.
7794
7795 If \a base is 0, the C language convention is used: if the string begins
7796 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7797 2 is used; otherwise, if the string begins with "0", base 8 is used;
7798 otherwise, base 10 is used.
7799
7800 The string conversion will always happen in the 'C' locale. For
7801 locale-dependent conversion use QLocale::toLongLong()
7802
7803 Example:
7804
7805 \snippet qstring/main.cpp 73
7806
7807 This function ignores leading and trailing whitespace.
7808
7809 \note Support for the "0b" prefix was added in Qt 6.4.
7810
7811 \sa number(), toULong(), toInt(), QLocale::toInt()
7812*/
7813
7814/*!
7815 \fn ulong QString::toULong(bool *ok, int base) const
7816
7817 Returns the string converted to an \c{unsigned long} using base \a
7818 base, which is 10 by default and must be between 2 and 36, or 0.
7819 Returns 0 if the conversion fails.
7820
7821 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7822 to \c false, and success by setting *\a{ok} to \c true.
7823
7824 If \a base is 0, the C language convention is used: if the string begins
7825 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7826 2 is used; otherwise, if the string begins with "0", base 8 is used;
7827 otherwise, base 10 is used.
7828
7829 The string conversion will always happen in the 'C' locale. For
7830 locale-dependent conversion use QLocale::toULongLong()
7831
7832 Example:
7833
7834 \snippet qstring/main.cpp 78
7835
7836 This function ignores leading and trailing whitespace.
7837
7838 \note Support for the "0b" prefix was added in Qt 6.4.
7839
7840 \sa number(), QLocale::toUInt()
7841*/
7842
7843/*!
7844 \fn int QString::toInt(bool *ok, int base) const
7845 Returns the string converted to an \c int using base \a
7846 base, which is 10 by default and must be between 2 and 36, or 0.
7847 Returns 0 if the conversion fails.
7848
7849 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7850 to \c false, and success by setting *\a{ok} to \c true.
7851
7852 If \a base is 0, the C language convention is used: if the string begins
7853 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7854 2 is used; otherwise, if the string begins with "0", base 8 is used;
7855 otherwise, base 10 is used.
7856
7857 The string conversion will always happen in the 'C' locale. For
7858 locale-dependent conversion use QLocale::toInt()
7859
7860 Example:
7861
7862 \snippet qstring/main.cpp 72
7863
7864 This function ignores leading and trailing whitespace.
7865
7866 \note Support for the "0b" prefix was added in Qt 6.4.
7867
7868 \sa number(), toUInt(), toDouble(), QLocale::toInt()
7869*/
7870
7871/*!
7872 \fn uint QString::toUInt(bool *ok, int base) const
7873 Returns the string converted to an \c{unsigned int} using base \a
7874 base, which is 10 by default and must be between 2 and 36, or 0.
7875 Returns 0 if the conversion fails.
7876
7877 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7878 to \c false, and success by setting *\a{ok} to \c true.
7879
7880 If \a base is 0, the C language convention is used: if the string begins
7881 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7882 2 is used; otherwise, if the string begins with "0", base 8 is used;
7883 otherwise, base 10 is used.
7884
7885 The string conversion will always happen in the 'C' locale. For
7886 locale-dependent conversion use QLocale::toUInt()
7887
7888 Example:
7889
7890 \snippet qstring/main.cpp 77
7891
7892 This function ignores leading and trailing whitespace.
7893
7894 \note Support for the "0b" prefix was added in Qt 6.4.
7895
7896 \sa number(), toInt(), QLocale::toUInt()
7897*/
7898
7899/*!
7900 \fn short QString::toShort(bool *ok, int base) const
7901
7902 Returns the string converted to a \c short using base \a
7903 base, which is 10 by default and must be between 2 and 36, or 0.
7904 Returns 0 if the conversion fails.
7905
7906 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7907 to \c false, and success by setting *\a{ok} to \c true.
7908
7909 If \a base is 0, the C language convention is used: if the string begins
7910 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7911 2 is used; otherwise, if the string begins with "0", base 8 is used;
7912 otherwise, base 10 is used.
7913
7914 The string conversion will always happen in the 'C' locale. For
7915 locale-dependent conversion use QLocale::toShort()
7916
7917 Example:
7918
7919 \snippet qstring/main.cpp 76
7920
7921 This function ignores leading and trailing whitespace.
7922
7923 \note Support for the "0b" prefix was added in Qt 6.4.
7924
7925 \sa number(), toUShort(), toInt(), QLocale::toShort()
7926*/
7927
7928/*!
7929 \fn ushort QString::toUShort(bool *ok, int base) const
7930
7931 Returns the string converted to an \c{unsigned short} using base \a
7932 base, which is 10 by default and must be between 2 and 36, or 0.
7933 Returns 0 if the conversion fails.
7934
7935 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7936 to \c false, and success by setting *\a{ok} to \c true.
7937
7938 If \a base is 0, the C language convention is used: if the string begins
7939 with "0x", base 16 is used; otherwise, if the string begins with "0b", base
7940 2 is used; otherwise, if the string begins with "0", base 8 is used;
7941 otherwise, base 10 is used.
7942
7943 The string conversion will always happen in the 'C' locale. For
7944 locale-dependent conversion use QLocale::toUShort()
7945
7946 Example:
7947
7948 \snippet qstring/main.cpp 80
7949
7950 This function ignores leading and trailing whitespace.
7951
7952 \note Support for the "0b" prefix was added in Qt 6.4.
7953
7954 \sa number(), toShort(), QLocale::toUShort()
7955*/
7956
7957/*!
7958 Returns the string converted to a \c double value.
7959
7960 Returns an infinity if the conversion overflows or 0.0 if the
7961 conversion fails for other reasons (e.g. underflow).
7962
7963 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
7964 to \c false, and success by setting *\a{ok} to \c true.
7965
7966 \snippet qstring/main.cpp 66
7967
7968 \warning The QString content may only contain valid numerical characters
7969 which includes the plus/minus sign, the character e used in scientific
7970 notation, and the decimal point. Including the unit or additional characters
7971 leads to a conversion error.
7972
7973 \snippet qstring/main.cpp 67
7974
7975 The string conversion will always happen in the 'C' locale. For
7976 locale-dependent conversion use QLocale::toDouble()
7977
7978 \snippet qstring/main.cpp 68
7979
7980 For historical reasons, this function does not handle
7981 thousands group separators. If you need to convert such numbers,
7982 use QLocale::toDouble().
7983
7984 \snippet qstring/main.cpp 69
7985
7986 This function ignores leading and trailing whitespace.
7987
7988 \sa number(), QLocale::setDefault(), QLocale::toDouble(), trimmed()
7989*/
7990
7991double QString::toDouble(bool *ok) const
7992{
7993 return QStringView(*this).toDouble(ok);
7994}
7995
7996double QStringView::toDouble(bool *ok) const
7997{
7998 QStringView string = qt_trimmed(*this);
7999 QVarLengthArray<uchar> latin1(string.size());
8000 qt_to_latin1(latin1.data(), string.utf16(), string.size());
8001 auto r = qt_asciiToDouble(reinterpret_cast<const char *>(latin1.data()), string.size());
8002 if (ok != nullptr)
8003 *ok = r.ok();
8004 return r.result;
8005}
8006
8007/*!
8008 Returns the string converted to a \c float value.
8009
8010 Returns an infinity if the conversion overflows or 0.0 if the
8011 conversion fails for other reasons (e.g. underflow).
8012
8013 If \a ok is not \nullptr, failure is reported by setting *\a{ok}
8014 to \c false, and success by setting *\a{ok} to \c true.
8015
8016 \warning The QString content may only contain valid numerical characters
8017 which includes the plus/minus sign, the character e used in scientific
8018 notation, and the decimal point. Including the unit or additional characters
8019 leads to a conversion error.
8020
8021 The string conversion will always happen in the 'C' locale. For
8022 locale-dependent conversion use QLocale::toFloat()
8023
8024 For historical reasons, this function does not handle
8025 thousands group separators. If you need to convert such numbers,
8026 use QLocale::toFloat().
8027
8028 Example:
8029
8030 \snippet qstring/main.cpp 71
8031
8032 This function ignores leading and trailing whitespace.
8033
8034 \sa number(), toDouble(), toInt(), QLocale::toFloat(), trimmed()
8035*/
8036
8037float QString::toFloat(bool *ok) const
8038{
8039 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8040}
8041
8042float QStringView::toFloat(bool *ok) const
8043{
8044 return QLocaleData::convertDoubleToFloat(toDouble(ok), ok);
8045}
8046
8047/*! \fn QString &QString::setNum(int n, int base)
8048
8049 Sets the string to the printed value of \a n in the specified \a
8050 base, and returns a reference to the string.
8051
8052 The base is 10 by default and must be between 2 and 36.
8053
8054 \snippet qstring/main.cpp 56
8055
8056 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8057 To get a localized string representation of a number, use
8058 QLocale::toString() with the appropriate locale.
8059
8060 \sa number()
8061*/
8062
8063/*! \fn QString &QString::setNum(uint n, int base)
8064
8065 \overload
8066*/
8067
8068/*! \fn QString &QString::setNum(long n, int base)
8069
8070 \overload
8071*/
8072
8073/*! \fn QString &QString::setNum(ulong n, int base)
8074
8075 \overload
8076*/
8077
8078/*!
8079 \overload
8080*/
8081QString &QString::setNum(qlonglong n, int base)
8082{
8083 return *this = number(n, base);
8084}
8085
8086/*!
8087 \overload
8088*/
8089QString &QString::setNum(qulonglong n, int base)
8090{
8091 return *this = number(n, base);
8092}
8093
8094/*! \fn QString &QString::setNum(short n, int base)
8095
8096 \overload
8097*/
8098
8099/*! \fn QString &QString::setNum(ushort n, int base)
8100
8101 \overload
8102*/
8103
8104/*!
8105 \overload
8106
8107 Sets the string to the printed value of \a n, formatted according to the
8108 given \a format and \a precision, and returns a reference to the string.
8109
8110 \sa number(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8111*/
8112
8113QString &QString::setNum(double n, char format, int precision)
8114{
8115 return *this = number(n, format, precision);
8116}
8117
8118/*!
8119 \fn QString &QString::setNum(float n, char format, int precision)
8120 \overload
8121
8122 Sets the string to the printed value of \a n, formatted according
8123 to the given \a format and \a precision, and returns a reference
8124 to the string.
8125
8126 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8127 To get a localized string representation of a number, use
8128 QLocale::toString() with the appropriate locale.
8129
8130 \sa number()
8131*/
8132
8133
8134/*!
8135 \fn QString QString::number(long n, int base)
8136
8137 Returns a string equivalent of the number \a n according to the
8138 specified \a base.
8139
8140 The base is 10 by default and must be between 2
8141 and 36. For bases other than 10, \a n is treated as an
8142 unsigned integer.
8143
8144 The formatting always uses QLocale::C, i.e., English/UnitedStates.
8145 To get a localized string representation of a number, use
8146 QLocale::toString() with the appropriate locale.
8147
8148 \snippet qstring/main.cpp 35
8149
8150 \sa setNum()
8151*/
8152
8153QString QString::number(long n, int base)
8154{
8155 return number(qlonglong(n), base);
8156}
8157
8158/*!
8159 \fn QString QString::number(ulong n, int base)
8160
8161 \overload
8162*/
8163QString QString::number(ulong n, int base)
8164{
8165 return number(qulonglong(n), base);
8166}
8167
8168/*!
8169 \overload
8170*/
8171QString QString::number(int n, int base)
8172{
8173 return number(qlonglong(n), base);
8174}
8175
8176/*!
8177 \overload
8178*/
8179QString QString::number(uint n, int base)
8180{
8181 return number(qulonglong(n), base);
8182}
8183
8184/*!
8185 \overload
8186*/
8187QString QString::number(qlonglong n, int base)
8188{
8189#if defined(QT_CHECK_RANGE)
8190 if (base < 2 || base > 36) {
8191 qWarning("QString::setNum: Invalid base (%d)", base);
8192 base = 10;
8193 }
8194#endif
8195 bool negative = n < 0;
8196 /*
8197 Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so
8198 taking an absolute value has to take a slight detour.
8199 */
8200 return qulltoBasicLatin(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), base, negative);
8201}
8202
8203/*!
8204 \overload
8205*/
8206QString QString::number(qulonglong n, int base)
8207{
8208#if defined(QT_CHECK_RANGE)
8209 if (base < 2 || base > 36) {
8210 qWarning("QString::setNum: Invalid base (%d)", base);
8211 base = 10;
8212 }
8213#endif
8214 return qulltoBasicLatin(n, base, false);
8215}
8216
8217
8218/*!
8219 Returns a string representing the floating-point number \a n.
8220
8221 Returns a string that represents \a n, formatted according to the specified
8222 \a format and \a precision.
8223
8224 For formats with an exponent, the exponent will show its sign and have at
8225 least two digits, left-padding the exponent with zero if needed.
8226
8227 \sa setNum(), QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8228*/
8229QString QString::number(double n, char format, int precision)
8230{
8231 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8232
8233 switch (QtMiscUtils::toAsciiLower(format)) {
8234 case 'f':
8235 form = QLocaleData::DFDecimal;
8236 break;
8237 case 'e':
8238 form = QLocaleData::DFExponent;
8239 break;
8240 case 'g':
8241 form = QLocaleData::DFSignificantDigits;
8242 break;
8243 default:
8244#if defined(QT_CHECK_RANGE)
8245 qWarning("QString::setNum: Invalid format char '%c'", format);
8246#endif
8247 break;
8248 }
8249
8250 return qdtoBasicLatin(n, form, precision, isAsciiUpper(format));
8251}
8252
8253namespace {
8254template<class ResultList, class StringSource>
8255static ResultList splitString(const StringSource &source, QStringView sep,
8256 Qt::SplitBehavior behavior, Qt::CaseSensitivity cs)
8257{
8258 ResultList list;
8259 typename StringSource::size_type start = 0;
8260 typename StringSource::size_type end;
8261 typename StringSource::size_type extra = 0;
8262 while ((end = QtPrivate::findString(QStringView(source.constData(), source.size()), start + extra, sep, cs)) != -1) {
8263 if (start != end || behavior == Qt::KeepEmptyParts)
8264 list.append(source.sliced(start, end - start));
8265 start = end + sep.size();
8266 extra = (sep.size() == 0 ? 1 : 0);
8267 }
8268 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8269 list.append(source.sliced(start));
8270 return list;
8271}
8272
8273} // namespace
8274
8275/*!
8276 Splits the string into substrings wherever \a sep occurs, and
8277 returns the list of those strings. If \a sep does not match
8278 anywhere in the string, split() returns a single-element list
8279 containing this string.
8280
8281 \a cs specifies whether \a sep should be matched case
8282 sensitively or case insensitively.
8283
8284 If \a behavior is Qt::SkipEmptyParts, empty entries don't
8285 appear in the result. By default, empty entries are kept.
8286
8287 Example:
8288
8289 \snippet qstring/main.cpp 62
8290
8291 If \a sep is empty, split() returns an empty string, followed
8292 by each of the string's characters, followed by another empty string:
8293
8294 \snippet qstring/main.cpp 62-empty
8295
8296 To understand this behavior, recall that the empty string matches
8297 everywhere, so the above is qualitatively the same as:
8298
8299 \snippet qstring/main.cpp 62-slashes
8300
8301 \sa QStringList::join(), section()
8302
8303 \since 5.14
8304*/
8305QStringList QString::split(const QString &sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8306{
8307 return splitString<QStringList>(*this, sep, behavior, cs);
8308}
8309
8310/*!
8311 \overload
8312 \since 5.14
8313*/
8314QStringList QString::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8315{
8316 return splitString<QStringList>(*this, QStringView(&sep, 1), behavior, cs);
8317}
8318
8319/*!
8320 \fn QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8321 \fn QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8322
8323
8324 Splits the view into substring views wherever \a sep occurs, and
8325 returns the list of those string views.
8326
8327 See QString::split() for how \a sep, \a behavior and \a cs interact to form
8328 the result.
8329
8330 \note All the returned views are valid as long as the data referenced by
8331 this string view is valid. Destroying the data will cause all views to
8332 become dangling.
8333
8334 \since 6.0
8335*/
8336QList<QStringView> QStringView::split(QStringView sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8337{
8338 return splitString<QList<QStringView>>(QStringView(*this), sep, behavior, cs);
8339}
8340
8341QList<QStringView> QStringView::split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const
8342{
8343 return split(QStringView(&sep, 1), behavior, cs);
8344}
8345
8346#if QT_CONFIG(regularexpression)
8347namespace {
8348template<class ResultList, typename String, typename MatchingFunction>
8349static ResultList splitString(const String &source, const QRegularExpression &re,
8350 MatchingFunction matchingFunction,
8351 Qt::SplitBehavior behavior)
8352{
8353 ResultList list;
8354 if (!re.isValid()) {
8355 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString::split");
8356 return list;
8357 }
8358
8359 qsizetype start = 0;
8360 qsizetype end = 0;
8361 QRegularExpressionMatchIterator iterator = (re.*matchingFunction)(source, 0, QRegularExpression::NormalMatch, QRegularExpression::NoMatchOption);
8362 while (iterator.hasNext()) {
8363 QRegularExpressionMatch match = iterator.next();
8364 end = match.capturedStart();
8365 if (start != end || behavior == Qt::KeepEmptyParts)
8366 list.append(source.sliced(start, end - start));
8367 start = match.capturedEnd();
8368 }
8369
8370 if (start != source.size() || behavior == Qt::KeepEmptyParts)
8371 list.append(source.sliced(start));
8372
8373 return list;
8374}
8375} // namespace
8376
8377/*!
8378 \overload
8379 \since 5.14
8380
8381 Splits the string into substrings wherever the regular expression
8382 \a re matches, and returns the list of those strings. If \a re
8383 does not match anywhere in the string, split() returns a
8384 single-element list containing this string.
8385
8386 Here is an example where we extract the words in a sentence
8387 using one or more whitespace characters as the separator:
8388
8389 \snippet qstring/main.cpp 90
8390
8391 Here is a similar example, but this time we use any sequence of
8392 non-word characters as the separator:
8393
8394 \snippet qstring/main.cpp 91
8395
8396 Here is a third example where we use a zero-length assertion,
8397 \b{\\b} (word boundary), to split the string into an
8398 alternating sequence of non-word and word tokens:
8399
8400 \snippet qstring/main.cpp 92
8401
8402 \sa QStringList::join(), section()
8403*/
8404QStringList QString::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8405{
8406#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
8407 const auto matchingFunction = qOverload<const QString &, qsizetype, QRegularExpression::MatchType, QRegularExpression::MatchOptions>(&QRegularExpression::globalMatch);
8408#else
8409 const auto matchingFunction = &QRegularExpression::globalMatch;
8410#endif
8411 return splitString<QStringList>(*this,
8412 re,
8413 matchingFunction,
8414 behavior);
8415}
8416
8417/*!
8418 \overload
8419 \since 6.0
8420
8421 Splits the string into substring views wherever the regular expression \a re
8422 matches, and returns the list of those strings. If \a re does not match
8423 anywhere in the string, split() returns a single-element list containing
8424 this string as view.
8425
8426 \note The views in the returned list are sub-views of this view; as such,
8427 they reference the same data as it and only remain valid for as long as that
8428 data remains live.
8429*/
8430QList<QStringView> QStringView::split(const QRegularExpression &re, Qt::SplitBehavior behavior) const
8431{
8432 return splitString<QList<QStringView>>(*this, re, &QRegularExpression::globalMatchView, behavior);
8433}
8434
8435#endif // QT_CONFIG(regularexpression)
8436
8437/*!
8438 \enum QString::NormalizationForm
8439
8440 This enum describes the various normalized forms of Unicode text.
8441
8442 \value NormalizationForm_D Canonical Decomposition
8443 \value NormalizationForm_C Canonical Decomposition followed by Canonical Composition
8444 \value NormalizationForm_KD Compatibility Decomposition
8445 \value NormalizationForm_KC Compatibility Decomposition followed by Canonical Composition
8446
8447 \sa normalized(),
8448 {https://www.unicode.org/reports/tr15/}{Unicode Standard Annex #15}
8449*/
8450
8451/*!
8452 \since 4.5
8453
8454 Returns a copy of this string repeated the specified number of \a times.
8455
8456 If \a times is less than 1, an empty string is returned.
8457
8458 Example:
8459
8460 \snippet code/src_corelib_text_qstring.cpp 8
8461*/
8462QString QString::repeated(qsizetype times) const
8463{
8464 if (d.size == 0)
8465 return *this;
8466
8467 if (times <= 1) {
8468 if (times == 1)
8469 return *this;
8470 return QString();
8471 }
8472
8473 const qsizetype resultSize = times * d.size;
8474
8475 QString result;
8476 result.reserve(resultSize);
8477 if (result.capacity() != resultSize)
8478 return QString(); // not enough memory
8479
8480 memcpy(result.d.data(), d.data(), d.size * sizeof(QChar));
8481
8482 qsizetype sizeSoFar = d.size;
8483 char16_t *end = result.d.data() + sizeSoFar;
8484
8485 const qsizetype halfResultSize = resultSize >> 1;
8486 while (sizeSoFar <= halfResultSize) {
8487 memcpy(end, result.d.data(), sizeSoFar * sizeof(QChar));
8488 end += sizeSoFar;
8489 sizeSoFar <<= 1;
8490 }
8491 memcpy(end, result.d.data(), (resultSize - sizeSoFar) * sizeof(QChar));
8492 result.d.data()[resultSize] = '\0';
8493 result.d.size = resultSize;
8494 return result;
8495}
8496
8497void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
8498{
8499 {
8500 // check if it's fully ASCII first, because then we have no work
8501 auto start = reinterpret_cast<const char16_t *>(data->constData());
8502 const char16_t *p = start + from;
8503 if (isAscii_helper(p, p + data->size() - from))
8504 return;
8505 if (p > start + from)
8506 from = p - start - 1; // need one before the non-ASCII to perform NFC
8507 }
8508
8509 if (version == QChar::Unicode_Unassigned) {
8510 version = QChar::currentUnicodeVersion();
8511 } else if (int(version) <= NormalizationCorrectionsVersionMax) {
8512 const QString &s = *data;
8513 QChar *d = nullptr;
8515 if (n.version > version) {
8516 qsizetype pos = from;
8517 if (QChar::requiresSurrogates(n.ucs4)) {
8518 char16_t ucs4High = QChar::highSurrogate(n.ucs4);
8519 char16_t ucs4Low = QChar::lowSurrogate(n.ucs4);
8520 char16_t oldHigh = QChar::highSurrogate(n.old_mapping);
8521 char16_t oldLow = QChar::lowSurrogate(n.old_mapping);
8522 while (pos < s.size() - 1) {
8523 if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
8524 if (!d)
8525 d = data->data();
8526 d[pos] = QChar(oldHigh);
8527 d[++pos] = QChar(oldLow);
8528 }
8529 ++pos;
8530 }
8531 } else {
8532 while (pos < s.size()) {
8533 if (s.at(pos).unicode() == n.ucs4) {
8534 if (!d)
8535 d = data->data();
8536 d[pos] = QChar(n.old_mapping);
8537 }
8538 ++pos;
8539 }
8540 }
8541 }
8542 }
8543 }
8544
8545 if (normalizationQuickCheckHelper(data, mode, from, &from))
8546 return;
8547
8548 decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
8549
8550 canonicalOrderHelper(data, version, from);
8551
8552 if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
8553 return;
8554
8555 composeHelper(data, version, from);
8556}
8557
8558/*!
8559 Returns the string in the given Unicode normalization \a mode,
8560 according to the given \a version of the Unicode standard.
8561*/
8562QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
8563{
8564 QString copy = *this;
8565 qt_string_normalize(&copy, mode, version, 0);
8566 return copy;
8567}
8568
8569#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8570static void checkArgEscape(QStringView s)
8571{
8572 // If we're in here, it means that qArgDigitValue has accepted the
8573 // digit. We can skip the check in case we already know it will
8574 // succeed.
8575 if (!supportUnicodeDigitValuesInArg())
8576 return;
8577
8578 const auto isNonAsciiDigit = [](QChar c) {
8579 return c.unicode() < u'0' || c.unicode() > u'9';
8580 };
8581
8582 if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
8583 const auto accumulateDigit = [](int partial, QChar digit) {
8584 return partial * 10 + digit.digitValue();
8585 };
8586 const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
8587
8588 qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
8589 " it is currently being interpreted as the %d-th substitution.\n"
8590 " This is deprecated; support for non-ASCII digits will be dropped\n"
8591 " in a future version of Qt.",
8592 qUtf16Printable(s.toString()),
8593 parsedNumber);
8594 }
8595}
8596#endif
8597
8599{
8600 int min_escape; // lowest escape sequence number
8601 qsizetype occurrences; // number of occurrences of the lowest escape sequence number
8602 qsizetype locale_occurrences; // number of occurrences of the lowest escape sequence number that
8603 // contain 'L'
8604 qsizetype escape_len; // total length of escape sequences which will be replaced
8605};
8606
8607static ArgEscapeData findArgEscapes(QStringView s)
8608{
8609 const QChar *uc_begin = s.begin();
8610 const QChar *uc_end = s.end();
8611
8612 ArgEscapeData d;
8613
8614 d.min_escape = INT_MAX;
8615 d.occurrences = 0;
8616 d.escape_len = 0;
8617 d.locale_occurrences = 0;
8618
8619 const QChar *c = uc_begin;
8620 while (c != uc_end) {
8621 while (c != uc_end && c->unicode() != '%')
8622 ++c;
8623
8624 if (c == uc_end)
8625 break;
8626 const QChar *escape_start = c;
8627 if (++c == uc_end)
8628 break;
8629
8630 bool locale_arg = false;
8631 if (c->unicode() == 'L') {
8632 locale_arg = true;
8633 if (++c == uc_end)
8634 break;
8635 }
8636
8637 int escape = qArgDigitValue(*c);
8638 if (escape == -1)
8639 continue;
8640
8641 // ### Qt 7: do not allow anything but ASCII digits
8642 // in arg()'s replacements.
8643#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8644 const QChar *escapeBegin = c;
8645 const QChar *escapeEnd = escapeBegin + 1;
8646#endif
8647
8648 ++c;
8649
8650 if (c != uc_end) {
8651 const int next_escape = qArgDigitValue(*c);
8652 if (next_escape != -1) {
8653 escape = (10 * escape) + next_escape;
8654 ++c;
8655#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8656 ++escapeEnd;
8657#endif
8658 }
8659 }
8660
8661#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) && !defined(QT_BOOTSTRAPPED)
8662 checkArgEscape(QStringView(escapeBegin, escapeEnd));
8663#endif
8664
8665 if (escape > d.min_escape)
8666 continue;
8667
8668 if (escape < d.min_escape) {
8669 d.min_escape = escape;
8670 d.occurrences = 0;
8671 d.escape_len = 0;
8672 d.locale_occurrences = 0;
8673 }
8674
8675 ++d.occurrences;
8676 if (locale_arg)
8677 ++d.locale_occurrences;
8678 d.escape_len += c - escape_start;
8679 }
8680 return d;
8681}
8682
8683static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width,
8684 QStringView arg, QStringView larg, QChar fillChar)
8685{
8686 // Negative field-width for right-padding, positive for left-padding:
8687 const qsizetype abs_field_width = qAbs(field_width);
8688 const qsizetype result_len =
8689 s.size() - d.escape_len
8690 + (d.occurrences - d.locale_occurrences) * qMax(abs_field_width, arg.size())
8691 + d.locale_occurrences * qMax(abs_field_width, larg.size());
8692
8693 QString result(result_len, Qt::Uninitialized);
8694 QChar *rc = const_cast<QChar *>(result.unicode());
8695 QChar *const result_end = rc + result_len;
8696 qsizetype repl_cnt = 0;
8697
8698 const QChar *c = s.begin();
8699 const QChar *const uc_end = s.end();
8700 while (c != uc_end) {
8701 Q_ASSERT(d.occurrences > repl_cnt);
8702 /* We don't have to check increments of c against uc_end because, as
8703 long as d.occurrences > repl_cnt, we KNOW there are valid escape
8704 sequences remaining. */
8705
8706 const QChar *text_start = c;
8707 while (c->unicode() != '%')
8708 ++c;
8709
8710 const QChar *escape_start = c++;
8711 const bool localize = c->unicode() == 'L';
8712 if (localize)
8713 ++c;
8714
8715 int escape = qArgDigitValue(*c);
8716 if (escape != -1 && c + 1 != uc_end) {
8717 const int digit = qArgDigitValue(c[1]);
8718 if (digit != -1) {
8719 ++c;
8720 escape = 10 * escape + digit;
8721 }
8722 }
8723
8724 if (escape != d.min_escape) {
8725 memcpy(rc, text_start, (c - text_start) * sizeof(QChar));
8726 rc += c - text_start;
8727 } else {
8728 ++c;
8729
8730 memcpy(rc, text_start, (escape_start - text_start) * sizeof(QChar));
8731 rc += escape_start - text_start;
8732
8733 const QStringView use = localize ? larg : arg;
8734 const qsizetype pad_chars = abs_field_width - use.size();
8735 // (If negative, relevant loops are no-ops: no need to check.)
8736
8737 if (field_width > 0) { // left padded
8738 rc = std::fill_n(rc, pad_chars, fillChar);
8739 }
8740
8741 if (use.size())
8742 memcpy(rc, use.data(), use.size() * sizeof(QChar));
8743 rc += use.size();
8744
8745 if (field_width < 0) { // right padded
8746 rc = std::fill_n(rc, pad_chars, fillChar);
8747 }
8748
8749 if (++repl_cnt == d.occurrences) {
8750 memcpy(rc, c, (uc_end - c) * sizeof(QChar));
8751 rc += uc_end - c;
8752 Q_ASSERT(rc == result_end);
8753 c = uc_end;
8754 }
8755 }
8756 }
8757 Q_ASSERT(rc == result_end);
8758
8759 return result;
8760}
8761
8762/*!
8763 \fn template <typename T, QString::if_string_like<T> = true> QString QString::arg(const T &a, int fieldWidth, QChar fillChar) const
8764
8765 Returns a copy of this string with the lowest-numbered place-marker
8766 replaced by string \a a, i.e., \c %1, \c %2, ..., \c %99.
8767
8768 \a fieldWidth specifies the minimum amount of space that \a a
8769 shall occupy. If \a a requires less space than \a fieldWidth, it
8770 is padded to \a fieldWidth with character \a fillChar. A positive
8771 \a fieldWidth produces right-aligned text. A negative \a fieldWidth
8772 produces left-aligned text.
8773
8774 This example shows how we might create a \c status string for
8775 reporting progress while processing a list of files:
8776
8777 \snippet qstring/main.cpp 11-qstringview
8778
8779 First, \c arg(i) replaces \c %1. Then \c arg(total) replaces \c
8780 %2. Finally, \c arg(fileName) replaces \c %3.
8781
8782 One advantage of using arg() over asprintf() is that the order of the
8783 numbered place markers can change, if the application's strings are
8784 translated into other languages, but each arg() will still replace
8785 the lowest-numbered unreplaced place-marker, no matter where it
8786 appears. Also, if place-marker \c %i appears more than once in the
8787 string, arg() replaces all of them.
8788
8789 If there is no unreplaced place-marker remaining, a warning message
8790 is printed and the result is undefined. Place-marker numbers must be
8791 in the range 1 to 99.
8792
8793 \note In Qt versions prior to 6.9, this function was overloaded on
8794 \c{char}, QChar, QString, QStringView, and QLatin1StringView and in some
8795 cases, \c{wchar_t} and \c{char16_t} arguments would resolve to the integer
8796 overloads. In Qt versions prior to 5.10, this function lacked the
8797 QStringView and QLatin1StringView overloads.
8798*/
8799QString QString::arg_impl(QAnyStringView a, int fieldWidth, QChar fillChar) const
8800{
8801 ArgEscapeData d = findArgEscapes(*this);
8802
8803 if (Q_UNLIKELY(d.occurrences == 0)) {
8804 qWarning("QString::arg: Argument missing: \"%ls\", \"%ls\"", qUtf16Printable(*this),
8805 qUtf16Printable(a.toString()));
8806 return *this;
8807 }
8808 struct {
8809 QVarLengthArray<char16_t> out;
8810 QStringView operator()(QStringView in) noexcept { return in; }
8811 QStringView operator()(QLatin1StringView in)
8812 {
8813 out.resize(in.size());
8814 qt_from_latin1(out.data(), in.data(), size_t(in.size()));
8815 return out;
8816 }
8817 QStringView operator()(QUtf8StringView in)
8818 {
8819 out.resize(in.size());
8820 return QStringView{out.data(), QUtf8::convertToUnicode(out.data(), in)};
8821 }
8822 } convert;
8823
8824 QStringView sv = a.visit(std::ref(convert));
8825 return replaceArgEscapes(*this, d, fieldWidth, sv, sv, fillChar);
8826}
8827
8828/*!
8829 \fn template <typename T, QString::if_integral_non_char<T> = true> QString QString::arg(T a, int fieldWidth, int base, QChar fillChar) const
8830 \overload arg()
8831
8832 The \a a argument is expressed in base \a base, which is 10 by
8833 default and must be between 2 and 36. For bases other than 10, \a a
8834 is treated as an unsigned integer.
8835
8836 \a fieldWidth specifies the minimum amount of space that \a a is
8837 padded to and filled with the character \a fillChar. A positive
8838 value produces right-aligned text; a negative value produces
8839 left-aligned text.
8840
8841 The '%' can be followed by an 'L', in which case the sequence is
8842 replaced with a localized representation of \a a. The conversion
8843 uses the default locale, set by QLocale::setDefault(). If no default
8844 locale was specified, the system locale is used. The 'L' flag is
8845 ignored if \a base is not 10.
8846
8847 \snippet qstring/main.cpp 12
8848 \snippet qstring/main.cpp 14
8849
8850 \note In Qt versions prior to 6.9, this function was overloaded on various
8851 integral types and sometimes incorrectly accepted \c char and \c char16_t
8852 arguments.
8853
8854 \sa {Number Formats}
8855*/
8856QString QString::arg_impl(qlonglong a, int fieldWidth, int base, QChar fillChar) const
8857{
8858 ArgEscapeData d = findArgEscapes(*this);
8859
8860 if (d.occurrences == 0) {
8861 qWarning("QString::arg: Argument missing: \"%ls\", %llu", qUtf16Printable(*this), a);
8862 return *this;
8863 }
8864
8865 unsigned flags = QLocaleData::NoFlags;
8866 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8867 if (fillChar == u'0')
8868 flags = QLocaleData::ZeroPadded;
8869
8870 QString arg;
8871 if (d.occurrences > d.locale_occurrences) {
8872 arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags);
8873 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8874 }
8875
8876 QString localeArg;
8877 if (d.locale_occurrences > 0) {
8878 QLocale locale;
8879 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8880 flags |= QLocaleData::GroupDigits;
8881 localeArg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags);
8882 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8883 }
8884
8885 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8886}
8887
8888QString QString::arg_impl(qulonglong a, int fieldWidth, int base, QChar fillChar) const
8889{
8890 ArgEscapeData d = findArgEscapes(*this);
8891
8892 if (d.occurrences == 0) {
8893 qWarning("QString::arg: Argument missing: \"%ls\", %lld", qUtf16Printable(*this), a);
8894 return *this;
8895 }
8896
8897 unsigned flags = QLocaleData::NoFlags;
8898 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8899 if (fillChar == u'0')
8900 flags = QLocaleData::ZeroPadded;
8901
8902 QString arg;
8903 if (d.occurrences > d.locale_occurrences) {
8904 arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags);
8905 Q_ASSERT(fillChar != u'0' || fieldWidth <= arg.size());
8906 }
8907
8908 QString localeArg;
8909 if (d.locale_occurrences > 0) {
8910 QLocale locale;
8911 if (!(locale.numberOptions() & QLocale::OmitGroupSeparator))
8912 flags |= QLocaleData::GroupDigits;
8913 localeArg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags);
8914 Q_ASSERT(fillChar != u'0' || fieldWidth <= localeArg.size());
8915 }
8916
8917 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8918}
8919
8920/*!
8921 \fn template <typename T, QString::if_floating_point<T> = true> QString QString::arg(T a, int fieldWidth, char format, int precision, QChar fillChar) const
8922 \overload arg()
8923
8924 Argument \a a is formatted according to the specified \a format and
8925 \a precision. See \l{Floating-point Formats} for details.
8926
8927 \a fieldWidth specifies the minimum amount of space that \a a is
8928 padded to and filled with the character \a fillChar. A positive
8929 value produces right-aligned text; a negative value produces
8930 left-aligned text.
8931
8932 \snippet code/src_corelib_text_qstring.cpp 2
8933
8934 \note In Qt versions prior to 6.9, this function was a regular function
8935 taking \c double.
8936
8937 \sa QLocale::toString(), QLocale::FloatingPointPrecisionOption, {Number Formats}
8938*/
8939QString QString::arg_impl(double a, int fieldWidth, char format, int precision, QChar fillChar) const
8940{
8941 ArgEscapeData d = findArgEscapes(*this);
8942
8943 if (d.occurrences == 0) {
8944 qWarning("QString::arg: Argument missing: \"%ls\", %g", qUtf16Printable(*this), a);
8945 return *this;
8946 }
8947
8948 unsigned flags = QLocaleData::NoFlags;
8949 // ZeroPadded sorts out left-padding when the fill is zero, to the right of sign:
8950 if (fillChar == u'0')
8951 flags |= QLocaleData::ZeroPadded;
8952
8953 if (isAsciiUpper(format))
8954 flags |= QLocaleData::CapitalEorX;
8955
8956 QLocaleData::DoubleForm form = QLocaleData::DFDecimal;
8957 switch (QtMiscUtils::toAsciiLower(format)) {
8958 case 'f':
8959 form = QLocaleData::DFDecimal;
8960 break;
8961 case 'e':
8962 form = QLocaleData::DFExponent;
8963 break;
8964 case 'g':
8965 form = QLocaleData::DFSignificantDigits;
8966 break;
8967 default:
8968#if defined(QT_CHECK_RANGE)
8969 qWarning("QString::arg: Invalid format char '%c'", format);
8970#endif
8971 break;
8972 }
8973
8974 QString arg;
8975 if (d.occurrences > d.locale_occurrences) {
8976 arg = QLocaleData::c()->doubleToString(a, precision, form, fieldWidth,
8977 flags | QLocaleData::ZeroPadExponent);
8978 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8979 || fieldWidth <= arg.size());
8980 }
8981
8982 QString localeArg;
8983 if (d.locale_occurrences > 0) {
8984 QLocale locale;
8985
8986 const QLocale::NumberOptions numberOptions = locale.numberOptions();
8987 if (!(numberOptions & QLocale::OmitGroupSeparator))
8988 flags |= QLocaleData::GroupDigits;
8989 if (!(numberOptions & QLocale::OmitLeadingZeroInExponent))
8990 flags |= QLocaleData::ZeroPadExponent;
8991 if (numberOptions & QLocale::IncludeTrailingZeroesAfterDot)
8992 flags |= QLocaleData::AddTrailingZeroes;
8993 localeArg = locale.d->m_data->doubleToString(a, precision, form, fieldWidth, flags);
8994 Q_ASSERT(fillChar != u'0' || !qt_is_finite(a)
8995 || fieldWidth <= localeArg.size());
8996 }
8997
8998 return replaceArgEscapes(*this, d, fieldWidth, arg, localeArg, fillChar);
8999}
9000
9001static inline char16_t to_unicode(const QChar c) { return c.unicode(); }
9002static inline char16_t to_unicode(const char c) { return QLatin1Char{c}.unicode(); }
9003
9004template <typename Char>
9005static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
9006{
9007 qsizetype i = *pos;
9008 ++i;
9009 if (i < len && uc[i] == u'L')
9010 ++i;
9011 if (i < len) {
9012 int escape = to_unicode(uc[i]) - '0';
9013 if (uint(escape) >= 10U)
9014 return -1;
9015 ++i;
9016 if (i < len) {
9017 // there's a second digit
9018 int digit = to_unicode(uc[i]) - '0';
9019 if (uint(digit) < 10U) {
9020 escape = (escape * 10) + digit;
9021 ++i;
9022 }
9023 }
9024 *pos = i;
9025 return escape;
9026 }
9027 return -1;
9028}
9029
9030/*
9031 Algorithm for multiArg:
9032
9033 1. Parse the string as a sequence of verbatim text and placeholders (%L?\d{,3}).
9034 The L is parsed and accepted for compatibility with non-multi-arg, but since
9035 multiArg only accepts strings as replacements, the localization request can
9036 be safely ignored.
9037 2. The result of step (1) is a list of (string-ref,int)-tuples. The string-ref
9038 either points at text to be copied verbatim (in which case the int is -1),
9039 or, initially, at the textual representation of the placeholder. In that case,
9040 the int contains the numerical number as parsed from the placeholder.
9041 3. Next, collect all the non-negative ints found, sort them in ascending order and
9042 remove duplicates.
9043 3a. If the result has more entries than multiArg() was given replacement strings,
9044 we have found placeholders we can't satisfy with replacement strings. That is
9045 fine (there could be another .arg() call coming after this one), so just
9046 truncate the result to the number of actual multiArg() replacement strings.
9047 3b. If the result has less entries than multiArg() was given replacement strings,
9048 the string is missing placeholders. This is an error that the user should be
9049 warned about.
9050 4. The result of step (3) is a mapping from the index of any replacement string to
9051 placeholder number. This is the wrong way around, but since placeholder
9052 numbers could get as large as 999, while we typically don't have more than 9
9053 replacement strings, we trade 4K of sparsely-used memory for doing a reverse lookup
9054 each time we need to map a placeholder number to a replacement string index
9055 (that's a linear search; but still *much* faster than using an associative container).
9056 5. Next, for each of the tuples found in step (1), do the following:
9057 5a. If the int is negative, do nothing.
9058 5b. Otherwise, if the int is found in the result of step (3) at index I, replace
9059 the string-ref with a string-ref for the (complete) I'th replacement string.
9060 5c. Otherwise, do nothing.
9061 6. Concatenate all string refs into a single result string.
9062*/
9063
9064namespace {
9065struct Part
9066{
9067 Part() = default; // for QVarLengthArray; do not use
9068 constexpr Part(QAnyStringView s, int num = -1)
9069 : string{s}, number{num} {}
9070
9071 void reset(QAnyStringView s) noexcept { *this = {s, number}; }
9072
9073 QAnyStringView string;
9074 int number;
9075};
9076} // unnamed namespace
9077
9079
9080namespace {
9081
9082enum { ExpectedParts = 32 };
9083
9084typedef QVarLengthArray<Part, ExpectedParts> ParseResult;
9085typedef QVarLengthArray<int, ExpectedParts/2> ArgIndexToPlaceholderMap;
9086
9087template <typename StringView>
9088static ParseResult parseMultiArgFormatString_impl(StringView s)
9089{
9090 ParseResult result;
9091
9092 const auto uc = s.data();
9093 const auto len = s.size();
9094 const auto end = len - 1;
9095 qsizetype i = 0;
9096 qsizetype last = 0;
9097
9098 while (i < end) {
9099 if (uc[i] == u'%') {
9100 qsizetype percent = i;
9101 int number = getEscape(uc, &i, len);
9102 if (number != -1) {
9103 if (last != percent)
9104 result.push_back(Part{s.sliced(last, percent - last)}); // literal text (incl. failed placeholders)
9105 result.push_back(Part{s.sliced(percent, i - percent), number}); // parsed placeholder
9106 last = i;
9107 continue;
9108 }
9109 }
9110 ++i;
9111 }
9112
9113 if (last < len)
9114 result.push_back(Part{s.sliced(last, len - last)}); // trailing literal text
9115
9116 return result;
9117}
9118
9119static ParseResult parseMultiArgFormatString(QAnyStringView s)
9120{
9121 return s.visit([] (auto s) { return parseMultiArgFormatString_impl(s); });
9122}
9123
9124static ArgIndexToPlaceholderMap makeArgIndexToPlaceholderMap(const ParseResult &parts)
9125{
9126 ArgIndexToPlaceholderMap result;
9127
9128 for (const Part &part : parts) {
9129 if (part.number >= 0)
9130 result.push_back(part.number);
9131 }
9132
9133 std::sort(result.begin(), result.end());
9134 result.erase(std::unique(result.begin(), result.end()),
9135 result.end());
9136
9137 return result;
9138}
9139
9140static qsizetype resolveStringRefsAndReturnTotalSize(ParseResult &parts, const ArgIndexToPlaceholderMap &argIndexToPlaceholderMap, const QtPrivate::ArgBase *args[])
9141{
9142 using namespace QtPrivate;
9143 qsizetype totalSize = 0;
9144 for (Part &part : parts) {
9145 if (part.number != -1) {
9146 const auto it = std::find(argIndexToPlaceholderMap.begin(), argIndexToPlaceholderMap.end(), part.number);
9147 if (it != argIndexToPlaceholderMap.end()) {
9148 const auto &arg = *args[it - argIndexToPlaceholderMap.begin()];
9149 switch (arg.tag) {
9150 case ArgBase::L1:
9151 part.reset(static_cast<const QLatin1StringArg&>(arg).string);
9152 break;
9153 case ArgBase::Any:
9154 part.reset(static_cast<const QAnyStringArg&>(arg).string);
9155 break;
9156 case ArgBase::U16:
9157 part.reset(static_cast<const QStringViewArg&>(arg).string);
9158 break;
9159 }
9160 }
9161 }
9162 totalSize += part.string.size();
9163 }
9164 return totalSize;
9165}
9166
9167} // unnamed namespace
9168
9169QString QtPrivate::argToQString(QAnyStringView pattern, size_t numArgs, const ArgBase **args)
9170{
9171 // Step 1-2 above
9172 ParseResult parts = parseMultiArgFormatString(pattern);
9173
9174 // 3-4
9175 ArgIndexToPlaceholderMap argIndexToPlaceholderMap = makeArgIndexToPlaceholderMap(parts);
9176
9177 if (static_cast<size_t>(argIndexToPlaceholderMap.size()) > numArgs) // 3a
9178 argIndexToPlaceholderMap.resize(qsizetype(numArgs));
9179 else if (Q_UNLIKELY(static_cast<size_t>(argIndexToPlaceholderMap.size()) < numArgs)) // 3b
9180 qWarning("QString::arg: %d argument(s) missing in %ls",
9181 int(numArgs - argIndexToPlaceholderMap.size()), qUtf16Printable(pattern.toString()));
9182
9183 // 5
9184 const qsizetype totalSize = resolveStringRefsAndReturnTotalSize(parts, argIndexToPlaceholderMap, args);
9185
9186 // 6:
9187 QString result(totalSize, Qt::Uninitialized);
9188 auto out = const_cast<QChar*>(result.constData());
9189
9190 struct Concatenate {
9191 QChar *out;
9192 QChar *operator()(QLatin1String part) noexcept
9193 {
9194 if (part.size()) {
9195 qt_from_latin1(reinterpret_cast<char16_t*>(out),
9196 part.data(), part.size());
9197 }
9198 return out + part.size();
9199 }
9200 QChar *operator()(QUtf8StringView part) noexcept
9201 {
9202 return QUtf8::convertToUnicode(out, part);
9203 }
9204 QChar *operator()(QStringView part) noexcept
9205 {
9206 if (part.size())
9207 memcpy(out, part.data(), part.size() * sizeof(QChar));
9208 return out + part.size();
9209 }
9210 };
9211
9212 for (const Part &part : parts)
9213 out = part.string.visit(Concatenate{out});
9214
9215 // UTF-8 decoding may have caused an overestimate of totalSize - correct it:
9216 result.truncate(out - result.cbegin());
9217
9218 return result;
9219}
9220
9221/*! \fn bool QString::isRightToLeft() const
9222
9223 Returns \c true if the string is read right to left.
9224
9225 \sa QStringView::isRightToLeft()
9226*/
9227bool QString::isRightToLeft() const
9228{
9229 return QtPrivate::isRightToLeft(QStringView(*this));
9230}
9231
9232/*!
9233 \fn bool QString::isValidUtf16() const noexcept
9234 \since 5.15
9235
9236 Returns \c true if the string contains valid UTF-16 encoded data,
9237 or \c false otherwise.
9238
9239 Note that this function does not perform any special validation of the
9240 data; it merely checks if it can be successfully decoded from UTF-16.
9241 The data is assumed to be in host byte order; the presence of a BOM
9242 is meaningless.
9243
9244 \sa QStringView::isValidUtf16()
9245*/
9246
9247/*! \fn QChar *QString::data()
9248
9249 Returns a pointer to the data stored in the QString. The pointer
9250 can be used to access and modify the characters that compose the
9251 string.
9252
9253 Unlike constData() and unicode(), the returned data is always
9254 '\\0'-terminated.
9255
9256 Example:
9257
9258 \snippet qstring/main.cpp 19
9259
9260 Note that the pointer remains valid only as long as the string is
9261 not modified by other means. For read-only access, constData() is
9262 faster because it never causes a \l{deep copy} to occur.
9263
9264 \sa constData(), operator[]()
9265*/
9266
9267/*! \fn const QChar *QString::data() const
9268
9269 \overload
9270
9271 \note The returned string may not be '\\0'-terminated.
9272 Use size() to determine the length of the array.
9273
9274 \sa fromRawData()
9275*/
9276
9277/*! \fn const QChar *QString::constData() const
9278
9279 Returns a pointer to the data stored in the QString. The pointer
9280 can be used to access the characters that compose the string.
9281
9282 Note that the pointer remains valid only as long as the string is
9283 not modified.
9284
9285 \note The returned string may not be '\\0'-terminated.
9286 Use size() to determine the length of the array.
9287
9288 \sa data(), operator[](), fromRawData()
9289*/
9290
9291/*! \fn void QString::push_front(const QString &other)
9292
9293 This function is provided for STL compatibility, prepending the
9294 given \a other string to the beginning of this string. It is
9295 equivalent to \c prepend(other).
9296
9297 \sa prepend()
9298*/
9299
9300/*! \fn void QString::push_front(QChar ch)
9301
9302 \overload
9303
9304 Prepends the given \a ch character to the beginning of this string.
9305*/
9306
9307/*! \fn void QString::push_back(const QString &other)
9308
9309 This function is provided for STL compatibility, appending the
9310 given \a other string onto the end of this string. It is
9311 equivalent to \c append(other).
9312
9313 \sa append()
9314*/
9315
9316/*! \fn void QString::push_back(QChar ch)
9317
9318 \overload
9319
9320 Appends the given \a ch character onto the end of this string.
9321*/
9322
9323/*!
9324 \since 6.1
9325
9326 Removes from the string the characters in the half-open range
9327 [ \a first , \a last ). Returns an iterator to the character
9328 immediately after the last erased character (i.e. the character
9329 referred to by \a last before the erase).
9330*/
9331QString::iterator QString::erase(QString::const_iterator first, QString::const_iterator last)
9332{
9333 const auto start = std::distance(cbegin(), first);
9334 const auto len = std::distance(first, last);
9335 remove(start, len);
9336 return begin() + start;
9337}
9338
9339/*!
9340 \fn QString::iterator QString::erase(QString::const_iterator it)
9341
9342 \overload
9343 \since 6.5
9344
9345 Removes the character denoted by \c it from the string.
9346 Returns an iterator to the character immediately after the
9347 erased character.
9348
9349 \code
9350 QString c = "abcdefg";
9351 auto it = c.erase(c.cbegin()); // c is now "bcdefg"; "it" points to "b"
9352 \endcode
9353*/
9354
9355/*! \fn void QString::shrink_to_fit()
9356 \since 5.10
9357
9358 This function is provided for STL compatibility. It is
9359 equivalent to squeeze().
9360
9361 \sa squeeze()
9362*/
9363
9364/*!
9365 \fn std::string QString::toStdString() const
9366
9367 Returns a std::string object with the data contained in this
9368 QString. The Unicode data is converted into 8-bit characters using
9369 the toUtf8() function.
9370
9371 This method is mostly useful to pass a QString to a function
9372 that accepts a std::string object.
9373
9374 \sa toLatin1(), toUtf8(), toLocal8Bit(), QByteArray::toStdString()
9375*/
9376std::string QString::toStdString() const
9377{
9378 std::string result;
9379 if (isEmpty())
9380 return result;
9381
9382 auto writeToBuffer = [this](char *out, size_t) {
9383 char *last = QUtf8::convertFromUnicode(out, *this);
9384 return last - out;
9385 };
9386 size_t maxSize = size() * 3; // worst case for UTF-8
9387#ifdef __cpp_lib_string_resize_and_overwrite
9388 // C++23
9389 result.resize_and_overwrite(maxSize, writeToBuffer);
9390#else
9391 result.resize(maxSize);
9392 result.resize(writeToBuffer(result.data(), result.size()));
9393#endif
9394 return result;
9395}
9396
9397/*!
9398 \fn QString QString::fromRawData(const char16_t *unicode, qsizetype size)
9399 \since 6.10
9400
9401 Constructs a QString that uses the first \a size Unicode characters
9402 in the array \a unicode. The data in \a unicode is \e not
9403 copied. The caller must be able to guarantee that \a unicode will
9404 not be deleted or modified as long as the QString (or an
9405 unmodified copy of it) exists.
9406
9407 Any attempts to modify the QString or copies of it will cause it
9408 to create a deep copy of the data, ensuring that the raw data
9409 isn't modified.
9410
9411 Here is an example of how we can use a QRegularExpression on raw data in
9412 memory without requiring to copy the data into a QString:
9413
9414 \snippet qstring/main.cpp 22
9415 \snippet qstring/main.cpp 23
9416
9417 \warning A string created with fromRawData() is \e not
9418 '\\0'-terminated, unless the raw data contains a '\\0' character
9419 at position \a size. This means unicode() will \e not return a
9420 '\\0'-terminated string (although utf16() does, at the cost of
9421 copying the raw data).
9422
9423 \sa fromUtf16(), setRawData(), data(), constData(),
9424 nullTerminate(), nullTerminated()
9425*/
9426
9427/*!
9428 \fn QString QString::fromRawData(const QChar *unicode, qsizetype size)
9429 \overload
9430*/
9431
9432/*!
9433 \since 4.7
9434
9435 Resets the QString to use the first \a size Unicode characters
9436 in the array \a unicode. The data in \a unicode is \e not
9437 copied. The caller must be able to guarantee that \a unicode will
9438 not be deleted or modified as long as the QString (or an
9439 unmodified copy of it) exists.
9440
9441 This function can be used instead of fromRawData() to re-use
9442 existings QString objects to save memory re-allocations.
9443
9444 \sa fromRawData(), nullTerminate(), nullTerminated()
9445*/
9446QString &QString::setRawData(const QChar *unicode, qsizetype size)
9447{
9448 if (!unicode || !size) {
9449 clear();
9450 }
9451 *this = fromRawData(unicode, size);
9452 return *this;
9453}
9454
9455/*! \fn QString QString::fromStdU16String(const std::u16string &str)
9456 \since 5.5
9457
9458 \include qstring.cpp {from-std-string} {UTF-16} {fromUtf16()}
9459
9460 \sa fromUtf16(), fromStdWString(), fromStdU32String()
9461*/
9462
9463/*!
9464 \fn std::u16string QString::toStdU16String() const
9465 \since 5.5
9466
9467 Returns a std::u16string object with the data contained in this
9468 QString. The Unicode data is the same as returned by the utf16()
9469 method.
9470
9471 \sa utf16(), toStdWString(), toStdU32String()
9472*/
9473
9474/*! \fn QString QString::fromStdU32String(const std::u32string &str)
9475 \since 5.5
9476
9477 \include qstring.cpp {from-std-string} {UTF-32} {fromUcs4()}
9478
9479 \sa fromUcs4(), fromStdWString(), fromStdU16String()
9480*/
9481
9482/*!
9483 \fn std::u32string QString::toStdU32String() const
9484 \since 5.5
9485
9486 Returns a std::u32string object with the data contained in this
9487 QString. The Unicode data is the same as returned by the toUcs4()
9488 method.
9489
9490 \sa toUcs4(), toStdWString(), toStdU16String()
9491*/
9492
9493#if !defined(QT_NO_DATASTREAM)
9494/*!
9495 \fn QDataStream &operator<<(QDataStream &stream, const QString &string)
9496 \relates QString
9497
9498 Writes the given \a string to the specified \a stream.
9499
9500 \sa {Serializing Qt Data Types}
9501*/
9502
9503QDataStream &operator<<(QDataStream &out, const QString &str)
9504{
9505 if (out.version() == 1) {
9506 out << str.toLatin1();
9507 } else {
9508 if (!str.isNull() || out.version() < 3) {
9509 if ((out.byteOrder() == QDataStream::BigEndian) == (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9510 out.writeBytes(reinterpret_cast<const char *>(str.unicode()),
9511 static_cast<qsizetype>(sizeof(QChar) * str.size()));
9512 } else {
9513 QVarLengthArray<char16_t> buffer(str.size());
9514 qbswap<sizeof(char16_t)>(str.constData(), str.size(), buffer.data());
9515 out.writeBytes(reinterpret_cast<const char *>(buffer.data()),
9516 static_cast<qsizetype>(sizeof(char16_t) * buffer.size()));
9517 }
9518 } else {
9519 QDataStream::writeQSizeType(out, -1); // write null marker
9520 }
9521 }
9522 return out;
9523}
9524
9525/*!
9526 \fn QDataStream &operator>>(QDataStream &stream, QString &string)
9527 \relates QString
9528
9529 Reads a string from the specified \a stream into the given \a string.
9530
9531 \sa {Serializing Qt Data Types}
9532*/
9533
9534QDataStream &operator>>(QDataStream &in, QString &str)
9535{
9536 if (in.version() == 1) {
9537 QByteArray l;
9538 in >> l;
9539 str = QString::fromLatin1(l);
9540 } else {
9541 qint64 size = QDataStream::readQSizeType(in);
9542 qsizetype bytes = size;
9543 if (size != bytes || size < -1) {
9544 str.clear();
9545 in.setStatus(QDataStream::SizeLimitExceeded);
9546 return in;
9547 }
9548 if (bytes == -1) { // null string
9549 str = QString();
9550 } else if (bytes > 0) {
9551 if (bytes & 0x1) {
9552 str.clear();
9553 in.setStatus(QDataStream::ReadCorruptData);
9554 return in;
9555 }
9556
9557 const qsizetype Step = 1024 * 1024;
9558 qsizetype len = bytes / 2;
9559 qsizetype allocated = 0;
9560
9561 while (allocated < len) {
9562 int blockSize = qMin(Step, len - allocated);
9563 str.resize(allocated + blockSize);
9564 if (in.readRawData(reinterpret_cast<char *>(str.data()) + allocated * 2,
9565 blockSize * 2) != blockSize * 2) {
9566 str.clear();
9567 in.setStatus(QDataStream::ReadPastEnd);
9568 return in;
9569 }
9570 allocated += blockSize;
9571 }
9572
9573 if ((in.byteOrder() == QDataStream::BigEndian)
9574 != (QSysInfo::ByteOrder == QSysInfo::BigEndian)) {
9575 char16_t *data = reinterpret_cast<char16_t *>(str.data());
9576 qbswap<sizeof(*data)>(data, len, data);
9577 }
9578 } else {
9579 str = QString(QLatin1StringView(""));
9580 }
9581 }
9582 return in;
9583}
9584#endif // QT_NO_DATASTREAM
9585
9586/*!
9587 \typedef QString::Data
9588 \internal
9589*/
9590
9591/*!
9592 \typedef QString::DataPtr
9593 \internal
9594*/
9595
9596/*!
9597 \fn DataPtr & QString::data_ptr()
9598 \internal
9599*/
9600
9601/*!
9602 \since 5.11
9603 \internal
9604 \relates QStringView
9605
9606 Returns \c true if the string is read right to left.
9607
9608 \sa QString::isRightToLeft()
9609*/
9610bool QtPrivate::isRightToLeft(QStringView string) noexcept
9611{
9612 int isolateLevel = 0;
9613
9614 for (QStringIterator i(string); i.hasNext();) {
9615 const char32_t c = i.next();
9616
9617 switch (QChar::direction(c)) {
9618 case QChar::DirRLI:
9619 case QChar::DirLRI:
9620 case QChar::DirFSI:
9621 ++isolateLevel;
9622 break;
9623 case QChar::DirPDI:
9624 if (isolateLevel)
9625 --isolateLevel;
9626 break;
9627 case QChar::DirL:
9628 if (isolateLevel)
9629 break;
9630 return false;
9631 case QChar::DirR:
9632 case QChar::DirAL:
9633 if (isolateLevel)
9634 break;
9635 return true;
9636 case QChar::DirEN:
9637 case QChar::DirES:
9638 case QChar::DirET:
9639 case QChar::DirAN:
9640 case QChar::DirCS:
9641 case QChar::DirB:
9642 case QChar::DirS:
9643 case QChar::DirWS:
9644 case QChar::DirON:
9645 case QChar::DirLRE:
9646 case QChar::DirLRO:
9647 case QChar::DirRLE:
9648 case QChar::DirRLO:
9649 case QChar::DirPDF:
9650 case QChar::DirNSM:
9651 case QChar::DirBN:
9652 break;
9653 }
9654 }
9655 return false;
9656}
9657
9658qsizetype QtPrivate::count(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9659{
9660 qsizetype num = 0;
9661 qsizetype i = -1;
9662 if (haystack.size() > 500 && needle.size() > 5) {
9663 QStringMatcher matcher(needle, cs);
9664 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9665 ++num;
9666 } else {
9667 while ((i = QtPrivate::findString(haystack, i + 1, needle, cs)) != -1)
9668 ++num;
9669 }
9670 return num;
9671}
9672
9673qsizetype QtPrivate::count(QStringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9674{
9675 if (cs == Qt::CaseSensitive)
9676 return std::count(haystack.cbegin(), haystack.cend(), needle);
9677
9678 needle = foldCase(needle);
9679 return std::count_if(haystack.cbegin(), haystack.cend(),
9680 [needle](const QChar c) { return foldAndCompare(c, needle); });
9681}
9682
9683qsizetype QtPrivate::count(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9684{
9685 qsizetype num = 0;
9686 qsizetype i = -1;
9687
9688 QLatin1StringMatcher matcher(needle, cs);
9689 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9690 ++num;
9691
9692 return num;
9693}
9694
9695qsizetype QtPrivate::count(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9696{
9697 if (haystack.size() < needle.size())
9698 return 0;
9699
9700 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9701 return 0;
9702
9703 qsizetype num = 0;
9704 qsizetype i = -1;
9705
9706 QVarLengthArray<uchar> s(needle.size());
9707 qt_to_latin1_unchecked(s.data(), needle.utf16(), needle.size());
9708
9709 QLatin1StringMatcher matcher(QLatin1StringView(reinterpret_cast<char *>(s.data()), s.size()),
9710 cs);
9711 while ((i = matcher.indexIn(haystack, i + 1)) != -1)
9712 ++num;
9713
9714 return num;
9715}
9716
9717qsizetype QtPrivate::count(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9718{
9719 if (haystack.size() < needle.size())
9720 return -1;
9721
9722 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9723 return QtPrivate::count(haystack, QStringView(s.data(), s.size()), cs);
9724}
9725
9726qsizetype QtPrivate::count(QLatin1StringView haystack, QChar needle, Qt::CaseSensitivity cs) noexcept
9727{
9728 // non-L1 needles cannot possibly match in L1-only haystacks
9729 if (needle.unicode() > 0xff)
9730 return 0;
9731
9732 if (cs == Qt::CaseSensitive) {
9733 return std::count(haystack.cbegin(), haystack.cend(), needle.toLatin1());
9734 } else {
9735 return std::count_if(haystack.cbegin(), haystack.cend(),
9736 CaseInsensitiveL1::matcher(needle.toLatin1()));
9737 }
9738}
9739
9740/*!
9741 \fn bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9742 \since 5.10
9743 \fn bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9744 \since 5.10
9745 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9746 \since 5.10
9747 \fn bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9748 \since 5.10
9749 \internal
9750 \relates QStringView
9751
9752 Returns \c true if \a haystack starts with \a needle,
9753 otherwise returns \c false.
9754
9755 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9756
9757 \sa QtPrivate::endsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9758*/
9759
9760bool QtPrivate::startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9761{
9762 return qt_starts_with_impl(haystack, needle, cs);
9763}
9764
9765bool QtPrivate::startsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9766{
9767 return qt_starts_with_impl(haystack, needle, cs);
9768}
9769
9770bool QtPrivate::startsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9771{
9772 return qt_starts_with_impl(haystack, needle, cs);
9773}
9774
9775bool QtPrivate::startsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9776{
9777 return qt_starts_with_impl(haystack, needle, cs);
9778}
9779
9780/*!
9781 \fn bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9782 \since 5.10
9783 \fn bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9784 \since 5.10
9785 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs)
9786 \since 5.10
9787 \fn bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs)
9788 \since 5.10
9789 \internal
9790 \relates QStringView
9791
9792 Returns \c true if \a haystack ends with \a needle,
9793 otherwise returns \c false.
9794
9795 \include qstring.qdocinc {search-comparison-case-sensitivity} {search}
9796
9797 \sa QtPrivate::startsWith(), QString::endsWith(), QStringView::endsWith(), QLatin1StringView::endsWith()
9798*/
9799
9800bool QtPrivate::endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9801{
9802 return qt_ends_with_impl(haystack, needle, cs);
9803}
9804
9805bool QtPrivate::endsWith(QStringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9806{
9807 return qt_ends_with_impl(haystack, needle, cs);
9808}
9809
9810bool QtPrivate::endsWith(QLatin1StringView haystack, QStringView needle, Qt::CaseSensitivity cs) noexcept
9811{
9812 return qt_ends_with_impl(haystack, needle, cs);
9813}
9814
9815bool QtPrivate::endsWith(QLatin1StringView haystack, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9816{
9817 return qt_ends_with_impl(haystack, needle, cs);
9818}
9819
9820qsizetype QtPrivate::findString(QStringView haystack0, qsizetype from, QStringView needle0, Qt::CaseSensitivity cs) noexcept
9821{
9822 const qsizetype l = haystack0.size();
9823 const qsizetype sl = needle0.size();
9824 if (sl == 1)
9825 return findString(haystack0, from, needle0[0], cs);
9826 if (from < 0)
9827 from += l;
9828 if (std::size_t(sl + from) > std::size_t(l))
9829 return -1;
9830 if (!sl)
9831 return from;
9832 if (!l)
9833 return -1;
9834
9835 /*
9836 We use the Boyer-Moore algorithm in cases where the overhead
9837 for the skip table should pay off, otherwise we use a simple
9838 hash function.
9839 */
9840 if (l > 500 && sl > 5)
9841 return qFindStringBoyerMoore(haystack0, from, needle0, cs);
9842
9843 auto sv = [sl](const char16_t *v) { return QStringView(v, sl); };
9844 /*
9845 We use some hashing for efficiency's sake. Instead of
9846 comparing strings, we compare the hash value of str with that
9847 of a part of this QString. Only if that matches, we call
9848 qt_string_compare().
9849 */
9850 const char16_t *needle = needle0.utf16();
9851 const char16_t *haystack = haystack0.utf16() + from;
9852 const char16_t *end = haystack0.utf16() + (l - sl);
9853 const qregisteruint sl_minus_1 = sl - 1;
9854 qregisteruint hashNeedle = 0, hashHaystack = 0;
9855 qsizetype idx;
9856
9857 if (cs == Qt::CaseSensitive) {
9858 for (idx = 0; idx < sl; ++idx) {
9859 hashNeedle = ((hashNeedle<<1) + needle[idx]);
9860 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
9861 }
9862 hashHaystack -= haystack[sl_minus_1];
9863
9864 while (haystack <= end) {
9865 hashHaystack += haystack[sl_minus_1];
9866 if (hashHaystack == hashNeedle
9867 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseSensitive) == 0)
9868 return haystack - haystack0.utf16();
9869
9870 REHASH(*haystack);
9871 ++haystack;
9872 }
9873 } else {
9874 const char16_t *haystack_start = haystack0.utf16();
9875 for (idx = 0; idx < sl; ++idx) {
9876 hashNeedle = (hashNeedle<<1) + foldCase(needle + idx, needle);
9877 hashHaystack = (hashHaystack<<1) + foldCase(haystack + idx, haystack_start);
9878 }
9879 hashHaystack -= foldCase(haystack + sl_minus_1, haystack_start);
9880
9881 while (haystack <= end) {
9882 hashHaystack += foldCase(haystack + sl_minus_1, haystack_start);
9883 if (hashHaystack == hashNeedle
9884 && QtPrivate::compareStrings(needle0, sv(haystack), Qt::CaseInsensitive) == 0)
9885 return haystack - haystack0.utf16();
9886
9887 REHASH(foldCase(haystack, haystack_start));
9888 ++haystack;
9889 }
9890 }
9891 return -1;
9892}
9893
9894qsizetype QtPrivate::findString(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9895{
9896 if (haystack.size() < needle.size())
9897 return -1;
9898
9899 QVarLengthArray<char16_t> s = qt_from_latin1_to_qvla(needle);
9900 return QtPrivate::findString(haystack, from, QStringView(reinterpret_cast<const QChar*>(s.constData()), s.size()), cs);
9901}
9902
9903qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9904{
9905 if (haystack.size() < needle.size())
9906 return -1;
9907
9908 if (!QtPrivate::isLatin1(needle)) // won't find non-L1 UTF-16 needles in a L1 haystack!
9909 return -1;
9910
9911 if (needle.size() == 1) {
9912 const char n = needle.front().toLatin1();
9913 return QtPrivate::findString(haystack, from, QLatin1StringView(&n, 1), cs);
9914 }
9915
9916 QVarLengthArray<char> s(needle.size());
9917 qt_to_latin1_unchecked(reinterpret_cast<uchar *>(s.data()), needle.utf16(), needle.size());
9918 return QtPrivate::findString(haystack, from, QLatin1StringView(s.data(), s.size()), cs);
9919}
9920
9921qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9922{
9923 if (from < 0)
9924 from += haystack.size();
9925 if (from < 0)
9926 return -1;
9927 qsizetype adjustedSize = haystack.size() - from;
9928 if (adjustedSize < needle.size())
9929 return -1;
9930 if (needle.size() == 0)
9931 return from;
9932
9933 if (cs == Qt::CaseSensitive) {
9934
9935 if (needle.size() == 1) {
9936 Q_ASSERT(haystack.data() != nullptr); // see size check above
9937 if (auto it = memchr(haystack.data() + from, needle.front().toLatin1(), adjustedSize))
9938 return static_cast<const char *>(it) - haystack.data();
9939 return -1;
9940 }
9941
9942 const QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseSensitive);
9943 return matcher.indexIn(haystack, from);
9944 }
9945
9946 // If the needle is sufficiently small we simply iteratively search through
9947 // the haystack. When the needle is too long we use a boyer-moore searcher
9948 // from the standard library, if available. If it is not available then the
9949 // QLatin1Strings are converted to QString and compared as such. Though
9950 // initialization is slower the boyer-moore search it employs still makes up
9951 // for it when haystack and needle are sufficiently long.
9952 // The needle size was chosen by testing various lengths using the
9953 // qstringtokenizer benchmark with the
9954 // "tokenize_qlatin1string_qlatin1string" test.
9955#ifdef Q_CC_MSVC
9956 const qsizetype threshold = 1;
9957#else
9958 const qsizetype threshold = 13;
9959#endif
9960 if (needle.size() <= threshold) {
9961 const auto begin = haystack.begin();
9962 const auto end = haystack.end() - needle.size() + 1;
9963 auto ciMatch = CaseInsensitiveL1::matcher(needle[0].toLatin1());
9964 const qsizetype nlen1 = needle.size() - 1;
9965 for (auto it = std::find_if(begin + from, end, ciMatch); it != end;
9966 it = std::find_if(it + 1, end, ciMatch)) {
9967 // In this comparison we skip the first character because we know it's a match
9968 if (!nlen1 || QLatin1StringView(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
9969 return std::distance(begin, it);
9970 }
9971 return -1;
9972 }
9973
9974 QLatin1StringMatcher matcher(needle, Qt::CaseSensitivity::CaseInsensitive);
9975 return matcher.indexIn(haystack, from);
9976}
9977
9978qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, char16_t needle, Qt::CaseSensitivity cs) noexcept
9979{
9980 return qLastIndexOf(haystack, QChar(needle), from, cs);
9981}
9982
9983qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9984{
9985 return qLastIndexOf(haystack, from, needle, cs);
9986}
9987
9988qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9989{
9990 return qLastIndexOf(haystack, from, needle, cs);
9991}
9992
9993qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept
9994{
9995 return qLastIndexOf(haystack, from, needle, cs);
9996}
9997
9998qsizetype QtPrivate::lastIndexOf(QLatin1StringView haystack, qsizetype from, QLatin1StringView needle, Qt::CaseSensitivity cs) noexcept
9999{
10000 return qLastIndexOf(haystack, from, needle, cs);
10001}
10002
10003#if QT_CONFIG(regularexpression)
10004qsizetype QtPrivate::indexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10005{
10006 if (!re.isValid()) {
10007 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString(View)::indexOf");
10008 return -1;
10009 }
10010
10011 QRegularExpressionMatch match = stringHaystack
10012 ? re.match(*stringHaystack, from)
10013 : re.matchView(viewHaystack, from);
10014 if (match.hasMatch()) {
10015 const qsizetype ret = match.capturedStart();
10016 if (rmatch)
10017 *rmatch = std::move(match);
10018 return ret;
10019 }
10020
10021 return -1;
10022}
10023
10024qsizetype QtPrivate::indexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10025{
10026 return indexOf(haystack, nullptr, re, from, rmatch);
10027}
10028
10029qsizetype QtPrivate::lastIndexOf(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10030{
10031 if (!re.isValid()) {
10032 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString(View)::lastIndexOf");
10033 return -1;
10034 }
10035
10036 qsizetype endpos = (from < 0) ? (viewHaystack.size() + from + 1) : (from + 1);
10037 QRegularExpressionMatchIterator iterator = stringHaystack
10038 ? re.globalMatch(*stringHaystack)
10039 : re.globalMatchView(viewHaystack);
10040 qsizetype lastIndex = -1;
10041 while (iterator.hasNext()) {
10042 QRegularExpressionMatch match = iterator.next();
10043 qsizetype start = match.capturedStart();
10044 if (start < endpos) {
10045 lastIndex = start;
10046 if (rmatch)
10047 *rmatch = std::move(match);
10048 } else {
10049 break;
10050 }
10051 }
10052
10053 return lastIndex;
10054}
10055
10056qsizetype QtPrivate::lastIndexOf(QStringView haystack, const QRegularExpression &re, qsizetype from, QRegularExpressionMatch *rmatch)
10057{
10058 return lastIndexOf(haystack, nullptr, re, from, rmatch);
10059}
10060
10061bool QtPrivate::contains(QStringView viewHaystack, const QString *stringHaystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10062{
10063 if (!re.isValid()) {
10064 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString(View)::contains");
10065 return false;
10066 }
10067 QRegularExpressionMatch m = stringHaystack
10068 ? re.match(*stringHaystack)
10069 : re.matchView(viewHaystack);
10070 bool hasMatch = m.hasMatch();
10071 if (hasMatch && rmatch)
10072 *rmatch = std::move(m);
10073 return hasMatch;
10074}
10075
10076bool QtPrivate::contains(QStringView haystack, const QRegularExpression &re, QRegularExpressionMatch *rmatch)
10077{
10078 return contains(haystack, nullptr, re, rmatch);
10079}
10080
10081qsizetype QtPrivate::count(QStringView haystack, const QRegularExpression &re)
10082{
10083 if (!re.isValid()) {
10084 qtWarnAboutInvalidRegularExpression(re.pattern(), "QString(View)::count");
10085 return 0;
10086 }
10087 qsizetype count = 0;
10088 qsizetype index = -1;
10089 qsizetype len = haystack.size();
10090 while (index <= len - 1) {
10091 QRegularExpressionMatch match = re.matchView(haystack, index + 1);
10092 if (!match.hasMatch())
10093 break;
10094 count++;
10095
10096 // Search again, from the next character after the beginning of this
10097 // capture. If the capture starts with a surrogate pair, both together
10098 // count as "one character".
10099 index = match.capturedStart();
10100 if (index < len && haystack[index].isHighSurrogate())
10101 ++index;
10102 }
10103 return count;
10104}
10105
10106#endif // QT_CONFIG(regularexpression)
10107
10108/*!
10109 \since 5.0
10110
10111 Converts a plain text string to an HTML string with
10112 HTML metacharacters \c{<}, \c{>}, \c{&}, and \c{"} replaced by HTML
10113 entities.
10114
10115 Example:
10116
10117 \snippet code/src_corelib_text_qstring.cpp 7
10118*/
10119QString QString::toHtmlEscaped() const
10120{
10121 const auto pos = std::u16string_view(*this).find_first_of(u"<>&\"");
10122 if (pos == std::u16string_view::npos)
10123 return *this;
10124 QString rich;
10125 const qsizetype len = size();
10126 rich.reserve(qsizetype(len * 1.1));
10127 rich += qToStringViewIgnoringNull(*this).first(pos);
10128 for (auto ch : qToStringViewIgnoringNull(*this).sliced(pos)) {
10129 if (ch == u'<')
10130 rich += "&lt;"_L1;
10131 else if (ch == u'>')
10132 rich += "&gt;"_L1;
10133 else if (ch == u'&')
10134 rich += "&amp;"_L1;
10135 else if (ch == u'"')
10136 rich += "&quot;"_L1;
10137 else
10138 rich += ch;
10139 }
10140 rich.squeeze();
10141 return rich;
10142}
10143
10144/*!
10145 \macro QStringLiteral(str)
10146 \relates QString
10147
10148 The macro generates the data for a QString out of the string literal \a str
10149 at compile time. Creating a QString from it is free in this case, and the
10150 generated string data is stored in the read-only segment of the compiled
10151 object file.
10152
10153 If you have code that looks like this:
10154
10155 \snippet code/src_corelib_text_qstring.cpp 9
10156
10157 then a temporary QString will be created to be passed as the \c{hasAttribute}
10158 function parameter. This can be quite expensive, as it involves a memory
10159 allocation and the copy/conversion of the data into QString's internal
10160 encoding.
10161
10162 This cost can be avoided by using QStringLiteral instead:
10163
10164 \snippet code/src_corelib_text_qstring.cpp 10
10165
10166 In this case, QString's internal data will be generated at compile time; no
10167 conversion or allocation will occur at runtime.
10168
10169 Using QStringLiteral instead of a double quoted plain C++ string literal can
10170 significantly speed up creation of QString instances from data known at
10171 compile time.
10172
10173 \note QLatin1StringView can still be more efficient than QStringLiteral
10174 when the string is passed to a function that has an overload taking
10175 QLatin1StringView and this overload avoids conversion to QString. For
10176 instance, QString::operator==() can compare to a QLatin1StringView
10177 directly:
10178
10179 \snippet code/src_corelib_text_qstring.cpp 11
10180
10181 \note Some compilers have bugs encoding strings containing characters outside
10182 the US-ASCII character set. Make sure you prefix your string with \c{u} in
10183 those cases. It is optional otherwise.
10184
10185 \sa QByteArrayLiteral
10186*/
10187
10188#if QT_DEPRECATED_SINCE(6, 8)
10189/*!
10190 \fn QtLiterals::operator""_qs(const char16_t *str, size_t size)
10191
10192 \relates QString
10193 \since 6.2
10194 \deprecated [6.8] Use \c _s from Qt::StringLiterals namespace instead.
10195
10196 Literal operator that creates a QString out of the first \a size characters in
10197 the char16_t string literal \a str.
10198
10199 The QString is created at compile time, and the generated string data is stored
10200 in the read-only segment of the compiled object file. Duplicate literals may
10201 share the same read-only memory. This functionality is interchangeable with
10202 QStringLiteral, but saves typing when many string literals are present in the
10203 code.
10204
10205 The following code creates a QString:
10206 \code
10207 auto str = u"hello"_qs;
10208 \endcode
10209
10210 \sa QStringLiteral, QtLiterals::operator""_qba(const char *str, size_t size)
10211*/
10212#endif // QT_DEPRECATED_SINCE(6, 8)
10213
10214/*!
10215 \fn Qt::Literals::StringLiterals::operator""_s(const char16_t *str, size_t size)
10216
10217 \relates QString
10218 \since 6.4
10219
10220 Literal operator that creates a QString out of the first \a size characters in
10221 the char16_t string literal \a str.
10222
10223 The QString is created at compile time, and the generated string data is stored
10224 in the read-only segment of the compiled object file. Duplicate literals may
10225 share the same read-only memory. This functionality is interchangeable with
10226 QStringLiteral, but saves typing when many string literals are present in the
10227 code.
10228
10229 The following code creates a QString:
10230 \code
10231 using namespace Qt::Literals::StringLiterals;
10232
10233 auto str = u"hello"_s;
10234 \endcode
10235
10236 \sa Qt::Literals::StringLiterals
10237*/
10238
10239/*!
10240 \internal
10241 */
10242void QAbstractConcatenable::appendLatin1To(QLatin1StringView in, QChar *out) noexcept
10243{
10244 qt_from_latin1(reinterpret_cast<char16_t *>(out), in.data(), size_t(in.size()));
10245}
10246
10247/*!
10248 \fn template <typename T> qsizetype erase(QString &s, const T &t)
10249 \relates QString
10250 \since 6.1
10251
10252 Removes all elements that compare equal to \a t from the
10253 string \a s. Returns the number of elements removed, if any.
10254
10255 \sa erase_if
10256*/
10257
10258/*!
10259 \fn template <typename Predicate> qsizetype erase_if(QString &s, Predicate pred)
10260 \relates QString
10261 \since 6.1
10262
10263 Removes all elements for which the predicate \a pred returns true
10264 from the string \a s. Returns the number of elements removed, if
10265 any.
10266
10267 \sa erase
10268*/
10269
10270/*!
10271 \macro const char *qPrintable(const QString &str)
10272 \relates QString
10273
10274 Returns \a str as a \c{const char *}. This is equivalent to
10275 \a{str}.toLocal8Bit().constData().
10276
10277 The char pointer will be invalid after the statement in which
10278 qPrintable() is used. This is because the array returned by
10279 QString::toLocal8Bit() will fall out of scope.
10280
10281 \note qDebug(), qInfo(), qWarning(), qCritical(), qFatal() expect
10282 %s arguments to be UTF-8 encoded, while qPrintable() converts to
10283 local 8-bit encoding. Therefore qUtf8Printable() should be used
10284 for logging strings instead of qPrintable().
10285
10286 \sa qUtf8Printable()
10287*/
10288
10289/*!
10290 \macro const char *qUtf8Printable(const QString &str)
10291 \relates QString
10292 \since 5.4
10293
10294 Returns \a str as a \c{const char *}. This is equivalent to
10295 \a{str}.toUtf8().constData().
10296
10297 The char pointer will be invalid after the statement in which
10298 qUtf8Printable() is used. This is because the array returned by
10299 QString::toUtf8() will fall out of scope.
10300
10301 Example:
10302
10303 \snippet code/src_corelib_text_qstring.cpp qUtf8Printable
10304
10305 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10306*/
10307
10308/*!
10309 \macro const wchar_t *qUtf16Printable(const QString &str)
10310 \relates QString
10311 \since 5.7
10312
10313 Returns \a str as a \c{const ushort *}, but cast to a \c{const wchar_t *}
10314 to avoid warnings. This is equivalent to \a{str}.utf16() plus some casting.
10315
10316 The only useful thing you can do with the return value of this macro is to
10317 pass it to QString::asprintf() for use in a \c{%ls} conversion. In particular,
10318 the return value is \e{not} a valid \c{const wchar_t*}!
10319
10320 In general, the pointer will be invalid after the statement in which
10321 qUtf16Printable() is used. This is because the pointer may have been
10322 obtained from a temporary expression, which will fall out of scope.
10323
10324 Example:
10325
10326 \snippet code/src_corelib_text_qstring.cpp qUtf16Printable
10327
10328 \sa qPrintable(), qDebug(), qInfo(), qWarning(), qCritical(), qFatal()
10329*/
10330
10331QT_END_NAMESPACE
10332
10333#undef REHASH
QString convertToQString(QAnyStringView string)
Definition qstring.cpp:5633
Definition qlist.h:80
char32_t next(char32_t invalidAs=QChar::ReplacementCharacter)
bool hasNext() const
\inmodule QtCore
QList< uint > convertToUcs4(QStringView string)
Definition qstring.cpp:5889
QByteArray convertToUtf8(QStringView string)
Definition qstring.cpp:5834
QByteArray convertToLocal8Bit(QStringView string)
Definition qstring.cpp:5791
QByteArray convertToLatin1(QStringView string)
Definition qstring.cpp:5650
Combined button and popup list for selecting options.
static QString convertCase(T &str, QUnicodeTables::Case which)
Definition qstring.cpp:7242
static constexpr NormalizationCorrection uc_normalization_corrections[]
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9760
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool endsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Definition qstring.cpp:9800
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isLower(QStringView s) noexcept
Definition qstring.cpp:5570
const QString & asString(const QString &s)
Definition qstring.h:1661
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf16(QStringView s) noexcept
Definition qstring.cpp:905
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool equalStrings(QStringView lhs, QStringView rhs) noexcept
Definition qstring.cpp:1386
qsizetype findString(QStringView str, qsizetype from, QChar needle, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isRightToLeft(QStringView string) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isAscii(QLatin1StringView s) noexcept
Definition qstring.cpp:850
constexpr bool isLatin1(QLatin1StringView s) noexcept
Definition qstring.h:78
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrcasechr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:775
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isUpper(QStringView s) noexcept
Definition qstring.cpp:5575
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const char16_t * qustrchr(QStringView str, char16_t ch) noexcept
Definition qstring.cpp:687
void qt_to_latin1_unchecked(uchar *dst, const char16_t *uc, qsizetype len)
Definition qstring.cpp:1188
static char16_t foldCase(char16_t ch) noexcept
Definition qchar.cpp:1696
#define __has_feature(x)
uint QT_FASTCALL fetch1Pixel< QPixelLayout::BPP1LSB >(const uchar *src, int index)
bool comparesEqual(const QFileInfo &lhs, const QFileInfo &rhs)
static bool isAscii_helper(const char16_t *&ptr, const char16_t *end)
Definition qstring.cpp:858
static Int toIntegral(QStringView string, bool *ok, int base)
Definition qstring.cpp:7725
void qt_to_latin1(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1183
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6807
static void append_utf8(QString &qs, const char *cs, qsizetype len)
Definition qstring.cpp:7359
#define ATTRIBUTE_NO_SANITIZE
Definition qstring.cpp:366
bool qt_is_ascii(const char *&ptr, const char *end) noexcept
Definition qstring.cpp:786
static void replace_in_place(QString &str, QSpan< size_t > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3707
static bool checkCase(QStringView s, QUnicodeTables::Case c) noexcept
Definition qstring.cpp:5559
static void replace_helper(QString &str, QSpan< size_t > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3751
Q_CORE_EXPORT void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept
Definition qstring.cpp:920
static int ucstrcmp(const char16_t *a, size_t alen, const Char2 *b, size_t blen)
Definition qstring.cpp:1359
bool comparesEqual(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6813
Q_DECLARE_TYPEINFO(Part, Q_PRIMITIVE_TYPE)
static void removeStringImpl(QString &s, const T &needle, Qt::CaseSensitivity cs)
Definition qstring.cpp:3487
static bool needsReallocate(const QString &str, qsizetype newSize)
Definition qstring.cpp:2623
static int qArgDigitValue(QChar ch) noexcept
Definition qstring.cpp:1626
bool comparesEqual(const QByteArray &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6802
#define REHASH(a)
Definition qstring.cpp:65
static void replace_with_copy(QString &str, QSpan< size_t > indices, qsizetype blen, QStringView after)
Definition qstring.cpp:3684
bool comparesEqual(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6791
static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
Definition qstring.cpp:1277
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char *b)
Definition qstring.cpp:1221
static QByteArray qt_convert_to_latin1(QStringView string)
Definition qstring.cpp:5656
static bool ucstreq(const char16_t *a, size_t alen, const Char2 *b)
Definition qstring.cpp:1352
static QList< uint > qt_convert_to_ucs4(QStringView string)
Definition qstring.cpp:5861
qsizetype qFindStringBoyerMoore(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs)
static QByteArray qt_convert_to_local_8bit(QStringView string)
Definition qstring.cpp:5768
static LengthMod parse_length_modifier(const char *&c) noexcept
Definition qstring.cpp:7415
static ArgEscapeData findArgEscapes(QStringView s)
Definition qstring.cpp:8607
static QByteArray qt_convert_to_utf8(QStringView str)
Definition qstring.cpp:5814
static void qt_to_latin1_internal(uchar *dst, const char16_t *src, qsizetype length)
Definition qstring.cpp:1004
LengthMod
Definition qstring.cpp:7404
@ lm_z
Definition qstring.cpp:7404
@ lm_none
Definition qstring.cpp:7404
@ lm_t
Definition qstring.cpp:7404
@ lm_l
Definition qstring.cpp:7404
@ lm_ll
Definition qstring.cpp:7404
@ lm_hh
Definition qstring.cpp:7404
@ lm_L
Definition qstring.cpp:7404
@ lm_h
Definition qstring.cpp:7404
@ lm_j
Definition qstring.cpp:7404
static void insert_helper(QString &str, qsizetype i, const T &toInsert)
Definition qstring.cpp:2962
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
Definition qstring.cpp:1368
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, const QChar &rhs) noexcept
Definition qstring.cpp:6785
static char16_t to_unicode(const char c)
Definition qstring.cpp:9002
Qt::strong_ordering compareThreeWay(const QByteArray &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6818
static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetype field_width, QStringView arg, QStringView larg, QChar fillChar)
Definition qstring.cpp:8683
static QVarLengthArray< char16_t > qt_from_latin1_to_qvla(QLatin1StringView str)
Definition qstring.cpp:995
static Q_NEVER_INLINE int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
Definition qstring.cpp:1239
void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, qsizetype from)
Definition qstring.cpp:8497
static uint parse_flag_characters(const char *&c) noexcept
Definition qstring.cpp:7367
static Q_NEVER_INLINE int ucstricmp(qsizetype alen, const char16_t *a, qsizetype blen, const char16_t *b)
Definition qstring.cpp:1194
static char16_t to_unicode(const QChar c)
Definition qstring.cpp:9001
QDataStream & operator>>(QDataStream &in, QString &str)
Definition qstring.cpp:9534
static int getEscape(const Char *uc, qsizetype *pos, qsizetype len)
Definition qstring.cpp:9005
static int ucstrncmp(const char16_t *a, const char *b, size_t l)
Definition qstring.cpp:1330
static bool can_consume(const char *&c, char ch) noexcept
Definition qstring.cpp:7406
static int parse_field_width(const char *&c, qsizetype size)
Definition qstring.cpp:7387
Qt::strong_ordering compareThreeWay(const QByteArrayView &lhs, char16_t rhs) noexcept
Definition qstring.cpp:6796
#define qUtf16Printable(string)
Definition qstring.h:1678
qsizetype occurrences
Definition qstring.cpp:8601
qsizetype escape_len
Definition qstring.cpp:8604
qsizetype locale_occurrences
Definition qstring.cpp:8602
\inmodule QtCore \reentrant
Definition qchar.h:17
constexpr char16_t unicode() const noexcept
Converts a Latin-1 character to an 16-bit-encoded Unicode representation of the character.
Definition qchar.h:21
constexpr QLatin1Char(char c) noexcept
Constructs a Latin-1 character for c.
Definition qchar.h:19
@ BlankBeforePositive
Definition qlocale_p.h:264
@ AddTrailingZeroes
Definition qlocale_p.h:261
static int difference(char lhs, char rhs)