7#include "core/fxcrt/widestring.h"
16#include "core/fxcrt/check.h"
17#include "core/fxcrt/check_op.h"
18#include "core/fxcrt/compiler_specific.h"
19#include "core/fxcrt/fx_codepage.h"
20#include "core/fxcrt/fx_extension.h"
21#include "core/fxcrt/fx_memcpy_wrappers.h"
22#include "core/fxcrt/fx_safe_types.h"
23#include "core/fxcrt/fx_system.h"
24#include "core/fxcrt/numerics/safe_math.h"
25#include "core/fxcrt/span_util.h"
26#include "core/fxcrt/string_pool_template.h"
27#include "core/fxcrt/utf16.h"
30template class fxcrt::StringViewTemplate<
wchar_t>;
31template class fxcrt::StringPoolTemplate<
WideString>;
34#define FORCE_ANSI 0x10000
35#define FORCE_UNICODE 0x20000
36#define FORCE_INT64 0x40000
40#if defined(WCHAR_T_IS_32_BIT)
41size_t FuseSurrogates(pdfium::span<
wchar_t> s) {
43 for (size_t i = 0; i < s.size(); ++i) {
45 if (pdfium::IsHighSurrogate(s[i]) && i + 1 < s.size() &&
46 pdfium::IsLowSurrogate(s[i + 1])) {
47 s[dest_pos++] = pdfium::SurrogatePair(s[i], s[i + 1]).ToCodePoint();
57constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
59std::optional<size_t> GuessSizeForVSWPrintf(
const wchar_t* pFormat,
63 for (
const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
64 if (*pStr !=
'%' || *(pStr = pStr + 1) ==
'%') {
69 for (; *pStr != 0; pStr++) {
72 }
else if (*pStr ==
'*') {
73 iWidth = va_arg(argList,
int);
74 }
else if (*pStr !=
'-' && *pStr !=
'+' && *pStr !=
'0' &&
84 if (iWidth < 0 || iWidth > 128 * 1024) {
87 uint32_t nWidth =
static_cast<uint32_t>(iWidth);
92 iPrecision = va_arg(argList,
int);
101 if (iPrecision < 0 || iPrecision > 128 * 1024) {
104 uint32_t nPrecision =
static_cast<uint32_t>(iPrecision);
106 if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
127 switch (*pStr | nModifier) {
131 va_arg(argList,
int);
136 va_arg(argList,
int);
141 va_arg(argList,
int);
144 const wchar_t* pstrNextArg = va_arg(argList,
const wchar_t*);
146 nItemLen = wcslen(pstrNextArg);
155 const char* pstrNextArg = va_arg(argList,
const char*);
157 nItemLen = strlen(pstrNextArg);
167 const char* pstrNextArg = va_arg(argList,
const char*);
169 nItemLen = strlen(pstrNextArg);
179 const wchar_t* pstrNextArg = va_arg(argList,
wchar_t*);
181 nItemLen = wcslen(pstrNextArg);
191 if (nPrecision != 0 && nItemLen > nPrecision) {
192 nItemLen = nPrecision;
194 if (nItemLen < nWidth) {
206 va_arg(argList, int64_t);
208 va_arg(argList,
int);
211 if (nItemLen < nWidth + nPrecision) {
212 nItemLen = nWidth + nPrecision;
221 va_arg(argList,
double);
223 if (nItemLen < nWidth + nPrecision) {
224 nItemLen = nWidth + nPrecision;
228 if (nWidth + nPrecision > 100) {
229 nItemLen = nPrecision + nWidth + 128;
233 f = va_arg(argList,
double);
236 nItemLen = strlen(pszTemp);
240 va_arg(argList,
void*);
242 if (nItemLen < nWidth + nPrecision) {
243 nItemLen = nWidth + nPrecision;
247 va_arg(argList,
int*);
260 const wchar_t* pFormat,
268 pdfium::span<
wchar_t> buffer = str.GetBuffer(size);
277 FXSYS_memset(buffer.data(), 0, (size + 1) *
sizeof(
wchar_t)));
278 int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
279 bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
280 if (!bSufficientBuffer)
283 str.ReleaseBuffer(str.GetStringLength());
292void AppendCodePointToWideString(
char32_t code_point,
WideString& buffer) {
298#if defined(WCHAR_T_IS_16_BIT)
299 if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
300 buffer +=
static_cast<
wchar_t>(code_point);
303 pdfium::SurrogatePair surrogate_pair(code_point);
304 buffer += surrogate_pair.high();
305 buffer += surrogate_pair.low();
308 buffer
+= static_cast<
wchar_t>(code_point);
316 char32_t code_point = 0;
317 for (
char byte : bsStr) {
318 uint8_t code_unit =
static_cast<uint8_t>(byte);
319 if (code_unit < 0x80) {
321 AppendCodePointToWideString(code_unit, buffer);
322 }
else if (code_unit < 0xc0) {
325 code_point = (code_point << 6) | (code_unit & 0x3f);
326 if (remaining == 0) {
327 AppendCodePointToWideString(code_point, buffer);
330 }
else if (code_unit < 0xe0) {
332 code_point = code_unit & 0x1f;
333 }
else if (code_unit < 0xf0) {
335 code_point = code_unit & 0x0f;
336 }
else if (code_unit < 0xf8) {
338 code_point = code_unit & 0x07;
351static_assert(
sizeof(
WideString) <=
sizeof(
wchar_t*),
352 "Strings must not require more space than pointers");
357 swprintf(wbuf,
std::size(wbuf), L"%d", i);
364 va_copy(argListCopy, argList);
365 auto guess = GuessSizeForVSWPrintf(format, argListCopy);
368 if (!guess.has_value()) {
371 int maxLen =
pdfium::checked_cast<
int>(guess.value());
373 while (maxLen < 32 * 1024) {
374 va_copy(argListCopy, argList);
376 TryVSWPrintf(
static_cast<size_t>(maxLen), format, argListCopy);
389 va_start(argList, pFormat);
398 m_pData = StringData::Create(
UNSAFE_BUFFERS(pdfium::make_span(pStr, nLen)));
403 m_pData = StringData::Create(1);
404 m_pData->m_String[0] = ch;
411 if (!stringSrc.IsEmpty()) {
412 m_pData = StringData::Create(stringSrc.span());
417 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
418 nSafeLen += str2.GetLength();
420 size_t nNewLen = nSafeLen.ValueOrDie();
424 m_pData = StringData::Create(nNewLen);
425 m_pData->CopyContents(str1.span());
426 m_pData->CopyContentsAt(str1.GetLength(), str2.span());
430 FX_SAFE_SIZE_T nSafeLen = 0;
431 for (
const auto& item : list)
432 nSafeLen += item.GetLength();
434 size_t nNewLen = nSafeLen.ValueOrDie();
438 m_pData = StringData::Create(nNewLen);
441 for (
const auto& item : list) {
442 m_pData->CopyContentsAt(nOffset, item.span());
443 nOffset += item.GetLength();
451 AssignCopy(str, wcslen(str));
460 AssignCopy(str.unterminated_c_str(), str.GetLength());
466 if (m_pData != that.m_pData)
467 m_pData = that.m_pData;
473 if (m_pData != that.m_pData)
474 m_pData = std::move(that.m_pData);
481 Concat(str, wcslen(str));
493 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
500 Concat(str.unterminated_c_str(), str.GetLength());
507 return !ptr || !ptr[0];
510 return m_pData->m_nDataLength == 0;
514 return wcslen(ptr) == m_pData->m_nDataLength &&
516 m_pData->m_nDataLength)) == 0;
521 return str.IsEmpty();
525 return m_pData->m_nDataLength == str.GetLength() &&
527 m_pData->m_String, str.unterminated_c_str(), str.GetLength())) ==
532 if (m_pData == other.m_pData)
536 return other.IsEmpty();
541 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
542 wmemcmp(other.m_pData->m_String, m_pData->m_String,
543 m_pData->m_nDataLength) == 0;
551 if (!m_pData && !str.unterminated_c_str())
553 if (c_str() == str.unterminated_c_str())
556 size_t len = GetLength();
557 size_t other_len = str.GetLength();
560 int result =
UNSAFE_BUFFERS(FXSYS_wmemcmp(c_str(), str.unterminated_c_str(),
561 std::min(len, other_len)));
562 return result < 0 || (result == 0 && len < other_len);
570 return m_pData ? m_pData->m_nRefs : 0;
575 result.Reserve(GetLength());
576 for (
wchar_t wc : *
this)
577 result.InsertAtBack(
static_cast<
char>(wc & 0x7f));
583 result.Reserve(GetLength());
584 for (
wchar_t wc : *
this)
585 result.InsertAtBack(
static_cast<
char>(wc & 0xff));
591 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
598 pdfium::span<
char> dest_buf = bstr.GetBuffer(dest_len);
601 bstr.ReleaseBuffer(dest_len);
606 return FX_UTF8Encode(AsStringView());
610 std::u16string utf16 = FX_UTF16Encode(AsStringView());
612 size_t output_length = 0;
616 pdfium::span<uint8_t> buffer =
617 pdfium::as_writable_bytes(result.GetBuffer(utf16.size() * 2 + 2));
618 for (
char16_t c : utf16) {
619 buffer[output_length++] = c & 0xff;
620 buffer[output_length++] = c >> 8;
622 buffer[output_length++] = 0;
623 buffer[output_length++] = 0;
625 result.ReleaseBuffer(output_length);
631 size_t output_length = 0;
635 pdfium::span<uint8_t> buffer =
636 pdfium::as_writable_bytes(result.GetBuffer(GetLength() * 2 + 2));
637 for (
wchar_t wc : AsStringView()) {
638#if defined(WCHAR_T_IS_32_BIT)
639 if (pdfium::IsSupplementary(wc)) {
643 buffer[output_length++] = wc & 0xff;
644 buffer[output_length++] = wc >> 8;
646 buffer[output_length++] = 0;
647 buffer[output_length++] = 0;
649 result.ReleaseBuffer(output_length);
655 ret.Replace(L"&", L"&");
656 ret.Replace(L"<", L"<");
657 ret.Replace(L">", L">");
658 ret.Replace(L"\'", L"'");
659 ret.Replace(L"\"", L""");
665 return Substr(offset, GetLength() - offset);
672 if (first == 0 && count == GetLength()) {
675 return WideString(AsStringView().Substr(first, count));
679 return Substr(0, count);
684 return Substr(GetLength() - count, count);
691 ReallocBeforeWrite(m_pData->m_nDataLength);
692 FXSYS_wcslwr(m_pData->m_String);
699 ReallocBeforeWrite(m_pData->m_nDataLength);
700 FXSYS_wcsupr(m_pData->m_String);
706 result.Reserve(bstr.GetLength());
708 result.InsertAtBack(
static_cast<
wchar_t>(c & 0x7f));
715 result.Reserve(bstr.GetLength());
717 result.InsertAtBack(
static_cast<
wchar_t>(c & 0xff));
723 size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
730 pdfium::span<
wchar_t> dest_buf = wstr.GetBuffer(dest_len);
733 wstr.ReleaseBuffer(dest_len);
739 return UTF8Decode(str);
752 pdfium::span<
wchar_t> buf = result.GetBuffer(data.size() / 2);
753 for (size_t i = 0; i + 1 < data.size(); i += 2) {
754 buf[length++] = data[i] | data[i + 1] << 8;
757#if defined(WCHAR_T_IS_32_BIT)
758 length = FuseSurrogates(buf.first(length));
761 result.ReleaseBuffer(length);
774 pdfium::span<
wchar_t> buf = result.GetBuffer(data.size() / 2);
775 for (size_t i = 0; i + 1 < data.size(); i += 2) {
776 buf[length++] = data[i] << 8 | data[i + 1];
779#if defined(WCHAR_T_IS_32_BIT)
780 length = FuseSurrogates(buf.first(length));
783 result.ReleaseBuffer(length);
789 return str ? wcscmp(m_pData->m_String, str) : 1;
790 return (!str || str[0] == 0) ? 0 : -1;
795 return str.m_pData ? -1 : 0;
799 size_t this_len = m_pData->m_nDataLength;
800 size_t that_len = str.m_pData->m_nDataLength;
801 size_t min_len =
std::min(this_len, that_len);
805 FXSYS_wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len));
808 if (this_len == that_len)
810 return this_len < that_len ? -1 : 1;
815 return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
816 return (!str || str[0] == 0) ? 0 : -1;
825 TrimFront(kWideTrimChars);
829 TrimBack(kWideTrimChars);
832 return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
836 return os.write(str.c_str(), str.GetLength());
845 return os.write(str.unterminated_c_str(), str.GetLength());
849 os << FX_UTF8Encode(str);
856 uint32_t dwHashCode = 0;
857 for (WideStringView::UnsignedType c : str)
858 dwHashCode = 1313 * dwHashCode + c;
863 uint32_t dwHashCode = 0;
864 for (
wchar_t c : str)
865 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
WideString(const std::initializer_list< WideStringView > &list)
WideString & operator+=(const WideString &str)
void TrimWhitespaceFront()
bool operator==(const WideString &other) const
ByteString ToUTF8() const
static WideString Format(const wchar_t *pFormat,...)
UNSAFE_BUFFER_USAGE WideString(const wchar_t *pStr, size_t len)
WideString & operator=(WideString &&that) noexcept
WideString(WideStringView str1, WideStringView str2)
WideString First(size_t count) const
static WideString FromUTF8(ByteStringView str)
WideString & operator+=(const wchar_t *str)
bool operator==(const wchar_t *ptr) const
WideString & operator+=(wchar_t ch)
bool operator<(WideStringView str) const
static WideString FromDefANSI(ByteStringView str)
int CompareNoCase(const wchar_t *str) const
WideString(const wchar_t *ptr)
static WideString FromLatin1(ByteStringView str)
void TrimWhitespaceBack()
bool operator==(WideStringView str) const
ByteString ToLatin1() const
intptr_t ReferenceCountForTesting() const
bool operator<(const WideString &other) const
int Compare(const wchar_t *str) const
WideString & operator=(const WideString &that)
WideString EncodeEntities() const
ByteString ToASCII() const
static WideString FromASCII(ByteStringView str)
WideString & operator+=(WideStringView str)
ByteString ToUCS2LE() const
WideString Last(size_t count) const
int Compare(const WideString &str) const
WideString & operator=(const wchar_t *str)
WideString Substr(size_t offset) const
WideString(WideStringView str)
static WideString FormatV(const wchar_t *lpszFormat, va_list argList)
ByteString ToUTF16LE() const
static WideString FormatInteger(int i)
ByteString ToDefANSI() const
bool operator<(const wchar_t *ptr) const
WideString Substr(size_t first, size_t count) const
WideString & operator=(WideStringView str)
#define UNSAFE_BUFFERS(...)
bool FXSYS_IsDecimalDigit(wchar_t c)
int32_t FXSYS_wtoi(const wchar_t *str)
StringViewTemplate< wchar_t > WideStringView
StringViewTemplate< char > ByteStringView
constexpr char32_t kMaximumSupplementaryCodePoint
fxcrt::ByteStringView ByteStringView
fxcrt::WideStringView WideStringView
fxcrt::WideString WideString
uint32_t FX_HashCode_GetLoweredW(WideStringView str)
uint32_t FX_HashCode_GetW(WideStringView str)