7#include "core/fpdfapi/font/cpdf_tounicodemap.h"
13#include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
14#include "core/fpdfapi/font/cpdf_fontglobals.h"
15#include "core/fpdfapi/parser/cpdf_simple_parser.h"
16#include "core/fpdfapi/parser/cpdf_stream.h"
17#include "core/fpdfapi/parser/fpdf_parser_utility.h"
18#include "core/fxcrt/fx_extension.h"
19#include "core/fxcrt/fx_safe_types.h"
20#include "third_party/base/containers/contains.h"
24WideString StringDataAdd(WideString str) {
27 for (size_t i = str.GetLength(); i > 0; --i) {
28 wchar_t ch = str[i - 1] + value;
29 if (ch < str[i - 1]) {
32 ret.InsertAtFront(ch);
37 ret.InsertAtFront(value);
44 Load(
std::move(pStream));
50 auto it = m_Multimap.find(charcode);
51 if (it == m_Multimap.end()) {
55 m_pBaseMap->UnicodeFromCID(
static_cast<uint16_t>(charcode)));
58 uint32_t value = *it->second.begin();
59 wchar_t unicode =
static_cast<
wchar_t>(value & 0xffff);
60 if (unicode != 0xffff)
61 return WideString
(unicode
);
63 size_t index = value >> 16;
64 return index < m_MultiCharVec.size() ? m_MultiCharVec[index] : WideString();
68 for (
const auto& pair : m_Multimap) {
69 if (pdfium::Contains(pair.second,
static_cast<uint32_t>(unicode)))
76 uint32_t charcode)
const {
77 auto it = m_Multimap.find(charcode);
78 return it != m_Multimap.end() ? it->second.size() : 0u;
84 std::set<
char> seen_whitespace_chars;
85 for (
char c : input) {
86 if (PDFCharIsWhitespace(c)) {
87 seen_whitespace_chars.insert(c);
90 ByteString str_without_whitespace_chars;
92 if (seen_whitespace_chars.empty()) {
95 str_without_whitespace_chars.Reserve(input.GetLength());
96 for (
char c : input) {
97 if (!pdfium::Contains(seen_whitespace_chars, c)) {
98 str_without_whitespace_chars += c;
101 str = str_without_whitespace_chars.AsStringView();
104 size_t len = str.GetLength();
105 if (len <= 2 || str[0] !=
'<' || str[len - 1] !=
'>')
106 return absl::nullopt;
108 FX_SAFE_UINT32 code = 0;
109 for (
char c : str.Substr(1, len - 2)) {
110 if (!FXSYS_IsHexDigit(c))
111 return absl::nullopt;
113 code = code * 16 + FXSYS_HexCharToInt(c);
115 return absl::nullopt;
117 return absl::optional<uint32_t>(code.ValueOrDie());
122 size_t len = str.GetLength();
123 if (len <= 2 || str[0] !=
'<' || str[len - 1] !=
'>')
129 for (
char c : str.Substr(1, len - 2)) {
130 if (!FXSYS_IsHexDigit(c))
133 ch = ch * 16 + FXSYS_HexCharToInt(c);
146 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(
std::move(pStream));
147 pAcc->LoadAllDataFiltered();
150 ByteStringView word = parser.GetWord();
154 if (word ==
"beginbfchar")
155 HandleBeginBFChar(&parser);
156 else if (word ==
"beginbfrange")
157 HandleBeginBFRange(&parser);
158 else if (word ==
"/Adobe-Korea1-UCS2")
160 else if (word ==
"/Adobe-Japan1-UCS2")
162 else if (word ==
"/Adobe-CNS1-UCS2")
164 else if (word ==
"/Adobe-GB1-UCS2")
168 m_pBaseMap = CPDF_FontGlobals::GetInstance()->GetCID2UnicodeMap(cid_set);
174 ByteStringView word = pParser->GetWord();
175 if (word.IsEmpty() || word ==
"endbfchar")
178 absl::optional<uint32_t> code = StringToCode(word);
179 if (!code.has_value())
182 SetCode(code.value(), StringToWideString(pParser->GetWord()));
188 ByteStringView lowcode_str = pParser->GetWord();
189 if (lowcode_str.IsEmpty() || lowcode_str ==
"endbfrange")
192 absl::optional<uint32_t> lowcode_opt = StringToCode(lowcode_str);
193 if (!lowcode_opt.has_value())
196 ByteStringView highcode_str = pParser->GetWord();
197 absl::optional<uint32_t> highcode_opt = StringToCode(highcode_str);
198 if (!highcode_opt.has_value())
201 uint32_t lowcode = lowcode_opt.value();
202 uint32_t highcode = (lowcode & 0xffffff00) | (highcode_opt.value() & 0xff);
204 ByteStringView start = pParser->GetWord();
206 for (uint32_t code = lowcode; code <= highcode; ++code) {
207 SetCode(code, StringToWideString(pParser
->GetWord()));
208 if (code ==
std::numeric_limits<uint32_t>::max()) {
216 WideString destcode = StringToWideString(start);
217 if (destcode.GetLength() == 1) {
218 absl::optional<uint32_t> value_or_error = StringToCode(start);
219 if (!value_or_error.has_value())
222 uint32_t value = value_or_error.value();
223 for (uint32_t code = lowcode; code <= highcode; ++code) {
224 InsertIntoMultimap(code, value++);
225 if (code ==
std::numeric_limits<uint32_t>::max()) {
230 for (uint32_t code = lowcode; code <= highcode; ++code) {
232 code == lowcode ? destcode : StringDataAdd(destcode);
233 InsertIntoMultimap(code, GetMultiCharIndexIndicator());
234 m_MultiCharVec.push_back(retcode);
235 destcode =
std::move(retcode);
236 if (code ==
std::numeric_limits<uint32_t>::max()) {
245 FX_SAFE_UINT32 uni = m_MultiCharVec.size();
246 uni = uni * 0x10000 + 0xffff;
247 return uni.ValueOrDefault(0);
251 size_t len = destcode.GetLength();
256 InsertIntoMultimap(srccode, destcode
[0
]);
258 InsertIntoMultimap(srccode, GetMultiCharIndexIndicator());
259 m_MultiCharVec.push_back(destcode);
264 auto it = m_Multimap.find(code);
265 if (it == m_Multimap.end()) {
266 m_Multimap.emplace(code, std::set<uint32_t>{destcode});
270 it->second.emplace(destcode);
size_t GetUnicodeCountByCharcodeForTesting(uint32_t charcode) const
uint32_t ReverseLookup(wchar_t unicode) const
CPDF_ToUnicodeMap(RetainPtr< const CPDF_Stream > pStream)
WideString Lookup(uint32_t charcode) const
CharType operator[](const size_t index) const