7#include "core/fpdfapi/font/cpdf_cmap.h"
13#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
14#include "core/fpdfapi/font/cpdf_cmapparser.h"
15#include "core/fpdfapi/font/cpdf_fontglobals.h"
16#include "core/fpdfapi/parser/cpdf_simple_parser.h"
17#include "core/fxcrt/check.h"
18#include "core/fxcrt/fx_memcpy_wrappers.h"
19#include "core/fxcrt/notreached.h"
28struct PredefinedCMap {
33 ByteRange m_LeadingSegs[2];
36constexpr PredefinedCMap kPredefinedCMaps[] = {
95 {{0x81, 0x9f}, {0xe0, 0xfc}}},
100 {{0x81, 0x9f}, {0xe0, 0xfc}}},
105 {{0x81, 0x9f}, {0xe0, 0xfc}}},
110 {{0x81, 0x9f}, {0xe0, 0xfc}}},
115 {{0x81, 0x9f}, {0xe0, 0xfc}}},
120 {{0x8e, 0x8e}, {0xa1, 0xfe}}},
127 {{0x81, 0x9f}, {0xe0, 0xfc}}},
160 if (cmapid.GetLength() > 2)
161 cmapid = cmapid.First(cmapid.GetLength() - 2);
162 for (
const auto& map : kPredefinedCMaps) {
163 if (cmapid == map.m_pName)
169std::vector<
bool> LoadLeadingSegments(
const PredefinedCMap& map) {
170 std::vector<
bool> segments(256);
171 const auto seg_span =
pdfium::make_span(map.m_LeadingSegs);
172 for (
const ByteRange& seg : seg_span) {
173 if (seg.m_First == 0 && seg.m_Last == 0) {
176 for (
int b = seg.m_First; b <= seg.m_Last; ++b) {
183int CheckFourByteCodeRange(pdfium::span<uint8_t> codes,
184 pdfium::span<
const CPDF_CMap::CodeRange> ranges) {
185 for (size_t i = ranges.size(); i > 0; i--) {
186 const auto& range = ranges[i - 1];
187 if (range.m_CharSize < codes.size()) {
191 while (iChar < codes.size()) {
192 if (codes[iChar] < range.m_Lower[iChar] ||
193 codes[iChar] > range.m_Upper[iChar]) {
198 if (iChar == range.m_CharSize) {
202 return (codes.size() == range.m_CharSize) ? 2 : 1;
208size_t GetFourByteCharSizeImpl(
210 pdfium::span<
const CPDF_CMap::CodeRange> ranges) {
214 std::array<uint8_t, 4> codes = {{
217 static_cast<uint8_t>(charcode >> 8 & 0xFF),
218 static_cast<uint8_t>(charcode),
220 for (size_t offset = 0; offset < 4; offset++) {
221 size_t size = 4 - offset;
222 for (size_t j = 0; j < ranges.size(); j++) {
223 size_t iSeg = (ranges.size() - 1) - j;
224 if (ranges[iSeg].m_CharSize < size)
227 while (iChar < size) {
228 if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] ||
229 codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) {
234 if (iChar == ranges[iSeg].m_CharSize)
241const fxcmap::
CMap* FindEmbeddedCMap(pdfium::span<
const fxcmap::CMap> pCMaps,
243 for (size_t i = 0; i < pCMaps.size(); i++) {
244 if (bsName == pCMaps[i].m_Name)
253 : m_bVertical(bsPredefinedName.Back() ==
'V') {
254 if (bsPredefinedName ==
"Identity-H" || bsPredefinedName ==
"Identity-V") {
260 const PredefinedCMap* map = GetPredefinedCMap(bsPredefinedName);
264 m_Charset = map->m_Charset;
265 m_Coding = map->m_Coding;
266 m_CodingScheme = map->m_CodingScheme;
267 if (m_CodingScheme == MixedTwoBytes)
268 m_MixedTwoByteLeadingBytes = LoadLeadingSegments(*map);
269 m_pEmbedMap = FindEmbeddedCMap(
270 CPDF_FontGlobals::GetInstance()->GetEmbeddedCharset(m_Charset),
278CPDF_CMap::CPDF_CMap(pdfium::span<
const uint8_t> spEmbeddedData)
279 : m_DirectCharcodeToCIDTable(
280 FixedSizeDataVector<uint16_t>::Zeroed(kDirectMapTableSize)) {
285 if (word.IsEmpty()) {
296 return static_cast<uint16_t>(charcode);
299 return fxcmap::CIDFromCharCode(m_pEmbedMap, charcode);
301 if (m_DirectCharcodeToCIDTable.empty())
302 return static_cast<uint16_t>(charcode);
304 auto table_span = m_DirectCharcodeToCIDTable.span();
305 if (charcode < table_span.size())
306 return table_span[charcode];
308 auto it = std::lower_bound(m_AdditionalCharcodeToCIDMappings.begin(),
309 m_AdditionalCharcodeToCIDMappings.end(), charcode,
310 [](
const CPDF_CMap::CIDRange& arg, uint32_t val) {
311 return arg.m_EndCode < val;
313 if (it == m_AdditionalCharcodeToCIDMappings.end() ||
314 it->m_StartCode > charcode) {
317 return it->m_StartCID + charcode - it->m_StartCode;
321 size_t& offset = *pOffset;
322 auto pBytes = pString.unsigned_span();
323 switch (m_CodingScheme) {
325 return offset < pBytes.size() ? pBytes[offset++] : 0;
328 uint8_t byte1 = offset < pBytes.size() ? pBytes[offset++] : 0;
329 uint8_t byte2 = offset < pBytes.size() ? pBytes[offset++] : 0;
330 return 256 * byte1 + byte2;
333 uint8_t byte1 = offset < pBytes.size() ? pBytes[offset++] : 0;
334 if (!m_MixedTwoByteLeadingBytes[byte1])
336 uint8_t byte2 = offset < pBytes.size() ? pBytes[offset++] : 0;
337 return 256 * byte1 + byte2;
340 std::array<uint8_t, 4> codes;
342 codes[0] = offset < pBytes.size() ? pBytes[offset++] : 0;
345 CheckFourByteCodeRange(pdfium::make_span(codes).first(char_size),
346 m_MixedFourByteLeadingRanges);
350 uint32_t charcode = 0;
351 for (
int i = 0; i < char_size; i++)
352 charcode = (charcode << 8) + codes[i];
355 if (char_size == 4 || offset == pBytes.size())
357 codes[char_size++] = pBytes[offset++];
365 switch (m_CodingScheme) {
371 if (charcode < 0x100)
375 if (charcode < 0x100)
377 if (charcode < 0x10000)
379 if (charcode < 0x1000000)
387 switch (m_CodingScheme) {
389 return pString.GetLength();
391 return (pString.GetLength() + 1) / 2;
394 for (size_t i = 0; i < pString.GetLength(); i++) {
396 if (m_MixedTwoByteLeadingBytes[pString[i]])
404 while (offset < pString.GetLength()) {
405 GetNextChar(pString, &offset);
415 switch (m_CodingScheme) {
417 *str
+= static_cast<
char>(charcode);
420 *str
+= static_cast<
char>(charcode / 256);
421 *str
+= static_cast<
char>(charcode % 256);
424 if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[charcode]) {
425 *str
+= static_cast<
char>(charcode);
428 *str
+= static_cast<
char>(charcode >> 8);
429 *str
+= static_cast<
char>(charcode);
432 if (charcode < 0x100) {
433 int iSize =
static_cast<
int>(
434 GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges));
435 int pad = iSize != 0 ? iSize - 1 : 0;
436 for (
int i = 0; i < pad; ++i) {
437 *str
+= static_cast<
char>(0);
439 *str
+= static_cast<
char>(charcode);
442 if (charcode < 0x10000) {
443 *str
+= static_cast<
char>(charcode >> 8);
444 *str
+= static_cast<
char>(charcode);
447 if (charcode < 0x1000000) {
448 *str
+= static_cast<
char>(charcode >> 16);
449 *str
+= static_cast<
char>(charcode >> 8);
450 *str
+= static_cast<
char>(charcode);
453 *str
+= static_cast<
char>(charcode >> 24);
454 *str
+= static_cast<
char>(charcode >> 16);
455 *str
+= static_cast<
char>(charcode >> 8);
456 *str
+= static_cast<
char>(charcode);
463 DCHECK(m_AdditionalCharcodeToCIDMappings.empty());
468 mappings.begin(), mappings.end(),
472 m_AdditionalCharcodeToCIDMappings = std::move(mappings);
476 m_MixedFourByteLeadingRanges = std::move(ranges);
481 uint16_t start_cid) {
482 pdfium::span<uint16_t> span = m_DirectCharcodeToCIDTable.span();
483 for (uint32_t code = start_code; code <= end_code; ++code) {
484 span[code] =
static_cast<uint16_t>(start_cid + code - start_code);
fxcrt::ByteString ByteString
CPDF_CMapParser(CPDF_CMap *pCMap)
void ParseWord(ByteStringView word)
size_t CountChar(ByteStringView pString) const
uint32_t GetNextChar(ByteStringView pString, size_t *pOffset) const
uint16_t CIDFromCharCode(uint32_t charcode) const
void SetAdditionalMappings(std::vector< CIDRange > mappings)
int GetCharSize(uint32_t charcode) const
void AppendChar(ByteString *str, uint32_t charcode) const
void SetMixedFourByteLeadingRanges(std::vector< CodeRange > ranges)
void SetDirectCharcodeToCIDTableRange(uint32_t start_code, uint32_t end_code, uint16_t start_cid)
ByteString & operator+=(char ch)
#define NOTREACHED_NORETURN()
fxcrt::ByteStringView ByteStringView