7#include "core/fpdfapi/font/cpdf_cmap.h"
12#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
13#include "core/fpdfapi/font/cpdf_cmapparser.h"
14#include "core/fpdfapi/font/cpdf_fontglobals.h"
15#include "core/fpdfapi/parser/cpdf_simple_parser.h"
16#include "third_party/base/check.h"
25struct PredefinedCMap {
30 uint8_t m_LeadingSegCount;
31 ByteRange m_LeadingSegs[2];
34constexpr PredefinedCMap kPredefinedCMaps[] = {
109 {{0x81, 0x9f}, {0xe0, 0xfc}}},
115 {{0x81, 0x9f}, {0xe0, 0xfc}}},
121 {{0x81, 0x9f}, {0xe0, 0xfc}}},
127 {{0x81, 0x9f}, {0xe0, 0xfc}}},
133 {{0x81, 0x9f}, {0xe0, 0xfc}}},
139 {{0x8e, 0x8e}, {0xa1, 0xfe}}},
157 {{0x81, 0x9f}, {0xe0, 0xfc}}},
209const PredefinedCMap* GetPredefinedCMap(ByteStringView cmapid) {
210 if (cmapid.GetLength() > 2)
211 cmapid = cmapid.First(cmapid.GetLength() - 2);
212 for (
const auto& map : kPredefinedCMaps) {
213 if (cmapid == map.m_pName)
219std::vector<
bool> LoadLeadingSegments(
const PredefinedCMap& map) {
220 std::vector<
bool> segments(256);
221 for (uint32_t i = 0; i < map.m_LeadingSegCount; ++i) {
222 const ByteRange& seg = map.m_LeadingSegs[i];
223 for (
int b = seg.m_First; b <= seg.m_Last; ++b)
229int CheckFourByteCodeRange(uint8_t* codes,
231 const std::vector<CPDF_CMap::CodeRange>& ranges) {
232 for (size_t i = ranges.size(); i > 0; i--) {
234 if (ranges[seg].m_CharSize < size)
237 while (iChar < size) {
238 if (codes[iChar] < ranges[seg].m_Lower[iChar] ||
239 codes[iChar] > ranges[seg].m_Upper[iChar]) {
244 if (iChar == ranges[seg].m_CharSize)
247 return (size == ranges[seg].m_CharSize) ? 2 : 1;
252size_t GetFourByteCharSizeImpl(
254 const std::vector<CPDF_CMap::CodeRange>& ranges) {
259 codes[0] = codes[1] = 0x00;
260 codes[2] =
static_cast<uint8_t>(charcode >> 8 & 0xFF);
261 codes[3] =
static_cast<uint8_t>(charcode);
262 for (size_t offset = 0; offset < 4; offset++) {
263 size_t size = 4 - offset;
264 for (size_t j = 0; j < ranges.size(); j++) {
265 size_t iSeg = (ranges.size() - 1) - j;
266 if (ranges[iSeg].m_CharSize < size)
269 while (iChar < size) {
270 if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] ||
271 codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) {
276 if (iChar == ranges[iSeg].m_CharSize)
284 ByteStringView bsName) {
285 for (size_t i = 0; i < pCMaps.size(); i++) {
286 if (bsName == pCMaps[i].m_Name)
294CPDF_CMap::CPDF_CMap(ByteStringView bsPredefinedName)
295 : m_bVertical(bsPredefinedName.Back() ==
'V') {
296 if (bsPredefinedName ==
"Identity-H" || bsPredefinedName ==
"Identity-V") {
302 const PredefinedCMap* map = GetPredefinedCMap(bsPredefinedName);
306 m_Charset = map->m_Charset;
307 m_Coding = map->m_Coding;
308 m_CodingScheme = map->m_CodingScheme;
309 if (m_CodingScheme == MixedTwoBytes)
310 m_MixedTwoByteLeadingBytes = LoadLeadingSegments(*map);
311 m_pEmbedMap = FindEmbeddedCMap(
312 CPDF_FontGlobals::GetInstance()->GetEmbeddedCharset(m_Charset),
320CPDF_CMap::CPDF_CMap(pdfium::span<
const uint8_t> spEmbeddedData)
321 : m_DirectCharcodeToCIDTable(
322 FixedSizeDataVector<uint16_t>::Zeroed(kDirectMapTableSize)) {
326 ByteStringView word = syntax.GetWord();
327 if (word.IsEmpty()) {
338 return static_cast<uint16_t>(charcode);
341 return fxcmap::CIDFromCharCode(m_pEmbedMap, charcode);
343 if (m_DirectCharcodeToCIDTable.empty())
344 return static_cast<uint16_t>(charcode);
346 auto table_span = m_DirectCharcodeToCIDTable.span();
347 if (charcode < table_span.size())
348 return table_span[charcode];
350 auto it = std::lower_bound(m_AdditionalCharcodeToCIDMappings.begin(),
351 m_AdditionalCharcodeToCIDMappings.end(), charcode,
352 [](
const CPDF_CMap::CIDRange& arg, uint32_t val) {
353 return arg.m_EndCode < val;
355 if (it == m_AdditionalCharcodeToCIDMappings.end() ||
356 it->m_StartCode > charcode) {
359 return it->m_StartCID + charcode - it->m_StartCode;
362uint32_t CPDF_CMap::
GetNextChar(ByteStringView pString, size_t* pOffset)
const {
363 size_t& offset = *pOffset;
364 auto pBytes = pString.raw_span();
365 switch (m_CodingScheme) {
367 return offset < pBytes.size() ? pBytes[offset++] : 0;
370 uint8_t byte1 = offset < pBytes.size() ? pBytes[offset++] : 0;
371 uint8_t byte2 = offset < pBytes.size() ? pBytes[offset++] : 0;
372 return 256 * byte1 + byte2;
375 uint8_t byte1 = offset < pBytes.size() ? pBytes[offset++] : 0;
376 if (!m_MixedTwoByteLeadingBytes[byte1])
378 uint8_t byte2 = offset < pBytes.size() ? pBytes[offset++] : 0;
379 return 256 * byte1 + byte2;
384 codes[0] = offset < pBytes.size() ? pBytes[offset++] : 0;
386 int ret = CheckFourByteCodeRange(codes, char_size,
387 m_MixedFourByteLeadingRanges);
391 uint32_t charcode = 0;
392 for (
int i = 0; i < char_size; i++)
393 charcode = (charcode << 8) + codes[i];
396 if (char_size == 4 || offset == pBytes.size())
398 codes[char_size++] = pBytes[offset++];
406 switch (m_CodingScheme) {
412 if (charcode < 0x100)
416 if (charcode < 0x100)
418 if (charcode < 0x10000)
420 if (charcode < 0x1000000)
428 switch (m_CodingScheme) {
430 return pString.GetLength();
432 return (pString.GetLength() + 1) / 2;
435 for (size_t i = 0; i < pString.GetLength(); i++) {
437 if (m_MixedTwoByteLeadingBytes[pString[i]])
445 while (offset < pString.GetLength()) {
446 GetNextChar(pString, &offset);
452 return pString.GetLength();
455int CPDF_CMap::
AppendChar(
char* str, uint32_t charcode)
const {
456 switch (m_CodingScheme) {
458 str[0] =
static_cast<
char>(charcode);
461 str[0] =
static_cast<
char>(charcode / 256);
462 str[1] =
static_cast<
char>(charcode % 256);
465 if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[charcode]) {
466 str[0] =
static_cast<
char>(charcode);
469 str[0] =
static_cast<
char>(charcode >> 8);
470 str[1] =
static_cast<
char>(charcode);
473 if (charcode < 0x100) {
474 int iSize =
static_cast<
int>(
475 GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges));
478 str[iSize - 1] =
static_cast<
char>(charcode);
480 memset(str, 0, iSize - 1);
483 if (charcode < 0x10000) {
484 str[0] =
static_cast<
char>(charcode >> 8);
485 str[1] =
static_cast<
char>(charcode);
488 if (charcode < 0x1000000) {
489 str[0] =
static_cast<
char>(charcode >> 16);
490 str[1] =
static_cast<
char>(charcode >> 8);
491 str[2] =
static_cast<
char>(charcode);
494 str[0] =
static_cast<
char>(charcode >> 24);
495 str[1] =
static_cast<
char>(charcode >> 16);
496 str[2] =
static_cast<
char>(charcode >> 8);
497 str[3] =
static_cast<
char>(charcode);
504 DCHECK(m_AdditionalCharcodeToCIDMappings.empty());
509 mappings.begin(), mappings.end(),
513 m_AdditionalCharcodeToCIDMappings = std::move(mappings);
517 m_MixedFourByteLeadingRanges = std::move(ranges);
522 uint16_t start_cid) {
523 pdfium::span<uint16_t> span = m_DirectCharcodeToCIDTable.span();
524 for (uint32_t code = start_code; code <= end_code; ++code) {
525 span[code] =
static_cast<uint16_t>(start_cid + code - start_code);
CPDF_CMapParser(CPDF_CMap *pCMap)
void ParseWord(ByteStringView word)
size_t CountChar(ByteStringView pString) const
uint32_t GetNextChar(ByteStringView pString, size_t *pOffset) const
int AppendChar(char *str, uint32_t charcode) const
uint16_t CIDFromCharCode(uint32_t charcode) const
void SetAdditionalMappings(std::vector< CIDRange > mappings)
int GetCharSize(uint32_t charcode) const
void SetMixedFourByteLeadingRanges(std::vector< CodeRange > ranges)
void SetDirectCharcodeToCIDTableRange(uint32_t start_code, uint32_t end_code, uint16_t start_cid)