Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_cmapparser.cpp
Go to the documentation of this file.
1// Copyright 2014 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/font/cpdf_cmapparser.h"
8
9#include <ctype.h>
10
11#include <iterator>
12
13#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
14#include "core/fpdfapi/parser/cpdf_array.h"
15#include "core/fpdfapi/parser/cpdf_dictionary.h"
16#include "core/fpdfapi/parser/cpdf_simple_parser.h"
17#include "core/fxcrt/fx_extension.h"
18#include "core/fxcrt/fx_safe_types.h"
19#include "third_party/base/check.h"
20
21namespace {
22
23ByteStringView CMap_GetString(ByteStringView word) {
24 if (word.GetLength() <= 2)
25 return ByteStringView();
26 return word.Last(word.GetLength() - 2);
27}
28
29} // namespace
30
32
34 m_pCMap->SetAdditionalMappings(std::move(m_AdditionalCharcodeToCIDMappings));
35 m_pCMap->SetMixedFourByteLeadingRanges(std::move(m_Ranges));
36}
37
38void CPDF_CMapParser::ParseWord(ByteStringView word) {
39 DCHECK(!word.IsEmpty());
40
41 if (word == "begincidchar") {
42 m_Status = kProcessingCidChar;
43 m_CodeSeq = 0;
44 } else if (word == "begincidrange") {
45 m_Status = kProcessingCidRange;
46 m_CodeSeq = 0;
47 } else if (word == "endcidrange" || word == "endcidchar") {
48 m_Status = kStart;
49 } else if (word == "/WMode") {
50 m_Status = kProcessingWMode;
51 } else if (word == "/Registry") {
52 m_Status = kProcessingRegistry;
53 } else if (word == "/Ordering") {
54 m_Status = kProcessingOrdering;
55 } else if (word == "/Supplement") {
56 m_Status = kProcessingSupplement;
57 } else if (word == "begincodespacerange") {
58 m_Status = kProcessingCodeSpaceRange;
59 m_CodeSeq = 0;
60 } else if (word == "usecmap") {
61 } else if (m_Status == kProcessingCidChar) {
62 HandleCid(word);
63 } else if (m_Status == kProcessingCidRange) {
64 HandleCid(word);
65 } else if (m_Status == kProcessingRegistry) {
66 m_Status = kStart;
67 } else if (m_Status == kProcessingOrdering) {
68 m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
69 m_Status = kStart;
70 } else if (m_Status == kProcessingSupplement) {
71 m_Status = kStart;
72 } else if (m_Status == kProcessingWMode) {
73 m_pCMap->SetVertical(GetCode(word) != 0);
74 m_Status = kStart;
75 } else if (m_Status == kProcessingCodeSpaceRange) {
76 HandleCodeSpaceRange(word);
77 }
78 m_LastWord = word;
79}
80
81void CPDF_CMapParser::HandleCid(ByteStringView word) {
82 DCHECK(m_Status == kProcessingCidChar || m_Status == kProcessingCidRange);
83 bool bChar = m_Status == kProcessingCidChar;
84
85 m_CodePoints[m_CodeSeq] = GetCode(word);
86 m_CodeSeq++;
87 int nRequiredCodePoints = bChar ? 2 : 3;
88 if (m_CodeSeq < nRequiredCodePoints)
89 return;
90
91 uint32_t StartCode = m_CodePoints[0];
92 uint32_t EndCode;
93 uint16_t StartCID;
94 if (bChar) {
95 EndCode = StartCode;
96 StartCID = static_cast<uint16_t>(m_CodePoints[1]);
97 } else {
98 EndCode = m_CodePoints[1];
99 StartCID = static_cast<uint16_t>(m_CodePoints[2]);
100 }
101 if (EndCode < CPDF_CMap::kDirectMapTableSize) {
102 m_pCMap->SetDirectCharcodeToCIDTableRange(StartCode, EndCode, StartCID);
103 } else {
104 m_AdditionalCharcodeToCIDMappings.push_back({StartCode, EndCode, StartCID});
105 }
106 m_CodeSeq = 0;
107}
108
109void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
110 if (word != "endcodespacerange") {
111 if (word.IsEmpty() || word[0] != '<')
112 return;
113
114 if (m_CodeSeq % 2) {
115 absl::optional<CPDF_CMap::CodeRange> range =
116 GetCodeRange(m_LastWord.AsStringView(), word);
117 if (range.has_value())
118 m_PendingRanges.push_back(range.value());
119 }
120 m_CodeSeq++;
121 return;
122 }
123
124 size_t nSegs = m_Ranges.size() + m_PendingRanges.size();
125 if (nSegs == 1) {
126 const auto& first_range =
127 !m_Ranges.empty() ? m_Ranges[0] : m_PendingRanges[0];
128 m_pCMap->SetCodingScheme(first_range.m_CharSize == 2 ? CPDF_CMap::TwoBytes
129 : CPDF_CMap::OneByte);
130 } else if (nSegs > 1) {
131 m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
132 m_Ranges.reserve(nSegs);
133 std::move(m_PendingRanges.begin(), m_PendingRanges.end(),
134 std::back_inserter(m_Ranges));
135 m_PendingRanges.clear();
136 }
137 m_Status = kStart;
138}
139
140// static
141uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
142 if (word.IsEmpty())
143 return 0;
144
145 FX_SAFE_UINT32 num = 0;
146 if (word[0] == '<') {
147 for (size_t i = 1; i < word.GetLength() && isxdigit(word[i]); ++i) {
148 num = num * 16 + FXSYS_HexCharToInt(word[i]);
149 if (!num.IsValid())
150 return 0;
151 }
152 return num.ValueOrDie();
153 }
154
155 for (size_t i = 0; i < word.GetLength() && isdigit(word[i]); ++i) {
156 num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
157 if (!num.IsValid())
158 return 0;
159 }
160 return num.ValueOrDie();
161}
162
163// static
164absl::optional<CPDF_CMap::CodeRange> CPDF_CMapParser::GetCodeRange(
165 ByteStringView first,
166 ByteStringView second) {
167 if (first.IsEmpty() || first[0] != '<')
168 return absl::nullopt;
169
170 size_t i;
171 for (i = 1; i < first.GetLength(); ++i) {
172 if (first[i] == '>')
173 break;
174 }
175 size_t char_size = (i - 1) / 2;
176 if (char_size > 4)
177 return absl::nullopt;
178
179 CPDF_CMap::CodeRange range;
180 range.m_CharSize = char_size;
181 for (i = 0; i < range.m_CharSize; ++i) {
182 uint8_t digit1 = first[i * 2 + 1];
183 uint8_t digit2 = first[i * 2 + 2];
184 range.m_Lower[i] =
186 }
187
188 size_t size = second.GetLength();
189 for (i = 0; i < range.m_CharSize; ++i) {
190 size_t i1 = i * 2 + 1;
191 size_t i2 = i1 + 1;
192 uint8_t digit1 = i1 < size ? second[i1] : '0';
193 uint8_t digit2 = i2 < size ? second[i2] : '0';
194 range.m_Upper[i] =
196 }
197 return range;
198}
199
200// static
201CIDSet CPDF_CMapParser::CharsetFromOrdering(ByteStringView ordering) {
202 static const char* const kCharsetNames[CIDSET_NUM_SETS] = {
203 nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
204 static_assert(std::size(kCharsetNames) == CIDSET_NUM_SETS,
205 "Too many CID sets");
206
207 for (size_t charset = 1; charset < std::size(kCharsetNames); ++charset) {
208 if (ordering == kCharsetNames[charset])
209 return static_cast<CIDSet>(charset);
210 }
211 return CIDSET_UNKNOWN;
212}
static CIDSet CharsetFromOrdering(ByteStringView ordering)
CPDF_CMapParser(CPDF_CMap *pCMap)
void ParseWord(ByteStringView word)
CIDSet
@ CIDSET_UNKNOWN
@ CIDSET_NUM_SETS
int FXSYS_DecimalCharToInt(wchar_t c)
int FXSYS_HexCharToInt(char c)
uint8_t m_Lower[4]
Definition cpdf_cmap.h:48
uint8_t m_Upper[4]
Definition cpdf_cmap.h:49