Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_cidfont.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/font/cpdf_cidfont.h"
8
9#include <algorithm>
10#include <array>
11#include <limits>
12#include <utility>
13#include <vector>
14
15#include "build/build_config.h"
16#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
17#include "core/fpdfapi/font/cfx_cttgsubtable.h"
18#include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
19#include "core/fpdfapi/font/cpdf_cmap.h"
20#include "core/fpdfapi/font/cpdf_cmapparser.h"
21#include "core/fpdfapi/font/cpdf_fontencoding.h"
22#include "core/fpdfapi/font/cpdf_fontglobals.h"
23#include "core/fpdfapi/parser/cpdf_array.h"
24#include "core/fpdfapi/parser/cpdf_dictionary.h"
25#include "core/fpdfapi/parser/cpdf_stream.h"
26#include "core/fpdfapi/parser/cpdf_stream_acc.h"
27#include "core/fxcrt/check.h"
28#include "core/fxcrt/check_op.h"
29#include "core/fxcrt/fixed_size_data_vector.h"
30#include "core/fxcrt/fx_codepage.h"
31#include "core/fxcrt/fx_memory.h"
32#include "core/fxcrt/fx_safe_types.h"
33#include "core/fxcrt/fx_unicode.h"
34#include "core/fxcrt/span.h"
35#include "core/fxcrt/span_util.h"
36#include "core/fxcrt/stl_util.h"
37#include "core/fxge/fx_font.h"
38
39namespace {
40
41struct LowHighVal {
42 int low;
43 int high;
44 int val;
45};
46
47struct LowHighValXY : LowHighVal {
48 int x;
49 int y;
50};
51
52bool IsMetricForCID(const LowHighVal& val, uint16_t cid) {
53 return val.low <= cid && cid <= val.high;
54}
55
56constexpr std::array<FX_CodePage, CIDSET_NUM_SETS> kCharsetCodePages = {
63};
64
65constexpr CIDTransform kJapan1VerticalCIDs[] = {
66 {97, 129, 0, 0, 127, 55, 0}, {7887, 127, 0, 0, 127, 76, 89},
67 {7888, 127, 0, 0, 127, 79, 94}, {7889, 0, 129, 127, 0, 17, 127},
68 {7890, 0, 129, 127, 0, 17, 127}, {7891, 0, 129, 127, 0, 17, 127},
69 {7892, 0, 129, 127, 0, 17, 127}, {7893, 0, 129, 127, 0, 17, 127},
70 {7894, 0, 129, 127, 0, 17, 127}, {7895, 0, 129, 127, 0, 17, 127},
71 {7896, 0, 129, 127, 0, 17, 127}, {7897, 0, 129, 127, 0, 17, 127},
72 {7898, 0, 129, 127, 0, 17, 127}, {7899, 0, 129, 127, 0, 17, 104},
73 {7900, 0, 129, 127, 0, 17, 127}, {7901, 0, 129, 127, 0, 17, 104},
74 {7902, 0, 129, 127, 0, 17, 127}, {7903, 0, 129, 127, 0, 17, 127},
75 {7904, 0, 129, 127, 0, 17, 127}, {7905, 0, 129, 127, 0, 17, 114},
76 {7906, 0, 129, 127, 0, 17, 127}, {7907, 0, 129, 127, 0, 17, 127},
77 {7908, 0, 129, 127, 0, 17, 127}, {7909, 0, 129, 127, 0, 17, 127},
78 {7910, 0, 129, 127, 0, 17, 127}, {7911, 0, 129, 127, 0, 17, 127},
79 {7912, 0, 129, 127, 0, 17, 127}, {7913, 0, 129, 127, 0, 17, 127},
80 {7914, 0, 129, 127, 0, 17, 127}, {7915, 0, 129, 127, 0, 17, 114},
81 {7916, 0, 129, 127, 0, 17, 127}, {7917, 0, 129, 127, 0, 17, 127},
82 {7918, 127, 0, 0, 127, 18, 25}, {7919, 127, 0, 0, 127, 18, 25},
83 {7920, 127, 0, 0, 127, 18, 25}, {7921, 127, 0, 0, 127, 18, 25},
84 {7922, 127, 0, 0, 127, 18, 25}, {7923, 127, 0, 0, 127, 18, 25},
85 {7924, 127, 0, 0, 127, 18, 25}, {7925, 127, 0, 0, 127, 18, 25},
86 {7926, 127, 0, 0, 127, 18, 25}, {7927, 127, 0, 0, 127, 18, 25},
87 {7928, 127, 0, 0, 127, 18, 25}, {7929, 127, 0, 0, 127, 18, 25},
88 {7930, 127, 0, 0, 127, 18, 25}, {7931, 127, 0, 0, 127, 18, 25},
89 {7932, 127, 0, 0, 127, 18, 25}, {7933, 127, 0, 0, 127, 18, 25},
90 {7934, 127, 0, 0, 127, 18, 25}, {7935, 127, 0, 0, 127, 18, 25},
91 {7936, 127, 0, 0, 127, 18, 25}, {7937, 127, 0, 0, 127, 18, 25},
92 {7938, 127, 0, 0, 127, 18, 25}, {7939, 127, 0, 0, 127, 18, 25},
93 {8720, 0, 129, 127, 0, 19, 102}, {8721, 0, 129, 127, 0, 13, 127},
94 {8722, 0, 129, 127, 0, 19, 108}, {8723, 0, 129, 127, 0, 19, 102},
95 {8724, 0, 129, 127, 0, 19, 102}, {8725, 0, 129, 127, 0, 19, 102},
96 {8726, 0, 129, 127, 0, 19, 102}, {8727, 0, 129, 127, 0, 19, 102},
97 {8728, 0, 129, 127, 0, 19, 114}, {8729, 0, 129, 127, 0, 19, 114},
98 {8730, 0, 129, 127, 0, 38, 108}, {8731, 0, 129, 127, 0, 13, 108},
99 {8732, 0, 129, 127, 0, 19, 108}, {8733, 0, 129, 127, 0, 19, 108},
100 {8734, 0, 129, 127, 0, 19, 108}, {8735, 0, 129, 127, 0, 19, 108},
101 {8736, 0, 129, 127, 0, 19, 102}, {8737, 0, 129, 127, 0, 19, 102},
102 {8738, 0, 129, 127, 0, 19, 102}, {8739, 0, 129, 127, 0, 19, 102},
103 {8740, 0, 129, 127, 0, 19, 102}, {8741, 0, 129, 127, 0, 19, 102},
104 {8742, 0, 129, 127, 0, 19, 102}, {8743, 0, 129, 127, 0, 19, 102},
105 {8744, 0, 129, 127, 0, 19, 102}, {8745, 0, 129, 127, 0, 19, 102},
106 {8746, 0, 129, 127, 0, 19, 114}, {8747, 0, 129, 127, 0, 19, 114},
107 {8748, 0, 129, 127, 0, 19, 102}, {8749, 0, 129, 127, 0, 19, 102},
108 {8750, 0, 129, 127, 0, 19, 102}, {8751, 0, 129, 127, 0, 19, 102},
109 {8752, 0, 129, 127, 0, 19, 102}, {8753, 0, 129, 127, 0, 19, 102},
110 {8754, 0, 129, 127, 0, 19, 102}, {8755, 0, 129, 127, 0, 19, 102},
111 {8756, 0, 129, 127, 0, 19, 102}, {8757, 0, 129, 127, 0, 19, 102},
112 {8758, 0, 129, 127, 0, 19, 102}, {8759, 0, 129, 127, 0, 19, 102},
113 {8760, 0, 129, 127, 0, 19, 102}, {8761, 0, 129, 127, 0, 19, 102},
114 {8762, 0, 129, 127, 0, 19, 102}, {8763, 0, 129, 127, 0, 19, 102},
115 {8764, 0, 129, 127, 0, 19, 102}, {8765, 0, 129, 127, 0, 19, 102},
116 {8766, 0, 129, 127, 0, 19, 102}, {8767, 0, 129, 127, 0, 19, 102},
117 {8768, 0, 129, 127, 0, 19, 102}, {8769, 0, 129, 127, 0, 19, 102},
118 {8770, 0, 129, 127, 0, 19, 102}, {8771, 0, 129, 127, 0, 19, 102},
119 {8772, 0, 129, 127, 0, 19, 102}, {8773, 0, 129, 127, 0, 19, 102},
120 {8774, 0, 129, 127, 0, 19, 102}, {8775, 0, 129, 127, 0, 19, 102},
121 {8776, 0, 129, 127, 0, 19, 102}, {8777, 0, 129, 127, 0, 19, 102},
122 {8778, 0, 129, 127, 0, 19, 102}, {8779, 0, 129, 127, 0, 19, 114},
123 {8780, 0, 129, 127, 0, 19, 108}, {8781, 0, 129, 127, 0, 19, 114},
124 {8782, 0, 129, 127, 0, 13, 114}, {8783, 0, 129, 127, 0, 19, 108},
125 {8784, 0, 129, 127, 0, 13, 114}, {8785, 0, 129, 127, 0, 19, 108},
126 {8786, 0, 129, 127, 0, 19, 108}, {8787, 0, 129, 127, 0, 19, 108},
127 {8788, 0, 129, 127, 0, 19, 108}, {8789, 0, 129, 127, 0, 19, 108},
128 {8790, 0, 129, 127, 0, 19, 108}, {8791, 0, 129, 127, 0, 19, 108},
129 {8792, 0, 129, 127, 0, 19, 108}, {8793, 0, 129, 127, 0, 19, 108},
130 {8794, 0, 129, 127, 0, 19, 108}, {8795, 0, 129, 127, 0, 19, 108},
131 {8796, 0, 129, 127, 0, 19, 108}, {8797, 0, 129, 127, 0, 19, 108},
132 {8798, 0, 129, 127, 0, 19, 108}, {8799, 0, 129, 127, 0, 19, 108},
133 {8800, 0, 129, 127, 0, 19, 108}, {8801, 0, 129, 127, 0, 19, 108},
134 {8802, 0, 129, 127, 0, 19, 108}, {8803, 0, 129, 127, 0, 19, 108},
135 {8804, 0, 129, 127, 0, 19, 108}, {8805, 0, 129, 127, 0, 19, 108},
136 {8806, 0, 129, 127, 0, 19, 108}, {8807, 0, 129, 127, 0, 19, 108},
137 {8808, 0, 129, 127, 0, 19, 108}, {8809, 0, 129, 127, 0, 19, 108},
138 {8810, 0, 129, 127, 0, 19, 108}, {8811, 0, 129, 127, 0, 19, 114},
139 {8812, 0, 129, 127, 0, 19, 102}, {8813, 0, 129, 127, 0, 19, 114},
140 {8814, 0, 129, 127, 0, 76, 102}, {8815, 0, 129, 127, 0, 13, 121},
141 {8816, 0, 129, 127, 0, 19, 114}, {8817, 0, 129, 127, 0, 19, 127},
142 {8818, 0, 129, 127, 0, 19, 114}, {8819, 0, 129, 127, 0, 218, 108},
143};
144
145#if !BUILDFLAG(IS_WIN)
146
147bool IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset) {
148 switch (charset) {
149 case CIDSET_GB1:
150 case CIDSET_CNS1:
151 case CIDSET_JAPAN1:
152 case CIDSET_KOREA1:
153 return true;
154
155 default:
156 return false;
157 }
158}
159
160wchar_t EmbeddedUnicodeFromCharcode(const fxcmap::CMap* pEmbedMap,
161 CIDSet charset,
162 uint32_t charcode) {
163 if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
164 return 0;
165
166 uint16_t cid = fxcmap::CIDFromCharCode(pEmbedMap, charcode);
167 if (!cid)
168 return 0;
169
170 pdfium::span<const uint16_t> map =
171 CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
172 return cid < map.size() ? map[cid] : 0;
173}
174
175uint32_t EmbeddedCharcodeFromUnicode(const fxcmap::CMap* pEmbedMap,
176 CIDSet charset,
177 wchar_t unicode) {
178 if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
179 return 0;
180
181 pdfium::span<const uint16_t> map =
182 CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
183 for (uint32_t i = 0; i < map.size(); ++i) {
184 if (map[i] == unicode) {
185 uint32_t charCode = fxcmap::CharCodeFromCID(pEmbedMap, i);
186 if (charCode)
187 return charCode;
188 }
189 }
190 return 0;
191}
192
193#endif // !BUILDFLAG(IS_WIN)
194
195void UseCIDCharmap(const RetainPtr<CFX_Face>& face, CIDCoding coding) {
196 fxge::FontEncoding encoding;
197 switch (coding) {
198 case CIDCoding::kGB:
200 break;
201 case CIDCoding::kBIG5:
202 encoding = fxge::FontEncoding::kBig5;
203 break;
204 case CIDCoding::kJIS:
205 encoding = fxge::FontEncoding::kSjis;
206 break;
209 break;
210 default:
212 }
213 bool result = face->SelectCharMap(encoding);
214 if (!result) {
215 result = face->SelectCharMap(fxge::FontEncoding::kUnicode);
216 }
217 if (!result && face->GetCharMapCount()) {
218 face->SetCharMapByIndex(0);
219 }
220}
221
222void LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,
223 std::vector<int>* result,
224 int nElements) {
225 int width_status = 0;
226 int iCurElement = 0;
227 int first_code = 0;
228 int last_code = 0;
229 for (size_t i = 0; i < pArray->size(); i++) {
230 RetainPtr<const CPDF_Object> pObj = pArray->GetDirectObjectAt(i);
231 if (!pObj)
232 continue;
233
234 const CPDF_Array* pObjArray = pObj->AsArray();
235 if (pObjArray) {
236 if (width_status != 1)
237 return;
238 if (first_code > std::numeric_limits<int>::max() -
239 fxcrt::CollectionSize<int>(*pObjArray)) {
240 width_status = 0;
241 continue;
242 }
243
244 for (size_t j = 0; j < pObjArray->size(); j += nElements) {
245 result->push_back(first_code);
246 result->push_back(first_code);
247 for (int k = 0; k < nElements; k++)
248 result->push_back(pObjArray->GetIntegerAt(j + k));
249 first_code++;
250 }
251 width_status = 0;
252 } else {
253 if (width_status == 0) {
254 first_code = pObj->GetInteger();
255 width_status = 1;
256 } else if (width_status == 1) {
257 last_code = pObj->GetInteger();
258 width_status = 2;
259 iCurElement = 0;
260 } else {
261 if (!iCurElement) {
262 result->push_back(first_code);
263 result->push_back(last_code);
264 }
265 result->push_back(pObj->GetInteger());
266 iCurElement++;
267 if (iCurElement == nElements)
268 width_status = 0;
269 }
270 }
271 }
272}
273
274} // namespace
275
276CPDF_CIDFont::CPDF_CIDFont(CPDF_Document* pDocument,
277 RetainPtr<CPDF_Dictionary> pFontDict)
278 : CPDF_Font(pDocument, std::move(pFontDict)) {
279 for (size_t i = 0; i < std::size(m_CharBBox); ++i)
280 m_CharBBox[i] = FX_RECT(-1, -1, -1, -1);
281}
282
283CPDF_CIDFont::~CPDF_CIDFont() = default;
284
285bool CPDF_CIDFont::IsCIDFont() const {
286 return true;
287}
288
289const CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() const {
290 return this;
291}
292
293CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() {
294 return this;
295}
296
297uint16_t CPDF_CIDFont::CIDFromCharCode(uint32_t charcode) const {
298 return m_pCMap ? m_pCMap->CIDFromCharCode(charcode)
299 : static_cast<uint16_t>(charcode);
300}
301
302bool CPDF_CIDFont::IsVertWriting() const {
303 return m_pCMap && m_pCMap->IsVertWriting();
304}
305
306WideString CPDF_CIDFont::UnicodeFromCharCode(uint32_t charcode) const {
308 if (!str.IsEmpty())
309 return str;
310 wchar_t ret = GetUnicodeFromCharCode(charcode);
311 return ret ? WideString(ret) : WideString();
312}
313
314wchar_t CPDF_CIDFont::GetUnicodeFromCharCode(uint32_t charcode) const {
315 switch (m_pCMap->GetCoding()) {
316 case CIDCoding::kUCS2:
317 case CIDCoding::kUTF16:
318 return static_cast<wchar_t>(charcode);
319 case CIDCoding::kCID:
320 if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
321 return 0;
322 return m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
323 default:
324 break;
325 }
326 if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
327 return m_pCID2UnicodeMap->UnicodeFromCID(CIDFromCharCode(charcode));
328
329#if BUILDFLAG(IS_WIN)
330 uint8_t sequence[2] = {};
331 const int charsize = charcode < 256 ? 1 : 2;
332 if (charsize == 1) {
333 sequence[0] = charcode;
334 } else {
335 sequence[0] = charcode / 256;
336 sequence[1] = charcode % 256;
337 }
338 wchar_t unicode;
339 size_t ret = FX_MultiByteToWideChar(
340 kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
341 ByteStringView(pdfium::make_span(sequence).first(charsize)),
342 pdfium::span_from_ref(unicode));
343 return ret == 1 ? unicode : 0;
344#else
345 if (!m_pCMap->GetEmbedMap())
346 return 0;
347 return EmbeddedUnicodeFromCharcode(m_pCMap->GetEmbedMap(),
348 m_pCMap->GetCharset(), charcode);
349#endif
350}
351
352uint32_t CPDF_CIDFont::CharCodeFromUnicode(wchar_t unicode) const {
353 uint32_t charcode = CPDF_Font::CharCodeFromUnicode(unicode);
354 if (charcode)
355 return charcode;
356
357 switch (m_pCMap->GetCoding()) {
358 case CIDCoding::kUNKNOWN:
359 return 0;
360 case CIDCoding::kUCS2:
361 case CIDCoding::kUTF16:
362 return unicode;
363 case CIDCoding::kCID: {
364 if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
365 return 0;
366 uint32_t cid = 0;
367 while (cid < 65536) {
368 wchar_t this_unicode =
369 m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(cid));
370 if (this_unicode == unicode)
371 return cid;
372 cid++;
373 }
374 break;
375 }
376 default:
377 break;
378 }
379
380 if (unicode < 0x80)
381 return static_cast<uint32_t>(unicode);
382 if (m_pCMap->GetCoding() == CIDCoding::kCID)
383 return 0;
384#if BUILDFLAG(IS_WIN)
385 uint8_t buffer[32];
386 size_t ret = FX_WideCharToMultiByte(
387 kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
388 WideStringView(unicode),
389 pdfium::as_writable_chars(pdfium::make_span(buffer).first(4u)));
390 if (ret == 1)
391 return buffer[0];
392 if (ret == 2)
393 return buffer[0] * 256 + buffer[1];
394#else
395 if (m_pCMap->GetEmbedMap()) {
396 return EmbeddedCharcodeFromUnicode(m_pCMap->GetEmbedMap(),
397 m_pCMap->GetCharset(), unicode);
398 }
399#endif
400 return 0;
401}
402
403bool CPDF_CIDFont::Load() {
404 if (m_pFontDict->GetByteStringFor("Subtype") == "TrueType") {
405 LoadGB2312();
406 return true;
407 }
408
409 RetainPtr<const CPDF_Array> pFonts =
410 m_pFontDict->GetArrayFor("DescendantFonts");
411 if (!pFonts || pFonts->size() != 1)
412 return false;
413
414 RetainPtr<const CPDF_Dictionary> pCIDFontDict = pFonts->GetDictAt(0);
415 if (!pCIDFontDict)
416 return false;
417
418 m_BaseFontName = pCIDFontDict->GetByteStringFor("BaseFont");
419 if ((m_BaseFontName == "CourierStd" || m_BaseFontName == "CourierStd-Bold" ||
420 m_BaseFontName == "CourierStd-BoldOblique" ||
421 m_BaseFontName == "CourierStd-Oblique") &&
422 !IsEmbedded()) {
423 m_bAdobeCourierStd = true;
424 }
425
426 RetainPtr<const CPDF_Object> pEncoding =
427 m_pFontDict->GetDirectObjectFor("Encoding");
428 if (!pEncoding)
429 return false;
430
431 ByteString subtype = pCIDFontDict->GetByteStringFor("Subtype");
432 m_FontType =
433 subtype == "CIDFontType0" ? CIDFontType::kType1 : CIDFontType::kTrueType;
434
435 if (!pEncoding->IsName() && !pEncoding->IsStream())
436 return false;
437
438 auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
439 const CPDF_Stream* pEncodingStream = pEncoding->AsStream();
440 if (pEncodingStream) {
441 auto pAcc =
442 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pEncodingStream));
443 pAcc->LoadAllDataFiltered();
444 pdfium::span<const uint8_t> span = pAcc->GetSpan();
445 m_pCMap = pdfium::MakeRetain<CPDF_CMap>(span);
446 } else {
447 DCHECK(pEncoding->IsName());
448 ByteString cmap = pEncoding->GetString();
449 m_pCMap = pFontGlobals->GetPredefinedCMap(cmap);
450 }
451
452 RetainPtr<const CPDF_Dictionary> pFontDesc =
453 pCIDFontDict->GetDictFor("FontDescriptor");
454 if (pFontDesc)
455 LoadFontDescriptor(pFontDesc.Get());
456
457 m_Charset = m_pCMap->GetCharset();
458 if (m_Charset == CIDSET_UNKNOWN) {
459 RetainPtr<const CPDF_Dictionary> pCIDInfo =
460 pCIDFontDict->GetDictFor("CIDSystemInfo");
461 if (pCIDInfo) {
463 pCIDInfo->GetByteStringFor("Ordering").AsStringView());
464 }
465 }
466 if (m_Charset != CIDSET_UNKNOWN) {
467 m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
468 }
469 RetainPtr<CFX_Face> face = m_Font.GetFace();
470 if (face) {
471 if (m_FontType == CIDFontType::kType1) {
472 face->SelectCharMap(fxge::FontEncoding::kUnicode);
473 } else {
474 UseCIDCharmap(face, m_pCMap->GetCoding());
475 }
476 }
477 m_DefaultWidth = pCIDFontDict->GetIntegerFor("DW", 1000);
478 RetainPtr<const CPDF_Array> pWidthArray = pCIDFontDict->GetArrayFor("W");
479 if (pWidthArray)
480 LoadMetricsArray(std::move(pWidthArray), &m_WidthList, 1);
481
482 if (!IsEmbedded())
483 LoadSubstFont();
484
485 RetainPtr<const CPDF_Object> pmap =
486 pCIDFontDict->GetDirectObjectFor("CIDToGIDMap");
487 if (pmap) {
488 RetainPtr<const CPDF_Stream> pMapStream(pmap->AsStream());
489 if (pMapStream) {
490 m_pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pMapStream));
491 m_pStreamAcc->LoadAllDataFiltered();
492 } else if (m_pFontFile && pmap->IsName() &&
493 pmap->GetString() == "Identity") {
494 m_bCIDIsGID = true;
495 }
496 }
497
499 if (IsVertWriting()) {
500 RetainPtr<const CPDF_Array> pWidth2Array = pCIDFontDict->GetArrayFor("W2");
501 if (pWidth2Array)
502 LoadMetricsArray(std::move(pWidth2Array), &m_VertMetrics, 3);
503
504 RetainPtr<const CPDF_Array> pDefaultArray =
505 pCIDFontDict->GetArrayFor("DW2");
506 if (pDefaultArray) {
507 m_DefaultVY = pDefaultArray->GetIntegerAt(0);
508 m_DefaultW1 = pDefaultArray->GetIntegerAt(1);
509 }
510 }
511
512 // TODO(thestig): Better identify font types and identify more font types.
513 if (m_FontType == CIDFontType::kTrueType && IsEmbedded())
514 m_Font.SetFontType(CFX_Font::FontType::kCIDTrueType);
515
516 return true;
517}
518
519FX_RECT CPDF_CIDFont::GetCharBBox(uint32_t charcode) {
520 if (charcode < 256 && m_CharBBox[charcode].right != -1)
521 return m_CharBBox[charcode];
522
523 FX_RECT rect;
524 bool bVert = false;
525 int glyph_index = GlyphFromCharCode(charcode, &bVert);
526 RetainPtr<CFX_Face> face = m_Font.GetFace();
527 if (face) {
528 rect = face->GetCharBBox(charcode, glyph_index);
529 }
530 if (!m_pFontFile && m_Charset == CIDSET_JAPAN1) {
531 uint16_t cid = CIDFromCharCode(charcode);
532 const CIDTransform* pTransform = GetCIDTransform(cid);
533 if (pTransform && !bVert) {
538 CIDTransformToFloat(pTransform->e) * 1000,
539 CIDTransformToFloat(pTransform->f) * 1000);
541 }
542 }
543 if (charcode < 256)
544 m_CharBBox[charcode] = rect;
545
546 return rect;
547}
548
549int CPDF_CIDFont::GetCharWidthF(uint32_t charcode) {
550 if (charcode < 0x80 && m_bAnsiWidthsFixed) {
551 return (charcode >= 32 && charcode < 127) ? 500 : 0;
552 }
553 uint16_t cid = CIDFromCharCode(charcode);
554 auto lhv_span =
555 fxcrt::reinterpret_span<const LowHighVal>(pdfium::make_span(m_WidthList));
556 for (const auto& lhv : lhv_span) {
557 if (IsMetricForCID(lhv, cid)) {
558 return lhv.val;
559 }
560 }
561 return m_DefaultWidth;
562}
563
564int16_t CPDF_CIDFont::GetVertWidth(uint16_t cid) const {
565 auto lhvxy_span = fxcrt::reinterpret_span<const LowHighValXY>(
566 pdfium::make_span(m_VertMetrics));
567 for (const auto& lhvxy : lhvxy_span) {
568 if (IsMetricForCID(lhvxy, cid)) {
569 return lhvxy.val;
570 }
571 }
572 return m_DefaultW1;
573}
574
575CFX_Point16 CPDF_CIDFont::GetVertOrigin(uint16_t cid) const {
576 auto lhvxy_span = fxcrt::reinterpret_span<const LowHighValXY>(
577 pdfium::make_span(m_VertMetrics));
578 for (const auto& lhvxy : lhvxy_span) {
579 if (IsMetricForCID(lhvxy, cid)) {
580 return {static_cast<int16_t>(lhvxy.x), static_cast<int16_t>(lhvxy.y)};
581 }
582 }
583 int width = m_DefaultWidth;
584 auto lhv_span =
585 fxcrt::reinterpret_span<const LowHighVal>(pdfium::make_span(m_WidthList));
586 for (const auto& lhv : lhv_span) {
587 if (IsMetricForCID(lhv, cid)) {
588 width = lhv.val;
589 break;
590 }
591 }
592 return {static_cast<int16_t>(width / 2), m_DefaultVY};
593}
594
595int CPDF_CIDFont::GetGlyphIndex(uint32_t unicode, bool* pVertGlyph) {
596 if (pVertGlyph)
597 *pVertGlyph = false;
598
599 int index = m_Font.GetFace()->GetCharIndex(unicode);
601 return index;
602
603 if (!index || !IsVertWriting())
604 return index;
605
606 if (m_pTTGSUBTable)
607 return GetVerticalGlyph(index, pVertGlyph);
608
609 static constexpr uint32_t kGsubTag =
610 CFX_FontMapper::MakeTag('G', 'S', 'U', 'B');
611 RetainPtr<CFX_Face> face = m_Font.GetFace();
612 size_t length = face->GetSfntTable(kGsubTag, {});
613 if (!length) {
614 return index;
615 }
616
617 auto sub_data = FixedSizeDataVector<uint8_t>::Uninit(length);
618 if (!face->GetSfntTable(kGsubTag, sub_data.span())) {
619 return index;
620 }
621
622 // CFX_CTTGSUBTable parses the data and stores all the values in its structs.
623 // It does not store pointers into `sub_data`.
624 m_pTTGSUBTable = std::make_unique<CFX_CTTGSUBTable>(sub_data.span());
625 return GetVerticalGlyph(index, pVertGlyph);
626}
627
628int CPDF_CIDFont::GetVerticalGlyph(int index, bool* pVertGlyph) {
629 uint32_t vindex = m_pTTGSUBTable->GetVerticalGlyph(index);
630 if (!vindex)
631 return index;
632
633 index = vindex;
634 if (pVertGlyph)
635 *pVertGlyph = true;
636 return index;
637}
638
639int CPDF_CIDFont::GlyphFromCharCode(uint32_t charcode, bool* pVertGlyph) {
640 if (pVertGlyph)
641 *pVertGlyph = false;
642
643 if (!m_pFontFile && (!m_pStreamAcc || m_pCID2UnicodeMap)) {
644 uint16_t cid = CIDFromCharCode(charcode);
645 wchar_t unicode = 0;
646 if (m_bCIDIsGID) {
647#if BUILDFLAG(IS_APPLE)
648 if (FontStyleIsSymbolic(m_Flags))
649 return cid;
650
651 WideString uni_str = UnicodeFromCharCode(charcode);
652 if (uni_str.IsEmpty())
653 return cid;
654
655 unicode = uni_str[0];
656#else
657 return cid;
658#endif
659 } else {
660 if (cid && m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded())
661 unicode = m_pCID2UnicodeMap->UnicodeFromCID(cid);
662 if (unicode == 0)
663 unicode = GetUnicodeFromCharCode(charcode);
664 if (unicode == 0) {
665 WideString unicode_str = UnicodeFromCharCode(charcode);
666 if (!unicode_str.IsEmpty())
667 unicode = unicode_str[0];
668 }
669 }
670 if (unicode == 0) {
671 if (!m_bAdobeCourierStd)
672 return charcode ? static_cast<int>(charcode) : -1;
673
674 charcode += 31;
675 RetainPtr<CFX_Face> face = m_Font.GetFace();
676 bool bMSUnicode = UseTTCharmapMSUnicode(face);
677 bool bMacRoman = !bMSUnicode && UseTTCharmapMacRoman(face);
679 if (bMSUnicode)
680 base_encoding = FontEncoding::kWinAnsi;
681 else if (bMacRoman)
682 base_encoding = FontEncoding::kMacRoman;
683 const char* name =
684 GetAdobeCharName(base_encoding, std::vector<ByteString>(), charcode);
685 if (!name)
686 return charcode ? static_cast<int>(charcode) : -1;
687
688 int index = 0;
689 uint16_t name_unicode = UnicodeFromAdobeName(name);
690 if (!name_unicode)
691 return charcode ? static_cast<int>(charcode) : -1;
692
693 if (base_encoding == FontEncoding::kStandard) {
694 return face->GetCharIndex(name_unicode);
695 }
696
697 if (base_encoding == FontEncoding::kWinAnsi) {
698 index = face->GetCharIndex(name_unicode);
699 } else {
700 DCHECK_EQ(base_encoding, FontEncoding::kMacRoman);
701 uint32_t maccode = CharCodeFromUnicodeForEncoding(
703 index =
704 maccode ? face->GetCharIndex(maccode)
705 : face->GetNameIndex((FT_String*)name);
706 }
707 if (index == 0 || index == 0xffff)
708 return charcode ? static_cast<int>(charcode) : -1;
709 return index;
710 }
711 if (m_Charset == CIDSET_JAPAN1) {
712 if (unicode == '\\') {
713 unicode = '/';
714#if !BUILDFLAG(IS_APPLE)
715 } else if (unicode == 0xa5) {
716 unicode = 0x5c;
717#endif
718 }
719 }
720
721 RetainPtr<CFX_Face> face = m_Font.GetFace();
722 if (!face) {
723 return unicode;
724 }
725
726 size_t num_charmaps = face->GetCharMapCount();
727 if (!face->SelectCharMap(fxge::FontEncoding::kUnicode)) {
728 size_t i;
729 for (i = 0; i < num_charmaps; i++) {
730 uint32_t ret = CharCodeFromUnicodeForEncoding(
731 face->GetCharMapEncodingByIndex(i), static_cast<wchar_t>(charcode));
732 if (ret == 0)
733 continue;
734 face->SetCharMapByIndex(i);
735 unicode = static_cast<wchar_t>(ret);
736 break;
737 }
738 if (i == num_charmaps && i) {
739 face->SetCharMapByIndex(0);
740 unicode = static_cast<wchar_t>(charcode);
741 }
742 }
743 if (num_charmaps) {
744 int index = GetGlyphIndex(unicode, pVertGlyph);
745 return index != 0 ? index : -1;
746 }
747 return unicode;
748 }
749
750 RetainPtr<CFX_Face> face = m_Font.GetFace();
751 if (!face) {
752 return -1;
753 }
754
755 uint16_t cid = CIDFromCharCode(charcode);
756 if (!m_pStreamAcc) {
757 if (m_FontType == CIDFontType::kType1) {
758 return cid;
759 }
760 if (m_pFontFile && m_pCMap->IsDirectCharcodeToCIDTableIsEmpty()) {
761 return cid;
762 }
763 if (m_pCMap->GetCoding() == CIDCoding::kUNKNOWN) {
764 return cid;
765 }
766
767 std::optional<fxge::FontEncoding> charmap =
768 face->GetCurrentCharMapEncoding();
769 if (!charmap.has_value()) {
770 return cid;
771 }
772
773 if (charmap.value() == fxge::FontEncoding::kUnicode) {
774 WideString unicode_str = UnicodeFromCharCode(charcode);
775 if (unicode_str.IsEmpty())
776 return -1;
777
778 charcode = unicode_str[0];
779 }
780 return GetGlyphIndex(charcode, pVertGlyph);
781 }
782 uint32_t byte_pos = cid * 2;
783 if (byte_pos + 2 > m_pStreamAcc->GetSize())
784 return -1;
785
786 pdfium::span<const uint8_t> span = m_pStreamAcc->GetSpan().subspan(byte_pos);
787 return span[0] * 256 + span[1];
788}
789
790uint32_t CPDF_CIDFont::GetNextChar(ByteStringView pString,
791 size_t* pOffset) const {
792 return m_pCMap->GetNextChar(pString, pOffset);
793}
794
795int CPDF_CIDFont::GetCharSize(uint32_t charcode) const {
796 return m_pCMap->GetCharSize(charcode);
797}
798
799size_t CPDF_CIDFont::CountChar(ByteStringView pString) const {
800 return m_pCMap->CountChar(pString);
801}
802
803void CPDF_CIDFont::AppendChar(ByteString* str, uint32_t charcode) const {
804 m_pCMap->AppendChar(str, charcode);
805}
806
807bool CPDF_CIDFont::IsUnicodeCompatible() const {
808 if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
809 return true;
810 return m_pCMap->GetCoding() != CIDCoding::kUNKNOWN;
811}
812
813void CPDF_CIDFont::LoadSubstFont() {
814 FX_SAFE_INT32 safeStemV(m_StemV);
815 safeStemV *= 5;
816 m_Font.LoadSubst(m_BaseFontName, m_FontType == CIDFontType::kTrueType,
817 m_Flags, safeStemV.ValueOrDefault(FXFONT_FW_NORMAL),
818 m_ItalicAngle, kCharsetCodePages[m_Charset],
819 IsVertWriting());
820}
821
822// static
823float CPDF_CIDFont::CIDTransformToFloat(uint8_t ch) {
824 return (ch < 128 ? ch : ch - 255) * (1.0f / 127);
825}
826
827void CPDF_CIDFont::LoadGB2312() {
828 m_BaseFontName = m_pFontDict->GetByteStringFor("BaseFont");
829 m_Charset = CIDSET_GB1;
830
831 auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
832 m_pCMap = pFontGlobals->GetPredefinedCMap("GBK-EUC-H");
833 m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
834 RetainPtr<const CPDF_Dictionary> pFontDesc =
835 m_pFontDict->GetDictFor("FontDescriptor");
836 if (pFontDesc)
837 LoadFontDescriptor(pFontDesc.Get());
838
839 if (!IsEmbedded())
840 LoadSubstFont();
842 m_bAnsiWidthsFixed = true;
843}
844
845const CIDTransform* CPDF_CIDFont::GetCIDTransform(uint16_t cid) const {
846 if (m_Charset != CIDSET_JAPAN1 || m_pFontFile)
847 return nullptr;
848
849 const auto* pBegin = std::begin(kJapan1VerticalCIDs);
850 const auto* pEnd = std::end(kJapan1VerticalCIDs);
851 const auto* pTransform = std::lower_bound(
852 pBegin, pEnd, cid,
853 [](const CIDTransform& entry, uint16_t cid) { return entry.cid < cid; });
854
855 return pTransform < pEnd && cid == pTransform->cid ? pTransform : nullptr;
856}
fxcrt::ByteString ByteString
Definition bytestring.h:180
#define DCHECK
Definition check.h:33
#define DCHECK_EQ(x, y)
Definition check_op.h:17
CFX_FloatRect(const FX_RECT &rect)
FX_RECT GetOuterRect() const
static constexpr uint32_t MakeTag(char c1, char c2, char c3, char c4)
CFX_FloatRect TransformRect(const CFX_FloatRect &rect) const
constexpr CFX_Matrix(float a1, float b1, float c1, float d1, float e1, float f1)
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
Definition cpdf_array.h:29
~CPDF_CIDFont() override
size_t CountChar(ByteStringView pString) const override
uint32_t GetNextChar(ByteStringView pString, size_t *pOffset) const override
bool IsCIDFont() const override
bool IsUnicodeCompatible() const override
CFX_Point16 GetVertOrigin(uint16_t cid) const
int GetCharWidthF(uint32_t charcode) override
const CPDF_CIDFont * AsCIDFont() const override
uint16_t CIDFromCharCode(uint32_t charcode) const
bool Load() override
const CIDTransform * GetCIDTransform(uint16_t cid) const
int GlyphFromCharCode(uint32_t charcode, bool *pVertGlyph) override
int16_t GetVertWidth(uint16_t cid) const
FX_RECT GetCharBBox(uint32_t charcode) override
static float CIDTransformToFloat(uint8_t ch)
void AppendChar(ByteString *str, uint32_t charcode) const override
bool IsVertWriting() const override
uint32_t CharCodeFromUnicode(wchar_t Unicode) const override
int GetCharSize(uint32_t charcode) const
WideString UnicodeFromCharCode(uint32_t charcode) const override
CPDF_CIDFont * AsCIDFont() override
static CIDSet CharsetFromOrdering(ByteStringView ordering)
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
static CPDF_FontGlobals * GetInstance()
void LoadFontDescriptor(const CPDF_Dictionary *pFontDesc)
virtual uint32_t CharCodeFromUnicode(wchar_t Unicode) const
static bool UseTTCharmapMacRoman(const RetainPtr< CFX_Face > &face)
Definition cpdf_font.h:147
static bool UseTTCharmapMSUnicode(const RetainPtr< CFX_Face > &face)
Definition cpdf_font.h:141
virtual WideString UnicodeFromCharCode(uint32_t charcode) const
void CheckFontMetrics()
bool IsEmbedded() const
Definition cpdf_font.h:100
bool operator==(const char *ptr) const
WideString(wchar_t ch)
WideString()=default
@ CIDSET_JAPAN1
@ CIDSET_UNKNOWN
@ CIDSET_GB1
@ CIDSET_NUM_SETS
CIDCoding
Definition cpdf_cmap.h:25
uint32_t CharCodeFromUnicodeForEncoding(fxge::FontEncoding encoding, wchar_t unicode)
FontEncoding
FX_CodePage
Definition fx_codepage.h:19
@ kChineseTraditional
Definition fx_codepage.h:42
CFX_PTemplate< int16_t > CFX_Point16
wchar_t UnicodeFromAdobeName(const char *name)
Definition fx_font.cpp:140
#define FXFONT_FW_NORMAL
Definition fx_font.h:24
pdfium::CheckedNumeric< int32_t > FX_SAFE_INT32
constexpr wchar_t kBoxDrawingsLightVerical
Definition fx_unicode.h:102
fxcrt::ByteStringView ByteStringView
uint16_t cid
FX_RECT & operator=(const FX_RECT &that)=default
fxcrt::WideString WideString
Definition widestring.h:207