Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_cidfont.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/font/cpdf_cidfont.h"
8
9#include <algorithm>
10#include <limits>
11#include <utility>
12#include <vector>
13
14#include "build/build_config.h"
15#include "core/fpdfapi/cmaps/fpdf_cmaps.h"
16#include "core/fpdfapi/font/cfx_cttgsubtable.h"
17#include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
18#include "core/fpdfapi/font/cpdf_cmap.h"
19#include "core/fpdfapi/font/cpdf_cmapparser.h"
20#include "core/fpdfapi/font/cpdf_fontencoding.h"
21#include "core/fpdfapi/font/cpdf_fontglobals.h"
22#include "core/fpdfapi/parser/cpdf_array.h"
23#include "core/fpdfapi/parser/cpdf_dictionary.h"
24#include "core/fpdfapi/parser/cpdf_stream.h"
25#include "core/fpdfapi/parser/cpdf_stream_acc.h"
26#include "core/fxcrt/fixed_size_data_vector.h"
27#include "core/fxcrt/fx_codepage.h"
28#include "core/fxcrt/fx_memory.h"
29#include "core/fxcrt/fx_safe_types.h"
30#include "core/fxcrt/fx_unicode.h"
31#include "core/fxcrt/stl_util.h"
32#include "core/fxge/fx_font.h"
33#include "third_party/base/check.h"
34#include "third_party/base/check_op.h"
35#include "third_party/base/containers/span.h"
36
37namespace {
38
39constexpr FX_CodePage kCharsetCodePages[CIDSET_NUM_SETS] = {
46
47struct CIDTransform {
48 uint16_t cid;
49 uint8_t a;
50 uint8_t b;
51 uint8_t c;
52 uint8_t d;
53 uint8_t e;
54 uint8_t f;
55};
56
57constexpr CIDTransform kJapan1VerticalCIDs[] = {
58 {97, 129, 0, 0, 127, 55, 0}, {7887, 127, 0, 0, 127, 76, 89},
59 {7888, 127, 0, 0, 127, 79, 94}, {7889, 0, 129, 127, 0, 17, 127},
60 {7890, 0, 129, 127, 0, 17, 127}, {7891, 0, 129, 127, 0, 17, 127},
61 {7892, 0, 129, 127, 0, 17, 127}, {7893, 0, 129, 127, 0, 17, 127},
62 {7894, 0, 129, 127, 0, 17, 127}, {7895, 0, 129, 127, 0, 17, 127},
63 {7896, 0, 129, 127, 0, 17, 127}, {7897, 0, 129, 127, 0, 17, 127},
64 {7898, 0, 129, 127, 0, 17, 127}, {7899, 0, 129, 127, 0, 17, 104},
65 {7900, 0, 129, 127, 0, 17, 127}, {7901, 0, 129, 127, 0, 17, 104},
66 {7902, 0, 129, 127, 0, 17, 127}, {7903, 0, 129, 127, 0, 17, 127},
67 {7904, 0, 129, 127, 0, 17, 127}, {7905, 0, 129, 127, 0, 17, 114},
68 {7906, 0, 129, 127, 0, 17, 127}, {7907, 0, 129, 127, 0, 17, 127},
69 {7908, 0, 129, 127, 0, 17, 127}, {7909, 0, 129, 127, 0, 17, 127},
70 {7910, 0, 129, 127, 0, 17, 127}, {7911, 0, 129, 127, 0, 17, 127},
71 {7912, 0, 129, 127, 0, 17, 127}, {7913, 0, 129, 127, 0, 17, 127},
72 {7914, 0, 129, 127, 0, 17, 127}, {7915, 0, 129, 127, 0, 17, 114},
73 {7916, 0, 129, 127, 0, 17, 127}, {7917, 0, 129, 127, 0, 17, 127},
74 {7918, 127, 0, 0, 127, 18, 25}, {7919, 127, 0, 0, 127, 18, 25},
75 {7920, 127, 0, 0, 127, 18, 25}, {7921, 127, 0, 0, 127, 18, 25},
76 {7922, 127, 0, 0, 127, 18, 25}, {7923, 127, 0, 0, 127, 18, 25},
77 {7924, 127, 0, 0, 127, 18, 25}, {7925, 127, 0, 0, 127, 18, 25},
78 {7926, 127, 0, 0, 127, 18, 25}, {7927, 127, 0, 0, 127, 18, 25},
79 {7928, 127, 0, 0, 127, 18, 25}, {7929, 127, 0, 0, 127, 18, 25},
80 {7930, 127, 0, 0, 127, 18, 25}, {7931, 127, 0, 0, 127, 18, 25},
81 {7932, 127, 0, 0, 127, 18, 25}, {7933, 127, 0, 0, 127, 18, 25},
82 {7934, 127, 0, 0, 127, 18, 25}, {7935, 127, 0, 0, 127, 18, 25},
83 {7936, 127, 0, 0, 127, 18, 25}, {7937, 127, 0, 0, 127, 18, 25},
84 {7938, 127, 0, 0, 127, 18, 25}, {7939, 127, 0, 0, 127, 18, 25},
85 {8720, 0, 129, 127, 0, 19, 102}, {8721, 0, 129, 127, 0, 13, 127},
86 {8722, 0, 129, 127, 0, 19, 108}, {8723, 0, 129, 127, 0, 19, 102},
87 {8724, 0, 129, 127, 0, 19, 102}, {8725, 0, 129, 127, 0, 19, 102},
88 {8726, 0, 129, 127, 0, 19, 102}, {8727, 0, 129, 127, 0, 19, 102},
89 {8728, 0, 129, 127, 0, 19, 114}, {8729, 0, 129, 127, 0, 19, 114},
90 {8730, 0, 129, 127, 0, 38, 108}, {8731, 0, 129, 127, 0, 13, 108},
91 {8732, 0, 129, 127, 0, 19, 108}, {8733, 0, 129, 127, 0, 19, 108},
92 {8734, 0, 129, 127, 0, 19, 108}, {8735, 0, 129, 127, 0, 19, 108},
93 {8736, 0, 129, 127, 0, 19, 102}, {8737, 0, 129, 127, 0, 19, 102},
94 {8738, 0, 129, 127, 0, 19, 102}, {8739, 0, 129, 127, 0, 19, 102},
95 {8740, 0, 129, 127, 0, 19, 102}, {8741, 0, 129, 127, 0, 19, 102},
96 {8742, 0, 129, 127, 0, 19, 102}, {8743, 0, 129, 127, 0, 19, 102},
97 {8744, 0, 129, 127, 0, 19, 102}, {8745, 0, 129, 127, 0, 19, 102},
98 {8746, 0, 129, 127, 0, 19, 114}, {8747, 0, 129, 127, 0, 19, 114},
99 {8748, 0, 129, 127, 0, 19, 102}, {8749, 0, 129, 127, 0, 19, 102},
100 {8750, 0, 129, 127, 0, 19, 102}, {8751, 0, 129, 127, 0, 19, 102},
101 {8752, 0, 129, 127, 0, 19, 102}, {8753, 0, 129, 127, 0, 19, 102},
102 {8754, 0, 129, 127, 0, 19, 102}, {8755, 0, 129, 127, 0, 19, 102},
103 {8756, 0, 129, 127, 0, 19, 102}, {8757, 0, 129, 127, 0, 19, 102},
104 {8758, 0, 129, 127, 0, 19, 102}, {8759, 0, 129, 127, 0, 19, 102},
105 {8760, 0, 129, 127, 0, 19, 102}, {8761, 0, 129, 127, 0, 19, 102},
106 {8762, 0, 129, 127, 0, 19, 102}, {8763, 0, 129, 127, 0, 19, 102},
107 {8764, 0, 129, 127, 0, 19, 102}, {8765, 0, 129, 127, 0, 19, 102},
108 {8766, 0, 129, 127, 0, 19, 102}, {8767, 0, 129, 127, 0, 19, 102},
109 {8768, 0, 129, 127, 0, 19, 102}, {8769, 0, 129, 127, 0, 19, 102},
110 {8770, 0, 129, 127, 0, 19, 102}, {8771, 0, 129, 127, 0, 19, 102},
111 {8772, 0, 129, 127, 0, 19, 102}, {8773, 0, 129, 127, 0, 19, 102},
112 {8774, 0, 129, 127, 0, 19, 102}, {8775, 0, 129, 127, 0, 19, 102},
113 {8776, 0, 129, 127, 0, 19, 102}, {8777, 0, 129, 127, 0, 19, 102},
114 {8778, 0, 129, 127, 0, 19, 102}, {8779, 0, 129, 127, 0, 19, 114},
115 {8780, 0, 129, 127, 0, 19, 108}, {8781, 0, 129, 127, 0, 19, 114},
116 {8782, 0, 129, 127, 0, 13, 114}, {8783, 0, 129, 127, 0, 19, 108},
117 {8784, 0, 129, 127, 0, 13, 114}, {8785, 0, 129, 127, 0, 19, 108},
118 {8786, 0, 129, 127, 0, 19, 108}, {8787, 0, 129, 127, 0, 19, 108},
119 {8788, 0, 129, 127, 0, 19, 108}, {8789, 0, 129, 127, 0, 19, 108},
120 {8790, 0, 129, 127, 0, 19, 108}, {8791, 0, 129, 127, 0, 19, 108},
121 {8792, 0, 129, 127, 0, 19, 108}, {8793, 0, 129, 127, 0, 19, 108},
122 {8794, 0, 129, 127, 0, 19, 108}, {8795, 0, 129, 127, 0, 19, 108},
123 {8796, 0, 129, 127, 0, 19, 108}, {8797, 0, 129, 127, 0, 19, 108},
124 {8798, 0, 129, 127, 0, 19, 108}, {8799, 0, 129, 127, 0, 19, 108},
125 {8800, 0, 129, 127, 0, 19, 108}, {8801, 0, 129, 127, 0, 19, 108},
126 {8802, 0, 129, 127, 0, 19, 108}, {8803, 0, 129, 127, 0, 19, 108},
127 {8804, 0, 129, 127, 0, 19, 108}, {8805, 0, 129, 127, 0, 19, 108},
128 {8806, 0, 129, 127, 0, 19, 108}, {8807, 0, 129, 127, 0, 19, 108},
129 {8808, 0, 129, 127, 0, 19, 108}, {8809, 0, 129, 127, 0, 19, 108},
130 {8810, 0, 129, 127, 0, 19, 108}, {8811, 0, 129, 127, 0, 19, 114},
131 {8812, 0, 129, 127, 0, 19, 102}, {8813, 0, 129, 127, 0, 19, 114},
132 {8814, 0, 129, 127, 0, 76, 102}, {8815, 0, 129, 127, 0, 13, 121},
133 {8816, 0, 129, 127, 0, 19, 114}, {8817, 0, 129, 127, 0, 19, 127},
134 {8818, 0, 129, 127, 0, 19, 114}, {8819, 0, 129, 127, 0, 218, 108},
135};
136
137// Boundary value to avoid integer overflow when adding 1/64th of the value.
138constexpr int kMaxRectTop = 2114445437;
139
140int FTPosToCBoxInt(FT_Pos pos) {
141 // Boundary values to avoid integer overflow when multiplied by 1000.
142 constexpr FT_Pos kMinCBox = -2147483;
143 constexpr FT_Pos kMaxCBox = 2147483;
144 return static_cast<int>(std::clamp(pos, kMinCBox, kMaxCBox));
145}
146
147#if !BUILDFLAG(IS_WIN)
148
149bool IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset) {
150 switch (charset) {
151 case CIDSET_GB1:
152 case CIDSET_CNS1:
153 case CIDSET_JAPAN1:
154 case CIDSET_KOREA1:
155 return true;
156
157 default:
158 return false;
159 }
160}
161
162wchar_t EmbeddedUnicodeFromCharcode(const fxcmap::CMap* pEmbedMap,
163 CIDSet charset,
164 uint32_t charcode) {
165 if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
166 return 0;
167
168 uint16_t cid = fxcmap::CIDFromCharCode(pEmbedMap, charcode);
169 if (!cid)
170 return 0;
171
172 pdfium::span<const uint16_t> map =
173 CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
174 return cid < map.size() ? map[cid] : 0;
175}
176
177uint32_t EmbeddedCharcodeFromUnicode(const fxcmap::CMap* pEmbedMap,
178 CIDSet charset,
179 wchar_t unicode) {
180 if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
181 return 0;
182
183 pdfium::span<const uint16_t> map =
184 CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
185 for (uint32_t i = 0; i < map.size(); ++i) {
186 if (map[i] == unicode) {
187 uint32_t charCode = fxcmap::CharCodeFromCID(pEmbedMap, i);
188 if (charCode)
189 return charCode;
190 }
191 }
192 return 0;
193}
194
195#endif // !BUILDFLAG(IS_WIN)
196
197void UseCIDCharmap(const RetainPtr<CFX_Face>& face, CIDCoding coding) {
198 fxge::FontEncoding encoding;
199 switch (coding) {
200 case CIDCoding::kGB:
202 break;
203 case CIDCoding::kBIG5:
204 encoding = fxge::FontEncoding::kBig5;
205 break;
206 case CIDCoding::kJIS:
207 encoding = fxge::FontEncoding::kSjis;
208 break;
211 break;
212 default:
214 }
215 bool result = face->SelectCharMap(encoding);
216 if (!result) {
217 result = face->SelectCharMap(fxge::FontEncoding::kUnicode);
218 }
219 if (!result && face->GetCharMapCount()) {
220 face->SetCharMapByIndex(0);
221 }
222}
223
224bool IsMetricForCID(const int* pEntry, uint16_t cid) {
225 return pEntry[0] <= cid && pEntry[1] >= cid;
226}
227
228void LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,
229 std::vector<int>* result,
230 int nElements) {
231 int width_status = 0;
232 int iCurElement = 0;
233 int first_code = 0;
234 int last_code = 0;
235 for (size_t i = 0; i < pArray->size(); i++) {
236 RetainPtr<const CPDF_Object> pObj = pArray->GetDirectObjectAt(i);
237 if (!pObj)
238 continue;
239
240 const CPDF_Array* pObjArray = pObj->AsArray();
241 if (pObjArray) {
242 if (width_status != 1)
243 return;
244 if (first_code > std::numeric_limits<int>::max() -
245 fxcrt::CollectionSize<int>(*pObjArray)) {
246 width_status = 0;
247 continue;
248 }
249
250 for (size_t j = 0; j < pObjArray->size(); j += nElements) {
251 result->push_back(first_code);
252 result->push_back(first_code);
253 for (int k = 0; k < nElements; k++)
254 result->push_back(pObjArray->GetIntegerAt(j + k));
255 first_code++;
256 }
257 width_status = 0;
258 } else {
259 if (width_status == 0) {
260 first_code = pObj->GetInteger();
261 width_status = 1;
262 } else if (width_status == 1) {
263 last_code = pObj->GetInteger();
264 width_status = 2;
265 iCurElement = 0;
266 } else {
267 if (!iCurElement) {
268 result->push_back(first_code);
269 result->push_back(last_code);
270 }
271 result->push_back(pObj->GetInteger());
272 iCurElement++;
273 if (iCurElement == nElements)
274 width_status = 0;
275 }
276 }
277 }
278}
279
280} // namespace
281
282CPDF_CIDFont::CPDF_CIDFont(CPDF_Document* pDocument,
283 RetainPtr<CPDF_Dictionary> pFontDict)
284 : CPDF_Font(pDocument, std::move(pFontDict)) {
285 for (size_t i = 0; i < std::size(m_CharBBox); ++i)
286 m_CharBBox[i] = FX_RECT(-1, -1, -1, -1);
287}
288
289CPDF_CIDFont::~CPDF_CIDFont() = default;
290
291bool CPDF_CIDFont::IsCIDFont() const {
292 return true;
293}
294
295const CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() const {
296 return this;
297}
298
299CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() {
300 return this;
301}
302
303uint16_t CPDF_CIDFont::CIDFromCharCode(uint32_t charcode) const {
304 return m_pCMap ? m_pCMap->CIDFromCharCode(charcode)
305 : static_cast<uint16_t>(charcode);
306}
307
308bool CPDF_CIDFont::IsVertWriting() const {
309 return m_pCMap && m_pCMap->IsVertWriting();
310}
311
312WideString CPDF_CIDFont::UnicodeFromCharCode(uint32_t charcode) const {
313 WideString str = CPDF_Font::UnicodeFromCharCode(charcode);
314 if (!str.IsEmpty())
315 return str;
316 wchar_t ret = GetUnicodeFromCharCode(charcode);
317 return ret ? WideString(ret) : WideString();
318}
319
320wchar_t CPDF_CIDFont::GetUnicodeFromCharCode(uint32_t charcode) const {
321 switch (m_pCMap->GetCoding()) {
322 case CIDCoding::kUCS2:
323 case CIDCoding::kUTF16:
324 return static_cast<wchar_t>(charcode);
325 case CIDCoding::kCID:
326 if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
327 return 0;
328 return m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
329 default:
330 break;
331 }
332 if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
333 return m_pCID2UnicodeMap->UnicodeFromCID(CIDFromCharCode(charcode));
334
335#if BUILDFLAG(IS_WIN)
336 wchar_t unicode;
337 int charsize = 1;
338 if (charcode > 255) {
339 charcode = (charcode % 256) * 256 + (charcode / 256);
340 charsize = 2;
341 }
342 size_t ret = FX_MultiByteToWideChar(
343 kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
344 ByteStringView(reinterpret_cast<const char*>(&charcode), charsize),
345 pdfium::span_from_ref(unicode));
346 return ret == 1 ? unicode : 0;
347#else
348 if (!m_pCMap->GetEmbedMap())
349 return 0;
350 return EmbeddedUnicodeFromCharcode(m_pCMap->GetEmbedMap(),
351 m_pCMap->GetCharset(), charcode);
352#endif
353}
354
355uint32_t CPDF_CIDFont::CharCodeFromUnicode(wchar_t unicode) const {
356 uint32_t charcode = CPDF_Font::CharCodeFromUnicode(unicode);
357 if (charcode)
358 return charcode;
359
360 switch (m_pCMap->GetCoding()) {
361 case CIDCoding::kUNKNOWN:
362 return 0;
363 case CIDCoding::kUCS2:
364 case CIDCoding::kUTF16:
365 return unicode;
366 case CIDCoding::kCID: {
367 if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
368 return 0;
369 uint32_t cid = 0;
370 while (cid < 65536) {
371 wchar_t this_unicode =
372 m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(cid));
373 if (this_unicode == unicode)
374 return cid;
375 cid++;
376 }
377 break;
378 }
379 default:
380 break;
381 }
382
383 if (unicode < 0x80)
384 return static_cast<uint32_t>(unicode);
385 if (m_pCMap->GetCoding() == CIDCoding::kCID)
386 return 0;
387#if BUILDFLAG(IS_WIN)
388 uint8_t buffer[32];
389 size_t ret = FX_WideCharToMultiByte(
390 kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
391 WideStringView(&unicode, 1),
392 pdfium::make_span(reinterpret_cast<char*>(buffer), 4u));
393 if (ret == 1)
394 return buffer[0];
395 if (ret == 2)
396 return buffer[0] * 256 + buffer[1];
397#else
398 if (m_pCMap->GetEmbedMap()) {
399 return EmbeddedCharcodeFromUnicode(m_pCMap->GetEmbedMap(),
400 m_pCMap->GetCharset(), unicode);
401 }
402#endif
403 return 0;
404}
405
406bool CPDF_CIDFont::Load() {
407 if (m_pFontDict->GetByteStringFor("Subtype") == "TrueType") {
408 LoadGB2312();
409 return true;
410 }
411
412 RetainPtr<const CPDF_Array> pFonts =
413 m_pFontDict->GetArrayFor("DescendantFonts");
414 if (!pFonts || pFonts->size() != 1)
415 return false;
416
417 RetainPtr<const CPDF_Dictionary> pCIDFontDict = pFonts->GetDictAt(0);
418 if (!pCIDFontDict)
419 return false;
420
421 m_BaseFontName = pCIDFontDict->GetByteStringFor("BaseFont");
422 if ((m_BaseFontName == "CourierStd" || m_BaseFontName == "CourierStd-Bold" ||
423 m_BaseFontName == "CourierStd-BoldOblique" ||
424 m_BaseFontName == "CourierStd-Oblique") &&
425 !IsEmbedded()) {
426 m_bAdobeCourierStd = true;
427 }
428
429 RetainPtr<const CPDF_Object> pEncoding =
430 m_pFontDict->GetDirectObjectFor("Encoding");
431 if (!pEncoding)
432 return false;
433
434 ByteString subtype = pCIDFontDict->GetByteStringFor("Subtype");
435 m_FontType =
436 subtype == "CIDFontType0" ? CIDFontType::kType1 : CIDFontType::kTrueType;
437
438 if (!pEncoding->IsName() && !pEncoding->IsStream())
439 return false;
440
441 auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
442 const CPDF_Stream* pEncodingStream = pEncoding->AsStream();
443 if (pEncodingStream) {
444 auto pAcc =
445 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pEncodingStream));
446 pAcc->LoadAllDataFiltered();
447 pdfium::span<const uint8_t> span = pAcc->GetSpan();
448 m_pCMap = pdfium::MakeRetain<CPDF_CMap>(span);
449 } else {
450 DCHECK(pEncoding->IsName());
451 ByteString cmap = pEncoding->GetString();
452 m_pCMap = pFontGlobals->GetPredefinedCMap(cmap);
453 }
454
455 RetainPtr<const CPDF_Dictionary> pFontDesc =
456 pCIDFontDict->GetDictFor("FontDescriptor");
457 if (pFontDesc)
458 LoadFontDescriptor(pFontDesc.Get());
459
460 m_Charset = m_pCMap->GetCharset();
461 if (m_Charset == CIDSET_UNKNOWN) {
462 RetainPtr<const CPDF_Dictionary> pCIDInfo =
463 pCIDFontDict->GetDictFor("CIDSystemInfo");
464 if (pCIDInfo) {
466 pCIDInfo->GetByteStringFor("Ordering").AsStringView());
467 }
468 }
469 if (m_Charset != CIDSET_UNKNOWN) {
470 m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
471 }
472 RetainPtr<CFX_Face> face = m_Font.GetFace();
473 if (face) {
474 if (m_FontType == CIDFontType::kType1) {
475 face->SelectCharMap(fxge::FontEncoding::kUnicode);
476 } else {
477 UseCIDCharmap(face, m_pCMap->GetCoding());
478 }
479 }
480 m_DefaultWidth = pCIDFontDict->GetIntegerFor("DW", 1000);
481 RetainPtr<const CPDF_Array> pWidthArray = pCIDFontDict->GetArrayFor("W");
482 if (pWidthArray)
483 LoadMetricsArray(std::move(pWidthArray), &m_WidthList, 1);
484
485 if (!IsEmbedded())
486 LoadSubstFont();
487
488 RetainPtr<const CPDF_Object> pmap =
489 pCIDFontDict->GetDirectObjectFor("CIDToGIDMap");
490 if (pmap) {
491 RetainPtr<const CPDF_Stream> pMapStream(pmap->AsStream());
492 if (pMapStream) {
493 m_pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pMapStream));
494 m_pStreamAcc->LoadAllDataFiltered();
495 } else if (m_pFontFile && pmap->IsName() &&
496 pmap->GetString() == "Identity") {
497 m_bCIDIsGID = true;
498 }
499 }
500
502 if (IsVertWriting()) {
503 RetainPtr<const CPDF_Array> pWidth2Array = pCIDFontDict->GetArrayFor("W2");
504 if (pWidth2Array)
505 LoadMetricsArray(std::move(pWidth2Array), &m_VertMetrics, 3);
506
507 RetainPtr<const CPDF_Array> pDefaultArray =
508 pCIDFontDict->GetArrayFor("DW2");
509 if (pDefaultArray) {
510 m_DefaultVY = pDefaultArray->GetIntegerAt(0);
511 m_DefaultW1 = pDefaultArray->GetIntegerAt(1);
512 }
513 }
514
515 // TODO(thestig): Better identify font types and identify more font types.
516 if (m_FontType == CIDFontType::kTrueType && IsEmbedded())
517 m_Font.SetFontType(CFX_Font::FontType::kCIDTrueType);
518
519 return true;
520}
521
522FX_RECT CPDF_CIDFont::GetCharBBox(uint32_t charcode) {
523 if (charcode < 256 && m_CharBBox[charcode].right != -1)
524 return m_CharBBox[charcode];
525
526 FX_RECT rect;
527 bool bVert = false;
528 int glyph_index = GlyphFromCharCode(charcode, &bVert);
529 RetainPtr<CFX_Face> face = m_Font.GetFace();
530 if (face) {
531 FXFT_FaceRec* face_rec = face->GetRec();
532 if (face->IsTricky()) {
533 int err = FT_Load_Glyph(face_rec, glyph_index,
534 FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH);
535 if (!err) {
536 FT_Glyph glyph;
537 err = FT_Get_Glyph(face_rec->glyph, &glyph);
538 if (!err) {
539 FT_BBox cbox;
540 FT_Glyph_Get_CBox(glyph, FT_GLYPH_BBOX_PIXELS, &cbox);
541 const int xMin = FTPosToCBoxInt(cbox.xMin);
542 const int xMax = FTPosToCBoxInt(cbox.xMax);
543 const int yMin = FTPosToCBoxInt(cbox.yMin);
544 const int yMax = FTPosToCBoxInt(cbox.yMax);
545 const int pixel_size_x = face_rec->size->metrics.x_ppem;
546 const int pixel_size_y = face_rec->size->metrics.y_ppem;
547 if (pixel_size_x == 0 || pixel_size_y == 0) {
548 rect = FX_RECT(xMin, yMax, xMax, yMin);
549 } else {
550 rect =
551 FX_RECT(xMin * 1000 / pixel_size_x, yMax * 1000 / pixel_size_y,
552 xMax * 1000 / pixel_size_x, yMin * 1000 / pixel_size_y);
553 }
554 rect.top = std::min(rect.top, static_cast<int>(face->GetAscender()));
555 rect.bottom =
556 std::max(rect.bottom, static_cast<int>(face->GetDescender()));
557 FT_Done_Glyph(glyph);
558 }
559 }
560 } else {
561 int err = FT_Load_Glyph(face_rec, glyph_index, FT_LOAD_NO_SCALE);
562 if (err == 0) {
563 rect = GetCharBBoxForFace(face);
564 if (rect.top <= kMaxRectTop)
565 rect.top += rect.top / 64;
566 else
567 rect.top = std::numeric_limits<int>::max();
568 }
569 }
570 }
571 if (!m_pFontFile && m_Charset == CIDSET_JAPAN1) {
572 uint16_t cid = CIDFromCharCode(charcode);
573 const uint8_t* pTransform = GetCIDTransform(cid);
574 if (pTransform && !bVert) {
575 CFX_Matrix matrix(CIDTransformToFloat(pTransform[0]),
576 CIDTransformToFloat(pTransform[1]),
577 CIDTransformToFloat(pTransform[2]),
578 CIDTransformToFloat(pTransform[3]),
579 CIDTransformToFloat(pTransform[4]) * 1000,
580 CIDTransformToFloat(pTransform[5]) * 1000);
582 }
583 }
584 if (charcode < 256)
585 m_CharBBox[charcode] = rect;
586
587 return rect;
588}
589
590int CPDF_CIDFont::GetCharWidthF(uint32_t charcode) {
591 if (charcode < 0x80 && m_bAnsiWidthsFixed)
592 return (charcode >= 32 && charcode < 127) ? 500 : 0;
593
594 uint16_t cid = CIDFromCharCode(charcode);
595 size_t size = m_WidthList.size();
596 const int* pList = m_WidthList.data();
597 for (size_t i = 0; i < size; i += 3) {
598 const int* pEntry = pList + i;
599 if (IsMetricForCID(pEntry, cid))
600 return pEntry[2];
601 }
602 return m_DefaultWidth;
603}
604
605int16_t CPDF_CIDFont::GetVertWidth(uint16_t cid) const {
606 size_t vertsize = m_VertMetrics.size() / 5;
607 if (vertsize) {
608 const int* pTable = m_VertMetrics.data();
609 for (size_t i = 0; i < vertsize; i++) {
610 const int* pEntry = pTable + (i * 5);
611 if (IsMetricForCID(pEntry, cid))
612 return static_cast<int16_t>(pEntry[2]);
613 }
614 }
615 return m_DefaultW1;
616}
617
618CFX_Point16 CPDF_CIDFont::GetVertOrigin(uint16_t cid) const {
619 size_t vertsize = m_VertMetrics.size() / 5;
620 if (vertsize) {
621 const int* pTable = m_VertMetrics.data();
622 for (size_t i = 0; i < vertsize; i++) {
623 const int* pEntry = pTable + (i * 5);
624 if (IsMetricForCID(pEntry, cid)) {
625 return {static_cast<int16_t>(pEntry[3]),
626 static_cast<int16_t>(pEntry[4])};
627 }
628 }
629 }
630 int width = m_DefaultWidth;
631 size_t size = m_WidthList.size();
632 const int* pList = m_WidthList.data();
633 for (size_t i = 0; i < size; i += 3) {
634 const int* pEntry = pList + i;
635 if (IsMetricForCID(pEntry, cid)) {
636 width = pEntry[2];
637 break;
638 }
639 }
640 return {static_cast<int16_t>(width / 2), m_DefaultVY};
641}
642
643int CPDF_CIDFont::GetGlyphIndex(uint32_t unicode, bool* pVertGlyph) {
644 if (pVertGlyph)
645 *pVertGlyph = false;
646
647 int index = m_Font.GetFace()->GetCharIndex(unicode);
648 if (unicode == pdfium::unicode::kBoxDrawingsLightVerical)
649 return index;
650
651 if (!index || !IsVertWriting())
652 return index;
653
654 if (m_pTTGSUBTable)
655 return GetVerticalGlyph(index, pVertGlyph);
656
657 static constexpr uint32_t kGsubTag =
658 CFX_FontMapper::MakeTag('G', 'S', 'U', 'B');
659 RetainPtr<CFX_Face> face = m_Font.GetFace();
660 size_t length = face->GetSfntTable(kGsubTag, {});
661 if (!length) {
662 return index;
663 }
664
665 auto sub_data = FixedSizeDataVector<uint8_t>::Uninit(length);
666 if (!face->GetSfntTable(kGsubTag, sub_data.span())) {
667 return index;
668 }
669
670 // CFX_CTTGSUBTable parses the data and stores all the values in its structs.
671 // It does not store pointers into `sub_data`.
672 m_pTTGSUBTable = std::make_unique<CFX_CTTGSUBTable>(sub_data.span());
673 return GetVerticalGlyph(index, pVertGlyph);
674}
675
676int CPDF_CIDFont::GetVerticalGlyph(int index, bool* pVertGlyph) {
677 uint32_t vindex = m_pTTGSUBTable->GetVerticalGlyph(index);
678 if (!vindex)
679 return index;
680
681 index = vindex;
682 if (pVertGlyph)
683 *pVertGlyph = true;
684 return index;
685}
686
687int CPDF_CIDFont::GlyphFromCharCode(uint32_t charcode, bool* pVertGlyph) {
688 if (pVertGlyph)
689 *pVertGlyph = false;
690
691 if (!m_pFontFile && (!m_pStreamAcc || m_pCID2UnicodeMap)) {
692 uint16_t cid = CIDFromCharCode(charcode);
693 wchar_t unicode = 0;
694 if (m_bCIDIsGID) {
695#if BUILDFLAG(IS_APPLE)
696 if (FontStyleIsSymbolic(m_Flags))
697 return cid;
698
699 WideString uni_str = UnicodeFromCharCode(charcode);
700 if (uni_str.IsEmpty())
701 return cid;
702
703 unicode = uni_str[0];
704#else
705 return cid;
706#endif
707 } else {
708 if (cid && m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded())
709 unicode = m_pCID2UnicodeMap->UnicodeFromCID(cid);
710 if (unicode == 0)
711 unicode = GetUnicodeFromCharCode(charcode);
712 if (unicode == 0) {
713 WideString unicode_str = UnicodeFromCharCode(charcode);
714 if (!unicode_str.IsEmpty())
715 unicode = unicode_str[0];
716 }
717 }
718 if (unicode == 0) {
719 if (!m_bAdobeCourierStd)
720 return charcode ? static_cast<int>(charcode) : -1;
721
722 charcode += 31;
723 RetainPtr<CFX_Face> face = m_Font.GetFace();
724 bool bMSUnicode = UseTTCharmapMSUnicode(face);
725 bool bMacRoman = !bMSUnicode && UseTTCharmapMacRoman(face);
727 if (bMSUnicode)
728 base_encoding = FontEncoding::kWinAnsi;
729 else if (bMacRoman)
730 base_encoding = FontEncoding::kMacRoman;
731 const char* name =
732 GetAdobeCharName(base_encoding, std::vector<ByteString>(), charcode);
733 if (!name)
734 return charcode ? static_cast<int>(charcode) : -1;
735
736 int index = 0;
737 uint16_t name_unicode = UnicodeFromAdobeName(name);
738 if (!name_unicode)
739 return charcode ? static_cast<int>(charcode) : -1;
740
741 if (base_encoding == FontEncoding::kStandard) {
742 return face->GetCharIndex(name_unicode);
743 }
744
745 if (base_encoding == FontEncoding::kWinAnsi) {
746 index = face->GetCharIndex(name_unicode);
747 } else {
748 DCHECK_EQ(base_encoding, FontEncoding::kMacRoman);
749 uint32_t maccode = CharCodeFromUnicodeForEncoding(
751 index =
752 maccode ? face->GetCharIndex(maccode)
753 : face->GetNameIndex((FT_String*)name);
754 }
755 if (index == 0 || index == 0xffff)
756 return charcode ? static_cast<int>(charcode) : -1;
757 return index;
758 }
759 if (m_Charset == CIDSET_JAPAN1) {
760 if (unicode == '\\') {
761 unicode = '/';
762#if !BUILDFLAG(IS_APPLE)
763 } else if (unicode == 0xa5) {
764 unicode = 0x5c;
765#endif
766 }
767 }
768
769 RetainPtr<CFX_Face> face = m_Font.GetFace();
770 if (!face) {
771 return unicode;
772 }
773
774 size_t num_charmaps = face->GetCharMapCount();
775 if (!face->SelectCharMap(fxge::FontEncoding::kUnicode)) {
776 size_t i;
777 for (i = 0; i < num_charmaps; i++) {
778 uint32_t ret = CharCodeFromUnicodeForEncoding(
779 face->GetCharMapEncodingByIndex(i), static_cast<wchar_t>(charcode));
780 if (ret == 0)
781 continue;
782 face->SetCharMapByIndex(i);
783 unicode = static_cast<wchar_t>(ret);
784 break;
785 }
786 if (i == num_charmaps && i) {
787 face->SetCharMapByIndex(0);
788 unicode = static_cast<wchar_t>(charcode);
789 }
790 }
791 if (num_charmaps) {
792 int index = GetGlyphIndex(unicode, pVertGlyph);
793 return index != 0 ? index : -1;
794 }
795 return unicode;
796 }
797
798 RetainPtr<CFX_Face> face = m_Font.GetFace();
799 if (!face) {
800 return -1;
801 }
802
803 uint16_t cid = CIDFromCharCode(charcode);
804 if (!m_pStreamAcc) {
805 if (m_FontType == CIDFontType::kType1) {
806 return cid;
807 }
808 if (m_pFontFile && m_pCMap->IsDirectCharcodeToCIDTableIsEmpty()) {
809 return cid;
810 }
811 if (m_pCMap->GetCoding() == CIDCoding::kUNKNOWN) {
812 return cid;
813 }
814
815 absl::optional<fxge::FontEncoding> charmap =
816 face->GetCurrentCharMapEncoding();
817 if (!charmap.has_value()) {
818 return cid;
819 }
820
821 if (charmap.value() == fxge::FontEncoding::kUnicode) {
822 WideString unicode_str = UnicodeFromCharCode(charcode);
823 if (unicode_str.IsEmpty())
824 return -1;
825
826 charcode = unicode_str[0];
827 }
828 return GetGlyphIndex(charcode, pVertGlyph);
829 }
830 uint32_t byte_pos = cid * 2;
831 if (byte_pos + 2 > m_pStreamAcc->GetSize())
832 return -1;
833
834 pdfium::span<const uint8_t> span = m_pStreamAcc->GetSpan().subspan(byte_pos);
835 return span[0] * 256 + span[1];
836}
837
838uint32_t CPDF_CIDFont::GetNextChar(ByteStringView pString,
839 size_t* pOffset) const {
840 return m_pCMap->GetNextChar(pString, pOffset);
841}
842
843int CPDF_CIDFont::GetCharSize(uint32_t charcode) const {
844 return m_pCMap->GetCharSize(charcode);
845}
846
847size_t CPDF_CIDFont::CountChar(ByteStringView pString) const {
848 return m_pCMap->CountChar(pString);
849}
850
851int CPDF_CIDFont::AppendChar(char* str, uint32_t charcode) const {
852 return m_pCMap->AppendChar(str, charcode);
853}
854
855bool CPDF_CIDFont::IsUnicodeCompatible() const {
856 if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
857 return true;
858 return m_pCMap->GetCoding() != CIDCoding::kUNKNOWN;
859}
860
861void CPDF_CIDFont::LoadSubstFont() {
862 FX_SAFE_INT32 safeStemV(m_StemV);
863 safeStemV *= 5;
864 m_Font.LoadSubst(m_BaseFontName, m_FontType == CIDFontType::kTrueType,
865 m_Flags, safeStemV.ValueOrDefault(FXFONT_FW_NORMAL),
866 m_ItalicAngle, kCharsetCodePages[m_Charset],
867 IsVertWriting());
868}
869
870// static
871float CPDF_CIDFont::CIDTransformToFloat(uint8_t ch) {
872 return (ch < 128 ? ch : ch - 255) * (1.0f / 127);
873}
874
875void CPDF_CIDFont::LoadGB2312() {
876 m_BaseFontName = m_pFontDict->GetByteStringFor("BaseFont");
877 m_Charset = CIDSET_GB1;
878
879 auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
880 m_pCMap = pFontGlobals->GetPredefinedCMap("GBK-EUC-H");
881 m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
882 RetainPtr<const CPDF_Dictionary> pFontDesc =
883 m_pFontDict->GetDictFor("FontDescriptor");
884 if (pFontDesc)
885 LoadFontDescriptor(pFontDesc.Get());
886
887 if (!IsEmbedded())
888 LoadSubstFont();
890 m_bAnsiWidthsFixed = true;
891}
892
893const uint8_t* CPDF_CIDFont::GetCIDTransform(uint16_t cid) const {
894 if (m_Charset != CIDSET_JAPAN1 || m_pFontFile)
895 return nullptr;
896
897 const auto* pBegin = std::begin(kJapan1VerticalCIDs);
898 const auto* pEnd = std::end(kJapan1VerticalCIDs);
899 const auto* pTransform = std::lower_bound(
900 pBegin, pEnd, cid,
901 [](const CIDTransform& entry, uint16_t cid) { return entry.cid < cid; });
902
903 return (pTransform < pEnd && cid == pTransform->cid) ? &pTransform->a
904 : nullptr;
905}
CFX_FloatRect(const FX_RECT &rect)
FX_RECT GetOuterRect() const
static constexpr uint32_t MakeTag(char c1, char c2, char c3, char c4)
CFX_FloatRect TransformRect(const CFX_FloatRect &rect) const
CFX_Matrix(float a1, float b1, float c1, float d1, float e1, float f1)
~CPDF_CIDFont() override
size_t CountChar(ByteStringView pString) const override
uint32_t GetNextChar(ByteStringView pString, size_t *pOffset) const override
bool IsCIDFont() const override
const uint8_t * GetCIDTransform(uint16_t cid) const
bool IsUnicodeCompatible() const override
int AppendChar(char *str, uint32_t charcode) const override
CFX_Point16 GetVertOrigin(uint16_t cid) const
int GetCharWidthF(uint32_t charcode) override
const CPDF_CIDFont * AsCIDFont() const override
uint16_t CIDFromCharCode(uint32_t charcode) const
bool Load() override
int GlyphFromCharCode(uint32_t charcode, bool *pVertGlyph) override
int16_t GetVertWidth(uint16_t cid) const
FX_RECT GetCharBBox(uint32_t charcode) override
static float CIDTransformToFloat(uint8_t ch)
bool IsVertWriting() const override
uint32_t CharCodeFromUnicode(wchar_t Unicode) const override
int GetCharSize(uint32_t charcode) const
WideString UnicodeFromCharCode(uint32_t charcode) const override
CPDF_CIDFont * AsCIDFont() override
static CIDSet CharsetFromOrdering(ByteStringView ordering)
static CPDF_FontGlobals * GetInstance()
void LoadFontDescriptor(const CPDF_Dictionary *pFontDesc)
virtual uint32_t CharCodeFromUnicode(wchar_t Unicode) const
static bool UseTTCharmapMacRoman(const RetainPtr< CFX_Face > &face)
Definition cpdf_font.h:152
static bool UseTTCharmapMSUnicode(const RetainPtr< CFX_Face > &face)
Definition cpdf_font.h:146
virtual WideString UnicodeFromCharCode(uint32_t charcode) const
static FX_RECT GetCharBBoxForFace(const RetainPtr< CFX_Face > &face)
void CheckFontMetrics()
bool IsEmbedded() const
Definition cpdf_font.h:101
bool operator==(const char *ptr) const
WideString(wchar_t ch)
CharType operator[](const size_t index) const
Definition widestring.h:146
bool IsEmpty() const
Definition widestring.h:118
@ CIDSET_JAPAN1
@ CIDSET_UNKNOWN
@ CIDSET_GB1
@ CIDSET_NUM_SETS
CIDCoding
Definition cpdf_cmap.h:24
uint32_t CharCodeFromUnicodeForEncoding(fxge::FontEncoding encoding, wchar_t unicode)
FontEncoding
FX_CodePage
Definition fx_codepage.h:18
wchar_t UnicodeFromAdobeName(const char *name)
Definition fx_font.cpp:138
#define FXFONT_FW_NORMAL
Definition fx_font.h:22
constexpr wchar_t kBoxDrawingsLightVerical
Definition fx_unicode.h:102
FX_RECT & operator=(const FX_RECT &that)=default
int32_t bottom
int32_t right
int32_t top
constexpr FX_RECT(int l, int t, int r, int b)