Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
fpdf_parser_utility.cpp
Go to the documentation of this file.
1// Copyright 2014 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9#include <ostream>
10#include <utility>
11
12#include "core/fpdfapi/parser/cpdf_array.h"
13#include "core/fpdfapi/parser/cpdf_boolean.h"
14#include "core/fpdfapi/parser/cpdf_dictionary.h"
15#include "core/fpdfapi/parser/cpdf_number.h"
16#include "core/fpdfapi/parser/cpdf_reference.h"
17#include "core/fpdfapi/parser/cpdf_stream.h"
18#include "core/fpdfapi/parser/cpdf_stream_acc.h"
19#include "core/fpdfapi/parser/cpdf_string.h"
20#include "core/fpdfapi/parser/fpdf_parser_decode.h"
21#include "core/fxcrt/fx_extension.h"
22#include "core/fxcrt/fx_stream.h"
23#include "core/fxcrt/span_util.h"
24#include "third_party/base/check.h"
25
26// Indexed by 8-bit character code, contains either:
27// 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
28// 'N' - for numeric: 0123456789+-.
29// 'D' - for delimiter: %()/<>[]{}
30// 'R' - otherwise.
31const char kPDFCharTypes[256] = {
32 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
33 // SI
34 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
35 'R',
36
37 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
38 // US
39 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
40 'R',
41
42 // SP ! " # $ % & ' ( ) * + , - .
43 // /
44 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
45 'D',
46
47 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
48 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
49 'R',
50
51 // @ A B C D E F G H I J K L M N O
52 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
53 'R',
54
55 // P Q R S T U V W X Y Z [ \ ] ^ _
56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
57 'R',
58
59 // ` a b c d e f g h i j k l m n o
60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
61 'R',
62
63 // p q r s t u v w x y z { | } ~
64 // DEL
65 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
66 'R',
67
68 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
73 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
74 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
75 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
76 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
77
79 const RetainPtr<IFX_SeekableReadStream>& pFile) {
80 static constexpr size_t kBufSize = 4;
81 uint8_t buf[kBufSize];
82 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
83 if (!pFile->ReadBlockAtOffset(buf, offset))
84 return absl::nullopt;
85
86 if (memcmp(buf, "%PDF", 4) == 0)
87 return offset;
88 }
89 return absl::nullopt;
90}
91
92ByteString PDF_NameDecode(ByteStringView orig) {
93 size_t src_size = orig.GetLength();
94 size_t out_index = 0;
95 ByteString result;
96 {
97 // Span's lifetime must end before ReleaseBuffer() below.
98 pdfium::span<char> pDest = result.GetBuffer(src_size);
99 for (size_t i = 0; i < src_size; i++) {
100 if (orig[i] == '#' && i + 2 < src_size) {
101 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
102 FXSYS_HexCharToInt(orig[i + 2]);
103 i += 2;
104 } else {
105 pDest[out_index++] = orig[i];
106 }
107 }
108 }
109 result.ReleaseBuffer(out_index);
110 return result;
111}
112
113ByteString PDF_NameEncode(const ByteString& orig) {
114 const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str());
115 int src_len = orig.GetLength();
116 int dest_len = 0;
117 int i;
118 for (i = 0; i < src_len; i++) {
119 uint8_t ch = src_buf[i];
120 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
122 dest_len += 3;
123 } else {
124 dest_len++;
125 }
126 }
127 if (dest_len == src_len)
128 return orig;
129
130 ByteString res;
131 {
132 // Span's lifetime must end before ReleaseBuffer() below.
133 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
134 dest_len = 0;
135 for (i = 0; i < src_len; i++) {
136 uint8_t ch = src_buf[i];
137 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
139 dest_buf[dest_len++] = '#';
140 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
141 dest_len += 2;
142 continue;
143 }
144 dest_buf[dest_len++] = ch;
145 }
146 }
147 res.ReleaseBuffer(dest_len);
148 return res;
149}
150
151std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
152 size_t nCount) {
153 DCHECK(pArray);
154 DCHECK(pArray->size() >= nCount);
155 std::vector<float> ret(nCount);
156 for (size_t i = 0; i < nCount; ++i)
157 ret[i] = pArray->GetFloatAt(i);
158 return ret;
159}
160
161bool ValidateDictType(const CPDF_Dictionary* dict, ByteStringView type) {
162 DCHECK(!type.IsEmpty());
163 return dict && dict->GetNameFor("Type") == type;
164}
165
166bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
167 ByteStringView type) {
168 if (!dict)
169 return false;
170
171 CPDF_DictionaryLocker locker(dict);
172 for (const auto& it : locker) {
173 RetainPtr<const CPDF_Dictionary> entry =
174 ToDictionary(it.second->GetDirect());
175 if (!ValidateDictType(entry.Get(), type))
176 return false;
177 }
178 return true;
179}
180
181bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
182 return ValidateDictAllResourcesOfType(dict, "Font");
183}
184
185bool ValidateDictOptionalType(const CPDF_Dictionary* dict,
186 ByteStringView type) {
187 DCHECK(!type.IsEmpty());
188 return dict && (!dict->KeyExist("Type") || dict->GetNameFor("Type") == type);
189}
190
191std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
192 if (!pObj) {
193 buf << " null";
194 return buf;
195 }
196 switch (pObj->GetType()) {
198 buf << " null";
199 break;
202 buf << " " << pObj->GetString();
203 break;
206 break;
207 case CPDF_Object::kName: {
208 ByteString str = pObj->GetString();
209 buf << "/" << PDF_NameEncode(str);
210 break;
211 }
213 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
214 break;
215 }
216 case CPDF_Object::kArray: {
217 const CPDF_Array* p = pObj->AsArray();
218 buf << "[";
219 for (size_t i = 0; i < p->size(); i++) {
220 RetainPtr<const CPDF_Object> pElement = p->GetObjectAt(i);
221 if (!pElement->IsInline()) {
222 buf << " " << pElement->GetObjNum() << " 0 R";
223 } else {
224 buf << pElement.Get();
225 }
226 }
227 buf << "]";
228 break;
229 }
232 buf << "<<";
233 for (const auto& it : locker) {
234 const ByteString& key = it.first;
235 const RetainPtr<CPDF_Object>& pValue = it.second;
236 buf << "/" << PDF_NameEncode(key);
237 if (!pValue->IsInline()) {
238 buf << " " << pValue->GetObjNum() << " 0 R ";
239 } else {
240 buf << pValue;
241 }
242 }
243 buf << ">>";
244 break;
245 }
247 RetainPtr<const CPDF_Stream> p(pObj->AsStream());
248 buf << p->GetDict().Get() << "stream\r\n";
249 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(p));
250 pAcc->LoadAllDataRaw();
251 auto span = fxcrt::reinterpret_span<const char>(pAcc->GetSpan());
252 buf.write(span.data(), span.size());
253 buf << "\r\nendstream";
254 break;
255 }
256 }
257 return buf;
258}
CPDF_DictionaryLocker(const CPDF_Dictionary *pDictionary)
bool KeyExist(const ByteString &key) const
ByteString GetNameFor(const ByteString &key) const
virtual Type GetType() const =0
virtual ByteString GetString() const
const CPDF_Array * AsArray() const
const CPDF_Dictionary * AsDictionary() const
const CPDF_String * AsString() const
const CPDF_Reference * AsReference() const
uint32_t GetRefObjNum() const
ByteString EncodeString() const
const char * c_str() const
Definition bytestring.h:76
bool PDFCharIsWhitespace(uint8_t c)
bool ValidateDictOptionalType(const CPDF_Dictionary *dict, ByteStringView type)
ByteString PDF_NameDecode(ByteStringView orig)
const char kPDFCharTypes[256]
ByteString PDF_NameEncode(const ByteString &orig)
bool ValidateFontResourceDict(const CPDF_Dictionary *dict)
absl::optional< FX_FILESIZE > GetHeaderOffset(const RetainPtr< IFX_SeekableReadStream > &pFile)
bool ValidateDictType(const CPDF_Dictionary *dict, ByteStringView type)
std::vector< float > ReadArrayElementsToVector(const CPDF_Array *pArray, size_t nCount)
bool ValidateDictAllResourcesOfType(const CPDF_Dictionary *dict, ByteStringView type)
bool PDFCharIsDelimiter(uint8_t c)
#define FX_FILESIZE
Definition fx_types.h:19