Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_hint_tables.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/cpdf_hint_tables.h"
8
9#include <limits>
10
11#include "core/fpdfapi/parser/cpdf_array.h"
12#include "core/fpdfapi/parser/cpdf_data_avail.h"
13#include "core/fpdfapi/parser/cpdf_dictionary.h"
14#include "core/fpdfapi/parser/cpdf_document.h"
15#include "core/fpdfapi/parser/cpdf_linearized_header.h"
16#include "core/fpdfapi/parser/cpdf_parser.h"
17#include "core/fpdfapi/parser/cpdf_read_validator.h"
18#include "core/fpdfapi/parser/cpdf_stream.h"
19#include "core/fpdfapi/parser/cpdf_stream_acc.h"
20#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
21#include "core/fxcrt/cfx_bitstream.h"
22#include "core/fxcrt/fx_safe_types.h"
23#include "third_party/base/check.h"
24#include "third_party/base/containers/span.h"
25
26namespace {
27
28bool CanReadFromBitStream(const CFX_BitStream* hStream,
29 const FX_SAFE_UINT32& bits) {
30 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
31}
32
33// Sanity check values from the page table header. The note in the PDF 1.7
34// reference for Table F.3 says the valid range is only 0 through 32. Though 0
35// is not useful either.
36bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
37 return bits > 0 && bits <= 32;
38}
39
40} // namespace
41
43CPDF_HintTables::PageInfo::~PageInfo() = default;
44
45// static
46std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
47 CPDF_SyntaxParser* parser,
48 const CPDF_LinearizedHeader* pLinearized) {
49 DCHECK(parser);
50 if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
51 !pLinearized->HasHintTable()) {
52 return nullptr;
53 }
54
55 const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
56 const uint32_t szHintLength = pLinearized->GetHintLength();
57
58 if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
59 szHintStart, szHintLength)) {
60 return nullptr;
61 }
62
63 parser->SetPos(szHintStart);
64 RetainPtr<CPDF_Stream> hints_stream = ToStream(
65 parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
66
67 if (!hints_stream)
68 return nullptr;
69
70 auto pHintTables = std::make_unique<CPDF_HintTables>(
71 parser->GetValidator().Get(), pLinearized);
72 if (!pHintTables->LoadHintStream(hints_stream.Get()))
73 return nullptr;
74
75 return pHintTables;
76}
77
79 const CPDF_LinearizedHeader* pLinearized)
81 DCHECK(m_pLinearized);
82}
83
85
87 const uint32_t nPages = m_pLinearized->GetPageCount();
88 if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
89 return false;
90
91 const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
92 if (nFirstPageNum >= nPages)
93 return false;
94
95 if (!hStream || hStream->IsEOF())
96 return false;
97
98 const uint32_t kHeaderSize = 288;
99 if (hStream->BitsRemaining() < kHeaderSize)
100 return false;
101
102 // Item 1: The least number of objects in a page.
103 const uint32_t dwObjLeastNum = hStream->GetBits(32);
104 if (!dwObjLeastNum || dwObjLeastNum >= CPDF_Parser::kMaxObjectNumber)
105 return false;
106
107 // Item 2: The location of the first page's page object.
108 const FX_FILESIZE szFirstObjLoc =
109 HintsOffsetToFileOffset(hStream->GetBits(32));
110 if (!szFirstObjLoc)
111 return false;
112
113 m_szFirstPageObjOffset = szFirstObjLoc;
114
115 // Item 3: The number of bits needed to represent the difference
116 // between the greatest and least number of objects in a page.
117 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
118 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
119 return false;
120
121 // Item 4: The least length of a page in bytes.
122 const uint32_t dwPageLeastLen = hStream->GetBits(32);
123 if (!dwPageLeastLen)
124 return false;
125
126 // Item 5: The number of bits needed to represent the difference
127 // between the greatest and least length of a page, in bytes.
128 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
129 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
130 return false;
131
132 // Skip Item 6, 7, 8, 9 total 96 bits.
133 hStream->SkipBits(96);
134
135 // Item 10: The number of bits needed to represent the greatest
136 // number of shared object references.
137 const uint32_t dwSharedObjBits = hStream->GetBits(16);
138 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
139 return false;
140
141 // Item 11: The number of bits needed to represent the numerically
142 // greatest shared object identifier used by the pages.
143 const uint32_t dwSharedIdBits = hStream->GetBits(16);
144 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
145 return false;
146
147 // Item 12: The number of bits needed to represent the numerator of
148 // the fractional position for each shared object reference. For each
149 // shared object referenced from a page, there is an indication of
150 // where in the page's content stream the object is first referenced.
151 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
152 if (dwSharedNumeratorBits > 32)
153 return false;
154
155 // Item 13: Skip Item 13 which has 16 bits.
156 hStream->SkipBits(16);
157
158 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
159 required_bits *= nPages;
160 if (!CanReadFromBitStream(hStream, required_bits))
161 return false;
162
163 m_PageInfos = std::vector<PageInfo>(nPages);
164 m_PageInfos[nFirstPageNum].set_start_obj_num(
165 m_pLinearized->GetFirstPageObjNum());
166 // The object number of remaining pages starts from 1.
167 FX_SAFE_UINT32 dwStartObjNum = 1;
168 for (uint32_t i = 0; i < nPages; ++i) {
169 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
170 safeDeltaObj += dwObjLeastNum;
171 if (!safeDeltaObj.IsValid())
172 return false;
173 m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
174 if (i == nFirstPageNum)
175 continue;
176 m_PageInfos[i].set_start_obj_num(dwStartObjNum.ValueOrDie());
177 dwStartObjNum += m_PageInfos[i].objects_count();
178 if (!dwStartObjNum.IsValid() ||
179 dwStartObjNum.ValueOrDie() >= CPDF_Parser::kMaxObjectNumber) {
180 return false;
181 }
182 }
183 hStream->ByteAlign();
184
185 required_bits = dwDeltaPageLenBits;
186 required_bits *= nPages;
187 if (!CanReadFromBitStream(hStream, required_bits))
188 return false;
189
190 for (uint32_t i = 0; i < nPages; ++i) {
191 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
192 safePageLen += dwPageLeastLen;
193 if (!safePageLen.IsValid())
194 return false;
195 m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
196 }
197
198 DCHECK(m_szFirstPageObjOffset);
199 m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
200 FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
201 for (uint32_t i = 0; i < nPages; ++i) {
202 if (i == nFirstPageNum)
203 continue;
204 m_PageInfos[i].set_page_offset(prev_page_end);
205 prev_page_end += m_PageInfos[i].page_length();
206 }
207 hStream->ByteAlign();
208
209 // Number of shared objects.
210 required_bits = dwSharedObjBits;
211 required_bits *= nPages;
212 if (!CanReadFromBitStream(hStream, required_bits))
213 return false;
214
215 std::vector<uint32_t> dwNSharedObjsArray(nPages);
216 for (uint32_t i = 0; i < nPages; i++)
217 dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
218 hStream->ByteAlign();
219
220 // Array of identifiers, size = nshared_objects.
221 for (uint32_t i = 0; i < nPages; i++) {
222 required_bits = dwSharedIdBits;
223 required_bits *= dwNSharedObjsArray[i];
224 if (!CanReadFromBitStream(hStream, required_bits))
225 return false;
226
227 for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
228 m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
229 }
230 hStream->ByteAlign();
231
232 if (dwSharedNumeratorBits) {
233 for (uint32_t i = 0; i < nPages; i++) {
234 FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
235 safeSize *= dwSharedNumeratorBits;
236 if (!CanReadFromBitStream(hStream, safeSize))
237 return false;
238
239 hStream->SkipBits(safeSize.ValueOrDie());
240 }
241 hStream->ByteAlign();
242 }
243
244 FX_SAFE_UINT32 safeTotalPageLen = nPages;
245 safeTotalPageLen *= dwDeltaPageLenBits;
246 if (!CanReadFromBitStream(hStream, safeTotalPageLen))
247 return false;
248
249 hStream->SkipBits(safeTotalPageLen.ValueOrDie());
250 hStream->ByteAlign();
251 return true;
252}
253
255 uint32_t offset) {
256 if (!hStream || hStream->IsEOF())
257 return false;
258
259 FX_SAFE_UINT32 bit_offset = offset;
260 bit_offset *= 8;
261 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
262 return false;
263 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
264
265 const uint32_t kHeaderSize = 192;
266 if (hStream->BitsRemaining() < kHeaderSize)
267 return false;
268
269 // Item 1: The object number of the first object in the shared objects
270 // section.
271 uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
272 if (!dwFirstSharedObjNum)
273 return false;
274
275 // Item 2: The location of the first object in the shared objects section.
276 const FX_FILESIZE szFirstSharedObjLoc =
277 HintsOffsetToFileOffset(hStream->GetBits(32));
278 if (!szFirstSharedObjLoc)
279 return false;
280
281 // Item 3: The number of shared object entries for the first page.
282 m_nFirstPageSharedObjs = hStream->GetBits(32);
283
284 // Item 4: The number of shared object entries for the shared objects
285 // section, including the number of shared object entries for the first page.
286 uint32_t dwSharedObjTotal = hStream->GetBits(32);
287
288 // Item 5: The number of bits needed to represent the greatest number of
289 // objects in a shared object group.
290 uint32_t dwSharedObjNumBits = hStream->GetBits(16);
291 if (dwSharedObjNumBits > 32)
292 return false;
293
294 // Item 6: The least length of a shared object group in bytes.
295 uint32_t dwGroupLeastLen = hStream->GetBits(32);
296
297 // Item 7: The number of bits needed to represent the difference between the
298 // greatest and least length of a shared object group, in bytes.
299 uint32_t dwDeltaGroupLen = hStream->GetBits(16);
300
301 // Trying to decode more than 32 bits isn't going to work when we write into
302 // a uint32_t. Decoding 0 bits also makes no sense.
303 if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
304 return false;
305
306 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
307 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
308 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
309 return false;
310 }
311
312 FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
313 required_bits *= dwDeltaGroupLen;
314 if (!CanReadFromBitStream(hStream, required_bits))
315 return false;
316
317 if (dwSharedObjTotal > 0) {
318 uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
319 if (dwLastSharedObj > m_nFirstPageSharedObjs) {
320 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
321 safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
322 if (!safeObjNum.IsValid())
323 return false;
324 }
325 }
326
327 m_SharedObjGroupInfos.resize(dwSharedObjTotal);
328 // Table F.6 - Shared object hint table, shared object group entries:
329 // Item 1: A number that, when added to the least shared object
330 // group length.
331 FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
332 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
333 if (i == m_nFirstPageSharedObjs)
334 prev_shared_group_end_offset = szFirstSharedObjLoc;
335
336 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
337 safeObjLen += dwGroupLeastLen;
338 if (!safeObjLen.IsValid())
339 return false;
340
341 m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
342 m_SharedObjGroupInfos[i].m_szOffset =
343 prev_shared_group_end_offset.ValueOrDie();
344 prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
345 if (!prev_shared_group_end_offset.IsValid())
346 return false;
347 }
348
349 hStream->ByteAlign();
350 {
351 // Item 2: A flag indicating whether the shared object signature (item 3) is
352 // present.
353 uint32_t signature_count = 0;
354 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
355 signature_count += hStream->GetBits(1);
356 }
357 hStream->ByteAlign();
358 // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
359 // hash that uniquely identifies the resource that the group of objects
360 // represents.
361 if (signature_count) {
362 required_bits = signature_count;
363 required_bits *= 128;
364 if (!CanReadFromBitStream(hStream, required_bits))
365 return false;
366
367 hStream->SkipBits(required_bits.ValueOrDie());
368 hStream->ByteAlign();
369 }
370 }
371 // Item 4: A number equal to 1 less than the number of objects in the group.
372 FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
373 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
374 if (i == m_nFirstPageSharedObjs)
375 cur_obj_num = dwFirstSharedObjNum;
376
377 FX_SAFE_UINT32 obj_count =
378 dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
379 obj_count += 1;
380 if (!obj_count.IsValid())
381 return false;
382
383 uint32_t obj_num = cur_obj_num.ValueOrDie();
384 cur_obj_num += obj_count.ValueOrDie();
385 if (!cur_obj_num.IsValid())
386 return false;
387
388 m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
389 m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
390 }
391
392 hStream->ByteAlign();
393 return true;
394}
395
396bool CPDF_HintTables::GetPagePos(uint32_t index,
397 FX_FILESIZE* szPageStartPos,
398 FX_FILESIZE* szPageLength,
399 uint32_t* dwObjNum) const {
400 if (index >= m_pLinearized->GetPageCount())
401 return false;
402
403 *szPageStartPos = m_PageInfos[index].page_offset();
404 *szPageLength = m_PageInfos[index].page_length();
405 *dwObjNum = m_PageInfos[index].start_obj_num();
406 return true;
407}
408
409CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
410 if (index == m_pLinearized->GetFirstPageNo())
411 return CPDF_DataAvail::kDataAvailable;
412
413 if (index >= m_pLinearized->GetPageCount())
414 return CPDF_DataAvail::kDataError;
415
416 const uint32_t dwLength = m_PageInfos[index].page_length();
417 if (!dwLength)
418 return CPDF_DataAvail::kDataError;
419
420 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
421 m_PageInfos[index].page_offset(), dwLength)) {
422 return CPDF_DataAvail::kDataNotAvailable;
423 }
424
425 // Download data of shared objects in the page.
426 for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
427 if (dwIndex >= m_SharedObjGroupInfos.size())
428 continue;
429 const SharedObjGroupInfo& shared_group_info =
430 m_SharedObjGroupInfos[dwIndex];
431
432 if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
433 return CPDF_DataAvail::kDataError;
434
435 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
436 shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
437 return CPDF_DataAvail::kDataNotAvailable;
438 }
439 }
440 return CPDF_DataAvail::kDataAvailable;
441}
442
443bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
444 if (!pHintStream || !m_pLinearized->HasHintTable())
445 return false;
446
447 RetainPtr<const CPDF_Dictionary> pDict = pHintStream->GetDict();
448 if (!pDict)
449 return false;
450
451 RetainPtr<const CPDF_Object> pOffset = pDict->GetObjectFor("S");
452 if (!pOffset || !pOffset->IsNumber())
453 return false;
454
455 int shared_hint_table_offset = pOffset->GetInteger();
456 if (shared_hint_table_offset <= 0)
457 return false;
458
459 auto pAcc =
460 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pHintStream));
461 pAcc->LoadAllDataFiltered();
462
463 uint32_t size = pAcc->GetSize();
464 // The header section of page offset hint table is 36 bytes.
465 // The header section of shared object hint table is 24 bytes.
466 // Hint table has at least 60 bytes.
467 const uint32_t kMinStreamLength = 60;
468 if (size < kMinStreamLength)
469 return false;
470
471 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
472 if (!safe_shared_hint_table_offset.IsValid() ||
473 size < safe_shared_hint_table_offset.ValueOrDie()) {
474 return false;
475 }
476
477 CFX_BitStream bs(pAcc->GetSpan().subspan(0, size));
478 return ReadPageHintTable(&bs) &&
479 ReadSharedObjHintTable(&bs, shared_hint_table_offset);
480}
481
482FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
483 uint32_t hints_offset) const {
484 FX_SAFE_FILESIZE file_offset = hints_offset;
485 if (!file_offset.IsValid())
486 return 0;
487
488 // The resulting positions shall be interpreted as if the primary hint stream
489 // itself were not present. That is, a position greater than the hint stream
490 // offset shall have the hint stream length added to it to determine the
491 // actual offset relative to the beginning of the file.
492 // See ISO 32000-1:2008 spec, annex F.4 (Hint tables).
493 // Note: The PDF spec does not mention this, but positions equal to the hint
494 // stream offset also need to have the hint stream length added to it. e.g.
495 // There exists linearized PDFs generated by Adobe software that have this
496 // property.
497 if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
498 file_offset += m_pLinearized->GetHintLength();
499
500 return file_offset.ValueOrDefault(0);
501}
uint32_t GetBits(uint32_t nBits)
bool IsEOF() const
static constexpr int kPageMaxNum
CPDF_DataAvail::DocAvailStatus CheckPage(uint32_t index)
virtual ~CPDF_HintTables()
bool GetPagePos(uint32_t index, FX_FILESIZE *szPageStartPos, FX_FILESIZE *szPageLength, uint32_t *dwObjNum) const
bool LoadHintStream(CPDF_Stream *pHintStream)
bool ReadSharedObjHintTable(CFX_BitStream *hStream, uint32_t offset)
CPDF_HintTables(CPDF_ReadValidator *pValidator, const CPDF_LinearizedHeader *pLinearized)
bool ReadPageHintTable(CFX_BitStream *hStream)
FX_FILESIZE GetHintStart() const
static constexpr uint32_t kMaxObjectNumber
Definition cpdf_parser.h:57
RetainPtr< CPDF_ReadValidator > GetValidator() const
void SetPos(FX_FILESIZE pos)
#define FX_FILESIZE
Definition fx_types.h:19