Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_parser.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/cpdf_parser.h"
8
9#include <ctype.h>
10#include <stdint.h>
11
12#include <algorithm>
13#include <optional>
14#include <utility>
15#include <vector>
16
17#include "core/fpdfapi/parser/cpdf_array.h"
18#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
19#include "core/fpdfapi/parser/cpdf_dictionary.h"
20#include "core/fpdfapi/parser/cpdf_document.h"
21#include "core/fpdfapi/parser/cpdf_linearized_header.h"
22#include "core/fpdfapi/parser/cpdf_number.h"
23#include "core/fpdfapi/parser/cpdf_object_stream.h"
24#include "core/fpdfapi/parser/cpdf_read_validator.h"
25#include "core/fpdfapi/parser/cpdf_reference.h"
26#include "core/fpdfapi/parser/cpdf_security_handler.h"
27#include "core/fpdfapi/parser/cpdf_stream.h"
28#include "core/fpdfapi/parser/cpdf_stream_acc.h"
29#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
30#include "core/fpdfapi/parser/fpdf_parser_utility.h"
31#include "core/fxcrt/autorestorer.h"
32#include "core/fxcrt/check.h"
33#include "core/fxcrt/check_op.h"
34#include "core/fxcrt/containers/contains.h"
35#include "core/fxcrt/data_vector.h"
36#include "core/fxcrt/fx_extension.h"
37#include "core/fxcrt/fx_safe_types.h"
38#include "core/fxcrt/notreached.h"
39#include "core/fxcrt/scoped_set_insertion.h"
40#include "core/fxcrt/span.h"
41
44
45namespace {
46
47// A limit on the size of the xref table. Theoretical limits are higher, but
48// this may be large enough in practice. The max size should always be 1 more
49// than the max object number.
50constexpr int32_t kMaxXRefSize = CPDF_Parser::kMaxObjectNumber + 1;
51
52// "%PDF-1.7\n"
53constexpr FX_FILESIZE kPDFHeaderSize = 9;
54
55// The required number of fields in a /W array in a cross-reference stream
56// dictionary.
57constexpr size_t kMinFieldCount = 3;
58
59// Trailers are inline.
60constexpr uint32_t kNoTrailerObjectNumber = 0;
61
62struct CrossRefStreamIndexEntry {
63 uint32_t start_obj_num;
64 uint32_t obj_count;
65};
66
67std::optional<ObjectType> GetObjectTypeFromCrossRefStreamType(
68 uint32_t cross_ref_stream_type) {
69 switch (cross_ref_stream_type) {
70 case 0:
71 return ObjectType::kFree;
72 case 1:
74 case 2:
76 default:
77 return std::nullopt;
78 }
79}
80
81// Use the Get*XRefStreamEntry() functions below, instead of calling this
82// directly.
83uint32_t GetVarInt(pdfium::span<const uint8_t> input) {
84 uint32_t result = 0;
85 for (uint8_t c : input)
86 result = result * 256 + c;
87 return result;
88}
89
90// The following 3 functions retrieve variable length entries from
91// cross-reference streams, as described in ISO 32000-1:2008 table 18. There are
92// only 3 fields for any given entry.
93uint32_t GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
94 pdfium::span<const uint32_t> field_widths) {
95 return GetVarInt(entry_span.first(field_widths[0]));
96}
97
98uint32_t GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
99 pdfium::span<const uint32_t> field_widths) {
100 return GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
101}
102
103uint32_t GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
104 pdfium::span<const uint32_t> field_widths) {
105 return GetVarInt(
106 entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
107}
108
109std::vector<CrossRefStreamIndexEntry> GetCrossRefStreamIndices(
110 const CPDF_Array* array,
111 uint32_t size) {
112 std::vector<CrossRefStreamIndexEntry> indices;
113 if (array) {
114 for (size_t i = 0; i < array->size() / 2; i++) {
115 RetainPtr<const CPDF_Number> pStartNumObj = array->GetNumberAt(i * 2);
116 if (!pStartNumObj)
117 continue;
118
119 RetainPtr<const CPDF_Number> pCountObj = array->GetNumberAt(i * 2 + 1);
120 if (!pCountObj)
121 continue;
122
123 int nStartNum = pStartNumObj->GetInteger();
124 int nCount = pCountObj->GetInteger();
125 if (nStartNum < 0 || nCount <= 0)
126 continue;
127
128 indices.push_back(
129 {static_cast<uint32_t>(nStartNum), static_cast<uint32_t>(nCount)});
133 if (indices.empty())
134 indices.push_back({0, size});
135 return indices;
136}
137
138std::vector<uint32_t> GetFieldWidths(const CPDF_Array* array) {
139 std::vector<uint32_t> results;
140 if (!array)
141 return results;
142
143 CPDF_ArrayLocker locker(array);
144 for (const auto& obj : locker)
145 results.push_back(obj->GetInteger());
146 return results;
147}
148
149class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
150 public:
151 ObjectsHolderStub() = default;
152 ~ObjectsHolderStub() override = default;
153 bool TryInit() override { return true; }
154};
155
156} // namespace
157
161 if (!holder) {
162 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
163 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
164 }
165}
166
168
169CPDF_Parser::~CPDF_Parser() = default;
170
171uint32_t CPDF_Parser::GetLastObjNum() const {
172 return m_CrossRefTable->objects_info().empty()
173 ? 0
174 : m_CrossRefTable->objects_info().rbegin()->first;
175}
176
177bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
178 return objnum <= GetLastObjNum();
179}
180
182 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
183 return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
184}
185
186bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
187 DCHECK(IsValidObjectNumber(objnum));
188 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
189 return !info || info->type == ObjectType::kFree;
190}
191
192bool CPDF_Parser::InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator) {
193 const std::optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
194 if (!header_offset.has_value())
195 return false;
196 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
197 return false;
198
199 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
200 header_offset.value());
201 return ParseFileVersion();
202}
203
204bool CPDF_Parser::ParseFileVersion() {
205 m_FileVersion = 0;
206 uint8_t ch;
207 if (!m_pSyntax->GetCharAt(5, ch))
208 return false;
209
210 if (isdigit(ch))
211 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
212
213 if (!m_pSyntax->GetCharAt(7, ch))
214 return false;
215
216 if (isdigit(ch))
217 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
218 return true;
219}
220
223 const ByteString& password) {
224 if (!InitSyntaxParser(pdfium::MakeRetain<CPDF_ReadValidator>(
225 std::move(pFileAccess), nullptr)))
226 return FORMAT_ERROR;
227 SetPassword(password);
229}
230
232 DCHECK(!m_bHasParsed);
233 DCHECK(!m_bXRefTableRebuilt);
234 m_bHasParsed = true;
235 m_bXRefStream = false;
236
237 m_LastXRefOffset = ParseStartXRef();
238 if (m_LastXRefOffset >= kPDFHeaderSize) {
239 if (!LoadAllCrossRefTablesAndStreams(m_LastXRefOffset)) {
241 return FORMAT_ERROR;
242
243 m_bXRefTableRebuilt = true;
244 m_LastXRefOffset = 0;
245 }
246 } else {
248 return FORMAT_ERROR;
249
250 m_bXRefTableRebuilt = true;
251 }
252 Error eRet = SetEncryptHandler();
253 if (eRet != SUCCESS)
254 return eRet;
255
256 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
257 if (m_bXRefTableRebuilt)
258 return FORMAT_ERROR;
259
260 ReleaseEncryptHandler();
262 return FORMAT_ERROR;
263
264 eRet = SetEncryptHandler();
265 if (eRet != SUCCESS)
266 return eRet;
267
268 m_pObjectsHolder->TryInit();
269 if (!GetRoot())
270 return FORMAT_ERROR;
271 }
273 ReleaseEncryptHandler();
275 return FORMAT_ERROR;
276
277 eRet = SetEncryptHandler();
278 if (eRet != SUCCESS)
279 return eRet;
280 }
281 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
282 RetainPtr<const CPDF_Reference> pMetadata =
283 ToReference(GetRoot()->GetObjectFor("Metadata"));
284 if (pMetadata)
285 m_MetadataObjnum = pMetadata->GetRefObjNum();
286 }
287 return SUCCESS;
288}
289
291 static constexpr char kStartXRefKeyword[] = "startxref";
292 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
293 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
294 return 0;
295
296 // Skip "startxref" keyword.
297 m_pSyntax->GetKeyword();
298
299 // Read XRef offset.
300 const CPDF_SyntaxParser::WordResult xref_offset_result =
301 m_pSyntax->GetNextWord();
302 if (!xref_offset_result.is_number || xref_offset_result.word.IsEmpty())
303 return 0;
304
305 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
306 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
307 return 0;
308
309 return result.ValueOrDie();
310}
311
312CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
313 ReleaseEncryptHandler();
314 if (!GetTrailer())
315 return FORMAT_ERROR;
316
317 RetainPtr<const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
318 if (!pEncryptDict)
319 return SUCCESS;
320
321 if (pEncryptDict->GetNameFor("Filter") != "Standard")
322 return HANDLER_ERROR;
323
324 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
325 if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
326 return PASSWORD_ERROR;
327
328 m_pSecurityHandler = std::move(pSecurityHandler);
329 return SUCCESS;
330}
331
332void CPDF_Parser::ReleaseEncryptHandler() {
333 m_pSecurityHandler.Reset();
334}
335
336// Ideally, all the cross reference entries should be verified.
337// In reality, we rarely see well-formed cross references don't match
338// with the objects. crbug/602650 showed a case where object numbers
339// in the cross reference table are all off by one.
340bool CPDF_Parser::VerifyCrossRefTable() {
341 for (const auto& it : m_CrossRefTable->objects_info()) {
342 if (it.second.pos <= 0)
343 continue;
344 // Find the first non-zero position.
345 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
346 m_pSyntax->SetPos(it.second.pos);
347 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
348 m_pSyntax->SetPos(SavedPos);
349 if (!word_result.is_number || word_result.word.IsEmpty() ||
350 FXSYS_atoui(word_result.word.c_str()) != it.first) {
351 // If the object number read doesn't match the one stored,
352 // something is wrong with the cross reference table.
353 return false;
354 }
355 break;
356 }
357 return true;
358}
359
360bool CPDF_Parser::LoadAllCrossRefTablesAndStreams(FX_FILESIZE xref_offset) {
361 const bool is_xref_stream = !LoadCrossRefTable(xref_offset, /*skip=*/true);
362 if (is_xref_stream) {
363 // Use a copy of `xref_offset`, as LoadCrossRefStream() may change it.
364 FX_FILESIZE xref_offset_copy = xref_offset;
365 if (!LoadCrossRefStream(&xref_offset_copy, /*is_main_xref=*/true)) {
366 return false;
367 }
368
369 // LoadCrossRefStream() sets the trailer when `is_main_xref` is true.
370 // Thus no SetTrailer() call like the else-block below. Similarly,
371 // LoadCrossRefStream() also calls SetObjectMapSize() itself, so no need to
372 // call it again here.
373 } else {
374 RetainPtr<CPDF_Dictionary> trailer = LoadTrailer();
375 if (!trailer) {
376 return false;
377 }
378
379 m_CrossRefTable->SetTrailer(std::move(trailer), kNoTrailerObjectNumber);
380
381 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
382 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) {
383 m_CrossRefTable->SetObjectMapSize(xrefsize);
384 }
385 }
386
387 std::vector<FX_FILESIZE> xref_list;
388 std::vector<FX_FILESIZE> xref_stream_list;
389
390 if (is_xref_stream) {
391 xref_list.push_back(0);
392 xref_stream_list.push_back(xref_offset);
393 } else {
394 xref_list.push_back(xref_offset);
395 xref_stream_list.push_back(GetTrailer()->GetDirectIntegerFor("XRefStm"));
396 }
397
398 if (!FindAllCrossReferenceTablesAndStream(xref_offset, xref_list,
399 xref_stream_list)) {
400 return false;
401 }
402
403 if (xref_list.front() > 0) {
404 if (!LoadCrossRefTable(xref_list.front(), /*skip=*/false)) {
405 return false;
406 }
407
408 if (!VerifyCrossRefTable()) {
409 return false;
410 }
411 }
412
413 // Cross reference table entries take precedence over cross reference stream
414 // entries. So process the stream entries first and then give the cross
415 // reference tables a chance to overwrite them.
416 //
417 // XRefStm entries should only be used in update sections, so skip
418 // `xref_stream_list.front()`.
419 //
420 // See details in ISO 32000-1:2008, section 7.5.8.4.
421 for (size_t i = 1; i < xref_list.size(); ++i) {
422 if (xref_stream_list[i] > 0 &&
423 !LoadCrossRefStream(&xref_stream_list[i], /*is_main_xref=*/false)) {
424 return false;
425 }
426 if (xref_list[i] > 0 && !LoadCrossRefTable(xref_list[i], /*skip=*/false)) {
427 return false;
428 }
429 }
430
431 if (is_xref_stream) {
432 m_ObjectStreamMap.clear();
433 m_bXRefStream = true;
434 }
435
436 return true;
437}
438
439bool CPDF_Parser::LoadLinearizedAllCrossRefTable(FX_FILESIZE main_xref_offset) {
440 if (!LoadCrossRefTable(main_xref_offset, /*skip=*/false)) {
441 return false;
442 }
443
444 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailer();
445 if (!main_trailer)
446 return false;
447
448 // GetTrailer() currently returns the first-page trailer.
450 return false;
451
452 // Read /XRefStm from the first-page trailer. No need to read /Prev for the
453 // first-page trailer, as the caller already did that and passed it in as
454 // |main_xref_offset|.
456 std::vector<FX_FILESIZE> xref_list{main_xref_offset};
457 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
458
459 // Merge the trailers. Now GetTrailer() returns the merged trailer, where
460 // /Prev is from the main-trailer.
461 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
462 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
463 kNoTrailerObjectNumber),
464 std::move(m_CrossRefTable));
465
466 if (!FindAllCrossReferenceTablesAndStream(main_xref_offset, xref_list,
467 xref_stream_list)) {
468 return false;
469 }
470
471 // Unlike LoadAllCrossRefTablesAndStreams(), the first XRefStm entry in
472 // `xref_stream_list` should be processed.
473 if (xref_stream_list[0] > 0 &&
474 !LoadCrossRefStream(&xref_stream_list[0], /*is_main_xref=*/false)) {
475 return false;
476 }
477
478 // Cross reference table entries take precedence over cross reference stream
479 // entries. So process the stream entries first and then give the cross
480 // reference tables a chance to overwrite them.
481 for (size_t i = 1; i < xref_list.size(); ++i) {
482 if (xref_stream_list[i] > 0 &&
483 !LoadCrossRefStream(&xref_stream_list[i], /*is_main_xref=*/false)) {
484 return false;
485 }
486 if (xref_list[i] > 0 && !LoadCrossRefTable(xref_list[i], /*skip=*/false)) {
487 return false;
488 }
489 }
490
491 return true;
492}
493
494bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
495 uint32_t start_objnum,
496 uint32_t count,
497 std::vector<CrossRefObjData>* out_objects) {
498 if (!count)
499 return true;
500
501 // Each entry shall be exactly 20 byte.
502 // A sample entry looks like:
503 // "0000000000 00007 f\r\n"
504 static constexpr int32_t kEntrySize = 20;
505
506 if (!out_objects) {
507 FX_SAFE_FILESIZE pos = count;
508 pos *= kEntrySize;
509 pos += m_pSyntax->GetPos();
510 if (!pos.IsValid())
511 return false;
512 m_pSyntax->SetPos(pos.ValueOrDie());
513 return true;
514 }
515 const size_t start_obj_index = out_objects->size();
516 FX_SAFE_SIZE_T new_size = start_obj_index;
517 new_size += count;
518 if (!new_size.IsValid())
519 return false;
520
521 if (new_size.ValueOrDie() > kMaxXRefSize)
522 return false;
523
524 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
525 if (new_size.ValueOrDie() > max_entries_in_file)
526 return false;
527
528 out_objects->resize(new_size.ValueOrDie());
529
530 DataVector<char> buf(1024 * kEntrySize + 1);
531 buf.back() = '\0';
532
533 uint32_t entries_to_read = count;
534 while (entries_to_read > 0) {
535 const uint32_t entries_in_block = std::min(entries_to_read, 1024u);
536 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
537 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
538 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
539 return false;
540
541 for (uint32_t i = 0; i < entries_in_block; i++) {
542 uint32_t iObjectIndex = count - entries_to_read + i;
543 CrossRefObjData& obj_data =
544 (*out_objects)[start_obj_index + iObjectIndex];
545 const uint32_t objnum = start_objnum + iObjectIndex;
546 obj_data.obj_num = objnum;
547 ObjectInfo& info = obj_data.info;
548
549 pdfium::span<const char> pEntry =
550 pdfium::make_span(buf).subspan(i * kEntrySize);
551 if (pEntry[17] == 'f') {
552 info.pos = 0;
554 } else {
555 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry.data());
556 if (!offset.IsValid())
557 return false;
558
559 if (offset.ValueOrDie() == 0) {
560 for (int32_t c = 0; c < 10; c++) {
561 if (!isdigit(pEntry[c]))
562 return false;
563 }
564 }
565
566 info.pos = offset.ValueOrDie();
567
568 // TODO(art-snake): The info.gennum is uint16_t, but version may be
569 // greated than max<uint16_t>. Needs solve this issue.
570 const int32_t version = FXSYS_atoi(pEntry.subspan(11).data());
571 info.gennum = version;
573 }
574 }
575 entries_to_read -= entries_in_block;
576 }
577 return true;
578}
579
580bool CPDF_Parser::ParseCrossRefTable(
581 std::vector<CrossRefObjData>* out_objects) {
582 if (out_objects)
583 out_objects->clear();
584
585 if (m_pSyntax->GetKeyword() != "xref")
586 return false;
587 std::vector<CrossRefObjData> result_objects;
588 while (true) {
589 FX_FILESIZE saved_pos = m_pSyntax->GetPos();
590 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
591 const ByteString& word = word_result.word;
592 if (word.IsEmpty())
593 return false;
594
595 if (!word_result.is_number) {
596 m_pSyntax->SetPos(saved_pos);
597 break;
598 }
599
600 uint32_t start_objnum = FXSYS_atoui(word.c_str());
601 if (start_objnum >= kMaxObjectNumber)
602 return false;
603
604 uint32_t count = m_pSyntax->GetDirectNum();
605 m_pSyntax->ToNextWord();
606
607 if (!ParseAndAppendCrossRefSubsectionData(
608 start_objnum, count, out_objects ? &result_objects : nullptr)) {
609 return false;
610 }
611 }
612 if (out_objects)
613 *out_objects = std::move(result_objects);
614 return true;
615}
616
618 m_pSyntax->SetPos(pos);
619 std::vector<CrossRefObjData> objects;
620 if (!ParseCrossRefTable(skip ? nullptr : &objects)) {
621 return false;
622 }
623
624 MergeCrossRefObjectsData(objects);
625 return true;
626}
627
628void CPDF_Parser::MergeCrossRefObjectsData(
629 const std::vector<CrossRefObjData>& objects) {
630 for (const auto& obj : objects) {
631 switch (obj.info.type) {
632 case ObjectType::kFree:
633 if (obj.info.gennum > 0)
634 m_CrossRefTable->SetFree(obj.obj_num, obj.info.gennum);
635 break;
636 case ObjectType::kNormal:
637 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum,
638 obj.info.is_object_stream_flag,
639 obj.info.pos);
640 break;
641 case ObjectType::kCompressed:
642 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
643 obj.info.archive.obj_index);
644 break;
645 }
646 }
647}
648
649bool CPDF_Parser::FindAllCrossReferenceTablesAndStream(
650 FX_FILESIZE main_xref_offset,
651 std::vector<FX_FILESIZE>& xref_list,
652 std::vector<FX_FILESIZE>& xref_stream_list) {
653 std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
654
655 // When the trailer doesn't have Prev entry or Prev entry value is not
656 // numerical, GetDirectInteger() returns 0. Loading will end.
658 while (xref_offset > 0) {
659 // Check for circular references.
660 if (pdfium::Contains(seen_xref_offset, xref_offset)) {
661 return false;
662 }
663
664 seen_xref_offset.insert(xref_offset);
665
666 // Use a copy of `xref_offset`, as LoadCrossRefStream() may change it.
667 FX_FILESIZE xref_offset_copy = xref_offset;
668 if (LoadCrossRefStream(&xref_offset_copy, /*is_main_xref=*/false)) {
669 // Since `xref_offset` points to a cross reference stream, mark it
670 // accordingly.
671 xref_list.insert(xref_list.begin(), 0);
672 xref_stream_list.insert(xref_stream_list.begin(), xref_offset);
673 xref_offset = xref_offset_copy;
674
675 // On success, LoadCrossRefStream() called CPDF_CrossRefTable::MergeUp()
676 // when `is_main_xref` is false. Thus no explicit call here.
677 } else {
678 // SLOW ...
679 LoadCrossRefTable(xref_offset, /*skip=*/true);
680
681 RetainPtr<CPDF_Dictionary> trailer_dict = LoadTrailer();
682 if (!trailer_dict) {
683 return false;
684 }
685
686 // The trailer for cross reference tables may point to a cross reference
687 // stream as well.
688 xref_list.insert(xref_list.begin(), xref_offset);
689 xref_stream_list.insert(xref_stream_list.begin(),
690 trailer_dict->GetIntegerFor("XRefStm"));
691 xref_offset = trailer_dict->GetDirectIntegerFor("Prev");
692
693 // SLOW ...
694 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
695 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
696 kNoTrailerObjectNumber),
697 std::move(m_CrossRefTable));
698 }
699 }
700 return true;
701}
702
704 auto cross_ref_table = std::make_unique<CPDF_CrossRefTable>();
705
706 const uint32_t kBufferSize = 4096;
707 m_pSyntax->SetReadBufferSize(kBufferSize);
708 m_pSyntax->SetPos(0);
709
710 std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
711 for (CPDF_SyntaxParser::WordResult result = m_pSyntax->GetNextWord();
712 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
713 const ByteString& word = result.word;
714 if (result.is_number) {
715 numbers.emplace_back(FXSYS_atoui(word.c_str()),
716 m_pSyntax->GetPos() - word.GetLength());
717 if (numbers.size() > 2u)
718 numbers.erase(numbers.begin());
719 continue;
720 }
721
722 if (word == "(") {
723 m_pSyntax->ReadString();
724 } else if (word == "<") {
725 m_pSyntax->ReadHexString();
726 } else if (word == "trailer") {
727 RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
728 if (pTrailer) {
729 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
730 // Grab the object number from `pTrailer` before potentially calling
731 // std::move(pTrailer) below.
732 const uint32_t trailer_object_number = pTrailer->GetObjNum();
733 RetainPtr<CPDF_Dictionary> trailer_dict =
734 stream_trailer ? stream_trailer->GetMutableDict()
735 : ToDictionary(std::move(pTrailer));
736 cross_ref_table = CPDF_CrossRefTable::MergeUp(
737 std::move(cross_ref_table),
738 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
739 trailer_object_number));
740 }
741 } else if (word == "obj" && numbers.size() == 2u) {
742 const FX_FILESIZE obj_pos = numbers[0].second;
743 const uint32_t obj_num = numbers[0].first;
744 const uint32_t gen_num = numbers[1].first;
745
746 m_pSyntax->SetPos(obj_pos);
747 RetainPtr<CPDF_Stream> pStream = ToStream(m_pSyntax->GetIndirectObject(
748 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
749
750 if (pStream && pStream->GetDict()->GetNameFor("Type") == "XRef") {
751 cross_ref_table = CPDF_CrossRefTable::MergeUp(
752 std::move(cross_ref_table),
753 std::make_unique<CPDF_CrossRefTable>(
754 ToDictionary(pStream->GetDict()->Clone()),
755 pStream->GetObjNum()));
756 }
757
758 if (obj_num < kMaxObjectNumber) {
759 cross_ref_table->AddNormal(obj_num, gen_num, /*is_object_stream=*/false,
760 obj_pos);
761 const auto object_stream =
762 CPDF_ObjectStream::Create(std::move(pStream));
763 if (object_stream) {
764 const auto& object_info = object_stream->object_info();
765 for (size_t i = 0; i < object_info.size(); ++i) {
766 const auto& info = object_info[i];
767 if (info.obj_num < kMaxObjectNumber)
768 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
769 }
770 }
771 }
772 }
773 numbers.clear();
774 }
775
776 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
777 std::move(cross_ref_table));
778 // Resore default buffer size.
779 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
780
781 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
782}
783
784bool CPDF_Parser::LoadCrossRefStream(FX_FILESIZE* pos, bool is_main_xref) {
785 RetainPtr<const CPDF_Stream> pStream =
786 ToStream(ParseIndirectObjectAt(*pos, 0));
787 if (!pStream || !pStream->GetObjNum()) {
788 return false;
789 }
790
791 RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
792 int32_t prev = pDict->GetIntegerFor("Prev");
793 if (prev < 0)
794 return false;
795
796 int32_t size = pDict->GetIntegerFor("Size");
797 if (size < 0)
798 return false;
799
800 *pos = prev;
801
802 auto new_cross_ref_table = std::make_unique<CPDF_CrossRefTable>(
803 /*trailer=*/ToDictionary(pDict->Clone()),
804 /*trailer_object_number=*/pStream->GetObjNum());
805 if (is_main_xref) {
806 m_CrossRefTable = std::move(new_cross_ref_table);
807 m_CrossRefTable->SetObjectMapSize(size);
808 } else {
809 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
810 std::move(new_cross_ref_table), std::move(m_CrossRefTable));
811 }
812
813 std::vector<CrossRefStreamIndexEntry> indices =
814 GetCrossRefStreamIndices(pDict->GetArrayFor("Index").Get(), size);
815
816 std::vector<uint32_t> field_widths =
817 GetFieldWidths(pDict->GetArrayFor("W").Get());
818 if (field_widths.size() < kMinFieldCount)
819 return false;
820
821 FX_SAFE_UINT32 dwAccWidth;
822 for (uint32_t width : field_widths)
823 dwAccWidth += width;
824 if (!dwAccWidth.IsValid())
825 return false;
826
827 uint32_t total_width = dwAccWidth.ValueOrDie();
828 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
829 pAcc->LoadAllDataFiltered();
830
831 pdfium::span<const uint8_t> data_span = pAcc->GetSpan();
832 uint32_t segindex = 0;
833 for (const auto& index : indices) {
834 FX_SAFE_UINT32 seg_end = segindex;
835 seg_end += index.obj_count;
836 seg_end *= total_width;
837 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
838 continue;
839
840 pdfium::span<const uint8_t> seg_span = data_span.subspan(
841 segindex * total_width, index.obj_count * total_width);
842 FX_SAFE_UINT32 safe_new_size = index.start_obj_num;
843 safe_new_size += index.obj_count;
844 if (!safe_new_size.IsValid()) {
845 continue;
846 }
847
848 // Until SetObjectMapSize() below has been called by a prior loop iteration,
849 // `current_size` is based on the /Size value parsed in
850 // LoadCrossRefStream(). PDFs may not always have the correct /Size. In this
851 // case, other PDF implementations ignore the incorrect size, and PDFium
852 // also ignores incorrect size in trailers for cross reference tables.
853 const uint32_t current_size =
854 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
855 // So allow `new_size` to be greater than `current_size`, but avoid going
856 // over `kMaxXRefSize`. This works just fine because the loop below checks
857 // against `kMaxObjectNumber`, and the two "max" constants are in sync.
858 const uint32_t new_size =
859 std::min<uint32_t>(safe_new_size.ValueOrDie(), kMaxXRefSize);
860 if (new_size > current_size) {
861 m_CrossRefTable->SetObjectMapSize(new_size);
862 }
863
864 for (uint32_t i = 0; i < index.obj_count; ++i) {
865 const uint32_t obj_num = index.start_obj_num + i;
866 if (obj_num >= kMaxObjectNumber) {
867 break;
868 }
869
870 ProcessCrossRefStreamEntry(seg_span.subspan(i * total_width, total_width),
871 field_widths, obj_num);
872 }
873
874 segindex += index.obj_count;
875 }
876 return true;
877}
878
879void CPDF_Parser::ProcessCrossRefStreamEntry(
880 pdfium::span<const uint8_t> entry_span,
881 pdfium::span<const uint32_t> field_widths,
882 uint32_t obj_num) {
883 DCHECK_GE(field_widths.size(), kMinFieldCount);
884 ObjectType type;
885 if (field_widths[0]) {
886 const uint32_t cross_ref_stream_obj_type =
887 GetFirstXRefStreamEntry(entry_span, field_widths);
888 std::optional<ObjectType> maybe_type =
889 GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
890 if (!maybe_type.has_value()) {
891 return;
892 }
893 type = maybe_type.value();
894 } else {
895 // Per ISO 32000-1:2008 table 17, use the default value of 1 for the xref
896 // stream entry when it is not specified. The `type` assignment is the
897 // equivalent to calling GetObjectTypeFromCrossRefStreamType(1).
898 type = ObjectType::kNormal;
899 }
900
901 if (type == ObjectType::kFree) {
902 const uint32_t gen_num = GetThirdXRefStreamEntry(entry_span, field_widths);
903 if (pdfium::IsValueInRangeForNumericType<uint16_t>(gen_num)) {
904 m_CrossRefTable->SetFree(obj_num, gen_num);
905 }
906 return;
907 }
908
909 if (type == ObjectType::kNormal) {
910 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
911 const uint32_t gen_num = GetThirdXRefStreamEntry(entry_span, field_widths);
912 if (pdfium::IsValueInRangeForNumericType<FX_FILESIZE>(offset) &&
913 pdfium::IsValueInRangeForNumericType<uint16_t>(gen_num)) {
914 m_CrossRefTable->AddNormal(obj_num, gen_num, /*is_object_stream=*/false,
915 offset);
916 }
917 return;
918 }
919
921 const uint32_t archive_obj_num =
922 GetSecondXRefStreamEntry(entry_span, field_widths);
923 if (!IsValidObjectNumber(archive_obj_num)) {
924 return;
925 }
926
927 const uint32_t archive_obj_index =
928 GetThirdXRefStreamEntry(entry_span, field_widths);
929 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
930}
931
933 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
934}
935
936RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetRoot() const {
938 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
939 return obj ? obj->GetDict() : nullptr;
940}
941
943 if (!GetTrailer())
944 return nullptr;
945
946 RetainPtr<const CPDF_Object> pEncryptObj =
947 GetTrailer()->GetObjectFor("Encrypt");
948 if (!pEncryptObj)
949 return nullptr;
950
951 if (pEncryptObj->IsDictionary())
952 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
953
954 if (pEncryptObj->IsReference()) {
955 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
956 pEncryptObj->AsReference()->GetRefObjNum()));
957 }
958 return nullptr;
959}
960
962 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
963}
964
966 return m_CrossRefTable->trailer();
967}
968
970 return m_CrossRefTable->GetMutableTrailerForTesting();
971}
972
974 return m_CrossRefTable->trailer_object_number();
975}
976
978 return m_CrossRefTable->trailer()
979 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
980 : RetainPtr<CPDF_Dictionary>();
981}
982
983uint32_t CPDF_Parser::GetInfoObjNum() const {
984 RetainPtr<const CPDF_Reference> pRef =
985 ToReference(m_CrossRefTable->trailer()
986 ? m_CrossRefTable->trailer()->GetObjectFor("Info")
987 : nullptr);
988 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
989}
990
991uint32_t CPDF_Parser::GetRootObjNum() const {
992 RetainPtr<const CPDF_Reference> pRef =
993 ToReference(m_CrossRefTable->trailer()
994 ? m_CrossRefTable->trailer()->GetObjectFor("Root")
995 : nullptr);
996 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
997}
998
1000 if (!IsValidObjectNumber(objnum)) {
1001 return nullptr;
1002 }
1003
1004 // Prevent circular parsing the same object.
1005 if (pdfium::Contains(m_ParsingObjNums, objnum)) {
1006 return nullptr;
1007 }
1008
1009 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1010 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
1011 if (!info) {
1012 return nullptr;
1013 }
1014
1015 switch (info->type) {
1016 case ObjectType::kFree: {
1017 return nullptr;
1018 }
1019 case ObjectType::kNormal: {
1020 if (info->pos <= 0) {
1021 return nullptr;
1022 }
1023 return ParseIndirectObjectAt(info->pos, objnum);
1024 }
1026 const auto* obj_stream = GetObjectStream(info->archive.obj_num);
1027 if (!obj_stream) {
1028 return nullptr;
1029 }
1030 return obj_stream->ParseObject(m_pObjectsHolder, objnum,
1031 info->archive.obj_index);
1032 }
1033 }
1034}
1035
1036const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
1037 // Prevent circular parsing the same object.
1038 if (pdfium::Contains(m_ParsingObjNums, object_number))
1039 return nullptr;
1040
1041 auto it = m_ObjectStreamMap.find(object_number);
1042 if (it != m_ObjectStreamMap.end())
1043 return it->second.get();
1044
1045 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
1046 if (!info || !info->is_object_stream_flag) {
1047 return nullptr;
1048 }
1049
1050 const FX_FILESIZE object_pos = info->pos;
1051 if (object_pos <= 0)
1052 return nullptr;
1053
1054 // Keep track of `object_number` before doing more parsing.
1055 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, object_number);
1056
1057 RetainPtr<CPDF_Object> object =
1058 ParseIndirectObjectAt(object_pos, object_number);
1059 if (!object)
1060 return nullptr;
1061
1062 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1063 CPDF_ObjectStream::Create(ToStream(object));
1064 const CPDF_ObjectStream* result = objs_stream.get();
1065 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1066
1067 return result;
1068}
1069
1070RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
1071 uint32_t objnum) {
1072 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1073 m_pSyntax->SetPos(pos);
1074
1075 auto result = m_pSyntax->GetIndirectObject(
1076 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1077 m_pSyntax->SetPos(saved_pos);
1078 if (result && objnum && result->GetObjNum() != objnum)
1079 return nullptr;
1080
1081 const bool should_decrypt = m_pSecurityHandler &&
1082 m_pSecurityHandler->GetCryptoHandler() &&
1083 objnum != m_MetadataObjnum;
1084 if (should_decrypt &&
1085 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1086 return nullptr;
1087 }
1088 return result;
1089}
1090
1092 return m_pSyntax->GetDocumentSize();
1093}
1094
1095uint32_t CPDF_Parser::GetFirstPageNo() const {
1096 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1097}
1098
1100 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1101 m_pLinearized = std::move(pLinearized);
1102}
1103
1104RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailer() {
1105 if (m_pSyntax->GetKeyword() != "trailer")
1106 return nullptr;
1107
1108 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1109}
1110
1111uint32_t CPDF_Parser::GetPermissions(bool get_owner_perms) const {
1112 return m_pSecurityHandler
1113 ? m_pSecurityHandler->GetPermissions(get_owner_perms)
1114 : 0xFFFFFFFF;
1115}
1116
1118 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1119}
1120
1122 RetainPtr<CPDF_ReadValidator> validator,
1123 const ByteString& password) {
1124 DCHECK(!m_bHasParsed);
1125 DCHECK(!m_bXRefTableRebuilt);
1126 SetPassword(password);
1127 m_bXRefStream = false;
1128 m_LastXRefOffset = 0;
1129
1130 if (!InitSyntaxParser(std::move(validator)))
1131 return FORMAT_ERROR;
1132
1133 m_pLinearized = ParseLinearizedHeader();
1134 if (!m_pLinearized)
1135 return StartParseInternal();
1136
1137 m_bHasParsed = true;
1138
1139 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1140 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1141 const bool loaded_xref_table =
1142 LoadCrossRefTable(dwFirstXRefOffset, /*skip=*/false);
1143 if (!loaded_xref_table &&
1144 !LoadCrossRefStream(&dwFirstXRefOffset, /*is_main_xref=*/true)) {
1145 if (!RebuildCrossRef())
1146 return FORMAT_ERROR;
1147
1148 m_bXRefTableRebuilt = true;
1149 m_LastXRefOffset = 0;
1150 }
1151 if (loaded_xref_table) {
1152 RetainPtr<CPDF_Dictionary> trailer = LoadTrailer();
1153 if (!trailer)
1154 return SUCCESS;
1155
1156 m_CrossRefTable->SetTrailer(std::move(trailer), kNoTrailerObjectNumber);
1157 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
1158 if (xrefsize > 0) {
1159 // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
1160 // the xref table.
1161 const uint32_t expected_last_obj_num = xrefsize - 1;
1162 if (GetLastObjNum() != expected_last_obj_num && !RebuildCrossRef()) {
1163 return FORMAT_ERROR;
1164 }
1165 }
1166 }
1167
1168 Error eRet = SetEncryptHandler();
1169 if (eRet != SUCCESS)
1170 return eRet;
1171
1172 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1173 if (m_bXRefTableRebuilt)
1174 return FORMAT_ERROR;
1175
1176 ReleaseEncryptHandler();
1177 if (!RebuildCrossRef())
1178 return FORMAT_ERROR;
1179
1180 eRet = SetEncryptHandler();
1181 if (eRet != SUCCESS)
1182 return eRet;
1183
1184 m_pObjectsHolder->TryInit();
1185 if (!GetRoot())
1186 return FORMAT_ERROR;
1187 }
1188
1190 ReleaseEncryptHandler();
1192 return FORMAT_ERROR;
1193
1194 eRet = SetEncryptHandler();
1195 if (eRet != SUCCESS)
1196 return eRet;
1197 }
1198
1199 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1200 RetainPtr<const CPDF_Reference> pMetadata =
1201 ToReference(GetRoot()->GetObjectFor("Metadata"));
1202 if (pMetadata)
1203 m_MetadataObjnum = pMetadata->GetRefObjNum();
1204 }
1205 return SUCCESS;
1206}
1207
1208bool CPDF_Parser::LoadLinearizedAllCrossRefStream(
1209 FX_FILESIZE main_xref_offset) {
1210 FX_FILESIZE xref_offset = main_xref_offset;
1211 if (!LoadCrossRefStream(&xref_offset, /*is_main_xref=*/false)) {
1212 return false;
1213 }
1214
1215 std::set<FX_FILESIZE> seen_xref_offset;
1216 while (xref_offset) {
1217 seen_xref_offset.insert(xref_offset);
1218 if (!LoadCrossRefStream(&xref_offset, /*is_main_xref=*/false)) {
1219 return false;
1220 }
1221
1222 // Check for circular references.
1223 if (pdfium::Contains(seen_xref_offset, xref_offset))
1224 return false;
1225 }
1226 m_ObjectStreamMap.clear();
1227 m_bXRefStream = true;
1228 return true;
1229}
1230
1231CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1232 const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1233 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1234 if (main_xref_offset < 0)
1235 return FORMAT_ERROR;
1236
1237 if (main_xref_offset == 0)
1238 return SUCCESS;
1239
1240 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1241 m_MetadataObjnum = 0;
1242 m_ObjectStreamMap.clear();
1243
1244 if (!LoadLinearizedAllCrossRefTable(main_xref_offset) &&
1245 !LoadLinearizedAllCrossRefStream(main_xref_offset)) {
1246 m_LastXRefOffset = 0;
1247 return FORMAT_ERROR;
1248 }
1249
1250 return SUCCESS;
1251}
1252
1254 std::unique_ptr<CPDF_SyntaxParser> parser) {
1255 m_pSyntax = std::move(parser);
1256}
1257
1259 std::vector<unsigned int> trailer_ends;
1260 m_pSyntax->SetTrailerEnds(&trailer_ends);
1261
1262 // Traverse the document.
1263 m_pSyntax->SetPos(0);
1264 while (true) {
1265 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
1266 if (word_result.is_number) {
1267 // The object number was read. Read the generation number.
1268 word_result = m_pSyntax->GetNextWord();
1269 if (!word_result.is_number)
1270 break;
1271
1272 word_result = m_pSyntax->GetNextWord();
1273 if (word_result.word != "obj")
1274 break;
1275
1276 m_pSyntax->GetObjectBody(nullptr);
1277
1278 word_result = m_pSyntax->GetNextWord();
1279 if (word_result.word != "endobj")
1280 break;
1281 } else if (word_result.word == "trailer") {
1282 m_pSyntax->GetObjectBody(nullptr);
1283 } else if (word_result.word == "startxref") {
1284 m_pSyntax->GetNextWord();
1285 } else if (word_result.word == "xref") {
1286 while (true) {
1287 word_result = m_pSyntax->GetNextWord();
1288 if (word_result.word.IsEmpty() || word_result.word == "startxref")
1289 break;
1290 }
1291 m_pSyntax->GetNextWord();
1292 } else {
1293 break;
1294 }
1295 }
1296
1297 // Stop recording trailer ends.
1298 m_pSyntax->SetTrailerEnds(nullptr);
1299 return trailer_ends;
1300}
1301
1303 FX_FILESIZE src_size) {
1304 static constexpr FX_FILESIZE kBufferSize = 4096;
1305 DataVector<uint8_t> buffer(kBufferSize);
1306 m_pSyntax->SetPos(0);
1307 while (src_size) {
1308 const uint32_t block_size =
1309 static_cast<uint32_t>(std::min(kBufferSize, src_size));
1310 auto block_span = pdfium::make_span(buffer).first(block_size);
1311 if (!m_pSyntax->ReadBlock(block_span))
1312 return false;
1313 if (!archive->WriteBlock(pdfium::make_span(buffer).first(block_size)))
1314 return false;
1315 src_size -= block_size;
1316 }
1317 return true;
1318}
fxcrt::ByteString ByteString
Definition bytestring.h:180
#define DCHECK
Definition check.h:33
#define DCHECK_GE(x, y)
Definition check_op.h:22
#define DCHECK_EQ(x, y)
Definition check_op.h:17
CPDF_ArrayLocker(const CPDF_Array *pArray)
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
Definition cpdf_array.h:29
int GetDirectIntegerFor(const ByteString &key) const
RetainPtr< const CPDF_Array > GetArrayFor(const ByteString &key) const
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
static constexpr uint32_t kInvalidObjNum
Definition cpdf_object.h:52
void SetSyntaxParserForTesting(std::unique_ptr< CPDF_SyntaxParser > parser)
bool RebuildCrossRef()
uint32_t GetPermissions(bool get_owner_perms) const
const CPDF_Dictionary * GetTrailer() const
FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const
void SetLinearizedHeaderForTesting(std::unique_ptr< CPDF_LinearizedHeader > pLinearized)
Error StartLinearizedParse(RetainPtr< CPDF_ReadValidator > validator, const ByteString &password)
uint32_t GetRootObjNum() const
RetainPtr< const CPDF_Array > GetIDArray() const
RetainPtr< const CPDF_Dictionary > GetEncryptDict() const
std::vector< unsigned int > GetTrailerEnds()
CPDF_Parser(ParsedObjectsHolder *holder)
bool WriteToArchive(IFX_ArchiveStream *archive, FX_FILESIZE src_size)
ByteString GetPassword() const
Definition cpdf_parser.h:70
CPDF_Dictionary * GetMutableTrailerForTesting()
uint32_t GetTrailerObjectNumber() const
static constexpr uint32_t kMaxObjectNumber
Definition cpdf_parser.h:57
uint32_t GetFirstPageNo() const
Error StartParseInternal()
ByteString GetEncodedPassword() const
FX_FILESIZE GetDocumentSize() const
bool IsValidObjectNumber(uint32_t objnum) const
Error StartParse(RetainPtr< IFX_SeekableReadStream > pFile, const ByteString &password)
std::unique_ptr< CPDF_LinearizedHeader > ParseLinearizedHeader()
FX_FILESIZE ParseStartXRef()
bool IsObjectFree(uint32_t objnum) const
RetainPtr< CPDF_Dictionary > GetCombinedTrailer() const
bool LoadCrossRefTable(FX_FILESIZE pos, bool skip)
uint32_t GetLastObjNum() const
RetainPtr< CPDF_Object > ParseIndirectObject(uint32_t objnum)
uint32_t GetInfoObjNum() const
bool operator==(const char *ptr) const
CPDF_CrossRefTable::ObjectType ObjectType
CPDF_CrossRefTable::ObjectInfo ObjectInfo
int FXSYS_DecimalCharToInt(wchar_t c)
pdfium::CheckedNumeric< FX_FILESIZE > FX_SAFE_FILESIZE
pdfium::CheckedNumeric< uint32_t > FX_SAFE_UINT32
#define FX_FILESIZE
Definition fx_types.h:19