Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_parser.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/cpdf_parser.h"
8
9#include <ctype.h>
10#include <stdint.h>
11
12#include <algorithm>
13#include <utility>
14#include <vector>
15
16#include "core/fpdfapi/parser/cpdf_array.h"
17#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18#include "core/fpdfapi/parser/cpdf_dictionary.h"
19#include "core/fpdfapi/parser/cpdf_document.h"
20#include "core/fpdfapi/parser/cpdf_linearized_header.h"
21#include "core/fpdfapi/parser/cpdf_number.h"
22#include "core/fpdfapi/parser/cpdf_object_stream.h"
23#include "core/fpdfapi/parser/cpdf_read_validator.h"
24#include "core/fpdfapi/parser/cpdf_reference.h"
25#include "core/fpdfapi/parser/cpdf_security_handler.h"
26#include "core/fpdfapi/parser/cpdf_stream.h"
27#include "core/fpdfapi/parser/cpdf_stream_acc.h"
28#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
29#include "core/fpdfapi/parser/fpdf_parser_utility.h"
30#include "core/fxcrt/autorestorer.h"
31#include "core/fxcrt/data_vector.h"
32#include "core/fxcrt/fx_extension.h"
33#include "core/fxcrt/fx_safe_types.h"
34#include "core/fxcrt/scoped_set_insertion.h"
35#include "third_party/base/check.h"
36#include "third_party/base/check_op.h"
37#include "third_party/base/containers/contains.h"
38#include "third_party/base/containers/span.h"
39#include "third_party/base/notreached.h"
40
41using ObjectType = CPDF_CrossRefTable::ObjectType;
42using ObjectInfo = CPDF_CrossRefTable::ObjectInfo;
43
44namespace {
45
46// A limit on the size of the xref table. Theoretical limits are higher, but
47// this may be large enough in practice. The max size should always be 1 more
48// than the max object number.
49constexpr int32_t kMaxXRefSize = CPDF_Parser::kMaxObjectNumber + 1;
50
51// "%PDF-1.7\n"
52constexpr FX_FILESIZE kPDFHeaderSize = 9;
53
54// The required number of fields in a /W array in a cross-reference stream
55// dictionary.
56constexpr size_t kMinFieldCount = 3;
57
58// V4 trailers are inline.
59constexpr uint32_t kNoV4TrailerObjectNumber = 0;
60
61struct CrossRefV5IndexEntry {
62 uint32_t start_obj_num;
63 uint32_t obj_count;
64};
65
66ObjectType GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type) {
67 switch (cross_ref_stream_type) {
68 case 0:
69 return ObjectType::kFree;
70 case 1:
71 return ObjectType::kNormal;
72 case 2:
73 return ObjectType::kCompressed;
74 default:
75 return ObjectType::kNull;
76 }
77}
78
79// Use the Get*XRefStreamEntry() functions below, instead of calling this
80// directly.
81uint32_t GetVarInt(pdfium::span<const uint8_t> input) {
82 uint32_t result = 0;
83 for (uint8_t c : input)
84 result = result * 256 + c;
85 return result;
86}
87
88// The following 3 functions retrieve variable length entries from
89// cross-reference streams, as described in ISO 32000-1:2008 table 18. There are
90// only 3 fields for any given entry.
91uint32_t GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
92 pdfium::span<const uint32_t> field_widths) {
93 return GetVarInt(entry_span.first(field_widths[0]));
94}
95
96uint32_t GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
97 pdfium::span<const uint32_t> field_widths) {
98 return GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
99}
100
101uint32_t GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
102 pdfium::span<const uint32_t> field_widths) {
103 return GetVarInt(
104 entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
105}
106
107std::vector<CrossRefV5IndexEntry> GetCrossRefV5Indices(const CPDF_Array* array,
108 uint32_t size) {
109 std::vector<CrossRefV5IndexEntry> indices;
110 if (array) {
111 for (size_t i = 0; i < array->size() / 2; i++) {
112 RetainPtr<const CPDF_Number> pStartNumObj = array->GetNumberAt(i * 2);
113 if (!pStartNumObj)
114 continue;
115
116 RetainPtr<const CPDF_Number> pCountObj = array->GetNumberAt(i * 2 + 1);
117 if (!pCountObj)
118 continue;
119
120 int nStartNum = pStartNumObj->GetInteger();
121 int nCount = pCountObj->GetInteger();
122 if (nStartNum < 0 || nCount <= 0)
123 continue;
124
125 indices.push_back(
126 {static_cast<uint32_t>(nStartNum), static_cast<uint32_t>(nCount)});
127 }
128 }
129
130 if (indices.empty())
131 indices.push_back({0, size});
132 return indices;
135std::vector<uint32_t> GetFieldWidths(const CPDF_Array* array) {
136 std::vector<uint32_t> results;
137 if (!array)
138 return results;
139
140 CPDF_ArrayLocker locker(array);
141 for (const auto& obj : locker)
142 results.push_back(obj->GetInteger());
143 return results;
144}
145
146class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
147 public:
148 ObjectsHolderStub() = default;
149 ~ObjectsHolderStub() override = default;
150 bool TryInit() override { return true; }
151};
152
153} // namespace
154
158 if (!holder) {
159 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
160 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
161 }
162}
163
165
166CPDF_Parser::~CPDF_Parser() = default;
167
168uint32_t CPDF_Parser::GetLastObjNum() const {
169 return m_CrossRefTable->objects_info().empty()
170 ? 0
171 : m_CrossRefTable->objects_info().rbegin()->first;
172}
173
174bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
175 return objnum <= GetLastObjNum();
176}
177
179 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
180 return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
181}
182
183ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
184 DCHECK(IsValidObjectNumber(objnum));
185 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
186 return info ? info->type : ObjectType::kFree;
187}
188
189bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
190 switch (GetObjectType(objnum)) {
191 case ObjectType::kFree:
192 case ObjectType::kNull:
193 return true;
194 case ObjectType::kNormal:
195 case ObjectType::kCompressed:
196 return false;
197 }
198 NOTREACHED_NORETURN();
199}
200
201bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
202 return GetObjectType(objnum) == ObjectType::kFree;
203}
204
205bool CPDF_Parser::InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator) {
206 const absl::optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
207 if (!header_offset.has_value())
208 return false;
209 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
210 return false;
211
212 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
213 header_offset.value());
214 return ParseFileVersion();
215}
216
217bool CPDF_Parser::ParseFileVersion() {
218 m_FileVersion = 0;
219 uint8_t ch;
220 if (!m_pSyntax->GetCharAt(5, ch))
221 return false;
222
223 if (isdigit(ch))
224 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
225
226 if (!m_pSyntax->GetCharAt(7, ch))
227 return false;
228
229 if (isdigit(ch))
230 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
231 return true;
232}
233
236 const ByteString& password) {
237 if (!InitSyntaxParser(pdfium::MakeRetain<CPDF_ReadValidator>(
238 std::move(pFileAccess), nullptr)))
239 return FORMAT_ERROR;
240 SetPassword(password);
242}
243
245 DCHECK(!m_bHasParsed);
246 DCHECK(!m_bXRefTableRebuilt);
247 m_bHasParsed = true;
248 m_bXRefStream = false;
249
250 m_LastXRefOffset = ParseStartXRef();
251 if (m_LastXRefOffset >= kPDFHeaderSize) {
252 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
253 !LoadAllCrossRefV5(m_LastXRefOffset)) {
255 return FORMAT_ERROR;
256
257 m_bXRefTableRebuilt = true;
258 m_LastXRefOffset = 0;
259 }
260 } else {
262 return FORMAT_ERROR;
263
264 m_bXRefTableRebuilt = true;
265 }
266 Error eRet = SetEncryptHandler();
267 if (eRet != SUCCESS)
268 return eRet;
269
270 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
271 if (m_bXRefTableRebuilt)
272 return FORMAT_ERROR;
273
274 ReleaseEncryptHandler();
276 return FORMAT_ERROR;
277
278 eRet = SetEncryptHandler();
279 if (eRet != SUCCESS)
280 return eRet;
281
282 m_pObjectsHolder->TryInit();
283 if (!GetRoot())
284 return FORMAT_ERROR;
285 }
287 ReleaseEncryptHandler();
289 return FORMAT_ERROR;
290
291 eRet = SetEncryptHandler();
292 if (eRet != SUCCESS)
293 return eRet;
294 }
295 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
296 RetainPtr<const CPDF_Reference> pMetadata =
297 ToReference(GetRoot()->GetObjectFor("Metadata"));
298 if (pMetadata)
299 m_MetadataObjnum = pMetadata->GetRefObjNum();
300 }
301 return SUCCESS;
302}
303
305 static constexpr char kStartXRefKeyword[] = "startxref";
306 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
307 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
308 return 0;
309
310 // Skip "startxref" keyword.
311 m_pSyntax->GetKeyword();
312
313 // Read XRef offset.
314 const CPDF_SyntaxParser::WordResult xref_offset_result =
315 m_pSyntax->GetNextWord();
316 if (!xref_offset_result.is_number || xref_offset_result.word.IsEmpty())
317 return 0;
318
319 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
320 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
321 return 0;
322
323 return result.ValueOrDie();
324}
325
326CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
327 ReleaseEncryptHandler();
328 if (!GetTrailer())
329 return FORMAT_ERROR;
330
331 RetainPtr<const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
332 if (!pEncryptDict)
333 return SUCCESS;
334
335 if (pEncryptDict->GetNameFor("Filter") != "Standard")
336 return HANDLER_ERROR;
337
338 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
339 if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
340 return PASSWORD_ERROR;
341
342 m_pSecurityHandler = std::move(pSecurityHandler);
343 return SUCCESS;
344}
345
346void CPDF_Parser::ReleaseEncryptHandler() {
347 m_pSecurityHandler.Reset();
348}
349
350// Ideally, all the cross reference entries should be verified.
351// In reality, we rarely see well-formed cross references don't match
352// with the objects. crbug/602650 showed a case where object numbers
353// in the cross reference table are all off by one.
354bool CPDF_Parser::VerifyCrossRefV4() {
355 for (const auto& it : m_CrossRefTable->objects_info()) {
356 if (it.second.pos <= 0)
357 continue;
358 // Find the first non-zero position.
359 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
360 m_pSyntax->SetPos(it.second.pos);
361 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
362 m_pSyntax->SetPos(SavedPos);
363 if (!word_result.is_number || word_result.word.IsEmpty() ||
364 FXSYS_atoui(word_result.word.c_str()) != it.first) {
365 // If the object number read doesn't match the one stored,
366 // something is wrong with the cross reference table.
367 return false;
368 }
369 break;
370 }
371 return true;
372}
373
374bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
375 if (!LoadCrossRefV4(xref_offset, true))
376 return false;
377
378 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
379 if (!trailer)
380 return false;
381
382 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
383 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
384 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
385 m_CrossRefTable->SetObjectMapSize(xrefsize);
386
388 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
389 std::vector<FX_FILESIZE> xref_list{xref_offset};
390 std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
391
392 // When the trailer doesn't have Prev entry or Prev entry value is not
393 // numerical, GetDirectInteger() returns 0. Loading will end.
394 xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
395 while (xref_offset > 0) {
396 // Check for circular references.
397 if (pdfium::Contains(seen_xref_offset, xref_offset))
398 return false;
399
400 seen_xref_offset.insert(xref_offset);
401 xref_list.insert(xref_list.begin(), xref_offset);
402
403 // SLOW ...
404 LoadCrossRefV4(xref_offset, true);
405
406 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
407 if (!pDict)
408 return false;
409
410 xref_offset = pDict->GetDirectIntegerFor("Prev");
411 xref_stm = pDict->GetIntegerFor("XRefStm");
412 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
413
414 // SLOW ...
415 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
416 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
417 kNoV4TrailerObjectNumber),
418 std::move(m_CrossRefTable));
419 }
420
421 // Traverse the xref data structures from oldest to newest. So entries from
422 // later iterations should overwrite existing entries.
423 for (size_t i = 0; i < xref_list.size(); ++i) {
424 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
425 return false;
426
427 if (xref_stream_list[i] > 0 &&
428 !LoadCrossRefV5(&xref_stream_list[i], /*is_main_xref=*/false,
429 /*overwrite_existing=*/true)) {
430 return false;
431 }
432
433 if (i == 0 && !VerifyCrossRefV4())
434 return false;
435 }
436 return true;
437}
438
439bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
440 if (!LoadCrossRefV4(main_xref_offset, false))
441 return false;
442
443 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
444 if (!main_trailer)
445 return false;
446
447 // GetTrailer() currently returns the first-page trailer.
449 return false;
450
451 // Read /XRefStm from the first-page trailer. No need to read /Prev for the
452 // first-page trailer, as the caller already did that and passed it in as
453 // |main_xref_offset|.
455 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
456 std::vector<FX_FILESIZE> xref_list{main_xref_offset};
457 std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
458
459 // Merge the trailers.
460 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
461 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
462 kNoV4TrailerObjectNumber),
463 std::move(m_CrossRefTable));
464
465 // Now GetTrailer() returns the merged trailer, where /Prev is from the
466 // main-trailer.
468 while (xref_offset > 0) {
469 // Check for circular references.
470 if (pdfium::Contains(seen_xref_offset, xref_offset))
471 return false;
472
473 seen_xref_offset.insert(xref_offset);
474 xref_list.insert(xref_list.begin(), xref_offset);
475
476 // SLOW ...
477 LoadCrossRefV4(xref_offset, true);
478
479 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
480 if (!pDict)
481 return false;
482
483 xref_offset = pDict->GetDirectIntegerFor("Prev");
484 xref_stm = pDict->GetIntegerFor("XRefStm");
485 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
486
487 // SLOW ...
488 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
489 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
490 kNoV4TrailerObjectNumber),
491 std::move(m_CrossRefTable));
492 }
493
494 if (xref_stream_list[0] > 0 &&
495 !LoadCrossRefV5(&xref_stream_list[0], /*is_main_xref=*/false,
496 /*overwrite_existing=*/true)) {
497 return false;
498 }
499
500 // Traverse the xref data structures from oldest to newest. So entries from
501 // later iterations should overwrite existing entries.
502 for (size_t i = 1; i < xref_list.size(); ++i) {
503 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
504 return false;
505
506 if (xref_stream_list[i] > 0 &&
507 !LoadCrossRefV5(&xref_stream_list[i], /*is_main_xref=*/false,
508 /*overwrite_existing=*/true)) {
509 return false;
510 }
511 }
512 return true;
513}
514
515bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
516 uint32_t start_objnum,
517 uint32_t count,
518 std::vector<CrossRefObjData>* out_objects) {
519 if (!count)
520 return true;
521
522 // Each entry shall be exactly 20 byte.
523 // A sample entry looks like:
524 // "0000000000 00007 f\r\n"
525 static constexpr int32_t kEntrySize = 20;
526
527 if (!out_objects) {
528 FX_SAFE_FILESIZE pos = count;
529 pos *= kEntrySize;
530 pos += m_pSyntax->GetPos();
531 if (!pos.IsValid())
532 return false;
533 m_pSyntax->SetPos(pos.ValueOrDie());
534 return true;
535 }
536 const size_t start_obj_index = out_objects->size();
537 FX_SAFE_SIZE_T new_size = start_obj_index;
538 new_size += count;
539 if (!new_size.IsValid())
540 return false;
541
542 if (new_size.ValueOrDie() > kMaxXRefSize)
543 return false;
544
545 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
546 if (new_size.ValueOrDie() > max_entries_in_file)
547 return false;
548
549 out_objects->resize(new_size.ValueOrDie());
550
551 DataVector<char> buf(1024 * kEntrySize + 1);
552 buf.back() = '\0';
553
554 uint32_t entries_to_read = count;
555 while (entries_to_read > 0) {
556 const uint32_t entries_in_block = std::min(entries_to_read, 1024u);
557 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
558 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
559 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
560 return false;
561
562 for (uint32_t i = 0; i < entries_in_block; i++) {
563 uint32_t iObjectIndex = count - entries_to_read + i;
564 CrossRefObjData& obj_data =
565 (*out_objects)[start_obj_index + iObjectIndex];
566 const uint32_t objnum = start_objnum + iObjectIndex;
567 obj_data.obj_num = objnum;
568 ObjectInfo& info = obj_data.info;
569
570 const char* pEntry = &buf[i * kEntrySize];
571 if (pEntry[17] == 'f') {
572 info.pos = 0;
573 info.type = ObjectType::kFree;
574 } else {
575 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
576 if (!offset.IsValid())
577 return false;
578
579 if (offset.ValueOrDie() == 0) {
580 for (int32_t c = 0; c < 10; c++) {
581 if (!isdigit(pEntry[c]))
582 return false;
583 }
584 }
585
586 info.pos = offset.ValueOrDie();
587
588 // TODO(art-snake): The info.gennum is uint16_t, but version may be
589 // greated than max<uint16_t>. Needs solve this issue.
590 const int32_t version = FXSYS_atoi(pEntry + 11);
591 info.gennum = version;
592 info.type = ObjectType::kNormal;
593 }
594 }
595 entries_to_read -= entries_in_block;
596 }
597 return true;
598}
599
600bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
601 if (out_objects)
602 out_objects->clear();
603
604 if (m_pSyntax->GetKeyword() != "xref")
605 return false;
606 std::vector<CrossRefObjData> result_objects;
607 while (true) {
608 FX_FILESIZE saved_pos = m_pSyntax->GetPos();
609 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
610 const ByteString& word = word_result.word;
611 if (word.IsEmpty())
612 return false;
613
614 if (!word_result.is_number) {
615 m_pSyntax->SetPos(saved_pos);
616 break;
617 }
618
619 uint32_t start_objnum = FXSYS_atoui(word.c_str());
620 if (start_objnum >= kMaxObjectNumber)
621 return false;
622
623 uint32_t count = m_pSyntax->GetDirectNum();
624 m_pSyntax->ToNextWord();
625
626 if (!ParseAndAppendCrossRefSubsectionData(
627 start_objnum, count, out_objects ? &result_objects : nullptr)) {
628 return false;
629 }
630 }
631 if (out_objects)
632 *out_objects = std::move(result_objects);
633 return true;
634}
635
636bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
637 m_pSyntax->SetPos(pos);
638 std::vector<CrossRefObjData> objects;
639 if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
640 return false;
641
642 MergeCrossRefObjectsData(objects);
643 return true;
644}
645
646void CPDF_Parser::MergeCrossRefObjectsData(
647 const std::vector<CrossRefObjData>& objects) {
648 for (const auto& obj : objects) {
649 switch (obj.info.type) {
650 case ObjectType::kFree:
651 if (obj.info.gennum > 0)
652 m_CrossRefTable->SetFree(obj.obj_num);
653 break;
654 case ObjectType::kNormal:
655 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum,
656 obj.info.is_object_stream_flag,
657 obj.info.pos);
658 break;
659 case ObjectType::kCompressed:
660 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
661 obj.info.archive.obj_index);
662 break;
663 case ObjectType::kNull:
664 // Ignored.
665 break;
666 }
667 }
668}
669
670bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
671 if (!LoadCrossRefV5(&xref_offset, /*is_main_xref=*/true,
672 /*overwrite_existing=*/false)) {
673 return false;
674 }
675
676 // Traverse the xref objects from newest to older. So entries from later
677 // iterations should not overwrite existing entries.
678 std::set<FX_FILESIZE> seen_xref_offset;
679 while (xref_offset > 0) {
680 seen_xref_offset.insert(xref_offset);
681 if (!LoadCrossRefV5(&xref_offset, /*is_main_xref=*/false,
682 /*overwrite_existing=*/false)) {
683 return false;
684 }
685
686 // Check for circular references.
687 if (pdfium::Contains(seen_xref_offset, xref_offset))
688 return false;
689 }
690 m_ObjectStreamMap.clear();
691 m_bXRefStream = true;
692 return true;
693}
694
696 auto cross_ref_table = std::make_unique<CPDF_CrossRefTable>();
697
698 const uint32_t kBufferSize = 4096;
699 m_pSyntax->SetReadBufferSize(kBufferSize);
700 m_pSyntax->SetPos(0);
701
702 std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
703 for (CPDF_SyntaxParser::WordResult result = m_pSyntax->GetNextWord();
704 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
705 const ByteString& word = result.word;
706 if (result.is_number) {
707 numbers.emplace_back(FXSYS_atoui(word.c_str()),
708 m_pSyntax->GetPos() - word.GetLength());
709 if (numbers.size() > 2u)
710 numbers.erase(numbers.begin());
711 continue;
712 }
713
714 if (word == "(") {
715 m_pSyntax->ReadString();
716 } else if (word == "<") {
717 m_pSyntax->ReadHexString();
718 } else if (word == "trailer") {
719 RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
720 if (pTrailer) {
721 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
722 // Grab the object number from `pTrailer` before potentially calling
723 // std::move(pTrailer) below.
724 const uint32_t trailer_object_number = pTrailer->GetObjNum();
725 RetainPtr<CPDF_Dictionary> trailer_dict =
726 stream_trailer ? stream_trailer->GetMutableDict()
727 : ToDictionary(std::move(pTrailer));
728 cross_ref_table = CPDF_CrossRefTable::MergeUp(
729 std::move(cross_ref_table),
730 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
731 trailer_object_number));
732 }
733 } else if (word == "obj" && numbers.size() == 2u) {
734 const FX_FILESIZE obj_pos = numbers[0].second;
735 const uint32_t obj_num = numbers[0].first;
736 const uint32_t gen_num = numbers[1].first;
737
738 m_pSyntax->SetPos(obj_pos);
739 const RetainPtr<CPDF_Stream> pStream =
740 ToStream(m_pSyntax->GetIndirectObject(
741 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
742
743 if (pStream && pStream->GetDict()->GetNameFor("Type") == "XRef") {
744 cross_ref_table = CPDF_CrossRefTable::MergeUp(
745 std::move(cross_ref_table),
746 std::make_unique<CPDF_CrossRefTable>(
747 ToDictionary(pStream->GetDict()->Clone()),
748 pStream->GetObjNum()));
749 }
750
751 if (obj_num < kMaxObjectNumber) {
752 cross_ref_table->AddNormal(obj_num, gen_num, /*is_object_stream=*/false,
753 obj_pos);
754 const auto object_stream =
755 CPDF_ObjectStream::Create(std::move(pStream));
756 if (object_stream) {
757 const auto& object_info = object_stream->object_info();
758 for (size_t i = 0; i < object_info.size(); ++i) {
759 const auto& info = object_info[i];
760 if (info.obj_num < kMaxObjectNumber)
761 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
762 }
763 }
764 }
765 }
766 numbers.clear();
767 }
768
769 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
770 std::move(cross_ref_table));
771 // Resore default buffer size.
772 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
773
774 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
775}
776
777bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos,
778 bool is_main_xref,
779 bool overwrite_existing) {
780 RetainPtr<const CPDF_Stream> pStream =
781 ToStream(ParseIndirectObjectAt(*pos, 0));
782 if (!pStream || !pStream->GetObjNum()) {
783 return false;
784 }
785
786 RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
787 int32_t prev = pDict->GetIntegerFor("Prev");
788 if (prev < 0)
789 return false;
790
791 int32_t size = pDict->GetIntegerFor("Size");
792 if (size < 0)
793 return false;
794
795 *pos = prev;
796
797 auto new_cross_ref_table = std::make_unique<CPDF_CrossRefTable>(
798 /*trailer=*/ToDictionary(pDict->Clone()),
799 /*trailer_object_number=*/pStream->GetObjNum());
800 if (is_main_xref) {
801 m_CrossRefTable = std::move(new_cross_ref_table);
802 m_CrossRefTable->SetObjectMapSize(size);
803 } else {
804 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
805 std::move(new_cross_ref_table), std::move(m_CrossRefTable));
806 }
807
808 std::vector<CrossRefV5IndexEntry> indices =
809 GetCrossRefV5Indices(pDict->GetArrayFor("Index").Get(), size);
810
811 std::vector<uint32_t> field_widths =
812 GetFieldWidths(pDict->GetArrayFor("W").Get());
813 if (field_widths.size() < kMinFieldCount)
814 return false;
815
816 FX_SAFE_UINT32 dwAccWidth;
817 for (uint32_t width : field_widths)
818 dwAccWidth += width;
819 if (!dwAccWidth.IsValid())
820 return false;
821
822 uint32_t total_width = dwAccWidth.ValueOrDie();
823 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
824 pAcc->LoadAllDataFiltered();
825
826 pdfium::span<const uint8_t> data_span = pAcc->GetSpan();
827 uint32_t segindex = 0;
828 for (const auto& index : indices) {
829 FX_SAFE_UINT32 seg_end = segindex;
830 seg_end += index.obj_count;
831 seg_end *= total_width;
832 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
833 continue;
834
835 pdfium::span<const uint8_t> seg_span = data_span.subspan(
836 segindex * total_width, index.obj_count * total_width);
837 FX_SAFE_UINT32 safe_new_size = index.start_obj_num;
838 safe_new_size += index.obj_count;
839 if (!safe_new_size.IsValid()) {
840 continue;
841 }
842
843 // Until SetObjectMapSize() below has been called by a prior loop iteration,
844 // `current_size` is based on the /Size value parsed in LoadCrossRefV5().
845 // PDFs may not always have the correct /Size. In this case, other PDF
846 // implementations ignore the incorrect size, and PDFium also ignores
847 // incorrect size in trailers for V4 xrefs.
848 const uint32_t current_size =
849 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
850 // So allow `new_size` to be greater than `current_size`, but avoid going
851 // over `kMaxXRefSize`. This works just fine because the loop below checks
852 // against `kMaxObjectNumber`, and the two "max" constants are in sync.
853 const uint32_t new_size =
854 std::min<uint32_t>(safe_new_size.ValueOrDie(), kMaxXRefSize);
855 if (new_size > current_size) {
856 m_CrossRefTable->SetObjectMapSize(new_size);
857 }
858
859 for (uint32_t i = 0; i < index.obj_count; ++i) {
860 const uint32_t obj_num = index.start_obj_num + i;
861 if (obj_num >= kMaxObjectNumber) {
862 break;
863 }
864
865 ProcessCrossRefV5Entry(seg_span.subspan(i * total_width, total_width),
866 field_widths, obj_num, overwrite_existing);
867 }
868
869 segindex += index.obj_count;
870 }
871 return true;
872}
873
874void CPDF_Parser::ProcessCrossRefV5Entry(
875 pdfium::span<const uint8_t> entry_span,
876 pdfium::span<const uint32_t> field_widths,
877 uint32_t obj_num,
878 bool overwrite_existing) {
879 DCHECK_GE(field_widths.size(), kMinFieldCount);
880 ObjectType type;
881 if (field_widths[0]) {
882 const uint32_t cross_ref_stream_obj_type =
883 GetFirstXRefStreamEntry(entry_span, field_widths);
884 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
885 if (type == ObjectType::kNull) {
886 return;
887 }
888 } else {
889 // Per ISO 32000-1:2008 table 17, use the default value of 1 for the xref
890 // stream entry when it is not specified. The `type` assignment is the
891 // equivalent to calling GetObjectTypeFromCrossRefStreamType(1).
892 type = ObjectType::kNormal;
893 }
894
895 const ObjectType existing_type = GetObjectType(obj_num);
896 if (existing_type == ObjectType::kNull) {
897 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
898 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
899 m_CrossRefTable->AddNormal(obj_num, 0, /*is_object_stream=*/false,
900 offset);
901 return;
902 }
903
904 if (!overwrite_existing && existing_type != ObjectType::kFree) {
905 return;
906 }
907
908 if (type == ObjectType::kFree) {
909 m_CrossRefTable->SetFree(obj_num);
910 return;
911 }
912
913 if (type == ObjectType::kNormal) {
914 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
915 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
916 m_CrossRefTable->AddNormal(obj_num, 0, /*is_object_stream=*/false,
917 offset);
918 return;
919 }
920
921 DCHECK_EQ(type, ObjectType::kCompressed);
922 const uint32_t archive_obj_num =
923 GetSecondXRefStreamEntry(entry_span, field_widths);
924 if (!IsValidObjectNumber(archive_obj_num)) {
925 return;
926 }
927
928 const uint32_t archive_obj_index =
929 GetThirdXRefStreamEntry(entry_span, field_widths);
930 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
931}
932
933RetainPtr<const CPDF_Array> CPDF_Parser::GetIDArray() const {
934 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
935}
936
937RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetRoot() const {
939 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
940 return obj ? obj->GetDict() : nullptr;
941}
942
943RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetEncryptDict() const {
944 if (!GetTrailer())
945 return nullptr;
946
947 RetainPtr<const CPDF_Object> pEncryptObj =
948 GetTrailer()->GetObjectFor("Encrypt");
949 if (!pEncryptObj)
950 return nullptr;
951
952 if (pEncryptObj->IsDictionary())
953 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
954
955 if (pEncryptObj->IsReference()) {
956 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
957 pEncryptObj->AsReference()->GetRefObjNum()));
958 }
959 return nullptr;
960}
961
962ByteString CPDF_Parser::GetEncodedPassword() const {
963 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
964}
965
966const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
967 return m_CrossRefTable->trailer();
968}
969
971 return m_CrossRefTable->GetMutableTrailerForTesting();
972}
973
975 return m_CrossRefTable->trailer_object_number();
976}
977
978RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
979 return m_CrossRefTable->trailer()
980 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
981 : RetainPtr<CPDF_Dictionary>();
982}
983
984uint32_t CPDF_Parser::GetInfoObjNum() const {
985 RetainPtr<const CPDF_Reference> pRef =
986 ToReference(m_CrossRefTable->trailer()
987 ? m_CrossRefTable->trailer()->GetObjectFor("Info")
988 : nullptr);
989 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
990}
991
992uint32_t CPDF_Parser::GetRootObjNum() const {
993 RetainPtr<const CPDF_Reference> pRef =
994 ToReference(m_CrossRefTable->trailer()
995 ? m_CrossRefTable->trailer()->GetObjectFor("Root")
996 : nullptr);
997 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
998}
999
1001 if (!IsValidObjectNumber(objnum))
1002 return nullptr;
1003
1004 // Prevent circular parsing the same object.
1005 if (pdfium::Contains(m_ParsingObjNums, objnum))
1006 return nullptr;
1007
1008 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1009 if (GetObjectType(objnum) == ObjectType::kNormal) {
1011 if (pos <= 0)
1012 return nullptr;
1013 return ParseIndirectObjectAt(pos, objnum);
1014 }
1015 if (GetObjectType(objnum) != ObjectType::kCompressed) {
1016 return nullptr;
1017 }
1018
1019 const auto& info = *m_CrossRefTable->GetObjectInfo(objnum);
1020 const CPDF_ObjectStream* pObjStream = GetObjectStream(info.archive.obj_num);
1021 if (!pObjStream)
1022 return nullptr;
1023
1024 return pObjStream->ParseObject(m_pObjectsHolder, objnum,
1025 info.archive.obj_index);
1026}
1027
1028const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
1029 // Prevent circular parsing the same object.
1030 if (pdfium::Contains(m_ParsingObjNums, object_number))
1031 return nullptr;
1032
1033 auto it = m_ObjectStreamMap.find(object_number);
1034 if (it != m_ObjectStreamMap.end())
1035 return it->second.get();
1036
1037 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
1038 if (!info || !info->is_object_stream_flag) {
1039 return nullptr;
1040 }
1041
1042 const FX_FILESIZE object_pos = info->pos;
1043 if (object_pos <= 0)
1044 return nullptr;
1045
1046 // Keep track of `object_number` before doing more parsing.
1047 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, object_number);
1048
1049 RetainPtr<CPDF_Object> object =
1050 ParseIndirectObjectAt(object_pos, object_number);
1051 if (!object)
1052 return nullptr;
1053
1054 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1055 CPDF_ObjectStream::Create(ToStream(object));
1056 const CPDF_ObjectStream* result = objs_stream.get();
1057 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1058
1059 return result;
1060}
1061
1063 uint32_t objnum) {
1064 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1065 m_pSyntax->SetPos(pos);
1066
1067 auto result = m_pSyntax->GetIndirectObject(
1068 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1069 m_pSyntax->SetPos(saved_pos);
1070 if (result && objnum && result->GetObjNum() != objnum)
1071 return nullptr;
1072
1073 const bool should_decrypt = m_pSecurityHandler &&
1074 m_pSecurityHandler->GetCryptoHandler() &&
1075 objnum != m_MetadataObjnum;
1076 if (should_decrypt &&
1077 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1078 return nullptr;
1079 }
1080 return result;
1081}
1082
1084 return m_pSyntax->GetDocumentSize();
1085}
1086
1087uint32_t CPDF_Parser::GetFirstPageNo() const {
1088 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1089}
1090
1092 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1093 m_pLinearized = std::move(pLinearized);
1094}
1095
1096RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1097 if (m_pSyntax->GetKeyword() != "trailer")
1098 return nullptr;
1099
1100 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1101}
1102
1103uint32_t CPDF_Parser::GetPermissions(bool get_owner_perms) const {
1104 return m_pSecurityHandler
1105 ? m_pSecurityHandler->GetPermissions(get_owner_perms)
1106 : 0xFFFFFFFF;
1107}
1108
1110 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1111}
1112
1114 RetainPtr<CPDF_ReadValidator> validator,
1115 const ByteString& password) {
1116 DCHECK(!m_bHasParsed);
1117 DCHECK(!m_bXRefTableRebuilt);
1118 SetPassword(password);
1119 m_bXRefStream = false;
1120 m_LastXRefOffset = 0;
1121
1122 if (!InitSyntaxParser(std::move(validator)))
1123 return FORMAT_ERROR;
1124
1125 m_pLinearized = ParseLinearizedHeader();
1126 if (!m_pLinearized)
1127 return StartParseInternal();
1128
1129 m_bHasParsed = true;
1130
1131 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1132 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1133 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1134 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, /*is_main_xref=*/true,
1135 /*overwrite_existing=*/false)) {
1136 if (!RebuildCrossRef())
1137 return FORMAT_ERROR;
1138
1139 m_bXRefTableRebuilt = true;
1140 m_LastXRefOffset = 0;
1141 }
1142 if (bLoadV4) {
1143 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
1144 if (!trailer)
1145 return SUCCESS;
1146
1147 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
1148 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
1149 if (xrefsize > 0) {
1150 // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
1151 // the xref table.
1152 const uint32_t expected_last_obj_num = xrefsize - 1;
1153 if (GetLastObjNum() != expected_last_obj_num && !RebuildCrossRef()) {
1154 return FORMAT_ERROR;
1155 }
1156 }
1157 }
1158
1159 Error eRet = SetEncryptHandler();
1160 if (eRet != SUCCESS)
1161 return eRet;
1162
1163 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1164 if (m_bXRefTableRebuilt)
1165 return FORMAT_ERROR;
1166
1167 ReleaseEncryptHandler();
1168 if (!RebuildCrossRef())
1169 return FORMAT_ERROR;
1170
1171 eRet = SetEncryptHandler();
1172 if (eRet != SUCCESS)
1173 return eRet;
1174
1175 m_pObjectsHolder->TryInit();
1176 if (!GetRoot())
1177 return FORMAT_ERROR;
1178 }
1179
1181 ReleaseEncryptHandler();
1183 return FORMAT_ERROR;
1184
1185 eRet = SetEncryptHandler();
1186 if (eRet != SUCCESS)
1187 return eRet;
1188 }
1189
1190 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1191 RetainPtr<const CPDF_Reference> pMetadata =
1192 ToReference(GetRoot()->GetObjectFor("Metadata"));
1193 if (pMetadata)
1194 m_MetadataObjnum = pMetadata->GetRefObjNum();
1195 }
1196 return SUCCESS;
1197}
1198
1199bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1200 FX_FILESIZE xref_offset = main_xref_offset;
1201 if (!LoadCrossRefV5(&xref_offset, /*is_main_xref=*/false,
1202 /*overwrite_existing=*/false)) {
1203 return false;
1204 }
1205
1206 // Traverse the xref objects from newest to older. So entries from later
1207 // iterations should not overwrite existing entries.
1208 std::set<FX_FILESIZE> seen_xref_offset;
1209 while (xref_offset) {
1210 seen_xref_offset.insert(xref_offset);
1211 if (!LoadCrossRefV5(&xref_offset, /*is_main_xref=*/false,
1212 /*overwrite_existing=*/false)) {
1213 return false;
1214 }
1215
1216 // Check for circular references.
1217 if (pdfium::Contains(seen_xref_offset, xref_offset))
1218 return false;
1219 }
1220 m_ObjectStreamMap.clear();
1221 m_bXRefStream = true;
1222 return true;
1223}
1224
1225CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1226 const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1227 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1228 if (main_xref_offset < 0)
1229 return FORMAT_ERROR;
1230
1231 if (main_xref_offset == 0)
1232 return SUCCESS;
1233
1234 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1235 m_MetadataObjnum = 0;
1236 m_ObjectStreamMap.clear();
1237
1238 if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1239 !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1240 m_LastXRefOffset = 0;
1241 return FORMAT_ERROR;
1242 }
1243
1244 return SUCCESS;
1245}
1246
1248 std::unique_ptr<CPDF_SyntaxParser> parser) {
1249 m_pSyntax = std::move(parser);
1250}
1251
1253 std::vector<unsigned int> trailer_ends;
1254 m_pSyntax->SetTrailerEnds(&trailer_ends);
1255
1256 // Traverse the document.
1257 m_pSyntax->SetPos(0);
1258 while (true) {
1259 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
1260 if (word_result.is_number) {
1261 // The object number was read. Read the generation number.
1262 word_result = m_pSyntax->GetNextWord();
1263 if (!word_result.is_number)
1264 break;
1265
1266 word_result = m_pSyntax->GetNextWord();
1267 if (word_result.word != "obj")
1268 break;
1269
1270 m_pSyntax->GetObjectBody(nullptr);
1271
1272 word_result = m_pSyntax->GetNextWord();
1273 if (word_result.word != "endobj")
1274 break;
1275 } else if (word_result.word == "trailer") {
1276 m_pSyntax->GetObjectBody(nullptr);
1277 } else if (word_result.word == "startxref") {
1278 m_pSyntax->GetNextWord();
1279 } else if (word_result.word == "xref") {
1280 while (true) {
1281 word_result = m_pSyntax->GetNextWord();
1282 if (word_result.word.IsEmpty() || word_result.word == "startxref")
1283 break;
1284 }
1285 m_pSyntax->GetNextWord();
1286 } else {
1287 break;
1288 }
1289 }
1290
1291 // Stop recording trailer ends.
1292 m_pSyntax->SetTrailerEnds(nullptr);
1293 return trailer_ends;
1294}
1295
1297 FX_FILESIZE src_size) {
1298 static constexpr FX_FILESIZE kBufferSize = 4096;
1299 DataVector<uint8_t> buffer(kBufferSize);
1300 m_pSyntax->SetPos(0);
1301 while (src_size) {
1302 const uint32_t block_size =
1303 static_cast<uint32_t>(std::min(kBufferSize, src_size));
1304 auto block_span = pdfium::make_span(buffer).first(block_size);
1305 if (!m_pSyntax->ReadBlock(block_span))
1306 return false;
1307 if (!archive->WriteBlock(pdfium::make_span(buffer).first(block_size)))
1308 return false;
1309 src_size -= block_size;
1310 }
1311 return true;
1312}
CPDF_ArrayLocker(const CPDF_Array *pArray)
int GetDirectIntegerFor(const ByteString &key) const
RetainPtr< const CPDF_Array > GetArrayFor(const ByteString &key) const
static constexpr uint32_t kInvalidObjNum
Definition cpdf_object.h:52
bool IsObjectFreeOrNull(uint32_t objnum) const
void SetSyntaxParserForTesting(std::unique_ptr< CPDF_SyntaxParser > parser)
bool RebuildCrossRef()
uint32_t GetPermissions(bool get_owner_perms) const
const CPDF_Dictionary * GetTrailer() const
FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const
void SetLinearizedHeaderForTesting(std::unique_ptr< CPDF_LinearizedHeader > pLinearized)
Error StartLinearizedParse(RetainPtr< CPDF_ReadValidator > validator, const ByteString &password)
uint32_t GetRootObjNum() const
RetainPtr< const CPDF_Array > GetIDArray() const
RetainPtr< const CPDF_Dictionary > GetEncryptDict() const
std::vector< unsigned int > GetTrailerEnds()
CPDF_Parser(ParsedObjectsHolder *holder)
RetainPtr< const CPDF_Dictionary > GetRoot() const
bool WriteToArchive(IFX_ArchiveStream *archive, FX_FILESIZE src_size)
void SetPassword(const ByteString &password)
Definition cpdf_parser.h:70
ByteString GetPassword() const
Definition cpdf_parser.h:71
CPDF_Dictionary * GetMutableTrailerForTesting()
uint32_t GetTrailerObjectNumber() const
RetainPtr< CPDF_Object > ParseIndirectObjectAt(FX_FILESIZE pos, uint32_t objnum)
static constexpr uint32_t kMaxObjectNumber
Definition cpdf_parser.h:57
uint32_t GetFirstPageNo() const
Error StartParseInternal()
ByteString GetEncodedPassword() const
FX_FILESIZE GetDocumentSize() const
bool IsValidObjectNumber(uint32_t objnum) const
Error StartParse(RetainPtr< IFX_SeekableReadStream > pFile, const ByteString &password)
std::unique_ptr< CPDF_LinearizedHeader > ParseLinearizedHeader()
FX_FILESIZE ParseStartXRef()
bool IsObjectFree(uint32_t objnum) const
RetainPtr< CPDF_Dictionary > GetCombinedTrailer() const
uint32_t GetLastObjNum() const
RetainPtr< CPDF_Object > ParseIndirectObject(uint32_t objnum)
bool LoadCrossRefV4(FX_FILESIZE pos, bool bSkip)
uint32_t GetInfoObjNum() const
bool operator==(const char *ptr) const
const char * c_str() const
Definition bytestring.h:76
bool IsEmpty() const
Definition bytestring.h:119
int FXSYS_DecimalCharToInt(wchar_t c)
int32_t FXSYS_atoi(const char *str)
uint32_t FXSYS_atoui(const char *str)
#define FX_FILESIZE
Definition fx_types.h:19