135std::vector<uint32_t> GetFieldWidths(
const CPDF_Array* array) {
136 std::vector<uint32_t> results;
141 for (
const auto& obj : locker)
142 results.push_back(obj->GetInteger());
148 ObjectsHolderStub() =
default;
149 ~ObjectsHolderStub()
override =
default;
150 bool TryInit()
override {
return true; }
159 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
160 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
169 return m_CrossRefTable->objects_info().empty()
171 : m_CrossRefTable->objects_info().rbegin()->first;
179 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
180 return (info && info->type == ObjectType
::kNormal) ? info->pos : 0;
183ObjectType
CPDF_Parser::GetObjectType(uint32_t objnum)
const {
184 DCHECK(IsValidObjectNumber(objnum));
185 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
186 return info ? info->type : ObjectType
::kFree;
190 switch (GetObjectType(objnum)) {
198 NOTREACHED_NORETURN();
202 return GetObjectType(objnum) == ObjectType
::kFree;
206 const absl::optional<
FX_FILESIZE> header_offset = GetHeaderOffset(validator);
207 if (!header_offset.has_value())
209 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
212 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
213 header_offset.value());
214 return ParseFileVersion();
220 if (!m_pSyntax->GetCharAt(5, ch))
226 if (!m_pSyntax->GetCharAt(7, ch))
236 const ByteString& password) {
238 std::move(pFileAccess),
nullptr)))
245 DCHECK(!m_bHasParsed);
246 DCHECK(!m_bXRefTableRebuilt);
248 m_bXRefStream =
false;
251 if (m_LastXRefOffset >= kPDFHeaderSize) {
252 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
253 !LoadAllCrossRefV5(m_LastXRefOffset)) {
257 m_bXRefTableRebuilt =
true;
258 m_LastXRefOffset = 0;
264 m_bXRefTableRebuilt =
true;
266 Error eRet = SetEncryptHandler();
270 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
271 if (m_bXRefTableRebuilt)
274 ReleaseEncryptHandler();
278 eRet = SetEncryptHandler();
282 m_pObjectsHolder->TryInit();
287 ReleaseEncryptHandler();
291 eRet = SetEncryptHandler();
295 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
296 RetainPtr<
const CPDF_Reference> pMetadata =
297 ToReference(GetRoot()->GetObjectFor(
"Metadata"));
299 m_MetadataObjnum = pMetadata->GetRefObjNum();
305 static constexpr char kStartXRefKeyword[] =
"startxref";
306 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
307 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
311 m_pSyntax->GetKeyword();
315 m_pSyntax->GetNextWord();
316 if (!xref_offset_result
.is_number || xref_offset_result.word.IsEmpty())
319 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
320 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
323 return result.ValueOrDie();
327 ReleaseEncryptHandler();
331 RetainPtr<
const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
335 if (pEncryptDict->GetNameFor(
"Filter") !=
"Standard")
338 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
342 m_pSecurityHandler = std::move(pSecurityHandler);
347 m_pSecurityHandler.Reset();
355 for (
const auto& it : m_CrossRefTable->objects_info()) {
356 if (it.second.pos <= 0)
360 m_pSyntax->SetPos(it.second.pos);
361 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
362 m_pSyntax->SetPos(SavedPos);
363 if (!word_result.is_number || word_result.word.IsEmpty() ||
364 FXSYS_atoui(word_result.word.c_str()) != it.first) {
378 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
382 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
384 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
385 m_CrossRefTable->SetObjectMapSize(xrefsize);
388 std::vector<
FX_FILESIZE> xref_stream_list{xref_stm};
390 std::set<
FX_FILESIZE> seen_xref_offset{xref_offset};
395 while (xref_offset > 0) {
397 if (pdfium::Contains(seen_xref_offset, xref_offset))
400 seen_xref_offset.insert(xref_offset);
401 xref_list.insert(xref_list.begin(), xref_offset);
406 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
410 xref_offset = pDict->GetDirectIntegerFor(
"Prev");
411 xref_stm = pDict->GetIntegerFor(
"XRefStm");
412 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
415 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
416 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
417 kNoV4TrailerObjectNumber),
418 std::move(m_CrossRefTable));
423 for (size_t i = 0; i < xref_list.size(); ++i) {
427 if (xref_stream_list[i] > 0 &&
428 !LoadCrossRefV5(&xref_stream_list[i],
false,
433 if (i == 0 && !VerifyCrossRefV4())
443 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
455 std::vector<
FX_FILESIZE> xref_stream_list{xref_stm};
456 std::vector<
FX_FILESIZE> xref_list{main_xref_offset};
457 std::set<
FX_FILESIZE> seen_xref_offset{main_xref_offset};
460 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
461 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
462 kNoV4TrailerObjectNumber),
463 std::move(m_CrossRefTable));
468 while (xref_offset > 0) {
470 if (pdfium::Contains(seen_xref_offset, xref_offset))
473 seen_xref_offset.insert(xref_offset);
474 xref_list.insert(xref_list.begin(), xref_offset);
479 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
483 xref_offset = pDict->GetDirectIntegerFor(
"Prev");
484 xref_stm = pDict->GetIntegerFor(
"XRefStm");
485 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
488 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
489 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
490 kNoV4TrailerObjectNumber),
491 std::move(m_CrossRefTable));
494 if (xref_stream_list[0] > 0 &&
495 !LoadCrossRefV5(&xref_stream_list[0],
false,
502 for (size_t i = 1; i < xref_list.size(); ++i) {
506 if (xref_stream_list[i] > 0 &&
507 !LoadCrossRefV5(&xref_stream_list[i],
false,
515bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
516 uint32_t start_objnum,
518 std::vector<CrossRefObjData>* out_objects) {
525 static constexpr int32_t kEntrySize = 20;
528 FX_SAFE_FILESIZE pos = count;
530 pos += m_pSyntax->GetPos();
533 m_pSyntax->SetPos(pos.ValueOrDie());
536 const size_t start_obj_index = out_objects->size();
537 FX_SAFE_SIZE_T new_size = start_obj_index;
539 if (!new_size.IsValid())
542 if (new_size.ValueOrDie() > kMaxXRefSize)
545 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
546 if (new_size.ValueOrDie() > max_entries_in_file)
549 out_objects->resize(new_size.ValueOrDie());
551 DataVector<
char> buf(1024 * kEntrySize + 1);
554 uint32_t entries_to_read = count;
555 while (entries_to_read > 0) {
556 const uint32_t entries_in_block =
std::min(entries_to_read, 1024u);
557 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
558 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
559 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
562 for (uint32_t i = 0; i < entries_in_block; i++) {
563 uint32_t iObjectIndex = count - entries_to_read + i;
564 CrossRefObjData& obj_data =
565 (*out_objects)[start_obj_index + iObjectIndex];
566 const uint32_t objnum = start_objnum + iObjectIndex;
567 obj_data.obj_num = objnum;
568 ObjectInfo& info = obj_data.info;
570 const char* pEntry = &buf[i * kEntrySize];
571 if (pEntry[17] ==
'f') {
575 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
576 if (!offset.IsValid())
579 if (offset.ValueOrDie() == 0) {
580 for (int32_t c = 0; c < 10; c++) {
581 if (!isdigit(pEntry[c]))
586 info
.pos = offset.ValueOrDie();
595 entries_to_read -= entries_in_block;
600bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
602 out_objects->clear();
604 if (m_pSyntax->GetKeyword() !=
"xref")
606 std::vector<CrossRefObjData> result_objects;
610 const ByteString& word = word_result.word;
615 m_pSyntax->SetPos(saved_pos);
623 uint32_t count = m_pSyntax->GetDirectNum();
624 m_pSyntax->ToNextWord();
626 if (!ParseAndAppendCrossRefSubsectionData(
627 start_objnum, count, out_objects ? &result_objects :
nullptr)) {
632 *out_objects =
std::move(result_objects);
637 m_pSyntax->SetPos(pos);
638 std::vector<CrossRefObjData> objects;
639 if (!ParseCrossRefV4(bSkip ?
nullptr : &objects))
642 MergeCrossRefObjectsData(objects);
647 const std::vector<CrossRefObjData>& objects) {
648 for (
const auto& obj : objects) {
649 switch (obj.info.type) {
650 case ObjectType::kFree:
651 if (obj.info.gennum > 0)
652 m_CrossRefTable->SetFree(obj.obj_num);
654 case ObjectType::kNormal:
655 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum,
656 obj.info.is_object_stream_flag,
659 case ObjectType::kCompressed:
660 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
661 obj.info.archive.obj_index);
663 case ObjectType::kNull:
671 if (!LoadCrossRefV5(&xref_offset,
true,
679 while (xref_offset > 0) {
680 seen_xref_offset.insert(xref_offset);
681 if (!LoadCrossRefV5(&xref_offset,
false,
687 if (pdfium::Contains(seen_xref_offset, xref_offset))
690 m_ObjectStreamMap.clear();
691 m_bXRefStream =
true;
698 const uint32_t kBufferSize = 4096;
699 m_pSyntax->SetReadBufferSize(kBufferSize);
700 m_pSyntax->SetPos(0);
702 std::vector<std::pair<uint32_t,
FX_FILESIZE>> numbers;
704 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
705 const ByteString& word = result.word;
707 numbers.emplace_back(FXSYS_atoui(word.c_str()),
708 m_pSyntax->GetPos() - word.GetLength());
709 if (numbers.size() > 2u)
710 numbers.erase(numbers.begin());
715 m_pSyntax->ReadString();
716 }
else if (word
== "<") {
717 m_pSyntax->ReadHexString();
718 }
else if (word
== "trailer") {
721 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
724 const uint32_t trailer_object_number = pTrailer->GetObjNum();
725 RetainPtr<CPDF_Dictionary> trailer_dict =
726 stream_trailer ? stream_trailer->GetMutableDict()
727 : ToDictionary(std::move(pTrailer));
728 cross_ref_table = CPDF_CrossRefTable::MergeUp(
729 std::move(cross_ref_table),
731 trailer_object_number));
733 }
else if (word
== "obj" && numbers.size() == 2u) {
735 const uint32_t obj_num = numbers[0].first;
736 const uint32_t gen_num = numbers[1].first;
738 m_pSyntax->SetPos(obj_pos);
740 ToStream(m_pSyntax->GetIndirectObject(
741 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
743 if (pStream && pStream->GetDict()->GetNameFor(
"Type") ==
"XRef") {
744 cross_ref_table = CPDF_CrossRefTable::MergeUp(
745 std::move(cross_ref_table),
747 ToDictionary(pStream->GetDict()->Clone()),
748 pStream->GetObjNum()));
752 cross_ref_table->AddNormal(obj_num, gen_num,
false,
754 const auto object_stream =
755 CPDF_ObjectStream::Create(
std::move(pStream));
757 const auto& object_info = object_stream->object_info();
758 for (size_t i = 0; i < object_info.size(); ++i) {
759 const auto& info = object_info[i];
761 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
769 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
770 std::move(cross_ref_table));
772 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
774 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
779 bool overwrite_existing) {
781 ToStream(ParseIndirectObjectAt(*pos, 0));
782 if (!pStream || !pStream->GetObjNum()) {
786 RetainPtr<
const CPDF_Dictionary> pDict = pStream->GetDict();
787 int32_t prev = pDict->GetIntegerFor(
"Prev");
791 int32_t size = pDict->GetIntegerFor(
"Size");
798 ToDictionary(pDict->Clone()),
799 pStream->GetObjNum());
801 m_CrossRefTable = std::move(new_cross_ref_table);
802 m_CrossRefTable->SetObjectMapSize(size);
804 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
805 std::move(new_cross_ref_table), std::move(m_CrossRefTable));
808 std::vector<CrossRefV5IndexEntry> indices =
809 GetCrossRefV5Indices(pDict->GetArrayFor(
"Index").Get(), size);
811 std::vector<uint32_t> field_widths =
812 GetFieldWidths(pDict->GetArrayFor(
"W").Get());
813 if (field_widths.size() < kMinFieldCount)
816 FX_SAFE_UINT32 dwAccWidth;
817 for (uint32_t width : field_widths)
819 if (!dwAccWidth.IsValid())
822 uint32_t total_width = dwAccWidth.ValueOrDie();
823 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
824 pAcc->LoadAllDataFiltered();
826 pdfium::span<
const uint8_t> data_span = pAcc->GetSpan();
827 uint32_t segindex = 0;
828 for (
const auto& index : indices) {
829 FX_SAFE_UINT32 seg_end = segindex;
830 seg_end += index.obj_count;
831 seg_end *= total_width;
832 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
835 pdfium::span<
const uint8_t> seg_span = data_span.subspan(
836 segindex * total_width, index.obj_count * total_width);
837 FX_SAFE_UINT32 safe_new_size = index.start_obj_num;
838 safe_new_size += index.obj_count;
839 if (!safe_new_size.IsValid()) {
848 const uint32_t current_size =
849 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
853 const uint32_t new_size =
854 std::min<uint32_t>(safe_new_size.ValueOrDie(), kMaxXRefSize);
855 if (new_size > current_size) {
856 m_CrossRefTable->SetObjectMapSize(new_size);
859 for (uint32_t i = 0; i < index.obj_count; ++i) {
860 const uint32_t obj_num = index.start_obj_num + i;
861 if (obj_num >= kMaxObjectNumber) {
865 ProcessCrossRefV5Entry(seg_span.subspan(i * total_width, total_width),
866 field_widths, obj_num, overwrite_existing);
869 segindex += index.obj_count;
875 pdfium::span<
const uint8_t> entry_span,
876 pdfium::span<
const uint32_t> field_widths,
878 bool overwrite_existing) {
879 DCHECK_GE(field_widths.size(), kMinFieldCount);
881 if (field_widths[0]) {
882 const uint32_t cross_ref_stream_obj_type =
883 GetFirstXRefStreamEntry(entry_span, field_widths);
884 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
885 if (type == ObjectType
::kNull) {
895 const ObjectType existing_type = GetObjectType(obj_num);
896 if (existing_type == ObjectType
::kNull) {
897 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
898 if (pdfium::base::IsValueInRangeForNumericType<
FX_FILESIZE>(offset))
899 m_CrossRefTable->AddNormal(obj_num, 0,
false,
904 if (!overwrite_existing && existing_type != ObjectType
::kFree) {
908 if (type == ObjectType
::kFree) {
909 m_CrossRefTable->SetFree(obj_num);
914 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
915 if (pdfium::base::IsValueInRangeForNumericType<
FX_FILESIZE>(offset))
916 m_CrossRefTable->AddNormal(obj_num, 0,
false,
922 const uint32_t archive_obj_num =
923 GetSecondXRefStreamEntry(entry_span, field_widths);
928 const uint32_t archive_obj_index =
929 GetThirdXRefStreamEntry(entry_span, field_widths);
930 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
939 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
940 return obj ? obj->GetDict() :
nullptr;
948 GetTrailer()->GetObjectFor(
"Encrypt");
952 if (pEncryptObj->IsDictionary())
953 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
955 if (pEncryptObj->IsReference()) {
956 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
957 pEncryptObj->AsReference()->GetRefObjNum()));
963 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
967 return m_CrossRefTable->trailer();
971 return m_CrossRefTable->GetMutableTrailerForTesting();
975 return m_CrossRefTable->trailer_object_number();
979 return m_CrossRefTable->trailer()
980 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
981 : RetainPtr<CPDF_Dictionary>();
986 ToReference(m_CrossRefTable->trailer()
987 ? m_CrossRefTable->trailer()->GetObjectFor(
"Info")
994 ToReference(m_CrossRefTable->trailer()
995 ? m_CrossRefTable->trailer()->GetObjectFor(
"Root")
1005 if (pdfium::Contains(m_ParsingObjNums, objnum))
1009 if (GetObjectType(objnum) == ObjectType
::kNormal) {
1019 const auto& info = *m_CrossRefTable->GetObjectInfo(objnum);
1024 return pObjStream->ParseObject(m_pObjectsHolder, objnum,
1025 info.archive.obj_index);
1030 if (pdfium::Contains(m_ParsingObjNums, object_number))
1033 auto it = m_ObjectStreamMap.find(object_number);
1034 if (it != m_ObjectStreamMap.end())
1035 return it->second.get();
1037 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
1038 if (!info || !info->is_object_stream_flag) {
1043 if (object_pos <= 0)
1050 ParseIndirectObjectAt(object_pos, object_number);
1054 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1055 CPDF_ObjectStream::Create(ToStream(object));
1057 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1064 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1065 m_pSyntax->SetPos(pos);
1067 auto result = m_pSyntax->GetIndirectObject(
1068 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1069 m_pSyntax->SetPos(saved_pos);
1070 if (result && objnum && result->GetObjNum() != objnum)
1073 const bool should_decrypt = m_pSecurityHandler &&
1074 m_pSecurityHandler->GetCryptoHandler() &&
1075 objnum != m_MetadataObjnum;
1076 if (should_decrypt &&
1077 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1084 return m_pSyntax->GetDocumentSize();
1088 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1092 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1093 m_pLinearized = std::move(pLinearized);
1097 if (m_pSyntax->GetKeyword() !=
"trailer")
1100 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1104 return m_pSecurityHandler
1105 ? m_pSecurityHandler->GetPermissions(get_owner_perms)
1110 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());