134 indices.push_back({0, size});
138std::vector<uint32_t> GetFieldWidths(
const CPDF_Array* array) {
139 std::vector<uint32_t> results;
144 for (
const auto& obj : locker)
145 results.push_back(obj->GetInteger());
151 ObjectsHolderStub() =
default;
152 ~ObjectsHolderStub()
override =
default;
153 bool TryInit()
override {
return true; }
162 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
163 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
172 return m_CrossRefTable->objects_info().empty()
174 : m_CrossRefTable->objects_info().rbegin()->first;
182 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
187 DCHECK(IsValidObjectNumber(objnum));
188 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
193 const std::optional<
FX_FILESIZE> header_offset = GetHeaderOffset(validator);
194 if (!header_offset.has_value())
196 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
199 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
200 header_offset.value());
201 return ParseFileVersion();
207 if (!m_pSyntax->GetCharAt(5, ch))
213 if (!m_pSyntax->GetCharAt(7, ch))
225 std::move(pFileAccess),
nullptr)))
227 SetPassword(password);
233 DCHECK(!m_bXRefTableRebuilt);
235 m_bXRefStream =
false;
238 if (m_LastXRefOffset >= kPDFHeaderSize) {
239 if (!LoadAllCrossRefTablesAndStreams(m_LastXRefOffset)) {
243 m_bXRefTableRebuilt =
true;
244 m_LastXRefOffset = 0;
250 m_bXRefTableRebuilt =
true;
252 Error eRet = SetEncryptHandler();
256 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
257 if (m_bXRefTableRebuilt)
260 ReleaseEncryptHandler();
264 eRet = SetEncryptHandler();
268 m_pObjectsHolder->TryInit();
273 ReleaseEncryptHandler();
277 eRet = SetEncryptHandler();
281 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
282 RetainPtr<
const CPDF_Reference> pMetadata =
283 ToReference(GetRoot()->GetObjectFor(
"Metadata"));
285 m_MetadataObjnum = pMetadata->GetRefObjNum();
291 static constexpr char kStartXRefKeyword[] =
"startxref";
292 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
293 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
297 m_pSyntax->GetKeyword();
301 m_pSyntax->GetNextWord();
302 if (!xref_offset_result
.is_number || xref_offset_result.word.IsEmpty())
305 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
306 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
309 return result.ValueOrDie();
313 ReleaseEncryptHandler();
321 if (pEncryptDict->GetNameFor(
"Filter") !=
"Standard")
324 auto pSecurityHandler =
pdfium::MakeRetain<CPDF_SecurityHandler>();
328 m_pSecurityHandler = std::move(pSecurityHandler);
333 m_pSecurityHandler.Reset();
341 for (
const auto& it : m_CrossRefTable->objects_info()) {
342 if (it.second.pos <= 0)
346 m_pSyntax->SetPos(it.second.pos);
347 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
348 m_pSyntax->SetPos(SavedPos);
349 if (!word_result.is_number || word_result.word.IsEmpty() ||
350 FXSYS_atoui(word_result.word.c_str()) != it.first) {
362 if (is_xref_stream) {
365 if (!LoadCrossRefStream(&xref_offset_copy,
true)) {
379 m_CrossRefTable->SetTrailer(std::move(trailer), kNoTrailerObjectNumber);
382 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) {
383 m_CrossRefTable->SetObjectMapSize(xrefsize);
390 if (is_xref_stream) {
391 xref_list.push_back(0);
392 xref_stream_list.push_back(xref_offset);
394 xref_list.push_back(xref_offset);
398 if (!FindAllCrossReferenceTablesAndStream(xref_offset, xref_list,
403 if (xref_list.front() > 0) {
408 if (!VerifyCrossRefTable()) {
421 for (size_t i = 1; i < xref_list.size(); ++i) {
422 if (xref_stream_list[i] > 0 &&
423 !LoadCrossRefStream(&xref_stream_list[i],
false)) {
431 if (is_xref_stream) {
432 m_ObjectStreamMap.clear();
433 m_bXRefStream =
true;
456 std::vector<
FX_FILESIZE> xref_list{main_xref_offset};
457 std::vector<
FX_FILESIZE> xref_stream_list{xref_stm};
461 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
462 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
463 kNoTrailerObjectNumber),
464 std::move(m_CrossRefTable));
466 if (!FindAllCrossReferenceTablesAndStream(main_xref_offset, xref_list,
473 if (xref_stream_list[0] > 0 &&
474 !LoadCrossRefStream(&xref_stream_list[0],
false)) {
481 for (size_t i = 1; i < xref_list.size(); ++i) {
482 if (xref_stream_list[i] > 0 &&
483 !LoadCrossRefStream(&xref_stream_list[i],
false)) {
494bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
495 uint32_t start_objnum,
497 std::vector<CrossRefObjData>* out_objects) {
504 static constexpr int32_t kEntrySize = 20;
509 pos += m_pSyntax->GetPos();
512 m_pSyntax->SetPos(pos.ValueOrDie());
515 const size_t start_obj_index = out_objects->size();
516 FX_SAFE_SIZE_T new_size = start_obj_index;
518 if (!new_size.IsValid())
521 if (new_size.ValueOrDie() > kMaxXRefSize)
524 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
525 if (new_size.ValueOrDie() > max_entries_in_file)
528 out_objects->resize(new_size.ValueOrDie());
530 DataVector<
char> buf(1024 * kEntrySize + 1);
533 uint32_t entries_to_read = count;
534 while (entries_to_read > 0) {
535 const uint32_t entries_in_block =
std::min(entries_to_read, 1024u);
536 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
537 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
538 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
541 for (uint32_t i = 0; i < entries_in_block; i++) {
542 uint32_t iObjectIndex = count - entries_to_read + i;
543 CrossRefObjData& obj_data =
544 (*out_objects)[start_obj_index + iObjectIndex];
545 const uint32_t objnum = start_objnum + iObjectIndex;
546 obj_data.obj_num = objnum;
549 pdfium::span<
const char> pEntry =
550 pdfium::make_span(buf).subspan(i * kEntrySize);
551 if (pEntry[17] ==
'f') {
556 if (!offset.IsValid())
559 if (offset.ValueOrDie() == 0) {
560 for (int32_t c = 0; c < 10; c++) {
561 if (!isdigit(pEntry[c]))
566 info
.pos = offset.ValueOrDie();
570 const int32_t version = FXSYS_atoi(pEntry.subspan(11).data());
575 entries_to_read -= entries_in_block;
581 std::vector<CrossRefObjData>* out_objects) {
583 out_objects->clear();
585 if (m_pSyntax->GetKeyword() !=
"xref")
587 std::vector<CrossRefObjData> result_objects;
596 m_pSyntax->SetPos(saved_pos);
600 uint32_t start_objnum = FXSYS_atoui(word.c_str());
604 uint32_t count = m_pSyntax->GetDirectNum();
605 m_pSyntax->ToNextWord();
607 if (!ParseAndAppendCrossRefSubsectionData(
608 start_objnum, count, out_objects ? &result_objects :
nullptr)) {
613 *out_objects =
std::move(result_objects);
618 m_pSyntax->SetPos(pos);
619 std::vector<CrossRefObjData> objects;
620 if (!ParseCrossRefTable(skip ?
nullptr : &objects)) {
624 MergeCrossRefObjectsData(objects);
629 const std::vector<CrossRefObjData>& objects) {
630 for (
const auto& obj : objects) {
631 switch (obj.info.type) {
632 case ObjectType::kFree:
633 if (obj.info.gennum > 0)
634 m_CrossRefTable->SetFree(obj.obj_num, obj.info.gennum);
636 case ObjectType::kNormal:
637 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum,
638 obj.info.is_object_stream_flag,
641 case ObjectType::kCompressed:
642 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
643 obj.info.archive.obj_index);
649bool CPDF_Parser::FindAllCrossReferenceTablesAndStream(
653 std::set<
FX_FILESIZE> seen_xref_offset{main_xref_offset};
658 while (xref_offset > 0) {
660 if (
pdfium::Contains(seen_xref_offset, xref_offset)) {
664 seen_xref_offset.insert(xref_offset);
668 if (LoadCrossRefStream(&xref_offset_copy,
false)) {
671 xref_list.insert(xref_list.begin(), 0);
672 xref_stream_list.insert(xref_stream_list.begin(), xref_offset);
673 xref_offset = xref_offset_copy;
688 xref_list.insert(xref_list.begin(), xref_offset);
689 xref_stream_list.insert(xref_stream_list.begin(),
690 trailer_dict->GetIntegerFor(
"XRefStm"));
691 xref_offset = trailer_dict->GetDirectIntegerFor(
"Prev");
694 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
695 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
696 kNoTrailerObjectNumber),
697 std::move(m_CrossRefTable));
706 const uint32_t kBufferSize = 4096;
707 m_pSyntax->SetReadBufferSize(kBufferSize);
708 m_pSyntax->SetPos(0);
710 std::vector<std::pair<uint32_t,
FX_FILESIZE>> numbers;
712 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
715 numbers.emplace_back(FXSYS_atoui(word.c_str()),
716 m_pSyntax->GetPos() - word.GetLength());
717 if (numbers.size() > 2u)
718 numbers.erase(numbers.begin());
723 m_pSyntax->ReadString();
724 }
else if (word
== "<") {
725 m_pSyntax->ReadHexString();
726 }
else if (word
== "trailer") {
729 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
732 const uint32_t trailer_object_number = pTrailer->GetObjNum();
734 stream_trailer ? stream_trailer->GetMutableDict()
735 : ToDictionary(std::move(pTrailer));
736 cross_ref_table = CPDF_CrossRefTable::MergeUp(
737 std::move(cross_ref_table),
739 trailer_object_number));
741 }
else if (word
== "obj" && numbers.size() == 2u) {
743 const uint32_t obj_num = numbers[0].first;
744 const uint32_t gen_num = numbers[1].first;
746 m_pSyntax->SetPos(obj_pos);
747 RetainPtr<CPDF_Stream> pStream = ToStream(m_pSyntax->GetIndirectObject(
748 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
750 if (pStream && pStream->GetDict()->GetNameFor(
"Type") ==
"XRef") {
751 cross_ref_table = CPDF_CrossRefTable::MergeUp(
752 std::move(cross_ref_table),
754 ToDictionary(pStream->GetDict()->Clone()),
755 pStream->GetObjNum()));
759 cross_ref_table->AddNormal(obj_num, gen_num,
false,
761 const auto object_stream =
762 CPDF_ObjectStream::Create(
std::move(pStream));
764 const auto& object_info = object_stream->object_info();
765 for (size_t i = 0; i < object_info.size(); ++i) {
766 const auto& info = object_info[i];
768 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
776 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
777 std::move(cross_ref_table));
779 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
781 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
786 ToStream(ParseIndirectObjectAt(*pos, 0));
787 if (!pStream || !pStream->GetObjNum()) {
792 int32_t prev = pDict->GetIntegerFor(
"Prev");
796 int32_t size = pDict->GetIntegerFor(
"Size");
803 ToDictionary(pDict->Clone()),
804 pStream->GetObjNum());
806 m_CrossRefTable = std::move(new_cross_ref_table);
807 m_CrossRefTable->SetObjectMapSize(size);
809 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
810 std::move(new_cross_ref_table), std::move(m_CrossRefTable));
813 std::vector<CrossRefStreamIndexEntry> indices =
814 GetCrossRefStreamIndices(pDict->GetArrayFor(
"Index").Get(), size);
816 std::vector<uint32_t> field_widths =
817 GetFieldWidths(pDict->GetArrayFor(
"W").Get());
818 if (field_widths.size() < kMinFieldCount)
822 for (uint32_t width : field_widths)
824 if (!dwAccWidth.IsValid())
827 uint32_t total_width = dwAccWidth.ValueOrDie();
828 auto pAcc =
pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
829 pAcc->LoadAllDataFiltered();
831 pdfium::span<
const uint8_t> data_span = pAcc->GetSpan();
832 uint32_t segindex = 0;
833 for (
const auto& index : indices) {
834 FX_SAFE_UINT32 seg_end = segindex;
835 seg_end += index.obj_count;
836 seg_end *= total_width;
837 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
840 pdfium::span<
const uint8_t> seg_span = data_span.subspan(
841 segindex * total_width, index.obj_count * total_width);
842 FX_SAFE_UINT32 safe_new_size = index.start_obj_num;
843 safe_new_size += index.obj_count;
844 if (!safe_new_size.IsValid()) {
853 const uint32_t current_size =
854 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
858 const uint32_t new_size =
859 std::min<uint32_t>(safe_new_size.ValueOrDie(), kMaxXRefSize);
860 if (new_size > current_size) {
861 m_CrossRefTable->SetObjectMapSize(new_size);
864 for (uint32_t i = 0; i < index.obj_count; ++i) {
865 const uint32_t obj_num = index.start_obj_num + i;
866 if (obj_num >= kMaxObjectNumber) {
870 ProcessCrossRefStreamEntry(seg_span.subspan(i * total_width, total_width),
871 field_widths, obj_num);
874 segindex += index.obj_count;
880 pdfium::span<
const uint8_t> entry_span,
881 pdfium::span<
const uint32_t> field_widths,
883 DCHECK_GE(field_widths.size(), kMinFieldCount);
885 if (field_widths[0]) {
886 const uint32_t cross_ref_stream_obj_type =
887 GetFirstXRefStreamEntry(entry_span, field_widths);
889 GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
890 if (!maybe_type.has_value()) {
893 type = maybe_type.value();
902 const uint32_t gen_num = GetThirdXRefStreamEntry(entry_span, field_widths);
903 if (
pdfium::IsValueInRangeForNumericType<uint16_t>(gen_num)) {
904 m_CrossRefTable->SetFree(obj_num, gen_num);
910 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
911 const uint32_t gen_num = GetThirdXRefStreamEntry(entry_span, field_widths);
913 pdfium::IsValueInRangeForNumericType<uint16_t>(gen_num)) {
914 m_CrossRefTable->AddNormal(obj_num, gen_num,
false,
921 const uint32_t archive_obj_num =
922 GetSecondXRefStreamEntry(entry_span, field_widths);
927 const uint32_t archive_obj_index =
928 GetThirdXRefStreamEntry(entry_span, field_widths);
929 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
938 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
939 return obj ? obj->GetDict() :
nullptr;
947 GetTrailer()->GetObjectFor(
"Encrypt");
951 if (pEncryptObj->IsDictionary())
952 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
954 if (pEncryptObj->IsReference()) {
955 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
956 pEncryptObj->AsReference()->GetRefObjNum()));
962 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
966 return m_CrossRefTable->trailer();
970 return m_CrossRefTable->GetMutableTrailerForTesting();
974 return m_CrossRefTable->trailer_object_number();
978 return m_CrossRefTable->trailer()
979 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
980 : RetainPtr<CPDF_Dictionary>();
985 ToReference(m_CrossRefTable->trailer()
986 ? m_CrossRefTable->trailer()->GetObjectFor(
"Info")
993 ToReference(m_CrossRefTable->trailer()
994 ? m_CrossRefTable->trailer()->GetObjectFor(
"Root")
1005 if (pdfium::Contains(m_ParsingObjNums, objnum)) {
1010 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
1015 switch (info->type) {
1020 if (info->pos <= 0) {
1023 return ParseIndirectObjectAt(info->pos, objnum);
1026 const auto* obj_stream = GetObjectStream(info->archive.obj_num);
1030 return obj_stream->ParseObject(m_pObjectsHolder, objnum,
1031 info->archive.obj_index);
1038 if (pdfium::Contains(m_ParsingObjNums, object_number))
1041 auto it = m_ObjectStreamMap.find(object_number);
1042 if (it != m_ObjectStreamMap.end())
1043 return it->second.get();
1045 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
1046 if (!info || !info->is_object_stream_flag) {
1051 if (object_pos <= 0)
1058 ParseIndirectObjectAt(object_pos, object_number);
1062 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1063 CPDF_ObjectStream::Create(ToStream(object));
1065 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1072 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1073 m_pSyntax->SetPos(pos);
1075 auto result = m_pSyntax->GetIndirectObject(
1076 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1077 m_pSyntax->SetPos(saved_pos);
1078 if (result && objnum && result->GetObjNum() != objnum)
1081 const bool should_decrypt = m_pSecurityHandler &&
1082 m_pSecurityHandler->GetCryptoHandler() &&
1083 objnum != m_MetadataObjnum;
1084 if (should_decrypt &&
1085 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1092 return m_pSyntax->GetDocumentSize();
1096 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1100 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1101 m_pLinearized = std::move(pLinearized);
1105 if (m_pSyntax->GetKeyword() !=
"trailer")
1108 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1112 return m_pSecurityHandler
1113 ? m_pSecurityHandler->GetPermissions(get_owner_perms)
1118 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());