7#include "core/fpdfapi/parser/cpdf_data_avail.h"
13#include "core/fpdfapi/parser/cpdf_array.h"
14#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15#include "core/fpdfapi/parser/cpdf_dictionary.h"
16#include "core/fpdfapi/parser/cpdf_document.h"
17#include "core/fpdfapi/parser/cpdf_hint_tables.h"
18#include "core/fpdfapi/parser/cpdf_linearized_header.h"
19#include "core/fpdfapi/parser/cpdf_name.h"
20#include "core/fpdfapi/parser/cpdf_number.h"
21#include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22#include "core/fpdfapi/parser/cpdf_read_validator.h"
23#include "core/fpdfapi/parser/cpdf_reference.h"
24#include "core/fpdfapi/parser/cpdf_stream.h"
25#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26#include "core/fpdfapi/parser/fpdf_parser_utility.h"
27#include "core/fxcrt/autorestorer.h"
28#include "core/fxcrt/fx_extension.h"
29#include "core/fxcrt/fx_safe_types.h"
30#include "core/fxcrt/stl_util.h"
31#include "third_party/base/check.h"
32#include "third_party/base/containers/contains.h"
33#include "third_party/base/notreached.h"
34#include "third_party/base/numerics/safe_conversions.h"
39 constexpr size_t kMaxHierarchyDepth = 64;
46 if (++depth > kMaxHierarchyDepth) {
51 pDict = parent ? parent->GetMutableDict() :
nullptr;
60 : validator_(std::move(validator)) {
62 validator_->SetDownloadHints(hints);
65 ~HintsScope() { validator_->SetDownloadHints(
nullptr); }
84 m_pHintTables.reset();
86 m_pDocument->RemoveObserver(
this);
90 m_pDocument =
nullptr;
93 m_PagesObjAvail.clear();
94 m_PagesResourcesAvail.clear();
102 DCHECK(m_SeenPageObjList.empty());
103 AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
105 while (!m_bDocAvail) {
106 if (!CheckDocStatus())
113bool CPDF_DataAvail::CheckDocStatus() {
114 switch (m_internalStatus) {
115 case InternalStatus::kHeader:
116 return CheckHeader();
117 case InternalStatus::kFirstPage:
118 return CheckFirstPage();
119 case InternalStatus::kHintTable:
120 return CheckHintTables();
121 case InternalStatus::kLoadAllCrossRef:
122 return CheckAndLoadAllXref();
123 case InternalStatus::kLoadAllFile:
124 return LoadAllFile();
125 case InternalStatus::kRoot:
127 case InternalStatus::kInfo:
129 case InternalStatus::kPageTree:
130 if (m_bTotalLoadPageTree)
132 return LoadDocPages();
133 case InternalStatus::kPage:
134 if (m_bTotalLoadPageTree)
136 m_internalStatus = InternalStatus::kPageLaterLoad;
138 case InternalStatus::kError:
139 return LoadAllFile();
140 case InternalStatus::kPageLaterLoad:
141 m_internalStatus = InternalStatus::kPage;
149bool CPDF_DataAvail::CheckPageStatus() {
150 switch (m_internalStatus) {
151 case InternalStatus::kPageTree:
153 case InternalStatus::kPage:
155 case InternalStatus::kError:
156 return LoadAllFile();
158 m_bPagesTreeLoad =
true;
164bool CPDF_DataAvail::LoadAllFile() {
165 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166 m_internalStatus = InternalStatus::kDone;
172bool CPDF_DataAvail::CheckAndLoadAllXref() {
173 if (!m_pCrossRefAvail) {
175 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176 if (GetValidator()->has_read_problems())
179 if (last_xref_offset <= 0) {
180 m_internalStatus = InternalStatus::kError;
184 m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
188 switch (m_pCrossRefAvail->CheckAvail()) {
194 m_internalStatus = InternalStatus::kError;
198 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
199 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
200 m_internalStatus = InternalStatus::kLoadAllFile;
204 m_internalStatus = InternalStatus::kRoot;
209 bool* pExistInFile) {
210 *pExistInFile =
false;
211 CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
220 *pExistInFile =
true;
221 if (GetValidator()->has_read_problems())
227bool CPDF_DataAvail::CheckInfo() {
228 const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
230 m_internalStatus = InternalStatus::kPageTree;
235 m_parser.ParseIndirectObject(dwInfoObjNum);
236 if (GetValidator()->has_read_problems())
239 m_internalStatus = InternalStatus::kPageTree;
243bool CPDF_DataAvail::CheckRoot() {
244 const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
246 m_internalStatus = InternalStatus::kError;
251 m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
252 if (GetValidator()->has_read_problems())
256 m_internalStatus = InternalStatus::kError;
261 ToReference(m_pRoot->GetObjectFor(
"Pages"));
263 m_internalStatus = InternalStatus::kError;
267 m_PagesObjNum = pRef->GetRefObjNum();
268 m_internalStatus = InternalStatus::kInfo;
272bool CPDF_DataAvail::PreparePageItem() {
273 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
275 m_internalStatus = InternalStatus::kError;
280 ToReference(pRoot->GetObjectFor(
"Pages"));
282 m_internalStatus = InternalStatus::kError;
286 m_PagesObjNum = pRef->GetRefObjNum();
287 m_internalStatus = InternalStatus::kPageTree;
291bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
292 return m_pageMapCheckState.insert(dwPage).second;
295void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
296 m_pageMapCheckState.erase(dwPage);
299bool CPDF_DataAvail::CheckPage() {
300 std::vector<uint32_t> UnavailObjList;
301 for (uint32_t dwPageObjNum : m_PageObjList) {
302 bool bExists =
false;
303 RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
306 UnavailObjList.push_back(dwPageObjNum);
310 switch (pObj->GetType()) {
311 case CPDF_Object::kArray: {
312 CPDF_ArrayLocker locker(pObj->AsArray());
313 for (
const auto& pArrayObj : locker) {
314 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
316 UnavailObjList.push_back(pRef->GetRefObjNum());
320 case CPDF_Object::kDictionary:
321 if (pObj->GetDict()->GetNameFor(
"Type") ==
"Pages")
322 m_PagesArray.push_back(std::move(pObj));
328 m_PageObjList.clear();
329 if (!UnavailObjList.empty()) {
330 m_PageObjList = std::move(UnavailObjList);
333 size_t iPages = m_PagesArray.size();
334 for (size_t i = 0; i < iPages; ++i) {
336 if (pPages && !GetPageKids(pPages.Get())) {
337 m_PagesArray.clear();
338 m_internalStatus = InternalStatus::kError;
342 m_PagesArray.clear();
343 if (m_PageObjList.empty())
344 m_internalStatus = InternalStatus::kDone;
349bool CPDF_DataAvail::GetPageKids(
CPDF_Object* pPages) {
350 RetainPtr<
const CPDF_Dictionary> pDict = pPages->GetDict();
358 std::vector<uint32_t> object_numbers;
359 switch (pKids->GetType()) {
361 object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
365 for (
const auto& pArrayObj : locker) {
366 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
368 object_numbers.push_back(pRef->GetRefObjNum());
373 m_internalStatus = InternalStatus::kError;
377 for (uint32_t num : object_numbers) {
378 bool inserted = m_SeenPageObjList.insert(num).second;
380 m_PageObjList.push_back(num);
385bool CPDF_DataAvail::CheckPages() {
386 bool bExists =
false;
389 m_internalStatus = InternalStatus::kLoadAllFile;
394 if (m_internalStatus == InternalStatus::kError) {
395 m_internalStatus = InternalStatus::kLoadAllFile;
401 if (!GetPageKids(pPages.Get())) {
402 m_internalStatus = InternalStatus::kError;
406 m_internalStatus = InternalStatus::kPage;
410bool CPDF_DataAvail::CheckHeader() {
411 switch (CheckHeaderAndLinearized()) {
413 m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
414 : InternalStatus::kLoadAllCrossRef;
419 m_internalStatus = InternalStatus::kError;
424bool CPDF_DataAvail::CheckFirstPage() {
425 if (!m_pLinearized->GetFirstPageEndOffset() ||
426 !m_pLinearized->GetFileSize() ||
427 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
428 m_internalStatus = InternalStatus::kError;
432 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
435 dwEnd = (uint32_t)m_dwFileLen;
437 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
438 const size_t data_size = dwEnd > 1024 ?
static_cast<size_t>(dwEnd - 1024) : 0;
439 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
443 m_internalStatus = InternalStatus::kHintTable;
447bool CPDF_DataAvail::CheckHintTables() {
450 CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
452 if (GetValidator()->read_error()) {
453 m_internalStatus = InternalStatus::kError;
456 if (GetValidator()->has_unavailable_data())
459 m_internalStatus = InternalStatus::kDone;
470 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
472 return (result && (!objnum || result->GetObjNum() == objnum))
478 switch (CheckHeaderAndLinearized()) {
480 return m_pLinearized ? kLinearized : kNotLinearized;
488CPDF_DataAvail::
DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
494 GetHeaderOffset(GetValidator());
495 if (GetValidator()->has_read_problems())
498 if (!header_offset.has_value())
501 m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
502 GetValidator(), header_offset.value());
503 m_pLinearized = m_parser.ParseLinearizedHeader();
504 if (GetValidator()->has_read_problems())
507 m_bHeaderAvail =
true;
511bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
513 switch (m_internalStatus) {
514 case InternalStatus::kPageTree:
518 case InternalStatus::kPage:
519 if (!LoadDocPage(dwPage))
522 case InternalStatus::kError:
523 return LoadAllFile();
525 m_bPagesTreeLoad =
true;
527 m_bCurPageDictLoadOK =
true;
528 m_internalStatus = InternalStatus::kPage;
534bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
535 PageNode* pPageNode) {
536 bool bExists =
false;
539 m_internalStatus = InternalStatus::kError;
546 const CPDF_Array* pArray = pPages->AsArray();
548 m_internalStatus = InternalStatus::kError;
552 pPageNode->m_type = PageNode::Type::kPages;
553 for (size_t i = 0; i < pArray->size(); ++i) {
554 RetainPtr<
const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
558 auto pNode =
std::make_unique<PageNode>();
559 pNode->m_dwPageNo = pKid->GetRefObjNum();
560 pPageNode->m_ChildNodes.push_back(
std::move(pNode));
565bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
566 PageNode* pPageNode) {
567 bool bExists =
false;
570 m_internalStatus = InternalStatus::kError;
577 if (pPage->IsArray()) {
578 pPageNode->m_dwPageNo = dwPageNo;
579 pPageNode->m_type = PageNode::Type::kArray;
583 if (!pPage->IsDictionary()) {
584 m_internalStatus = InternalStatus::kError;
588 pPageNode->m_dwPageNo = dwPageNo;
589 RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
590 const ByteString type = pDict->GetNameFor(
"Type");
591 if (type
== "Page") {
592 pPageNode->m_type = PageNode::Type::kPage;
596 if (type
!= "Pages") {
597 m_internalStatus = InternalStatus::kError;
601 pPageNode->m_type = PageNode::Type::kPages;
604 m_internalStatus = InternalStatus::kPage;
608 switch (pKids->GetType()) {
610 const CPDF_Reference* pKid = pKids->AsReference();
611 auto pNode =
std::make_unique<PageNode>();
613 pPageNode->m_ChildNodes.push_back(
std::move(pNode));
617 const CPDF_Array* pKidsArray = pKids->AsArray();
618 for (size_t i = 0; i < pKidsArray->size(); ++i) {
620 ToReference(pKidsArray->GetObjectAt(i));
624 auto pNode =
std::make_unique<PageNode>();
625 pNode->m_dwPageNo = pKid->GetRefObjNum();
626 pPageNode->m_ChildNodes.push_back(
std::move(pNode));
636bool CPDF_DataAvail::CheckPageNode(
const CPDF_DataAvail::PageNode& pageNode,
640 if (level >= kMaxPageRecursionDepth)
643 int32_t iSize =
fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
644 if (iSize <= 0 || iPage >= iSize) {
645 m_internalStatus = InternalStatus::kError;
648 for (int32_t i = 0; i < iSize; ++i) {
649 PageNode* pNode = pageNode.m_ChildNodes[i].get();
653 if (pNode->m_type == PageNode::Type::kUnknown) {
655 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
658 if (pNode->m_type == PageNode::Type::kArray) {
660 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
663 switch (pNode->m_type) {
664 case PageNode::Type::kPage:
666 if (iPage == iCount && m_pDocument)
667 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
669 case PageNode::Type::kPages:
670 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
673 case PageNode::Type::kUnknown:
674 case PageNode::Type::kArray:
678 if (iPage == iCount) {
679 m_internalStatus = InternalStatus::kDone;
686bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
687 int iPage = pdfium::base::checked_cast<
int>(dwPage);
688 if (m_pDocument->GetPageCount() <= iPage ||
689 m_pDocument->IsPageLoaded(iPage)) {
690 m_internalStatus = InternalStatus::kDone;
693 if (m_PageNode.m_type == PageNode::Type::kPage) {
695 iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
699 return CheckPageNode(m_PageNode, iPage, iCount, 0);
702bool CPDF_DataAvail::CheckPageCount() {
703 bool bExists =
false;
706 m_internalStatus = InternalStatus::kError;
712 RetainPtr<
const CPDF_Dictionary> pPagesDict = pPages->GetDict();
714 m_internalStatus = InternalStatus::kError;
717 if (!pPagesDict->KeyExist(
"Kids"))
720 return pPagesDict->GetIntegerFor(
"Count") > 0;
723bool CPDF_DataAvail::LoadDocPages() {
724 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
727 if (CheckPageCount()) {
728 m_internalStatus = InternalStatus::kPage;
732 m_bTotalLoadPageTree =
true;
736bool CPDF_DataAvail::LoadPages() {
737 while (!m_bPagesTreeLoad) {
738 if (!CheckPageStatus())
745 m_pDocument->LoadPages();
749CPDF_DataAvail::
DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
750 if (m_bLinearedDataOK)
752 DCHECK(m_pLinearized);
753 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
754 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
758 if (!m_bMainXRefLoadTried) {
759 const FX_SAFE_FILESIZE prev =
760 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor(
"Prev");
761 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
762 if (main_xref_offset < 0)
765 if (main_xref_offset == 0)
768 FX_SAFE_SIZE_T data_size = m_dwFileLen;
769 data_size -= main_xref_offset;
770 if (!data_size.IsValid())
773 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
774 main_xref_offset, data_size.ValueOrDie()))
778 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
779 m_bMainXRefLoadTried =
true;
783 if (!PreparePageItem())
786 m_bMainXRefLoadedOK =
true;
787 m_bLinearedDataOK =
true;
799 const int iPage = pdfium::base::checked_cast<
int>(dwPage);
800 if (iPage >= m_pDocument->GetPageCount()) {
805 if (IsFirstCheck(dwPage)) {
806 m_bCurPageDictLoadOK =
false;
809 if (pdfium::Contains(m_pagesLoadState, dwPage))
814 if (dwPage == m_pLinearized->GetFirstPageNo()) {
815 RetainPtr<
const CPDF_Dictionary> pPageDict =
816 m_pDocument->GetPageDictionary(iPage);
821 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
822 GetValidator(), m_pDocument, pPageDict));
824 CPDF_PageObjectAvail* page_obj_avail =
825 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
835 nResult = m_pHintTables->CheckPage(dwPage);
839 m_pagesLoadState.insert(dwPage);
844 if (!m_bMainXRefLoadedOK) {
847 m_pDocument->GetParser()->RebuildCrossRef();
848 ResetFirstCheck(dwPage);
851 if (m_bTotalLoadPageTree) {
855 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
859 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
868 m_pDocument->GetMutablePageDictionary(iPage);
874 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
875 GetValidator(), m_pDocument, pPageDict));
876 CPDF_PageObjectAvail* page_obj_avail =
877 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
885 return resources_status;
887 m_bCurPageDictLoadOK =
false;
888 ResetFirstCheck(dwPage);
889 m_pagesLoadState.insert(dwPage);
898 if (GetValidator()->has_read_problems())
904 CPDF_PageObjectAvail* resource_avail =
905 m_PagesResourcesAvail
906 .insert(std::make_pair(resources,
907 std::make_unique<CPDF_PageObjectAvail>(
908 GetValidator(), m_pDocument, resources)))
909 .first->second.get();
918 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
919 : m_parser.m_pSyntax.get();
924 return m_pLinearized->GetPageCount();
925 return m_pDocument ? m_pDocument->GetPageCount() : 0;
930 if (!m_pDocument || index < 0 || index >= GetPageCount())
932 RetainPtr<
const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
935 if (!m_pLinearized || !m_pHintTables)
938 if (index ==
static_cast<
int>(m_pLinearized->GetFirstPageNo()))
942 uint32_t dwObjNum = 0;
943 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
944 &szPageLength, &dwObjNum);
945 if (!bPagePosGot || !dwObjNum)
948 m_pDocument->SetPageObjNum(index, dwObjNum);
950 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
951 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
952 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
954 if (!ValidatePage(index))
956 return m_pDocument->GetPageDictionary(index);
961 const HintsScope hints_scope(GetValidator(), pHints);
962 return CheckAcroForm();
965CPDF_DataAvail::
DocFormStatus CPDF_DataAvail::CheckAcroForm() {
978 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
986 m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
987 GetValidator(), m_pDocument, std::move(pAcroForm));
989 switch (m_pFormAvail->CheckAvail()) {
999bool CPDF_DataAvail::ValidatePage(uint32_t dwPage)
const {
1000 int iPage = pdfium::base::checked_cast<
int>(dwPage);
1001 RetainPtr<
const CPDF_Dictionary> pPageDict =
1002 m_pDocument->GetPageDictionary(iPage);
1006 CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1007 std::move(pPageDict));
1013 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1014 std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1015 const ByteString& password) {
1021 std::move(pPageData));
1022 document->AddObserver(
this);
1026 document->LoadLinearizedDoc(GetValidator(), password);
1029 if (GetValidator()->has_read_problems()) {
1035 return std::make_pair(error,
nullptr);
1037 m_pDocument = document.get();
1041CPDF_DataAvail::PageNode::PageNode() =
default;
1043CPDF_DataAvail::PageNode::~PageNode() =
default;
CPDF_DataAvail(FileAvail *pFileAvail, RetainPtr< IFX_SeekableReadStream > pFileRead)
~CPDF_DataAvail() override
DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints *pHints)
std::pair< CPDF_Parser::Error, std::unique_ptr< CPDF_Document > > ParseDocument(std::unique_ptr< CPDF_Document::RenderDataIface > pRenderData, std::unique_ptr< CPDF_Document::PageDataIface > pPageData, const ByteString &password)
RetainPtr< const CPDF_Dictionary > GetPageDictionary(int index) const
RetainPtr< CPDF_ReadValidator > GetValidator() const
DocAvailStatus IsDocAvail(DownloadHints *pHints)
void OnObservableDestroyed() override
DocLinearizationStatus IsLinearizedPDF()
DocFormStatus IsFormAvail(DownloadHints *pHints)
CPDF_DataAvail::DocAvailStatus CheckAvail()
static constexpr uint32_t kInvalidObjNum
uint32_t GetRefObjNum() const
FX_FILESIZE GetPos() const
void SetPos(FX_FILESIZE pos)
bool operator==(const char *ptr) const
bool operator!=(const char *ptr) const