Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_data_avail.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9#include <algorithm>
10#include <memory>
11#include <utility>
12
13#include "core/fpdfapi/parser/cpdf_array.h"
14#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15#include "core/fpdfapi/parser/cpdf_dictionary.h"
16#include "core/fpdfapi/parser/cpdf_document.h"
17#include "core/fpdfapi/parser/cpdf_hint_tables.h"
18#include "core/fpdfapi/parser/cpdf_linearized_header.h"
19#include "core/fpdfapi/parser/cpdf_name.h"
20#include "core/fpdfapi/parser/cpdf_number.h"
21#include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22#include "core/fpdfapi/parser/cpdf_read_validator.h"
23#include "core/fpdfapi/parser/cpdf_reference.h"
24#include "core/fpdfapi/parser/cpdf_stream.h"
25#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26#include "core/fpdfapi/parser/fpdf_parser_utility.h"
27#include "core/fxcrt/autorestorer.h"
28#include "core/fxcrt/fx_extension.h"
29#include "core/fxcrt/fx_safe_types.h"
30#include "core/fxcrt/stl_util.h"
31#include "third_party/base/check.h"
32#include "third_party/base/containers/contains.h"
33#include "third_party/base/notreached.h"
34#include "third_party/base/numerics/safe_conversions.h"
35
36namespace {
37
38RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> pDict) {
39 constexpr size_t kMaxHierarchyDepth = 64;
40 size_t depth = 0;
41
42 while (pDict) {
43 RetainPtr<CPDF_Object> result = pDict->GetMutableObjectFor("Resources");
44 if (result)
45 return result;
46 if (++depth > kMaxHierarchyDepth) {
47 // We have cycle in parents hierarchy.
48 return nullptr;
49 }
50 RetainPtr<CPDF_Object> parent = pDict->GetMutableObjectFor("Parent");
51 pDict = parent ? parent->GetMutableDict() : nullptr;
52 }
53 return nullptr;
54}
55
56class HintsScope {
57 public:
58 HintsScope(RetainPtr<CPDF_ReadValidator> validator,
59 CPDF_DataAvail::DownloadHints* hints)
60 : validator_(std::move(validator)) {
61 DCHECK(validator_);
62 validator_->SetDownloadHints(hints);
63 }
64
65 ~HintsScope() { validator_->SetDownloadHints(nullptr); }
66
67 private:
68 RetainPtr<CPDF_ReadValidator> validator_;
69};
70
71} // namespace
72
73CPDF_DataAvail::FileAvail::~FileAvail() = default;
74
75CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
76
82
83CPDF_DataAvail::~CPDF_DataAvail() {
84 m_pHintTables.reset();
85 if (m_pDocument)
86 m_pDocument->RemoveObserver(this);
87}
88
89void CPDF_DataAvail::OnObservableDestroyed() {
90 m_pDocument = nullptr;
91 m_pFormAvail.reset();
92 m_PagesArray.clear();
93 m_PagesObjAvail.clear();
94 m_PagesResourcesAvail.clear();
95}
96
97CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
98 DownloadHints* pHints) {
99 if (!m_dwFileLen)
100 return kDataError;
101
102 DCHECK(m_SeenPageObjList.empty());
103 AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
104 const HintsScope hints_scope(GetValidator(), pHints);
105 while (!m_bDocAvail) {
106 if (!CheckDocStatus())
107 return kDataNotAvailable;
108 }
109
110 return kDataAvailable;
111}
112
113bool CPDF_DataAvail::CheckDocStatus() {
114 switch (m_internalStatus) {
115 case InternalStatus::kHeader:
116 return CheckHeader();
117 case InternalStatus::kFirstPage:
118 return CheckFirstPage();
119 case InternalStatus::kHintTable:
120 return CheckHintTables();
121 case InternalStatus::kLoadAllCrossRef:
122 return CheckAndLoadAllXref();
123 case InternalStatus::kLoadAllFile:
124 return LoadAllFile();
125 case InternalStatus::kRoot:
126 return CheckRoot();
127 case InternalStatus::kInfo:
128 return CheckInfo();
129 case InternalStatus::kPageTree:
130 if (m_bTotalLoadPageTree)
131 return CheckPages();
132 return LoadDocPages();
133 case InternalStatus::kPage:
134 if (m_bTotalLoadPageTree)
135 return CheckPage();
136 m_internalStatus = InternalStatus::kPageLaterLoad;
137 return true;
138 case InternalStatus::kError:
139 return LoadAllFile();
140 case InternalStatus::kPageLaterLoad:
141 m_internalStatus = InternalStatus::kPage;
142 [[fallthrough]];
143 default:
144 m_bDocAvail = true;
145 return true;
146 }
147}
148
149bool CPDF_DataAvail::CheckPageStatus() {
150 switch (m_internalStatus) {
151 case InternalStatus::kPageTree:
152 return CheckPages();
153 case InternalStatus::kPage:
154 return CheckPage();
155 case InternalStatus::kError:
156 return LoadAllFile();
157 default:
158 m_bPagesTreeLoad = true;
159 m_bPagesLoad = true;
160 return true;
161 }
162}
163
164bool CPDF_DataAvail::LoadAllFile() {
165 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166 m_internalStatus = InternalStatus::kDone;
167 return true;
168 }
169 return false;
170}
171
172bool CPDF_DataAvail::CheckAndLoadAllXref() {
173 if (!m_pCrossRefAvail) {
175 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176 if (GetValidator()->has_read_problems())
177 return false;
178
179 if (last_xref_offset <= 0) {
180 m_internalStatus = InternalStatus::kError;
181 return false;
182 }
183
184 m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
185 last_xref_offset);
186 }
187
188 switch (m_pCrossRefAvail->CheckAvail()) {
189 case kDataAvailable:
190 break;
192 return false;
193 case kDataError:
194 m_internalStatus = InternalStatus::kError;
195 return false;
196 }
197
198 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
199 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
200 m_internalStatus = InternalStatus::kLoadAllFile;
201 return false;
202 }
203
204 m_internalStatus = InternalStatus::kRoot;
205 return true;
206}
207
208RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
209 bool* pExistInFile) {
210 *pExistInFile = false;
211 CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
212 if (!pParser)
213 return nullptr;
214
216 RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
217 if (!pRet)
218 return nullptr;
219
220 *pExistInFile = true;
221 if (GetValidator()->has_read_problems())
222 return nullptr;
223
224 return pRet;
225}
226
227bool CPDF_DataAvail::CheckInfo() {
228 const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
229 if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
230 m_internalStatus = InternalStatus::kPageTree;
231 return true;
232 }
233
235 m_parser.ParseIndirectObject(dwInfoObjNum);
236 if (GetValidator()->has_read_problems())
237 return false;
238
239 m_internalStatus = InternalStatus::kPageTree;
240 return true;
241}
242
243bool CPDF_DataAvail::CheckRoot() {
244 const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
245 if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
246 m_internalStatus = InternalStatus::kError;
247 return true;
248 }
249
251 m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
252 if (GetValidator()->has_read_problems())
253 return false;
254
255 if (!m_pRoot) {
256 m_internalStatus = InternalStatus::kError;
257 return false;
258 }
259
260 RetainPtr<const CPDF_Reference> pRef =
261 ToReference(m_pRoot->GetObjectFor("Pages"));
262 if (!pRef) {
263 m_internalStatus = InternalStatus::kError;
264 return false;
265 }
266
267 m_PagesObjNum = pRef->GetRefObjNum();
268 m_internalStatus = InternalStatus::kInfo;
269 return true;
270}
271
272bool CPDF_DataAvail::PreparePageItem() {
273 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
274 if (!pRoot) {
275 m_internalStatus = InternalStatus::kError;
276 return false;
277 }
278
279 RetainPtr<const CPDF_Reference> pRef =
280 ToReference(pRoot->GetObjectFor("Pages"));
281 if (!pRef) {
282 m_internalStatus = InternalStatus::kError;
283 return false;
284 }
285
286 m_PagesObjNum = pRef->GetRefObjNum();
287 m_internalStatus = InternalStatus::kPageTree;
288 return true;
289}
290
291bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
292 return m_pageMapCheckState.insert(dwPage).second;
293}
294
295void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
296 m_pageMapCheckState.erase(dwPage);
297}
298
299bool CPDF_DataAvail::CheckPage() {
300 std::vector<uint32_t> UnavailObjList;
301 for (uint32_t dwPageObjNum : m_PageObjList) {
302 bool bExists = false;
303 RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
304 if (!pObj) {
305 if (bExists)
306 UnavailObjList.push_back(dwPageObjNum);
307 continue;
308 }
309
310 switch (pObj->GetType()) {
311 case CPDF_Object::kArray: {
312 CPDF_ArrayLocker locker(pObj->AsArray());
313 for (const auto& pArrayObj : locker) {
314 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
315 if (pRef)
316 UnavailObjList.push_back(pRef->GetRefObjNum());
317 }
318 break;
319 }
320 case CPDF_Object::kDictionary:
321 if (pObj->GetDict()->GetNameFor("Type") == "Pages")
322 m_PagesArray.push_back(std::move(pObj));
323 break;
324 default:
325 break;
326 }
327 }
328 m_PageObjList.clear();
329 if (!UnavailObjList.empty()) {
330 m_PageObjList = std::move(UnavailObjList);
331 return false;
332 }
333 size_t iPages = m_PagesArray.size();
334 for (size_t i = 0; i < iPages; ++i) {
335 RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
336 if (pPages && !GetPageKids(pPages.Get())) {
337 m_PagesArray.clear();
338 m_internalStatus = InternalStatus::kError;
339 return false;
340 }
341 }
342 m_PagesArray.clear();
343 if (m_PageObjList.empty())
344 m_internalStatus = InternalStatus::kDone;
345
346 return true;
347}
348
349bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
350 RetainPtr<const CPDF_Dictionary> pDict = pPages->GetDict();
351 if (!pDict)
352 return true;
353
354 RetainPtr<const CPDF_Object> pKids = pDict->GetObjectFor("Kids");
355 if (!pKids)
356 return true;
357
358 std::vector<uint32_t> object_numbers;
359 switch (pKids->GetType()) {
361 object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
362 break;
363 case CPDF_Object::kArray: {
364 CPDF_ArrayLocker locker(pKids->AsArray());
365 for (const auto& pArrayObj : locker) {
366 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
367 if (pRef)
368 object_numbers.push_back(pRef->GetRefObjNum());
369 }
370 break;
371 }
372 default:
373 m_internalStatus = InternalStatus::kError;
374 return false;
375 }
376
377 for (uint32_t num : object_numbers) {
378 bool inserted = m_SeenPageObjList.insert(num).second;
379 if (inserted)
380 m_PageObjList.push_back(num);
381 }
382 return true;
383}
384
385bool CPDF_DataAvail::CheckPages() {
386 bool bExists = false;
387 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
388 if (!bExists) {
389 m_internalStatus = InternalStatus::kLoadAllFile;
390 return true;
391 }
392
393 if (!pPages) {
394 if (m_internalStatus == InternalStatus::kError) {
395 m_internalStatus = InternalStatus::kLoadAllFile;
396 return true;
397 }
398 return false;
399 }
400
401 if (!GetPageKids(pPages.Get())) {
402 m_internalStatus = InternalStatus::kError;
403 return false;
404 }
405
406 m_internalStatus = InternalStatus::kPage;
407 return true;
408}
409
410bool CPDF_DataAvail::CheckHeader() {
411 switch (CheckHeaderAndLinearized()) {
412 case kDataAvailable:
413 m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
414 : InternalStatus::kLoadAllCrossRef;
415 return true;
417 return false;
418 case kDataError:
419 m_internalStatus = InternalStatus::kError;
420 return true;
421 }
422}
423
424bool CPDF_DataAvail::CheckFirstPage() {
425 if (!m_pLinearized->GetFirstPageEndOffset() ||
426 !m_pLinearized->GetFileSize() ||
427 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
428 m_internalStatus = InternalStatus::kError;
429 return false;
430 }
431
432 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
433 dwEnd += 512;
434 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
435 dwEnd = (uint32_t)m_dwFileLen;
436
437 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
438 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
439 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
440 data_size))
441 return false;
442
443 m_internalStatus = InternalStatus::kHintTable;
444 return true;
445}
446
447bool CPDF_DataAvail::CheckHintTables() {
449 m_pHintTables =
450 CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
451
452 if (GetValidator()->read_error()) {
453 m_internalStatus = InternalStatus::kError;
454 return true;
455 }
456 if (GetValidator()->has_unavailable_data())
457 return false;
458
459 m_internalStatus = InternalStatus::kDone;
460 return true;
461}
462
463RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
464 FX_FILESIZE pos,
465 uint32_t objnum,
466 CPDF_IndirectObjectHolder* pObjList) const {
467 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
468 GetSyntaxParser()->SetPos(pos);
469 RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
470 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
471 GetSyntaxParser()->SetPos(SavedPos);
472 return (result && (!objnum || result->GetObjNum() == objnum))
473 ? std::move(result)
474 : nullptr;
475}
476
477CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
478 switch (CheckHeaderAndLinearized()) {
479 case kDataAvailable:
480 return m_pLinearized ? kLinearized : kNotLinearized;
483 case kDataError:
484 return kNotLinearized;
485 }
486}
487
488CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
489 if (m_bHeaderAvail)
490 return kDataAvailable;
491
493 const absl::optional<FX_FILESIZE> header_offset =
494 GetHeaderOffset(GetValidator());
495 if (GetValidator()->has_read_problems())
496 return kDataNotAvailable;
497
498 if (!header_offset.has_value())
499 return kDataError;
500
501 m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
502 GetValidator(), header_offset.value());
503 m_pLinearized = m_parser.ParseLinearizedHeader();
504 if (GetValidator()->has_read_problems())
505 return kDataNotAvailable;
506
507 m_bHeaderAvail = true;
508 return kDataAvailable;
509}
510
511bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
512 while (true) {
513 switch (m_internalStatus) {
514 case InternalStatus::kPageTree:
515 if (!LoadDocPages())
516 return false;
517 break;
518 case InternalStatus::kPage:
519 if (!LoadDocPage(dwPage))
520 return false;
521 break;
522 case InternalStatus::kError:
523 return LoadAllFile();
524 default:
525 m_bPagesTreeLoad = true;
526 m_bPagesLoad = true;
527 m_bCurPageDictLoadOK = true;
528 m_internalStatus = InternalStatus::kPage;
529 return true;
530 }
531 }
532}
533
534bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
535 PageNode* pPageNode) {
536 bool bExists = false;
537 RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
538 if (!bExists) {
539 m_internalStatus = InternalStatus::kError;
540 return false;
541 }
542
543 if (!pPages)
544 return false;
545
546 const CPDF_Array* pArray = pPages->AsArray();
547 if (!pArray) {
548 m_internalStatus = InternalStatus::kError;
549 return false;
550 }
551
552 pPageNode->m_type = PageNode::Type::kPages;
553 for (size_t i = 0; i < pArray->size(); ++i) {
554 RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
555 if (!pKid)
556 continue;
557
558 auto pNode = std::make_unique<PageNode>();
559 pNode->m_dwPageNo = pKid->GetRefObjNum();
560 pPageNode->m_ChildNodes.push_back(std::move(pNode));
561 }
562 return true;
563}
564
565bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
566 PageNode* pPageNode) {
567 bool bExists = false;
568 RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
569 if (!bExists) {
570 m_internalStatus = InternalStatus::kError;
571 return false;
572 }
573
574 if (!pPage)
575 return false;
576
577 if (pPage->IsArray()) {
578 pPageNode->m_dwPageNo = dwPageNo;
579 pPageNode->m_type = PageNode::Type::kArray;
580 return true;
581 }
582
583 if (!pPage->IsDictionary()) {
584 m_internalStatus = InternalStatus::kError;
585 return false;
586 }
587
588 pPageNode->m_dwPageNo = dwPageNo;
589 RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
590 const ByteString type = pDict->GetNameFor("Type");
591 if (type == "Page") {
592 pPageNode->m_type = PageNode::Type::kPage;
593 return true;
594 }
595
596 if (type != "Pages") {
597 m_internalStatus = InternalStatus::kError;
598 return false;
599 }
600
601 pPageNode->m_type = PageNode::Type::kPages;
602 RetainPtr<CPDF_Object> pKids = pDict->GetMutableObjectFor("Kids");
603 if (!pKids) {
604 m_internalStatus = InternalStatus::kPage;
605 return true;
606 }
607
608 switch (pKids->GetType()) {
610 const CPDF_Reference* pKid = pKids->AsReference();
611 auto pNode = std::make_unique<PageNode>();
612 pNode->m_dwPageNo = pKid->GetRefObjNum();
613 pPageNode->m_ChildNodes.push_back(std::move(pNode));
614 break;
615 }
616 case CPDF_Object::kArray: {
617 const CPDF_Array* pKidsArray = pKids->AsArray();
618 for (size_t i = 0; i < pKidsArray->size(); ++i) {
619 RetainPtr<const CPDF_Reference> pKid =
620 ToReference(pKidsArray->GetObjectAt(i));
621 if (!pKid)
622 continue;
623
624 auto pNode = std::make_unique<PageNode>();
625 pNode->m_dwPageNo = pKid->GetRefObjNum();
626 pPageNode->m_ChildNodes.push_back(std::move(pNode));
627 }
628 break;
629 }
630 default:
631 break;
632 }
633 return true;
634}
635
636bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
637 int32_t iPage,
638 int32_t& iCount,
639 int level) {
640 if (level >= kMaxPageRecursionDepth)
641 return false;
642
643 int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
644 if (iSize <= 0 || iPage >= iSize) {
645 m_internalStatus = InternalStatus::kError;
646 return false;
647 }
648 for (int32_t i = 0; i < iSize; ++i) {
649 PageNode* pNode = pageNode.m_ChildNodes[i].get();
650 if (!pNode)
651 continue;
652
653 if (pNode->m_type == PageNode::Type::kUnknown) {
654 // Updates the type for the unknown page node.
655 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
656 return false;
657 }
658 if (pNode->m_type == PageNode::Type::kArray) {
659 // Updates a more specific type for the array page node.
660 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
661 return false;
662 }
663 switch (pNode->m_type) {
664 case PageNode::Type::kPage:
665 iCount++;
666 if (iPage == iCount && m_pDocument)
667 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
668 break;
669 case PageNode::Type::kPages:
670 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
671 return false;
672 break;
673 case PageNode::Type::kUnknown:
674 case PageNode::Type::kArray:
675 // Already converted above, error if we get here.
676 return false;
677 }
678 if (iPage == iCount) {
679 m_internalStatus = InternalStatus::kDone;
680 return true;
681 }
682 }
683 return true;
684}
685
686bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
687 int iPage = pdfium::base::checked_cast<int>(dwPage);
688 if (m_pDocument->GetPageCount() <= iPage ||
689 m_pDocument->IsPageLoaded(iPage)) {
690 m_internalStatus = InternalStatus::kDone;
691 return true;
692 }
693 if (m_PageNode.m_type == PageNode::Type::kPage) {
694 m_internalStatus =
695 iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
696 return true;
697 }
698 int32_t iCount = -1;
699 return CheckPageNode(m_PageNode, iPage, iCount, 0);
700}
701
702bool CPDF_DataAvail::CheckPageCount() {
703 bool bExists = false;
704 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
705 if (!bExists) {
706 m_internalStatus = InternalStatus::kError;
707 return false;
708 }
709 if (!pPages)
710 return false;
711
712 RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
713 if (!pPagesDict) {
714 m_internalStatus = InternalStatus::kError;
715 return false;
716 }
717 if (!pPagesDict->KeyExist("Kids"))
718 return true;
719
720 return pPagesDict->GetIntegerFor("Count") > 0;
721}
722
723bool CPDF_DataAvail::LoadDocPages() {
724 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
725 return false;
726
727 if (CheckPageCount()) {
728 m_internalStatus = InternalStatus::kPage;
729 return true;
730 }
731
732 m_bTotalLoadPageTree = true;
733 return false;
734}
735
736bool CPDF_DataAvail::LoadPages() {
737 while (!m_bPagesTreeLoad) {
738 if (!CheckPageStatus())
739 return false;
740 }
741
742 if (m_bPagesLoad)
743 return true;
744
745 m_pDocument->LoadPages();
746 return false;
747}
748
749CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
750 if (m_bLinearedDataOK)
751 return kDataAvailable;
752 DCHECK(m_pLinearized);
753 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
754 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
755 return kDataError;
756 }
757
758 if (!m_bMainXRefLoadTried) {
759 const FX_SAFE_FILESIZE prev =
760 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
761 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
762 if (main_xref_offset < 0)
763 return kDataError;
764
765 if (main_xref_offset == 0)
766 return kDataAvailable;
767
768 FX_SAFE_SIZE_T data_size = m_dwFileLen;
769 data_size -= main_xref_offset;
770 if (!data_size.IsValid())
771 return kDataError;
772
773 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
774 main_xref_offset, data_size.ValueOrDie()))
775 return kDataNotAvailable;
776
777 CPDF_Parser::Error eRet =
778 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
779 m_bMainXRefLoadTried = true;
780 if (eRet != CPDF_Parser::SUCCESS)
781 return kDataError;
782
783 if (!PreparePageItem())
784 return kDataNotAvailable;
785
786 m_bMainXRefLoadedOK = true;
787 m_bLinearedDataOK = true;
788 }
789
790 return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
791}
792
793CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
794 uint32_t dwPage,
795 DownloadHints* pHints) {
796 if (!m_pDocument)
797 return kDataError;
798
799 const int iPage = pdfium::base::checked_cast<int>(dwPage);
800 if (iPage >= m_pDocument->GetPageCount()) {
801 // This is XFA page.
802 return kDataAvailable;
803 }
804
805 if (IsFirstCheck(dwPage)) {
806 m_bCurPageDictLoadOK = false;
807 }
808
809 if (pdfium::Contains(m_pagesLoadState, dwPage))
810 return kDataAvailable;
811
812 const HintsScope hints_scope(GetValidator(), pHints);
813 if (m_pLinearized) {
814 if (dwPage == m_pLinearized->GetFirstPageNo()) {
815 RetainPtr<const CPDF_Dictionary> pPageDict =
816 m_pDocument->GetPageDictionary(iPage);
817 if (!pPageDict)
818 return kDataError;
819
820 auto page_num_obj =
821 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
822 GetValidator(), m_pDocument, pPageDict));
823
824 CPDF_PageObjectAvail* page_obj_avail =
825 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
826 // TODO(art-snake): Check resources.
827 return page_obj_avail->CheckAvail();
828 }
829
830 DocAvailStatus nResult = CheckLinearizedData();
831 if (nResult != kDataAvailable)
832 return nResult;
833
834 if (m_pHintTables) {
835 nResult = m_pHintTables->CheckPage(dwPage);
836 if (nResult != kDataAvailable)
837 return nResult;
838 if (GetPageDictionary(dwPage)) {
839 m_pagesLoadState.insert(dwPage);
840 return kDataAvailable;
841 }
842 }
843
844 if (!m_bMainXRefLoadedOK) {
845 if (!LoadAllFile())
846 return kDataNotAvailable;
847 m_pDocument->GetParser()->RebuildCrossRef();
848 ResetFirstCheck(dwPage);
849 return kDataAvailable;
850 }
851 if (m_bTotalLoadPageTree) {
852 if (!LoadPages())
853 return kDataNotAvailable;
854 } else {
855 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
856 return kDataNotAvailable;
857 }
858 } else {
859 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
860 return kDataNotAvailable;
861 }
862 }
863
864 if (CheckAcroForm() == kFormNotAvailable)
865 return kDataNotAvailable;
866
867 RetainPtr<CPDF_Dictionary> pPageDict =
868 m_pDocument->GetMutablePageDictionary(iPage);
869 if (!pPageDict)
870 return kDataError;
871
872 {
873 auto page_num_obj =
874 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
875 GetValidator(), m_pDocument, pPageDict));
876 CPDF_PageObjectAvail* page_obj_avail =
877 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
878 const DocAvailStatus status = page_obj_avail->CheckAvail();
879 if (status != kDataAvailable)
880 return status;
881 }
882
883 const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
884 if (resources_status != kDataAvailable)
885 return resources_status;
886
887 m_bCurPageDictLoadOK = false;
888 ResetFirstCheck(dwPage);
889 m_pagesLoadState.insert(dwPage);
890 return kDataAvailable;
891}
892
893CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
894 RetainPtr<CPDF_Dictionary> page) {
895 DCHECK(page);
897 RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
898 if (GetValidator()->has_read_problems())
899 return kDataNotAvailable;
900
901 if (!resources)
902 return kDataAvailable;
903
904 CPDF_PageObjectAvail* resource_avail =
905 m_PagesResourcesAvail
906 .insert(std::make_pair(resources,
907 std::make_unique<CPDF_PageObjectAvail>(
908 GetValidator(), m_pDocument, resources)))
909 .first->second.get();
910 return resource_avail->CheckAvail();
911}
912
913RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
914 return m_pFileRead;
915}
916
917CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
918 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
919 : m_parser.m_pSyntax.get();
920}
921
922int CPDF_DataAvail::GetPageCount() const {
923 if (m_pLinearized)
924 return m_pLinearized->GetPageCount();
925 return m_pDocument ? m_pDocument->GetPageCount() : 0;
926}
927
928RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
929 int index) const {
930 if (!m_pDocument || index < 0 || index >= GetPageCount())
931 return nullptr;
932 RetainPtr<const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
933 if (page)
934 return page;
935 if (!m_pLinearized || !m_pHintTables)
936 return nullptr;
937
938 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
939 return nullptr;
940 FX_FILESIZE szPageStartPos = 0;
941 FX_FILESIZE szPageLength = 0;
942 uint32_t dwObjNum = 0;
943 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
944 &szPageLength, &dwObjNum);
945 if (!bPagePosGot || !dwObjNum)
946 return nullptr;
947 // We should say to the document, which object is the page.
948 m_pDocument->SetPageObjNum(index, dwObjNum);
949 // Page object already can be parsed in document.
950 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
951 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
952 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
953 }
954 if (!ValidatePage(index))
955 return nullptr;
956 return m_pDocument->GetPageDictionary(index);
957}
958
959CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
960 DownloadHints* pHints) {
961 const HintsScope hints_scope(GetValidator(), pHints);
962 return CheckAcroForm();
963}
964
965CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
966 if (!m_pDocument)
967 return kFormAvailable;
968
969 if (m_pLinearized) {
970 DocAvailStatus nDocStatus = CheckLinearizedData();
971 if (nDocStatus == kDataError)
972 return kFormError;
973 if (nDocStatus == kDataNotAvailable)
974 return kFormNotAvailable;
975 }
976
977 if (!m_pFormAvail) {
978 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
979 if (!pRoot)
980 return kFormAvailable;
981
982 RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
983 if (!pAcroForm)
984 return kFormNotExist;
985
986 m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
987 GetValidator(), m_pDocument, std::move(pAcroForm));
988 }
989 switch (m_pFormAvail->CheckAvail()) {
990 case kDataError:
991 return kFormError;
993 return kFormNotAvailable;
994 case kDataAvailable:
995 return kFormAvailable;
996 }
997}
998
999bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
1000 int iPage = pdfium::base::checked_cast<int>(dwPage);
1001 RetainPtr<const CPDF_Dictionary> pPageDict =
1002 m_pDocument->GetPageDictionary(iPage);
1003 if (!pPageDict)
1004 return false;
1005
1006 CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1007 std::move(pPageDict));
1008 return obj_avail.CheckAvail() == kDataAvailable;
1009}
1010
1012CPDF_DataAvail::ParseDocument(
1013 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1014 std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1015 const ByteString& password) {
1016 if (m_pDocument) {
1017 // We already returned parsed document.
1018 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1019 }
1020 auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
1021 std::move(pPageData));
1022 document->AddObserver(this);
1023
1024 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
1025 CPDF_Parser::Error error =
1026 document->LoadLinearizedDoc(GetValidator(), password);
1027
1028 // Additional check, that all ok.
1029 if (GetValidator()->has_read_problems()) {
1030 NOTREACHED();
1031 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1032 }
1033
1034 if (error != CPDF_Parser::SUCCESS)
1035 return std::make_pair(error, nullptr);
1036
1037 m_pDocument = document.get();
1038 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1039}
1040
1041CPDF_DataAvail::PageNode::PageNode() = default;
1042
1043CPDF_DataAvail::PageNode::~PageNode() = default;
CPDF_DataAvail(FileAvail *pFileAvail, RetainPtr< IFX_SeekableReadStream > pFileRead)
~CPDF_DataAvail() override
DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints *pHints)
std::pair< CPDF_Parser::Error, std::unique_ptr< CPDF_Document > > ParseDocument(std::unique_ptr< CPDF_Document::RenderDataIface > pRenderData, std::unique_ptr< CPDF_Document::PageDataIface > pPageData, const ByteString &password)
RetainPtr< const CPDF_Dictionary > GetPageDictionary(int index) const
RetainPtr< CPDF_ReadValidator > GetValidator() const
DocAvailStatus IsDocAvail(DownloadHints *pHints)
void OnObservableDestroyed() override
DocLinearizationStatus IsLinearizedPDF()
int GetPageCount() const
DocFormStatus IsFormAvail(DownloadHints *pHints)
CPDF_DataAvail::DocAvailStatus CheckAvail()
static constexpr uint32_t kInvalidObjNum
Definition cpdf_object.h:52
uint32_t GetRefObjNum() const
FX_FILESIZE GetPos() const
void SetPos(FX_FILESIZE pos)
bool operator==(const char *ptr) const
bool operator!=(const char *ptr) const
Definition bytestring.h:130
#define FX_FILESIZE
Definition fx_types.h:19