7#include "core/fpdfapi/parser/cpdf_document.h"
14#include "core/fpdfapi/parser/cpdf_array.h"
15#include "core/fpdfapi/parser/cpdf_dictionary.h"
16#include "core/fpdfapi/parser/cpdf_linearized_header.h"
17#include "core/fpdfapi/parser/cpdf_name.h"
18#include "core/fpdfapi/parser/cpdf_null.h"
19#include "core/fpdfapi/parser/cpdf_number.h"
20#include "core/fpdfapi/parser/cpdf_parser.h"
21#include "core/fpdfapi/parser/cpdf_read_validator.h"
22#include "core/fpdfapi/parser/cpdf_reference.h"
23#include "core/fpdfapi/parser/cpdf_stream.h"
24#include "core/fpdfapi/parser/cpdf_stream_acc.h"
25#include "core/fpdfapi/parser/fpdf_parser_utility.h"
26#include "core/fxcodec/jbig2/JBig2_DocumentContext.h"
27#include "core/fxcrt/check.h"
28#include "core/fxcrt/check_op.h"
29#include "core/fxcrt/containers/contains.h"
30#include "core/fxcrt/fx_codepage.h"
31#include "core/fxcrt/scoped_set_insertion.h"
32#include "core/fxcrt/span.h"
33#include "core/fxcrt/stl_util.h"
37const int kMaxPageLevel = 1024;
39enum class NodeType :
bool {
47 const ByteString kid_type_value = kid_dict->GetNameFor(
"Type");
48 if (kid_type_value
== "Pages") {
49 return NodeType::kBranch;
51 if (kid_type_value
== "Page") {
52 return NodeType::kLeaf;
58 const bool has_kids = kid_dict->KeyExist(
"Kids");
59 kid_dict->SetNewFor<CPDF_Name>(
"Type", has_kids ?
"Pages" :
"Page");
60 return has_kids ? NodeType::kBranch : NodeType::kLeaf;
68std::optional<
int> CountPages(
70 std::set<RetainPtr<CPDF_Dictionary>>* visited_pages) {
73 int count_from_dict = pages_dict->GetIntegerFor(
"Count");
75 return count_from_dict;
84 for (size_t i = 0; i < kids_array->size(); i++) {
86 if (!kid_dict ||
pdfium::Contains(*visited_pages, kid_dict)) {
90 NodeType kid_type = GetNodeType(kid_dict);
91 if (kid_type == NodeType::kBranch) {
95 std::optional<
int> local_count =
96 CountPages(std::move(kid_dict), visited_pages);
97 if (!local_count.has_value()) {
100 count += local_count.value();
102 CHECK_EQ(kid_type, NodeType::kLeaf);
111 pages_dict->SetNewFor<CPDF_Number>(
"Count", count);
116 uint32_t* skip_count,
124 if (*skip_count != 0)
135 if (level >= kMaxPageLevel)
139 if (count <= *skip_count) {
140 (*skip_count) -= count;
145 if (count && count == pKidList->size()) {
146 for (size_t i = 0; i < count; i++) {
148 ToReference(pKidList->GetObjectAt(i));
149 if (pKid && pKid->GetRefObjNum() == objnum)
150 return static_cast<
int>(*index + i);
154 for (size_t i = 0; i < pKidList->size(); i++) {
156 if (!pKid || pKid == pNode)
160 FindPageIndex(pKid.Get(), skip_count, objnum, index, level + 1);
161 if (found_index >= 0)
170 std::unique_ptr<PageDataIface> pPageData)
174 m_pDocRender->SetDocument(
this);
175 m_pDocPage->SetDocument(
this);
183 m_pExtension.reset();
193 return m_pParser ? m_pParser->ParseIndirectObject(objnum) :
nullptr;
197 SetLastObjNum(m_pParser->GetLastObjNum());
200 GetOrParseIndirectObject(m_pParser->GetRootObjNum());
202 m_pRootDict = pRootObj->GetMutableDict();
212 SetParser(std::make_unique<CPDF_Parser>(
this));
214 return HandleLoadResult(
215 m_pParser->StartParse(std::move(pFileAccess), password));
222 SetParser(std::make_unique<CPDF_Parser>(
this));
224 return HandleLoadResult(
225 m_pParser->StartLinearizedParse(std::move(validator), password));
230 m_pParser->GetLinearizedHeader();
231 if (!linearized_header) {
232 m_PageList.resize(RetrievePageCount());
238 m_PageList.resize(RetrievePageCount());
244 DCHECK(first_page_num < page_count);
245 m_PageList.resize(page_count);
246 m_PageList[first_page_num] = objnum;
252 if (*nPagesToGo < 0 || m_bReachedMaxPageLevel)
258 m_pTreeTraversal.pop_back();
259 if (*nPagesToGo != 1)
261 m_PageList[iPage] = pPages->GetObjNum();
264 if (level >= kMaxPageLevel) {
265 m_pTreeTraversal.pop_back();
266 m_bReachedMaxPageLevel =
true;
270 for (size_t i = m_pTreeTraversal[level].second; i < pKidList->size(); i++) {
271 if (*nPagesToGo == 0)
273 pKidList->ConvertToIndirectObjectAt(i,
this);
277 m_pTreeTraversal[level].second++;
280 if (pKid == pPages) {
281 m_pTreeTraversal[level].second++;
284 if (!pKid->KeyExist(
"Kids")) {
285 m_PageList[iPage - (*nPagesToGo) + 1] = pKid->GetObjNum();
287 m_pTreeTraversal[level].second++;
288 if (*nPagesToGo == 0) {
289 page =
std::move(pKid);
294 if (m_pTreeTraversal.size() == level + 1)
295 m_pTreeTraversal.emplace_back(std::move(pKid), 0);
298 TraversePDFPages(iPage, nPagesToGo, level + 1);
300 if (m_pTreeTraversal.size() == level + 1)
301 m_pTreeTraversal[level].second++;
303 if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0 ||
304 m_bReachedMaxPageLevel) {
305 page =
std::move(pPageKid);
310 if (m_pTreeTraversal[level].second == pKidList->size())
311 m_pTreeTraversal.pop_back();
316 m_iNextPageToTraverse = 0;
317 m_bReachedMaxPageLevel =
false;
318 m_pTreeTraversal.clear();
323 m_pParser = std::move(pParser);
327 if (error == CPDF_Parser::SUCCESS)
328 m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
338 return pdfium::WrapRetain(
343 return !!m_PageList[iPage];
347 if (!fxcrt::IndexInBounds(m_PageList, iPage))
350 const uint32_t objnum = m_PageList[iPage];
353 ToDictionary(GetOrParseIndirectObject(objnum));
362 if (m_pTreeTraversal.empty()) {
364 m_pTreeTraversal.emplace_back(std::move(pPages), 0);
366 int nPagesToGo = iPage - m_iNextPageToTraverse + 1;
368 m_iNextPageToTraverse = iPage + 1;
373 return pdfium::WrapRetain(
374 const_cast<CPDF_Dictionary*>(GetPageDictionary(iPage).Get()));
378 m_PageList[iPage] = objNum;
382 if (!m_pCodecContext)
383 m_pCodecContext = std::make_unique<JBig2_DocumentContext>();
384 return m_pCodecContext.get();
389 auto stream = NewIndirect<CPDF_Stream>(
std::move(dict));
390 m_ModifiedAPStreamIDs.insert(stream->GetObjNum());
395 return stream && pdfium::Contains(m_ModifiedAPStreamIDs, stream->GetObjNum());
399 uint32_t skip_count = 0;
400 bool bSkipped =
false;
401 for (uint32_t i = 0; i < m_PageList.size(); ++i) {
402 if (m_PageList[i] == objnum)
405 if (!bSkipped && m_PageList[i] == 0) {
415 int found_index = FindPageIndex(pPages, &skip_count, objnum, &start_index, 0);
418 if (!fxcrt::IndexInBounds(m_PageList, found_index))
422 if (IsValidPageObject(GetOrParseIndirectObject(objnum).Get()))
423 m_PageList[found_index] = objnum;
428 return fxcrt::CollectionSize<
int>(m_PageList);
436 if (!pPages->KeyExist(
"Kids"))
439 std::set<RetainPtr<CPDF_Dictionary>> visited_pages = {pPages};
440 return CountPages(std::move(pPages), &visited_pages).value_or(0);
444 return m_pParser ? m_pParser->GetPermissions(get_owner_perms) : 0;
448 RetainPtr<
const CPDF_Stream> pFontStream) {
449 return m_pDocPage->GetFontFileStreamAcc(std::move(pFontStream));
453 RetainPtr<CPDF_StreamAcc>&& pStreamAcc) {
454 m_pDocPage->MaybePurgeFontFileStreamAcc(std::move(pStreamAcc));
458 m_pDocPage->MaybePurgeImage(objnum);
464 m_pRootDict = NewIndirect<CPDF_Dictionary>();
465 m_pRootDict->SetNewFor<CPDF_Name>(
"Type",
"Catalog");
468 pPages->SetNewFor<CPDF_Name>(
"Type",
"Pages");
469 pPages->SetNewFor<CPDF_Number>(
"Count", 0);
471 m_pRootDict->SetNewFor<CPDF_Reference>(
"Pages",
this, pPages->GetObjNum());
472 m_pInfoDict = NewIndirect<CPDF_Dictionary>();
477 pDict->SetNewFor<CPDF_Name>(
"Type",
"Page");
478 uint32_t dwObjNum = pDict->GetObjNum();
479 if (!InsertNewPage(iPage, pDict)) {
491 std::set<RetainPtr<CPDF_Dictionary>>* visited) {
497 for (size_t i = 0; i < kids_list->size(); i++) {
499 NodeType kid_type = GetNodeType(kid_dict);
500 if (kid_type == NodeType::kLeaf) {
501 if (pages_to_go != 0) {
506 kids_list->InsertNewAt<CPDF_Reference>(i,
this, page_dict->GetObjNum());
507 page_dict->SetNewFor<CPDF_Reference>(
"Parent",
this,
508 pages_dict->GetObjNum());
510 kids_list->RemoveAt(i);
512 pages_dict->SetNewFor<CPDF_Number>(
513 "Count", pages_dict->GetIntegerFor(
"Count") + (is_insert ? 1 : -1));
518 CHECK_EQ(kid_type, NodeType::kBranch);
519 int page_count = kid_dict->GetIntegerFor(
"Count");
520 if (pages_to_go >= page_count) {
521 pages_to_go -= page_count;
524 if (
pdfium::Contains(*visited, kid_dict)) {
529 if (!InsertDeletePDFPage(std::move(kid_dict), pages_to_go, page_dict,
530 is_insert, visited)) {
533 pages_dict->SetNewFor<CPDF_Number>(
534 "Count", pages_dict->GetIntegerFor(
"Count") + (is_insert ? 1 : -1));
551 if (iPage < 0 || iPage > nPages)
554 if (iPage == nPages) {
556 pPagesList->AppendNew<CPDF_Reference>(
this, pPageDict->GetObjNum());
557 pPages->SetNewFor<CPDF_Number>(
"Count", nPages + 1);
558 pPageDict->SetNewFor<CPDF_Reference>(
"Parent",
this, pPages->GetObjNum());
561 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
562 if (!InsertDeletePDFPage(std::move(pPages), iPage, pPageDict,
true, &stack))
565 m_PageList.insert(m_PageList.begin() + iPage, pPageDict->GetObjNum());
576 uint32_t info_obj_num = m_pParser->GetInfoObjNum();
577 if (info_obj_num == 0)
580 auto ref =
pdfium::MakeRetain<CPDF_Reference>(
this, info_obj_num);
581 m_pInfoDict = ToDictionary(ref->GetMutableDirect());
586 return m_pParser ? m_pParser->GetIDArray() :
nullptr;
595 int nPages = pPages->GetIntegerFor(
"Count");
596 if (iPage < 0 || iPage >= nPages) {
605 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
606 if (!InsertDeletePDFPage(std::move(pPages), iPage,
nullptr,
false, &stack)) {
610 m_PageList.erase(m_PageList.begin() + iPage);
611 return page_dict->GetObjNum();
615 if (!page_obj_num || m_PageList.empty()) {
620 for (size_t i = 0; i < m_PageList.size(); ++i) {
621 GetPageDictionary(i);
624 if (pdfium::Contains(m_PageList, page_obj_num)) {
635 page_obj_num
, pdfium::MakeRetain<CPDF_Null>()
);
640 m_pRootDict = std::move(root);
644 int dest_page_index) {
647 if (num_pages_signed <= 0) {
650 const size_t num_pages = num_pages_signed;
653 if (page_indices.empty() || page_indices.size() > num_pages) {
658 if (dest_page_index < 0 ||
659 static_cast<size_t>(dest_page_index) > num_pages - page_indices.size()) {
671 std::set<
int> unique_page_indices;
673 std::vector<RetainPtr<CPDF_Dictionary>> pages_to_move;
674 pages_to_move.reserve(page_indices.size());
676 std::vector<
int> page_indices_to_delete;
677 page_indices_to_delete.reserve(page_indices.size());
678 for (
const int page_index : page_indices) {
679 bool inserted = unique_page_indices.insert(page_index).second;
684 RetainPtr<CPDF_Dictionary> page = GetMutablePageDictionary(page_index);
689 pages_to_move.push_back(std::move(page));
690 page_indices_to_delete.push_back(page_index);
694 std::sort(page_indices_to_delete.begin(), page_indices_to_delete.end(),
695 std::greater<
int>());
698 for (
int page_index : page_indices_to_delete) {
699 extension->DeletePage(page_index);
702 for (
int page_index : page_indices_to_delete) {
703 DeletePage(page_index);
709 for (size_t i = 0; i < pages_to_move.size(); ++i) {
710 if (!InsertNewPage(i + dest_page_index, pages_to_move[i])) {
720 m_PageList.resize(size);
725 : m_pPageData(pPageData) {}
728 m_pPageData->ClearStockFont();
fxcrt::ByteString ByteString
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
bool KeyExist(const ByteString &key) const
int GetIntegerFor(const ByteString &key) const
RetainPtr< const CPDF_Dictionary > GetDictFor(const ByteString &key) const
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
virtual bool ContainsExtensionForm() const =0
virtual ~RenderDataIface()
~CPDF_Document() override
JBig2_DocumentContext * GetOrCreateCodecContext()
CPDF_Parser::Error LoadLinearizedDoc(RetainPtr< CPDF_ReadValidator > validator, const ByteString &password)
RetainPtr< CPDF_Object > ParseIndirectObject(uint32_t objnum) override
bool IsPageLoaded(int iPage) const
RetainPtr< const CPDF_Dictionary > GetPageDictionary(int iPage)
RetainPtr< CPDF_Dictionary > CreateNewPage(int iPage)
bool IsModifiedAPStream(const CPDF_Stream *stream) const
RetainPtr< const CPDF_Array > GetFileIdentifier() const
void MaybePurgeImage(uint32_t objnum)
void MaybePurgeFontFileStreamAcc(RetainPtr< CPDF_StreamAcc > &&pStreamAcc)
RetainPtr< CPDF_Dictionary > GetInfo()
CPDF_Document(std::unique_ptr< RenderDataIface > pRenderData, std::unique_ptr< PageDataIface > pPageData)
CPDF_Parser::Error LoadDoc(RetainPtr< IFX_SeekableReadStream > pFileAccess, const ByteString &password)
void ResizePageListForTesting(size_t size)
RetainPtr< CPDF_Dictionary > GetMutablePageDictionary(int iPage)
bool MovePages(pdfium::span< const int > page_indices, int dest_page_index)
int GetPageIndex(uint32_t objnum)
Extension * GetExtension() const
uint32_t GetUserPermissions(bool get_owner_perms) const
void SetPageToNullObject(uint32_t page_obj_num)
RetainPtr< CPDF_StreamAcc > GetFontFileStreamAcc(RetainPtr< const CPDF_Stream > pFontStream)
RetainPtr< CPDF_Stream > CreateModifiedAPStream(RetainPtr< CPDF_Dictionary > dict)
void SetRootForTesting(RetainPtr< CPDF_Dictionary > root)
static bool IsValidPageObject(const CPDF_Object *obj)
const CPDF_Dictionary * GetRoot() const
void SetParser(std::unique_ptr< CPDF_Parser > pParser)
uint32_t DeletePage(int iPage)
static constexpr int kPageMaxNum
void SetPageObjNum(int iPage, uint32_t objNum)
void DeleteIndirectObject(uint32_t objnum)
RetainPtr< CPDF_Object > GetOrParseIndirectObject(uint32_t objnum)
bool ReplaceIndirectObjectIfHigherGeneration(uint32_t objnum, RetainPtr< CPDF_Object > pObj)
uint32_t GetObjNum() const
bool operator==(const char *ptr) const
const CPDF_Dictionary * ToDictionary(const CPDF_Object *obj)