7#include "core/fpdfapi/parser/cpdf_document.h"
13#include "core/fpdfapi/parser/cpdf_array.h"
14#include "core/fpdfapi/parser/cpdf_dictionary.h"
15#include "core/fpdfapi/parser/cpdf_linearized_header.h"
16#include "core/fpdfapi/parser/cpdf_name.h"
17#include "core/fpdfapi/parser/cpdf_number.h"
18#include "core/fpdfapi/parser/cpdf_parser.h"
19#include "core/fpdfapi/parser/cpdf_read_validator.h"
20#include "core/fpdfapi/parser/cpdf_reference.h"
21#include "core/fpdfapi/parser/cpdf_stream.h"
22#include "core/fpdfapi/parser/cpdf_stream_acc.h"
23#include "core/fpdfapi/parser/fpdf_parser_utility.h"
24#include "core/fxcodec/jbig2/JBig2_DocumentContext.h"
25#include "core/fxcrt/fx_codepage.h"
26#include "core/fxcrt/scoped_set_insertion.h"
27#include "core/fxcrt/stl_util.h"
28#include "third_party/abseil-cpp/absl/types/optional.h"
29#include "third_party/base/check.h"
30#include "third_party/base/check_op.h"
31#include "third_party/base/containers/contains.h"
32#include "third_party/base/containers/span.h"
36const int kMaxPageLevel = 1024;
38enum class NodeType :
bool {
45NodeType GetNodeType(
RetainPtr<CPDF_Dictionary> kid_dict) {
46 const ByteString kid_type_value = kid_dict->GetNameFor(
"Type");
47 if (kid_type_value
== "Pages") {
48 return NodeType::kBranch;
50 if (kid_type_value
== "Page") {
51 return NodeType::kLeaf;
57 const bool has_kids = kid_dict->KeyExist(
"Kids");
58 kid_dict->SetNewFor<CPDF_Name>(
"Type", has_kids ?
"Pages" :
"Page");
59 return has_kids ? NodeType::kBranch : NodeType::kLeaf;
67absl::optional<
int> CountPages(
69 std::set<RetainPtr<CPDF_Dictionary>>* visited_pages) {
72 int count_from_dict = pages_dict->GetIntegerFor(
"Count");
74 return count_from_dict;
77 RetainPtr<CPDF_Array> kids_array = pages_dict->GetMutableArrayFor(
"Kids");
83 for (size_t i = 0; i < kids_array->size(); i++) {
84 RetainPtr<CPDF_Dictionary> kid_dict = kids_array->GetMutableDictAt(i);
85 if (!kid_dict || pdfium::Contains(*visited_pages, kid_dict)) {
89 NodeType kid_type = GetNodeType(kid_dict);
90 if (kid_type == NodeType::kBranch) {
94 absl::optional<
int> local_count =
95 CountPages(std::move(kid_dict), visited_pages);
96 if (!local_count.has_value()) {
99 count += local_count.value();
101 CHECK_EQ(kid_type, NodeType::kLeaf);
106 return absl::nullopt;
110 pages_dict->SetNewFor<CPDF_Number>(
"Count", count);
114int FindPageIndex(
const CPDF_Dictionary* pNode,
115 uint32_t* skip_count,
123 if (*skip_count != 0)
130 RetainPtr<
const CPDF_Array> pKidList = pNode->GetArrayFor(
"Kids");
134 if (level >= kMaxPageLevel)
137 size_t count = pNode->GetIntegerFor(
"Count");
138 if (count <= *skip_count) {
139 (*skip_count) -= count;
144 if (count && count == pKidList->size()) {
145 for (size_t i = 0; i < count; i++) {
147 ToReference(pKidList->GetObjectAt(i));
148 if (pKid && pKid->GetRefObjNum() == objnum)
149 return static_cast<
int>(*index + i);
153 for (size_t i = 0; i < pKidList->size(); i++) {
154 RetainPtr<
const CPDF_Dictionary> pKid = pKidList->GetDictAt(i);
155 if (!pKid || pKid == pNode)
159 FindPageIndex(pKid.Get(), skip_count, objnum, index, level + 1);
160 if (found_index >= 0)
169 std::unique_ptr<PageDataIface> pPageData)
173 m_pDocRender->SetDocument(
this);
174 m_pDocPage->SetDocument(
this);
182 m_pExtension.reset();
192 return m_pParser ? m_pParser->ParseIndirectObject(objnum) :
nullptr;
196 SetLastObjNum(m_pParser->GetLastObjNum());
199 GetOrParseIndirectObject(m_pParser->GetRootObjNum());
201 m_pRootDict = pRootObj->GetMutableDict();
209 const ByteString& password) {
211 SetParser(std::make_unique<CPDF_Parser>(
this));
213 return HandleLoadResult(
214 m_pParser->StartParse(std::move(pFileAccess), password));
219 const ByteString& password) {
221 SetParser(std::make_unique<CPDF_Parser>(
this));
223 return HandleLoadResult(
224 m_pParser->StartLinearizedParse(std::move(validator), password));
229 m_pParser->GetLinearizedHeader();
230 if (!linearized_header) {
231 m_PageList.resize(RetrievePageCount());
237 m_PageList.resize(RetrievePageCount());
243 DCHECK(first_page_num < page_count);
244 m_PageList.resize(page_count);
245 m_PageList[first_page_num] = objnum;
251 if (*nPagesToGo < 0 || m_bReachedMaxPageLevel)
254 RetainPtr<CPDF_Dictionary> pPages = m_pTreeTraversal[level].first;
255 RetainPtr<CPDF_Array> pKidList = pPages->GetMutableArrayFor(
"Kids");
257 m_pTreeTraversal.pop_back();
258 if (*nPagesToGo != 1)
260 m_PageList[iPage] = pPages->GetObjNum();
263 if (level >= kMaxPageLevel) {
264 m_pTreeTraversal.pop_back();
265 m_bReachedMaxPageLevel =
true;
269 for (size_t i = m_pTreeTraversal[level].second; i < pKidList->size(); i++) {
270 if (*nPagesToGo == 0)
272 pKidList->ConvertToIndirectObjectAt(i,
this);
273 RetainPtr<CPDF_Dictionary> pKid = pKidList->GetMutableDictAt(i);
276 m_pTreeTraversal[level].second++;
279 if (pKid == pPages) {
280 m_pTreeTraversal[level].second++;
283 if (!pKid->KeyExist(
"Kids")) {
284 m_PageList[iPage - (*nPagesToGo) + 1] = pKid->GetObjNum();
286 m_pTreeTraversal[level].second++;
287 if (*nPagesToGo == 0) {
288 page =
std::move(pKid);
293 if (m_pTreeTraversal.size() == level + 1)
294 m_pTreeTraversal.emplace_back(std::move(pKid), 0);
297 TraversePDFPages(iPage, nPagesToGo, level + 1);
299 if (m_pTreeTraversal.size() == level + 1)
300 m_pTreeTraversal[level].second++;
302 if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0 ||
303 m_bReachedMaxPageLevel) {
304 page =
std::move(pPageKid);
309 if (m_pTreeTraversal[level].second == pKidList->size())
310 m_pTreeTraversal.pop_back();
315 m_iNextPageToTraverse = 0;
316 m_bReachedMaxPageLevel =
false;
317 m_pTreeTraversal.clear();
322 m_pParser = std::move(pParser);
326 if (error == CPDF_Parser::SUCCESS)
327 m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
332 const CPDF_Dictionary* pRoot =
GetRoot();
337 return pdfium::WrapRetain(
338 const_cast<CPDF_Dictionary*>(
this->GetPagesDict().Get()));
342 return !!m_PageList[iPage];
346 if (!fxcrt::IndexInBounds(m_PageList, iPage))
349 const uint32_t objnum = m_PageList[iPage];
352 ToDictionary(GetOrParseIndirectObject(objnum));
357 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
361 if (m_pTreeTraversal.empty()) {
363 m_pTreeTraversal.emplace_back(std::move(pPages), 0);
365 int nPagesToGo = iPage - m_iNextPageToTraverse + 1;
366 RetainPtr<CPDF_Dictionary> pPage = TraversePDFPages(iPage, &nPagesToGo, 0);
367 m_iNextPageToTraverse = iPage + 1;
372 return pdfium::WrapRetain(
373 const_cast<CPDF_Dictionary*>(GetPageDictionary(iPage).Get()));
377 m_PageList[iPage] = objNum;
381 if (!m_pCodecContext)
382 m_pCodecContext = std::make_unique<JBig2_DocumentContext>();
383 return m_pCodecContext.get();
387 auto stream = NewIndirect<CPDF_Stream>();
388 m_ModifiedAPStreamIDs.insert(stream->GetObjNum());
393 return stream && pdfium::Contains(m_ModifiedAPStreamIDs, stream->GetObjNum());
397 uint32_t skip_count = 0;
398 bool bSkipped =
false;
399 for (uint32_t i = 0; i < m_PageList.size(); ++i) {
400 if (m_PageList[i] == objnum)
403 if (!bSkipped && m_PageList[i] == 0) {
408 RetainPtr<
const CPDF_Dictionary> pPages = GetPagesDict();
413 int found_index = FindPageIndex(pPages, &skip_count, objnum, &start_index, 0);
416 if (!fxcrt::IndexInBounds(m_PageList, found_index))
420 if (IsValidPageObject(GetOrParseIndirectObject(objnum).Get()))
421 m_PageList[found_index] = objnum;
426 return fxcrt::CollectionSize<
int>(m_PageList);
430 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
434 if (!pPages->KeyExist(
"Kids"))
437 std::set<RetainPtr<CPDF_Dictionary>> visited_pages = {pPages};
438 return CountPages(std::move(pPages), &visited_pages).value_or(0);
442 return m_pParser ? m_pParser->GetPermissions(get_owner_perms) : 0;
446 RetainPtr<
const CPDF_Stream> pFontStream) {
447 return m_pDocPage->GetFontFileStreamAcc(std::move(pFontStream));
451 RetainPtr<CPDF_StreamAcc>&& pStreamAcc) {
453 m_pDocPage->MaybePurgeFontFileStreamAcc(std::move(pStreamAcc));
458 m_pDocPage->MaybePurgeImage(objnum);
462 DCHECK(!m_pRootDict);
463 DCHECK(!m_pInfoDict);
464 m_pRootDict = NewIndirect<CPDF_Dictionary>();
465 m_pRootDict->SetNewFor<CPDF_Name>(
"Type",
"Catalog");
467 auto pPages = NewIndirect<CPDF_Dictionary>();
468 pPages->SetNewFor<CPDF_Name>(
"Type",
"Pages");
469 pPages->SetNewFor<CPDF_Number>(
"Count", 0);
470 pPages->SetNewFor<CPDF_Array>(
"Kids");
471 m_pRootDict->SetNewFor<CPDF_Reference>(
"Pages",
this, pPages->GetObjNum());
472 m_pInfoDict = NewIndirect<CPDF_Dictionary>();
476 auto pDict = NewIndirect<CPDF_Dictionary>();
477 pDict->SetNewFor<CPDF_Name>(
"Type",
"Page");
478 uint32_t dwObjNum = pDict->GetObjNum();
479 if (!InsertNewPage(iPage, pDict)) {
491 std::set<RetainPtr<CPDF_Dictionary>>* visited) {
492 RetainPtr<CPDF_Array> kids_list = pages_dict->GetMutableArrayFor(
"Kids");
497 for (size_t i = 0; i < kids_list->size(); i++) {
498 RetainPtr<CPDF_Dictionary> kid_dict = kids_list->GetMutableDictAt(i);
499 NodeType kid_type = GetNodeType(kid_dict);
500 if (kid_type == NodeType::kLeaf) {
501 if (pages_to_go != 0) {
506 kids_list->InsertNewAt<CPDF_Reference>(i,
this, page_dict->GetObjNum());
507 page_dict->SetNewFor<CPDF_Reference>(
"Parent",
this,
508 pages_dict->GetObjNum());
510 kids_list->RemoveAt(i);
512 pages_dict->SetNewFor<CPDF_Number>(
513 "Count", pages_dict->GetIntegerFor(
"Count") + (is_insert ? 1 : -1));
518 CHECK_EQ(kid_type, NodeType::kBranch);
519 int page_count = kid_dict->GetIntegerFor(
"Count");
520 if (pages_to_go >= page_count) {
521 pages_to_go -= page_count;
524 if (pdfium::Contains(*visited, kid_dict)) {
529 if (!InsertDeletePDFPage(std::move(kid_dict), pages_to_go, page_dict,
530 is_insert, visited)) {
533 pages_dict->SetNewFor<CPDF_Number>(
534 "Count", pages_dict->GetIntegerFor(
"Count") + (is_insert ? 1 : -1));
542 RetainPtr<CPDF_Dictionary> pRoot = GetMutableRoot();
546 RetainPtr<CPDF_Dictionary> pPages = pRoot->GetMutableDictFor(
"Pages");
551 if (iPage < 0 || iPage > nPages)
554 if (iPage == nPages) {
555 RetainPtr<CPDF_Array> pPagesList = pPages->GetOrCreateArrayFor(
"Kids");
556 pPagesList->AppendNew<CPDF_Reference>(
this, pPageDict->GetObjNum());
557 pPages->SetNewFor<CPDF_Number>(
"Count", nPages + 1);
558 pPageDict->SetNewFor<CPDF_Reference>(
"Parent",
this, pPages->GetObjNum());
561 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
562 if (!InsertDeletePDFPage(std::move(pPages), iPage, pPageDict,
true, &stack))
565 m_PageList.insert(m_PageList.begin() + iPage, pPageDict->GetObjNum());
576 uint32_t info_obj_num = m_pParser->GetInfoObjNum();
577 if (info_obj_num == 0)
580 auto ref = pdfium::MakeRetain<CPDF_Reference>(
this, info_obj_num);
581 m_pInfoDict = ToDictionary(ref->GetMutableDirect());
586 return m_pParser ? m_pParser->GetIDArray() :
nullptr;
590 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
594 int nPages = pPages->GetIntegerFor(
"Count");
595 if (iPage < 0 || iPage >= nPages)
598 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
599 if (!InsertDeletePDFPage(std::move(pPages), iPage,
nullptr,
false, &stack))
602 m_PageList.erase(m_PageList.begin() + iPage);
606 m_pRootDict = std::move(root);
610 int dest_page_index) {
611 const CPDF_Dictionary* pages = GetPagesDict();
613 if (num_pages_signed <= 0) {
616 const size_t num_pages = num_pages_signed;
619 if (page_indices.empty() || page_indices.size() > num_pages) {
624 if (dest_page_index < 0 ||
625 static_cast<size_t>(dest_page_index) > num_pages - page_indices.size()) {
637 std::set<
int> unique_page_indices;
639 std::vector<RetainPtr<CPDF_Dictionary>> pages_to_move;
640 pages_to_move.reserve(page_indices.size());
642 std::vector<
int> page_indices_to_delete;
643 page_indices_to_delete.reserve(page_indices.size());
644 for (
const int page_index : page_indices) {
645 bool inserted = unique_page_indices.insert(page_index).second;
650 RetainPtr<CPDF_Dictionary> page = GetMutablePageDictionary(page_index);
655 pages_to_move.push_back(std::move(page));
656 page_indices_to_delete.push_back(page_index);
660 std::sort(page_indices_to_delete.begin(), page_indices_to_delete.end(),
661 std::greater<
int>());
664 for (
int page_index : page_indices_to_delete) {
665 extension->DeletePage(page_index);
668 for (
int page_index : page_indices_to_delete) {
669 DeletePage(page_index);
675 for (size_t i = 0; i < pages_to_move.size(); ++i) {
676 if (!InsertNewPage(i + dest_page_index, pages_to_move[i])) {
686 m_PageList.resize(size);
691 : m_pPageData(pPageData) {}
694 m_pPageData->ClearStockFont();
bool KeyExist(const ByteString &key) const
int GetIntegerFor(const ByteString &key) const
RetainPtr< const CPDF_Dictionary > GetDictFor(const ByteString &key) const
virtual bool ContainsExtensionForm() const =0
virtual ~RenderDataIface()
~CPDF_Document() override
JBig2_DocumentContext * GetOrCreateCodecContext()
CPDF_Parser::Error LoadLinearizedDoc(RetainPtr< CPDF_ReadValidator > validator, const ByteString &password)
RetainPtr< CPDF_Object > ParseIndirectObject(uint32_t objnum) override
bool IsPageLoaded(int iPage) const
RetainPtr< const CPDF_Dictionary > GetPageDictionary(int iPage)
RetainPtr< CPDF_Dictionary > CreateNewPage(int iPage)
void DeletePage(int iPage)
bool IsModifiedAPStream(const CPDF_Stream *stream) const
RetainPtr< const CPDF_Array > GetFileIdentifier() const
void MaybePurgeImage(uint32_t objnum)
void MaybePurgeFontFileStreamAcc(RetainPtr< CPDF_StreamAcc > &&pStreamAcc)
RetainPtr< CPDF_Dictionary > GetInfo()
CPDF_Document(std::unique_ptr< RenderDataIface > pRenderData, std::unique_ptr< PageDataIface > pPageData)
CPDF_Parser::Error LoadDoc(RetainPtr< IFX_SeekableReadStream > pFileAccess, const ByteString &password)
void ResizePageListForTesting(size_t size)
RetainPtr< CPDF_Dictionary > GetMutablePageDictionary(int iPage)
bool MovePages(pdfium::span< const int > page_indices, int dest_page_index)
int GetPageIndex(uint32_t objnum)
Extension * GetExtension() const
uint32_t GetUserPermissions(bool get_owner_perms) const
RetainPtr< CPDF_StreamAcc > GetFontFileStreamAcc(RetainPtr< const CPDF_Stream > pFontStream)
void SetRootForTesting(RetainPtr< CPDF_Dictionary > root)
static bool IsValidPageObject(const CPDF_Object *obj)
const CPDF_Dictionary * GetRoot() const
void SetParser(std::unique_ptr< CPDF_Parser > pParser)
static constexpr int kPageMaxNum
void SetPageObjNum(int iPage, uint32_t objNum)
RetainPtr< CPDF_Stream > CreateModifiedAPStream()
void DeleteIndirectObject(uint32_t objnum)
RetainPtr< CPDF_Object > GetOrParseIndirectObject(uint32_t objnum)
uint32_t GetObjNum() const
bool operator==(const char *ptr) const
const CPDF_Dictionary * ToDictionary(const CPDF_Object *obj)