Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
object_tree_traversal_util.cpp
Go to the documentation of this file.
1// Copyright 2023 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/fpdfapi/parser/object_tree_traversal_util.h"
6
7#include <stdint.h>
8
9#include <map>
10#include <queue>
11#include <set>
12#include <utility>
13#include <vector>
14
15#include "core/fpdfapi/parser/cpdf_array.h"
16#include "core/fpdfapi/parser/cpdf_dictionary.h"
17#include "core/fpdfapi/parser/cpdf_document.h"
18#include "core/fpdfapi/parser/cpdf_reference.h"
19#include "core/fpdfapi/parser/cpdf_stream.h"
20#include "core/fxcrt/unowned_ptr.h"
21#include "third_party/base/check.h"
22#include "third_party/base/containers/contains.h"
23
24namespace {
25
26class ObjectTreeTraverser {
27 public:
28 explicit ObjectTreeTraverser(const CPDF_Document* document)
29 : document_(document) {
30 const CPDF_Parser* parser = document_->GetParser();
31 const CPDF_Dictionary* trailer = parser ? parser->GetTrailer() : nullptr;
32 const CPDF_Dictionary* root = trailer ? trailer : document_->GetRoot();
33 const uint32_t root_object_number =
34 trailer ? parser->GetTrailerObjectNumber() : root->GetObjNum();
35 // If `root` is a trailer, then it may not have an object number, as many
36 // trailers are inlined.
37 if (root_object_number) {
38 referenced_objects_[root_object_number] = 1;
39 object_number_map_[root] = root_object_number;
40 }
41
42 object_queue_.push(pdfium::WrapRetain(root));
43 seen_objects_.insert(root);
44 }
45 ~ObjectTreeTraverser() = default;
46
47 void Traverse() { CalculateReferenceCounts(GetReferenceEntries()); }
48
49 const std::map<uint32_t, int>& referenced_objects() {
50 return referenced_objects_;
51 }
52
53 private:
54 struct ReferenceEntry {
55 uint32_t ref_object_number;
56 uint32_t referenced_object_number;
57 };
58
59 std::vector<ReferenceEntry> GetReferenceEntries() {
60 std::vector<ReferenceEntry> reference_entries;
61 while (!object_queue_.empty()) {
62 RetainPtr<const CPDF_Object> current_object = object_queue_.front();
63 object_queue_.pop();
64
65 switch (current_object->GetType()) {
67 CPDF_ArrayLocker locker(current_object->AsArray());
68 for (const auto& it : locker) {
69 PushNewObject(current_object, it);
70 }
71 break;
72 }
74 CPDF_DictionaryLocker locker(current_object->AsDictionary());
75 for (const auto& it : locker) {
76 PushNewObject(current_object, it.second);
77 }
78 break;
79 }
81 const CPDF_Reference* ref_object = current_object->AsReference();
82 const uint32_t ref_object_number = GetObjectNumber(ref_object);
83 const uint32_t referenced_object_number = ref_object->GetRefObjNum();
84
85 RetainPtr<const CPDF_Object> referenced_object;
86 if (ref_object->HasIndirectObjectHolder()) {
87 // Calling GetIndirectObject() does not work for normal references.
88 referenced_object = ref_object->GetDirect();
89 } else {
90 // Calling GetDirect() does not work for references from trailers.
91 referenced_object =
92 document_->GetIndirectObject(referenced_object_number);
93 }
94 // Unlike the other object types, CPDF_Reference can point at nullptr.
95 if (referenced_object) {
96 CHECK(referenced_object_number);
97 reference_entries.push_back(
98 {ref_object_number, referenced_object_number});
99 PushNewObject(ref_object, referenced_object);
100 }
101 break;
102 }
104 RetainPtr<const CPDF_Dictionary> dict =
105 current_object->AsStream()->GetDict();
106 CHECK(dict->IsInline()); // i.e. No object number.
107 CPDF_DictionaryLocker locker(dict);
108 for (const auto& it : locker) {
109 PushNewObject(current_object, it.second);
110 }
111 break;
112 }
113 default: {
114 break;
115 }
116 }
117 }
118 return reference_entries;
119 }
120
121 void CalculateReferenceCounts(
122 const std::vector<ReferenceEntry>& reference_entries) {
123 // Tracks PDF objects that referenced other PDF objects, identified by their
124 // object numbers. Never 0.
125 std::set<uint32_t> seen_ref_objects;
126
127 for (const ReferenceEntry& entry : reference_entries) {
128 // Make sure this is not a self-reference.
129 if (entry.referenced_object_number == entry.ref_object_number) {
130 continue;
131 }
132
133 // Make sure this is not a circular reference.
134 if (pdfium::Contains(seen_ref_objects, entry.ref_object_number) &&
135 pdfium::Contains(seen_ref_objects, entry.referenced_object_number)) {
136 continue;
137 }
138
139 ++referenced_objects_[entry.referenced_object_number];
140 if (entry.ref_object_number) {
141 seen_ref_objects.insert(entry.ref_object_number);
142 }
143 }
144 }
145
146 void PushNewObject(const CPDF_Object* parent_object,
147 RetainPtr<const CPDF_Object> child_object) {
148 CHECK(parent_object);
149 CHECK(child_object);
150 const bool inserted = seen_objects_.insert(child_object).second;
151 if (!inserted) {
152 return;
153 }
154 const uint32_t child_object_number = child_object->GetObjNum();
155 if (child_object_number) {
156 object_number_map_[child_object] = child_object_number;
157 } else {
158 // This search can fail for inlined trailers.
159 auto it = object_number_map_.find(parent_object);
160 if (it != object_number_map_.end()) {
161 object_number_map_[child_object] = it->second;
162 }
163 }
164 object_queue_.push(std::move(child_object));
165 }
166
167 // Returns 0 if not found.
168 uint32_t GetObjectNumber(const CPDF_Object* object) const {
169 auto it = object_number_map_.find(object);
170 return it != object_number_map_.end() ? it->second : 0;
171 }
172
173 UnownedPtr<const CPDF_Document> const document_;
174
175 // Queue of objects to traverse.
176 // - Pointers in the queue are non-null.
177 // - The same pointer never enters the queue twice.
178 std::queue<RetainPtr<const CPDF_Object>> object_queue_;
179
180 // Map of objects to "top-level" object numbers. For inline objects, this is
181 // the ancestor object with an object number. The keys are non-null and the
182 // values are never 0.
183 // This is used to prevent self-references, as a single PDF object, with
184 // inlined objects, is represented by multiple CPDF_Objects.
185 std::map<const CPDF_Object*, uint32_t> object_number_map_;
186
187 // Tracks traversed objects to prevent duplicates from getting into
188 // `object_queue_` and `object_number_map_`.
189 std::set<const CPDF_Object*> seen_objects_;
190
191 // Tracks which PDF objects are referenced.
192 // Key: object number
193 // Value: number of times referenced
194 std::map<uint32_t, int> referenced_objects_;
195};
196
197} // namespace
198
200 ObjectTreeTraverser traverser(document);
201 traverser.Traverse();
202
203 std::set<uint32_t> results;
204 for (const auto& it : traverser.referenced_objects()) {
205 results.insert(it.first);
206 }
207 return results;
208}
209
211 const CPDF_Document* document) {
212 ObjectTreeTraverser traverser(document);
213 traverser.Traverse();
214
215 std::set<uint32_t> results;
216 for (const auto& it : traverser.referenced_objects()) {
217 if (it.second > 1) {
218 results.insert(it.first);
219 }
220 }
221 return results;
222}
RetainPtr< const CPDF_Object > GetDirect() const
uint32_t GetObjNum() const
Definition cpdf_object.h:65
const CPDF_Dictionary * GetTrailer() const
uint32_t GetTrailerObjectNumber() const
bool HasIndirectObjectHolder() const
uint32_t GetRefObjNum() const
std::set< uint32_t > GetObjectsWithReferences(const CPDF_Document *document)
std::set< uint32_t > GetObjectsWithMultipleReferences(const CPDF_Document *document)
#define CHECK(cvref)