Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
fpdf_dataavail_embeddertest.cpp
Go to the documentation of this file.
1// Copyright 2015 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6#include <memory>
7#include <string>
8#include <utility>
9#include <vector>
10
11#include "core/fxcrt/bytestring.h"
12#include "core/fxcrt/span_util.h"
13#include "public/fpdf_doc.h"
14#include "public/fpdfview.h"
15#include "testing/embedder_test.h"
16#include "testing/fx_string_testhelpers.h"
17#include "testing/gtest/include/gtest/gtest.h"
18#include "testing/range_set.h"
19#include "testing/utils/file_util.h"
20#include "testing/utils/path_service.h"
21#include "third_party/base/numerics/safe_conversions.h"
22
23namespace {
24
25class MockDownloadHints final : public FX_DOWNLOADHINTS {
26 public:
27 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
28 }
29
30 MockDownloadHints() {
31 FX_DOWNLOADHINTS::version = 1;
32 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
33 }
34
35 ~MockDownloadHints() = default;
36};
37
38class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
39 public:
40 explicit TestAsyncLoader(const std::string& file_name) {
41 std::string file_path = PathService::GetTestFilePath(file_name);
42 if (file_path.empty()) {
43 return;
44 }
45 file_contents_ = GetFileContents(file_path.c_str());
46 if (file_contents_.empty()) {
47 return;
48 }
49
50 file_access_.m_FileLen =
51 pdfium::base::checked_cast<unsigned long>(file_contents_.size());
52 file_access_.m_GetBlock = SGetBlock;
53 file_access_.m_Param = this;
54
55 FX_DOWNLOADHINTS::version = 1;
56 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
57
58 FX_FILEAVAIL::version = 1;
59 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
60 }
61
62 bool IsOpened() const { return !file_contents_.empty(); }
63
64 FPDF_FILEACCESS* file_access() { return &file_access_; }
65 FX_DOWNLOADHINTS* hints() { return this; }
66 FX_FILEAVAIL* file_avail() { return this; }
67
68 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
69 return requested_segments_;
70 }
71
72 size_t max_requested_bound() const { return max_requested_bound_; }
73
74 void ClearRequestedSegments() {
75 requested_segments_.clear();
76 max_requested_bound_ = 0;
77 }
78
79 bool is_new_data_available() const { return is_new_data_available_; }
80 void set_is_new_data_available(bool is_new_data_available) {
81 is_new_data_available_ = is_new_data_available;
82 }
83
84 size_t max_already_available_bound() const {
85 return available_ranges_.IsEmpty()
86 ? 0
87 : available_ranges_.ranges().rbegin()->second;
88 }
89
90 void FlushRequestedData() {
91 for (const auto& it : requested_segments_) {
92 SetDataAvailable(it.first, it.second);
93 }
94 ClearRequestedSegments();
95 }
96
97 pdfium::span<const uint8_t> file_contents() const { return file_contents_; }
98 pdfium::span<uint8_t> mutable_file_contents() { return file_contents_; }
99
100 private:
101 void SetDataAvailable(size_t start, size_t size) {
102 available_ranges_.Union(RangeSet::Range(start, start + size));
103 }
104
105 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
106 return available_ranges_.Contains(RangeSet::Range(start, start + size));
107 }
108
109 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
110 if (!IsDataAvailImpl(pos, size))
111 return 0;
112 const unsigned long end = std::min(
113 pdfium::base::checked_cast<unsigned long>(file_contents_.size()),
114 pos + size);
115 if (end <= pos)
116 return 0;
117 const unsigned long bytes_to_copy = end - pos;
118 fxcrt::spancpy(pdfium::make_span(pBuf, size),
119 file_contents().subspan(pos, bytes_to_copy));
120 SetDataAvailable(pos, bytes_to_copy);
121 return static_cast<int>(bytes_to_copy);
122 }
123
124 void AddSegmentImpl(size_t offset, size_t size) {
125 requested_segments_.emplace_back(offset, size);
126 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
127 }
128
129 bool IsDataAvailImpl(size_t offset, size_t size) {
130 if (offset + size > file_contents_.size()) {
131 return false;
132 }
133 if (is_new_data_available_) {
134 SetDataAvailable(offset, size);
135 return true;
136 }
137 return CheckDataAlreadyAvailable(offset, size);
138 }
139
140 static int SGetBlock(void* param,
141 unsigned long pos,
142 unsigned char* pBuf,
143 unsigned long size) {
144 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
145 }
146
147 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
148 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
149 }
150
151 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
152 size_t offset,
153 size_t size) {
154 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
155 }
156
157 FPDF_FILEACCESS file_access_;
158
159 std::vector<uint8_t> file_contents_;
160 std::vector<std::pair<size_t, size_t>> requested_segments_;
161 size_t max_requested_bound_ = 0;
162 bool is_new_data_available_ = true;
163
164 RangeSet available_ranges_;
165};
166
167} // namespace
168
170
172 // Document must load without crashing but is too malformed to be available.
173 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
174 MockDownloadHints hints;
175 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
176}
177
179 // Document must load without crashing but is too malformed to be available.
180 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
181 MockDownloadHints hints;
182 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
183}
184
186 TestAsyncLoader loader("feature_linearized_loading.pdf");
187 CreateAvail(loader.file_avail(), loader.file_access());
188 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
189 SetDocumentFromAvail();
190 ASSERT_TRUE(document());
191 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
192
193 // No new data available, to prevent load "Pages" node.
194 loader.set_is_new_data_available(false);
195 ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
196 EXPECT_TRUE(page);
197}
198
200 TestAsyncLoader loader("feature_linearized_loading.pdf");
201 CreateAvail(loader.file_avail(), loader.file_access());
202 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
203 SetDocumentFromAvail();
204 ASSERT_TRUE(document());
205
206 // Prevent access to non-requested data to coerce the parser to send new
207 // request for non available (non-requested before) data.
208 loader.set_is_new_data_available(false);
209 loader.ClearRequestedSegments();
210
211 int status = PDF_FORM_NOTAVAIL;
212 while (status == PDF_FORM_NOTAVAIL) {
213 loader.FlushRequestedData();
214 status = FPDFAvail_IsFormAvail(avail(), loader.hints());
215 }
216 EXPECT_NE(PDF_FORM_ERROR, status);
217}
218
221 TestAsyncLoader loader("feature_linearized_loading.pdf");
222 CreateAvail(loader.file_avail(), loader.file_access());
223 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
224 SetDocumentFromAvail();
225 ASSERT_TRUE(document());
226 const int first_page_num = FPDFAvail_GetFirstPageNum(document());
227
228 // The main cross ref table should not be processed.
229 // (It is always at file end)
230 EXPECT_GT(loader.file_access()->m_FileLen,
231 loader.max_already_available_bound());
232
233 // Prevent access to non-requested data to coerce the parser to send new
234 // request for non available (non-requested before) data.
235 loader.set_is_new_data_available(false);
236 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints());
237
238 // The main cross ref table should not be requested.
239 // (It is always at file end)
240 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
241
242 // Allow parse page.
243 loader.set_is_new_data_available(true);
244 ASSERT_EQ(PDF_DATA_AVAIL,
245 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()));
246
247 // The main cross ref table should not be processed.
248 // (It is always at file end)
249 EXPECT_GT(loader.file_access()->m_FileLen,
250 loader.max_already_available_bound());
251
252 // Prevent loading data, while page loading.
253 loader.set_is_new_data_available(false);
254 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
255 EXPECT_TRUE(page);
256}
257
259 TestAsyncLoader loader("feature_linearized_loading.pdf");
260 CreateAvail(loader.file_avail(), loader.file_access());
261 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
262 SetDocumentFromAvail();
263 ASSERT_TRUE(document());
264
265 static constexpr uint32_t kSecondPageNum = 1;
266
267 // Prevent access to non-requested data to coerce the parser to send new
268 // request for non available (non-requested before) data.
269 loader.set_is_new_data_available(false);
270 loader.ClearRequestedSegments();
271
272 int status = PDF_DATA_NOTAVAIL;
273 while (status == PDF_DATA_NOTAVAIL) {
274 loader.FlushRequestedData();
275 status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints());
276 }
277 EXPECT_EQ(PDF_DATA_AVAIL, status);
278
279 // Prevent loading data, while page loading.
280 loader.set_is_new_data_available(false);
281 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
282 EXPECT_TRUE(page);
283}
284
286 TestAsyncLoader loader("linearized.pdf");
287 loader.set_is_new_data_available(false);
288 CreateAvail(loader.file_avail(), loader.file_access());
289 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
290 loader.FlushRequestedData();
291 }
292
293 SetDocumentFromAvail();
294 ASSERT_TRUE(document());
295
296 // The "info" dictionary should still be unavailable.
297 EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
298
299 // Simulate receiving whole file.
300 loader.set_is_new_data_available(true);
301 // Load second page, to parse additional crossref sections.
302 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
303
304 EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
305}
306
308 TestAsyncLoader loader("linearized.pdf");
309 // Map "Info" to an object within the first section without breaking
310 // linearization.
311 ByteString data(ByteStringView(loader.file_contents()));
312 absl::optional<size_t> index = data.Find("/Info 27 0 R");
313 ASSERT_TRUE(index.has_value());
314 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(7);
315 ASSERT_FALSE(span.empty());
316 EXPECT_EQ('7', span[0]);
317 span[0] = '9';
318
319 loader.set_is_new_data_available(false);
320 CreateAvail(loader.file_avail(), loader.file_access());
321 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
322 loader.FlushRequestedData();
323 }
324
325 SetDocumentFromAvail();
326 ASSERT_TRUE(document());
327
328 // The "Info" dictionary should be available for the linearized document, if
329 // it is located in the first page section.
330 // Info was remapped to a dictionary with Type "Catalog"
331 unsigned short buffer[100] = {0};
332 EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer)));
333 EXPECT_EQ(L"Catalog", GetPlatformWString(buffer));
334}
335
337 TestAsyncLoader loader("linearized.pdf");
338 // Map "Info" to an invalid object without breaking linearization.
339 ByteString data(ByteStringView(loader.file_contents()));
340 absl::optional<size_t> index = data.Find("/Info 27 0 R");
341 ASSERT_TRUE(index.has_value());
342 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(6);
343 ASSERT_GE(span.size(), 2u);
344 EXPECT_EQ('2', span[0]);
345 EXPECT_EQ('7', span[1]);
346 span[0] = '9';
347 span[1] = '9';
348
349 loader.set_is_new_data_available(false);
350 CreateAvail(loader.file_avail(), loader.file_access());
351 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
352 loader.FlushRequestedData();
353 }
354
355 SetDocumentFromAvail();
356 ASSERT_TRUE(document());
357
358 // Set all data available.
359 loader.set_is_new_data_available(true);
360 // Check second page, to load additional crossrefs.
361 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
362
363 // Test that api is robust enough to handle the bad case.
364 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
365}
366
368 TestAsyncLoader loader("linearized.pdf");
369 // Break the "Info" parameter without breaking linearization.
370 ByteString data(ByteStringView(loader.file_contents()));
371 absl::optional<size_t> index = data.Find("/Info 27 0 R");
372 ASSERT_TRUE(index.has_value());
373 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(2);
374 ASSERT_FALSE(span.empty());
375 EXPECT_EQ('n', span[0]);
376 span[0] = '_';
377
378 loader.set_is_new_data_available(false);
379 CreateAvail(loader.file_avail(), loader.file_access());
380 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
381 loader.FlushRequestedData();
382 }
383
384 SetDocumentFromAvail();
385 ASSERT_TRUE(document());
386
387 // Set all data available.
388 loader.set_is_new_data_available(true);
389 // Check second page, to load additional crossrefs.
390 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
391
392 // Test that api is robust enough to handle the bad case.
393 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
394}
395
397 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
398 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
399 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
400 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
401 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
403}
404
406 TestAsyncLoader loader("linearized.pdf");
407 CreateAvail(loader.file_avail(), loader.file_access());
408 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
409 EXPECT_EQ(PDF_DATA_NOTAVAIL,
410 FPDFAvail_IsPageAvail(avail(), -1, loader.hints()));
411}
412
414 // Test passes if it doesn't crash.
415 TestAsyncLoader loader("bug_1324189.pdf");
416 CreateAvail(loader.file_avail(), loader.file_access());
417 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
418}
419
421 // Test passes if it doesn't crash.
422 TestAsyncLoader loader("bug_1324503.pdf");
423 CreateAvail(loader.file_avail(), loader.file_access());
424 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
425}
static std::string GetTestFilePath(const std::string &file_name)
#define PDF_DATA_AVAIL
#define PDF_FORM_NOTAVAIL
#define PDF_DATA_NOTAVAIL
#define PDF_LINEARIZATION_UNKNOWN
#define PDF_DATA_ERROR
FPDF_EXPORT int FPDF_CALLCONV FPDFAvail_IsFormAvail(FPDF_AVAIL avail, FX_DOWNLOADHINTS *hints)
#define PDF_FORM_ERROR
FPDF_EXPORT int FPDF_CALLCONV FPDFAvail_IsPageAvail(FPDF_AVAIL avail, int page_index, FX_DOWNLOADHINTS *hints)
FPDF_EXPORT FPDF_DOCUMENT FPDF_CALLCONV FPDFAvail_GetDocument(FPDF_AVAIL avail, FPDF_BYTESTRING password)
FPDF_EXPORT int FPDF_CALLCONV FPDFAvail_IsDocAvail(FPDF_AVAIL avail, FX_DOWNLOADHINTS *hints)
FPDF_EXPORT int FPDF_CALLCONV FPDFAvail_GetFirstPageNum(FPDF_DOCUMENT doc)
FPDF_EXPORT int FPDF_CALLCONV FPDFAvail_IsLinearized(FPDF_AVAIL avail)
TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated)
std::wstring GetPlatformWString(FPDF_WIDESTRING wstr)
void(* AddSegment)(struct _FX_DOWNLOADHINTS *pThis, size_t offset, size_t size)