Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
fpdf_parser_decode_unittest.cpp
Go to the documentation of this file.
1// Copyright 2015 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/fpdfapi/parser/fpdf_parser_decode.h"
6
7#include <stddef.h>
8#include <stdint.h>
9
10#include <iterator>
11
12#include "core/fpdfapi/parser/cpdf_array.h"
13#include "core/fpdfapi/parser/cpdf_dictionary.h"
14#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
15#include "core/fpdfapi/parser/cpdf_name.h"
16#include "core/fpdfapi/parser/cpdf_reference.h"
17#include "core/fpdfapi/parser/cpdf_string.h"
18#include "core/fxcrt/bytestring.h"
19#include "core/fxcrt/fx_memory_wrappers.h"
20#include "core/fxcrt/string_view_template.h"
21#include "core/fxcrt/widestring.h"
22#include "testing/gtest/include/gtest/gtest.h"
23#include "testing/test_support.h"
24#include "third_party/base/containers/span.h"
25
26namespace {
27
28// Converts a string literal into a `uint8_t` span.
29template <size_t N>
30pdfium::span<const uint8_t> ToSpan(const char (&array)[N]) {
31 return pdfium::as_bytes(ByteStringView(array, N - 1).span());
32}
33
34// Converts a string literal into a `ByteString`.
35template <size_t N>
36ByteString ToByteString(const char (&array)[N]) {
37 return ByteString(array, N - 1);
38}
39
40} // namespace
41
42TEST(ParserDecodeTest, ValidateDecoderPipeline) {
43 {
44 // Empty decoder list is always valid.
45 auto decoders = pdfium::MakeRetain<CPDF_Array>();
46 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
47 }
48 {
49 // 1 decoder is almost always valid.
50 auto decoders = pdfium::MakeRetain<CPDF_Array>();
51 decoders->AppendNew<CPDF_Name>("FlateEncode");
52 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
53 }
54 {
55 // 1 decoder is almost always valid, even with an unknown decoder.
56 auto decoders = pdfium::MakeRetain<CPDF_Array>();
57 decoders->AppendNew<CPDF_Name>("FooBar");
58 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
59 }
60 {
61 // Valid 2 decoder pipeline.
62 auto decoders = pdfium::MakeRetain<CPDF_Array>();
63 decoders->AppendNew<CPDF_Name>("AHx");
64 decoders->AppendNew<CPDF_Name>("LZWDecode");
65 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
66 }
67 {
68 // Valid 2 decoder pipeline.
69 auto decoders = pdfium::MakeRetain<CPDF_Array>();
70 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
71 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
72 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
73 }
74 {
75 // Valid 5 decoder pipeline.
76 auto decoders = pdfium::MakeRetain<CPDF_Array>();
77 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
78 decoders->AppendNew<CPDF_Name>("A85");
79 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
80 decoders->AppendNew<CPDF_Name>("FlateDecode");
81 decoders->AppendNew<CPDF_Name>("RL");
82 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
83 }
84 {
85 // Valid 5 decoder pipeline, with an image decoder at the end.
86 auto decoders = pdfium::MakeRetain<CPDF_Array>();
87 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
88 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
89 decoders->AppendNew<CPDF_Name>("FlateDecode");
90 decoders->AppendNew<CPDF_Name>("LZW");
91 decoders->AppendNew<CPDF_Name>("DCTDecode");
92 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
93 }
94 {
95 // Invalid 1 decoder pipeline due to wrong type.
96 auto decoders = pdfium::MakeRetain<CPDF_Array>();
97 decoders->AppendNew<CPDF_String>("FlateEncode", false);
98 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
99 }
100 {
101 // Invalid 2 decoder pipeline, with 2 image decoders.
102 auto decoders = pdfium::MakeRetain<CPDF_Array>();
103 decoders->AppendNew<CPDF_Name>("DCTDecode");
104 decoders->AppendNew<CPDF_Name>("CCITTFaxDecode");
105 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
106 }
107 {
108 // Invalid 2 decoder pipeline, with 1 image decoder at the start.
109 auto decoders = pdfium::MakeRetain<CPDF_Array>();
110 decoders->AppendNew<CPDF_Name>("DCTDecode");
111 decoders->AppendNew<CPDF_Name>("FlateDecode");
112 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
113 }
114 {
115 // Invalid 2 decoder pipeline due to wrong type.
116 auto decoders = pdfium::MakeRetain<CPDF_Array>();
117 decoders->AppendNew<CPDF_String>("AHx", false);
118 decoders->AppendNew<CPDF_Name>("LZWDecode");
119 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
120 }
121 {
122 // Invalid 5 decoder pipeline.
123 auto decoders = pdfium::MakeRetain<CPDF_Array>();
124 decoders->AppendNew<CPDF_Name>("FlateDecode");
125 decoders->AppendNew<CPDF_Name>("FlateDecode");
126 decoders->AppendNew<CPDF_Name>("DCTDecode");
127 decoders->AppendNew<CPDF_Name>("FlateDecode");
128 decoders->AppendNew<CPDF_Name>("FlateDecode");
129 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
130 }
131 {
132 // Invalid 5 decoder pipeline due to wrong type.
133 auto decoders = pdfium::MakeRetain<CPDF_Array>();
134 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
135 decoders->AppendNew<CPDF_Name>("A85");
136 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
137 decoders->AppendNew<CPDF_Name>("FlateDecode");
138 decoders->AppendNew<CPDF_String>("RL", false);
139 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
140 }
141}
142
144 {
145 // Valid 2 decoder pipeline with indirect objects.
146 CPDF_IndirectObjectHolder objects_holder;
147 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "FlateDecode");
148 uint32_t decoder_number =
149 objects_holder.AddIndirectObject(std::move(decoder));
150
151 auto decoders = pdfium::MakeRetain<CPDF_Array>();
152 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
153 decoders->AppendNew<CPDF_Name>("LZW");
154 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
155 }
156 {
157 // Valid 5 decoder pipeline with indirect objects, with an image decoder at
158 // the end.
159 CPDF_IndirectObjectHolder objects_holder;
160 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "LZW");
161 uint32_t decoder_number =
162 objects_holder.AddIndirectObject(std::move(decoder));
163
164 auto decoders = pdfium::MakeRetain<CPDF_Array>();
165 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
166 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
167 decoders->AppendNew<CPDF_Name>("FlateDecode");
168 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
169 decoders->AppendNew<CPDF_Name>("DCTDecode");
170 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
171 }
172 {
173 // Invalid 2 decoder pipeline due to wrong type indirect object.
174 CPDF_IndirectObjectHolder objects_holder;
175 auto decoder =
176 pdfium::MakeRetain<CPDF_String>(nullptr, "FlateDecode", false);
177 uint32_t decoder_number =
178 objects_holder.AddIndirectObject(std::move(decoder));
179
180 auto decoders = pdfium::MakeRetain<CPDF_Array>();
181 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
182 decoders->AppendNew<CPDF_Name>("LZW");
183 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
184 }
185 {
186 // Invalid 2 decoder pipeline due to invalid indirect object.
187 CPDF_IndirectObjectHolder objects_holder;
188 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "DCTDecode");
189 uint32_t decoder_number =
190 objects_holder.AddIndirectObject(std::move(decoder));
191
192 auto decoders = pdfium::MakeRetain<CPDF_Array>();
193 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
194 decoders->AppendNew<CPDF_Name>("LZW");
195 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
196 }
197}
198
199// TODO(thestig): Test decoder params.
200TEST(ParserDecodeTest, GetDecoderArray) {
201 {
202 // Treat no filter as an empty filter array.
203 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
204 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
205 ASSERT_TRUE(decoder_array.has_value());
206 EXPECT_TRUE(decoder_array.value().empty());
207 }
208 {
209 // Wrong filter type.
210 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
211 dict->SetNewFor<CPDF_String>("Filter", "RL", false);
212 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
213 EXPECT_FALSE(decoder_array.has_value());
214 }
215 {
216 // Filter name.
217 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
218 dict->SetNewFor<CPDF_Name>("Filter", "RL");
219 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
220 ASSERT_TRUE(decoder_array.has_value());
221 ASSERT_EQ(1u, decoder_array.value().size());
222 EXPECT_EQ("RL", decoder_array.value()[0].first);
223 }
224 {
225 // Empty filter array.
226 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
227 dict->SetNewFor<CPDF_Array>("Filter");
228 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
229 ASSERT_TRUE(decoder_array.has_value());
230 EXPECT_TRUE(decoder_array.value().empty());
231 }
232 {
233 // Valid 1 element filter array.
234 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
235 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
236 filter_array->AppendNew<CPDF_Name>("FooBar");
237 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
238 ASSERT_TRUE(decoder_array.has_value());
239 ASSERT_EQ(1u, decoder_array.value().size());
240 EXPECT_EQ("FooBar", decoder_array.value()[0].first);
241 }
242 {
243 // Valid 2 element filter array.
244 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
245 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
246 filter_array->AppendNew<CPDF_Name>("AHx");
247 filter_array->AppendNew<CPDF_Name>("LZWDecode");
248 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
249 ASSERT_TRUE(decoder_array.has_value());
250 ASSERT_EQ(2u, decoder_array.value().size());
251 EXPECT_EQ("AHx", decoder_array.value()[0].first);
252 EXPECT_EQ("LZWDecode", decoder_array.value()[1].first);
253 }
254 {
255 // Invalid 2 element filter array.
256 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
257 auto invalid_filter_array = dict->SetNewFor<CPDF_Array>("Filter");
258 invalid_filter_array->AppendNew<CPDF_Name>("DCTDecode");
259 invalid_filter_array->AppendNew<CPDF_Name>("CCITTFaxDecode");
260 absl::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
261 EXPECT_FALSE(decoder_array.has_value());
262 }
263}
264
265TEST(ParserDecodeTest, A85Decode) {
266 const pdfium::DecodeTestData kTestData[] = {
267 // Empty src string.
268 STR_IN_OUT_CASE("", "", 0),
269 // Empty content in src string.
270 STR_IN_OUT_CASE("~>", "", 0),
271 // Regular conversion.
272 STR_IN_OUT_CASE("FCfN8~>", "test", 7),
273 // End at the ending mark.
274 STR_IN_OUT_CASE("FCfN8~>FCfN8", "test", 7),
275 // Skip whitespaces.
276 STR_IN_OUT_CASE("\t F C\r\n \tf N 8 ~>", "test", 17),
277 // No ending mark.
278 STR_IN_OUT_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20),
279 // Non-multiple length.
280 STR_IN_OUT_CASE("12A", "2k", 3),
281 // Stop at unknown characters.
282 STR_IN_OUT_CASE("FCfN8FCfN8vw", "testtest", 11),
283 };
284 for (const auto& test_case : kTestData) {
285 std::unique_ptr<uint8_t, FxFreeDeleter> result;
286 uint32_t result_size = 0;
287 EXPECT_EQ(test_case.processed_size,
288 A85Decode({test_case.input, test_case.input_size}, &result,
289 &result_size))
290 << "for case " << test_case.input;
291 ASSERT_EQ(test_case.expected_size, result_size);
292 const uint8_t* result_ptr = result.get();
293 for (size_t j = 0; j < result_size; ++j) {
294 EXPECT_EQ(test_case.expected[j], result_ptr[j])
295 << "for case " << test_case.input << " char " << j;
296 }
297 }
298}
299
300TEST(ParserDecodeTest, HexDecode) {
301 const pdfium::DecodeTestData kTestData[] = {
302 // Empty src string.
303 STR_IN_OUT_CASE("", "", 0),
304 // Empty content in src string.
305 STR_IN_OUT_CASE(">", "", 1),
306 // Only whitespaces in src string.
307 STR_IN_OUT_CASE("\t \r\n>", "", 7),
308 // Regular conversion.
309 STR_IN_OUT_CASE("12Ac>zzz", "\x12\xac", 5),
310 // Skip whitespaces.
311 STR_IN_OUT_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13),
312 // Non-multiple length.
313 STR_IN_OUT_CASE("12A>zzz", "\x12\xa0", 4),
314 // Skips unknown characters.
315 STR_IN_OUT_CASE("12tk \tAc>zzz", "\x12\xac", 10),
316 // No ending mark.
317 STR_IN_OUT_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12),
318 };
319 for (const auto& test_case : kTestData) {
320 std::unique_ptr<uint8_t, FxFreeDeleter> result;
321 uint32_t result_size = 0;
322 EXPECT_EQ(test_case.processed_size,
323 HexDecode({test_case.input, test_case.input_size}, &result,
324 &result_size))
325 << "for case " << test_case.input;
326 ASSERT_EQ(test_case.expected_size, result_size);
327 const uint8_t* result_ptr = result.get();
328 for (size_t j = 0; j < result_size; ++j) {
329 EXPECT_EQ(test_case.expected[j], result_ptr[j])
330 << "for case " << test_case.input << " char " << j;
331 }
332 }
333}
334
336 // Empty src string.
337 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("")));
338
339 // ASCII text.
340 EXPECT_EQ(L"the quick\tfox", PDF_DecodeText(ToSpan("the quick\tfox")));
341
342 // UTF-8 text.
343 EXPECT_EQ(L"\x0330\x0331",
344 PDF_DecodeText(ToSpan("\xEF\xBB\xBF\xCC\xB0\xCC\xB1")));
345
346 // UTF-16BE text.
347 EXPECT_EQ(L"\x0330\x0331",
348 PDF_DecodeText(ToSpan("\xFE\xFF\x03\x30\x03\x31")));
349
350 // More UTF-16BE text.
351 EXPECT_EQ(
352 L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
353 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB",
354 PDF_DecodeText(
355 ToSpan("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
356 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB")));
357
358 // Supplementary UTF-8 text.
359 EXPECT_EQ(L"🎨", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\xF0\x9F\x8E\xA8")));
360
361 // Supplementary UTF-16BE text.
362 EXPECT_EQ(L"🎨", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8")));
363}
364
365// https://crbug.com/pdfium/182
367 EXPECT_EQ(L"\x0020\x5370\x5237",
368 PDF_DecodeText(ToSpan(
369 "\xEF\xBB\xBF\x1B\x6A\x61\x1B\x20\xE5\x8D\xB0\xE5\x88\xB7")));
370 EXPECT_EQ(L"\x0020\x5370\x5237",
371 PDF_DecodeText(ToSpan(
372 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37")));
373 EXPECT_EQ(
374 L"\x0020\x5370\x5237",
375 PDF_DecodeText(ToSpan(
376 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37\x29")));
377 EXPECT_EQ(
378 L"\x0020\x5370\x5237",
379 PDF_DecodeText(ToSpan(
380 "\xFE\xFF\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x00\x20\x53\x70\x52\x37")));
381 EXPECT_EQ(L"\x0020\x5237",
382 PDF_DecodeText(ToSpan(
383 "\xFE\xFF\x00\x20\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x52\x37")));
384}
385
386// https://crbug.com/1001159
388 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\x1B\x1B")));
389 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B")));
390 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x20")));
391 EXPECT_EQ(L"\x0020", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\x1B\x1B\x20")));
392 EXPECT_EQ(L"\x0020",
393 PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x00\x20")));
394}
395
397 EXPECT_EQ(L"\xD800", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00"))) << "High";
398 EXPECT_EQ(L"\xDC00", PDF_DecodeText(ToSpan("\xFE\xFF\xDC\x00"))) << "Low";
399 EXPECT_EQ(L"\xD800🎨",
400 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00\xD8\x3C\xDF\xA8")))
401 << "High-high";
402 EXPECT_EQ(L"🎨\xDC00",
403 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8\xDC\x00")))
404 << "Low-low";
405}
406
408 // Empty src string.
409 EXPECT_EQ("", PDF_EncodeText(L""));
410
411 // ASCII text.
412 EXPECT_EQ("the quick\tfox", PDF_EncodeText(L"the quick\tfox"));
413
414 // Unicode text.
415 EXPECT_EQ("\xFE\xFF\x03\x30\x03\x31", PDF_EncodeText(L"\x0330\x0331"));
416
417 // More Unicode text.
418 EXPECT_EQ(
419 ToByteString("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
420 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB"),
421 PDF_EncodeText(L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
422 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB"));
423
424 // Supplementary Unicode text.
425 EXPECT_EQ("\xFE\xFF\xD8\x3C\xDF\xA8", PDF_EncodeText(L"🎨"));
426}
427
429 for (int pdf_code_point = 0; pdf_code_point < 256; ++pdf_code_point) {
430 ByteString original(static_cast<char>(pdf_code_point));
431 ByteString reencoded =
432 PDF_EncodeText(PDF_DecodeText(original.raw_span()).AsStringView());
433
434 switch (pdf_code_point) {
435 case 0x7F:
436 case 0x9F:
437 case 0xAD:
438 EXPECT_EQ(ByteString('\0'), reencoded) << "PDFDocEncoding undefined";
439 break;
440
441 default:
442 EXPECT_EQ(original, reencoded) << "PDFDocEncoding: " << pdf_code_point;
443 break;
444 }
445 }
446}
uint32_t AddIndirectObject(RetainPtr< CPDF_Object > pObj)
TEST(FXCRYPT, CryptToBase16)
TEST(FXCRYPT, MD5GenerateEmtpyData)
#define STR_IN_OUT_CASE(input_literal, expected_literal,...)