Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
fpdf_parser_decode_unittest.cpp
Go to the documentation of this file.
1// Copyright 2015 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/fpdfapi/parser/fpdf_parser_decode.h"
6
7#include <stddef.h>
8#include <stdint.h>
9
10#include <iterator>
11
12#include "core/fpdfapi/parser/cpdf_array.h"
13#include "core/fpdfapi/parser/cpdf_dictionary.h"
14#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
15#include "core/fpdfapi/parser/cpdf_name.h"
16#include "core/fpdfapi/parser/cpdf_reference.h"
17#include "core/fpdfapi/parser/cpdf_string.h"
18#include "core/fxcodec/data_and_bytes_consumed.h"
19#include "core/fxcrt/bytestring.h"
20#include "core/fxcrt/span.h"
21#include "core/fxcrt/string_view_template.h"
22#include "core/fxcrt/widestring.h"
23#include "testing/gmock/include/gmock/gmock.h"
24#include "testing/gtest/include/gtest/gtest.h"
25#include "testing/test_support.h"
26
27using ::testing::ElementsAreArray;
28
29namespace {
30
31// Converts a string literal into a `uint8_t` span.
32template <size_t N>
33pdfium::span<const uint8_t> ToSpan(const char (&array)[N]) {
34 return pdfium::as_bytes(UNSAFE_BUFFERS(ByteStringView(array, N - 1).span()));
35}
36
37// Converts a string literal into a `ByteString`.
38template <size_t N>
39ByteString ToByteString(const char (&array)[N]) {
40 // SAFETY: compiler correctly infers size.
41 return UNSAFE_BUFFERS(ByteString(array, N - 1));
42}
43
44} // namespace
45
46TEST(ParserDecodeTest, ValidateDecoderPipeline) {
47 {
48 // Empty decoder list is always valid.
49 auto decoders = pdfium::MakeRetain<CPDF_Array>();
50 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
51 }
52 {
53 // 1 decoder is almost always valid.
54 auto decoders = pdfium::MakeRetain<CPDF_Array>();
55 decoders->AppendNew<CPDF_Name>("FlateEncode");
56 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
57 }
58 {
59 // 1 decoder is almost always valid, even with an unknown decoder.
60 auto decoders = pdfium::MakeRetain<CPDF_Array>();
61 decoders->AppendNew<CPDF_Name>("FooBar");
62 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
63 }
64 {
65 // Valid 2 decoder pipeline.
66 auto decoders = pdfium::MakeRetain<CPDF_Array>();
67 decoders->AppendNew<CPDF_Name>("AHx");
68 decoders->AppendNew<CPDF_Name>("LZWDecode");
69 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
70 }
71 {
72 // Valid 2 decoder pipeline.
73 auto decoders = pdfium::MakeRetain<CPDF_Array>();
74 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
75 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
76 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
77 }
78 {
79 // Valid 5 decoder pipeline.
80 auto decoders = pdfium::MakeRetain<CPDF_Array>();
81 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
82 decoders->AppendNew<CPDF_Name>("A85");
83 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
84 decoders->AppendNew<CPDF_Name>("FlateDecode");
85 decoders->AppendNew<CPDF_Name>("RL");
86 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
87 }
88 {
89 // Valid 5 decoder pipeline, with an image decoder at the end.
90 auto decoders = pdfium::MakeRetain<CPDF_Array>();
91 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
92 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
93 decoders->AppendNew<CPDF_Name>("FlateDecode");
94 decoders->AppendNew<CPDF_Name>("LZW");
95 decoders->AppendNew<CPDF_Name>("DCTDecode");
96 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
97 }
98 {
99 // Invalid 1 decoder pipeline due to wrong type.
100 auto decoders = pdfium::MakeRetain<CPDF_Array>();
101 decoders->AppendNew<CPDF_String>("FlateEncode");
102 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
103 }
104 {
105 // Invalid 2 decoder pipeline, with 2 image decoders.
106 auto decoders = pdfium::MakeRetain<CPDF_Array>();
107 decoders->AppendNew<CPDF_Name>("DCTDecode");
108 decoders->AppendNew<CPDF_Name>("CCITTFaxDecode");
109 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
110 }
111 {
112 // Invalid 2 decoder pipeline, with 1 image decoder at the start.
113 auto decoders = pdfium::MakeRetain<CPDF_Array>();
114 decoders->AppendNew<CPDF_Name>("DCTDecode");
115 decoders->AppendNew<CPDF_Name>("FlateDecode");
116 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
117 }
118 {
119 // Invalid 2 decoder pipeline due to wrong type.
120 auto decoders = pdfium::MakeRetain<CPDF_Array>();
121 decoders->AppendNew<CPDF_String>("AHx");
122 decoders->AppendNew<CPDF_Name>("LZWDecode");
123 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
124 }
125 {
126 // Invalid 5 decoder pipeline.
127 auto decoders = pdfium::MakeRetain<CPDF_Array>();
128 decoders->AppendNew<CPDF_Name>("FlateDecode");
129 decoders->AppendNew<CPDF_Name>("FlateDecode");
130 decoders->AppendNew<CPDF_Name>("DCTDecode");
131 decoders->AppendNew<CPDF_Name>("FlateDecode");
132 decoders->AppendNew<CPDF_Name>("FlateDecode");
133 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
134 }
135 {
136 // Invalid 5 decoder pipeline due to wrong type.
137 auto decoders = pdfium::MakeRetain<CPDF_Array>();
138 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
139 decoders->AppendNew<CPDF_Name>("A85");
140 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
141 decoders->AppendNew<CPDF_Name>("FlateDecode");
142 decoders->AppendNew<CPDF_String>("RL");
143 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
144 }
145}
146
148 {
149 // Valid 2 decoder pipeline with indirect objects.
150 CPDF_IndirectObjectHolder objects_holder;
151 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "FlateDecode");
152 uint32_t decoder_number =
153 objects_holder.AddIndirectObject(std::move(decoder));
154
155 auto decoders = pdfium::MakeRetain<CPDF_Array>();
156 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
157 decoders->AppendNew<CPDF_Name>("LZW");
158 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
159 }
160 {
161 // Valid 5 decoder pipeline with indirect objects, with an image decoder at
162 // the end.
163 CPDF_IndirectObjectHolder objects_holder;
164 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "LZW");
165 uint32_t decoder_number =
166 objects_holder.AddIndirectObject(std::move(decoder));
167
168 auto decoders = pdfium::MakeRetain<CPDF_Array>();
169 decoders->AppendNew<CPDF_Name>("RunLengthDecode");
170 decoders->AppendNew<CPDF_Name>("ASCII85Decode");
171 decoders->AppendNew<CPDF_Name>("FlateDecode");
172 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
173 decoders->AppendNew<CPDF_Name>("DCTDecode");
174 EXPECT_TRUE(ValidateDecoderPipeline(decoders.Get()));
175 }
176 {
177 // Invalid 2 decoder pipeline due to wrong type indirect object.
178 CPDF_IndirectObjectHolder objects_holder;
179 auto decoder = pdfium::MakeRetain<CPDF_String>(nullptr, "FlateDecode");
180 uint32_t decoder_number =
181 objects_holder.AddIndirectObject(std::move(decoder));
182
183 auto decoders = pdfium::MakeRetain<CPDF_Array>();
184 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
185 decoders->AppendNew<CPDF_Name>("LZW");
186 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
187 }
188 {
189 // Invalid 2 decoder pipeline due to invalid indirect object.
190 CPDF_IndirectObjectHolder objects_holder;
191 auto decoder = pdfium::MakeRetain<CPDF_Name>(nullptr, "DCTDecode");
192 uint32_t decoder_number =
193 objects_holder.AddIndirectObject(std::move(decoder));
194
195 auto decoders = pdfium::MakeRetain<CPDF_Array>();
196 decoders->AppendNew<CPDF_Reference>(&objects_holder, decoder_number);
197 decoders->AppendNew<CPDF_Name>("LZW");
198 EXPECT_FALSE(ValidateDecoderPipeline(decoders.Get()));
199 }
200}
201
202// TODO(thestig): Test decoder params.
203TEST(ParserDecodeTest, GetDecoderArray) {
204 {
205 // Treat no filter as an empty filter array.
206 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
207 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
208 ASSERT_TRUE(decoder_array.has_value());
209 EXPECT_TRUE(decoder_array.value().empty());
210 }
211 {
212 // Wrong filter type.
213 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
214 dict->SetNewFor<CPDF_String>("Filter", "RL");
215 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
216 EXPECT_FALSE(decoder_array.has_value());
217 }
218 {
219 // Filter name.
220 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
221 dict->SetNewFor<CPDF_Name>("Filter", "RL");
222 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
223 ASSERT_TRUE(decoder_array.has_value());
224 ASSERT_EQ(1u, decoder_array.value().size());
225 EXPECT_EQ("RL", decoder_array.value()[0].first);
226 }
227 {
228 // Empty filter array.
229 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
230 dict->SetNewFor<CPDF_Array>("Filter");
231 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
232 ASSERT_TRUE(decoder_array.has_value());
233 EXPECT_TRUE(decoder_array.value().empty());
234 }
235 {
236 // Valid 1 element filter array.
237 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
238 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
239 filter_array->AppendNew<CPDF_Name>("FooBar");
240 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
241 ASSERT_TRUE(decoder_array.has_value());
242 ASSERT_EQ(1u, decoder_array.value().size());
243 EXPECT_EQ("FooBar", decoder_array.value()[0].first);
244 }
245 {
246 // Valid 2 element filter array.
247 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
248 auto filter_array = dict->SetNewFor<CPDF_Array>("Filter");
249 filter_array->AppendNew<CPDF_Name>("AHx");
250 filter_array->AppendNew<CPDF_Name>("LZWDecode");
251 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
252 ASSERT_TRUE(decoder_array.has_value());
253 ASSERT_EQ(2u, decoder_array.value().size());
254 EXPECT_EQ("AHx", decoder_array.value()[0].first);
255 EXPECT_EQ("LZWDecode", decoder_array.value()[1].first);
256 }
257 {
258 // Invalid 2 element filter array.
259 auto dict = pdfium::MakeRetain<CPDF_Dictionary>();
260 auto invalid_filter_array = dict->SetNewFor<CPDF_Array>("Filter");
261 invalid_filter_array->AppendNew<CPDF_Name>("DCTDecode");
262 invalid_filter_array->AppendNew<CPDF_Name>("CCITTFaxDecode");
263 std::optional<DecoderArray> decoder_array = GetDecoderArray(dict);
264 EXPECT_FALSE(decoder_array.has_value());
265 }
266}
267
268TEST(ParserDecodeTest, A85Decode) {
269 const pdfium::DecodeTestData kTestData[] = {
270 // Empty src string.
271 STR_IN_OUT_CASE("", "", 0),
272 // Empty content in src string.
273 STR_IN_OUT_CASE("~>", "", 0),
274 // Regular conversion.
275 STR_IN_OUT_CASE("FCfN8~>", "test", 7),
276 // End at the ending mark.
277 STR_IN_OUT_CASE("FCfN8~>FCfN8", "test", 7),
278 // Skip whitespaces.
279 STR_IN_OUT_CASE("\t F C\r\n \tf N 8 ~>", "test", 17),
280 // No ending mark.
281 STR_IN_OUT_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20),
282 // Non-multiple length.
283 STR_IN_OUT_CASE("12A", "2k", 3),
284 // Stop at unknown characters.
285 STR_IN_OUT_CASE("FCfN8FCfN8vw", "testtest", 11),
286 };
287 for (const auto& test_case : kTestData) {
288 DataAndBytesConsumed result = A85Decode(test_case.input_span());
289 EXPECT_EQ(test_case.processed_size, result.bytes_consumed)
290 << "for case " << test_case.input;
291 EXPECT_THAT(result.data, ElementsAreArray(test_case.expected_span()))
292 << "for case " << test_case.input;
293 }
294}
295
296TEST(ParserDecodeTest, HexDecode) {
297 const pdfium::DecodeTestData kTestData[] = {
298 // Empty src string.
299 STR_IN_OUT_CASE("", "", 0),
300 // Empty content in src string.
301 STR_IN_OUT_CASE(">", "", 1),
302 // Only whitespaces in src string.
303 STR_IN_OUT_CASE("\t \r\n>", "", 7),
304 // Regular conversion.
305 STR_IN_OUT_CASE("12Ac>zzz", "\x12\xac", 5),
306 // Skip whitespaces.
307 STR_IN_OUT_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13),
308 // Non-multiple length.
309 STR_IN_OUT_CASE("12A>zzz", "\x12\xa0", 4),
310 // Skips unknown characters.
311 STR_IN_OUT_CASE("12tk \tAc>zzz", "\x12\xac", 10),
312 // No ending mark.
313 STR_IN_OUT_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12),
314 };
315 for (const auto& test_case : kTestData) {
316 DataAndBytesConsumed result = HexDecode(
317 UNSAFE_TODO(pdfium::make_span(test_case.input, test_case.input_size)));
318 EXPECT_EQ(test_case.processed_size, result.bytes_consumed)
319 << "for case " << test_case.input;
320 EXPECT_THAT(result.data, ElementsAreArray(test_case.expected_span()))
321 << "for case " << test_case.input;
322 }
323}
324
326 // Empty src string.
327 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("")));
328
329 // ASCII text.
330 EXPECT_EQ(L"the quick\tfox", PDF_DecodeText(ToSpan("the quick\tfox")));
331
332 // UTF-8 text.
333 EXPECT_EQ(L"\x0330\x0331",
334 PDF_DecodeText(ToSpan("\xEF\xBB\xBF\xCC\xB0\xCC\xB1")));
335
336 // UTF-16BE text.
337 EXPECT_EQ(L"\x0330\x0331",
338 PDF_DecodeText(ToSpan("\xFE\xFF\x03\x30\x03\x31")));
339
340 // More UTF-16BE text.
341 EXPECT_EQ(
342 L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
343 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB",
344 PDF_DecodeText(
345 ToSpan("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
346 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB")));
347
348 // Supplementary UTF-8 text.
349 EXPECT_EQ(L"🎨", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\xF0\x9F\x8E\xA8")));
350
351 // Supplementary UTF-16BE text.
352 EXPECT_EQ(L"🎨", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8")));
353}
354
355// https://crbug.com/pdfium/182
357 EXPECT_EQ(L"\x0020\x5370\x5237",
358 PDF_DecodeText(ToSpan(
359 "\xEF\xBB\xBF\x1B\x6A\x61\x1B\x20\xE5\x8D\xB0\xE5\x88\xB7")));
360 EXPECT_EQ(L"\x0020\x5370\x5237",
361 PDF_DecodeText(ToSpan(
362 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37")));
363 EXPECT_EQ(
364 L"\x0020\x5370\x5237",
365 PDF_DecodeText(ToSpan(
366 "\xFE\xFF\x00\x1B\x6A\x61\x00\x1B\x00\x20\x53\x70\x52\x37\x29")));
367 EXPECT_EQ(
368 L"\x0020\x5370\x5237",
369 PDF_DecodeText(ToSpan(
370 "\xFE\xFF\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x00\x20\x53\x70\x52\x37")));
371 EXPECT_EQ(L"\x0020\x5237",
372 PDF_DecodeText(ToSpan(
373 "\xFE\xFF\x00\x20\x00\x1B\x6A\x61\x4A\x50\x00\x1B\x52\x37")));
374}
375
376// https://crbug.com/1001159
378 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\x1B\x1B")));
379 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B")));
380 EXPECT_EQ(L"", PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x20")));
381 EXPECT_EQ(L"\x0020", PDF_DecodeText(ToSpan("\xEF\xBB\xBF\x1B\x1B\x20")));
382 EXPECT_EQ(L"\x0020",
383 PDF_DecodeText(ToSpan("\xFE\xFF\x00\x1B\x00\x1B\x00\x20")));
384}
385
387 EXPECT_EQ(L"\xD800", PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00"))) << "High";
388 EXPECT_EQ(L"\xDC00", PDF_DecodeText(ToSpan("\xFE\xFF\xDC\x00"))) << "Low";
389 EXPECT_EQ(L"\xD800🎨",
390 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x00\xD8\x3C\xDF\xA8")))
391 << "High-high";
392 EXPECT_EQ(L"🎨\xDC00",
393 PDF_DecodeText(ToSpan("\xFE\xFF\xD8\x3C\xDF\xA8\xDC\x00")))
394 << "Low-low";
395}
396
398 // Empty src string.
399 EXPECT_EQ("", PDF_EncodeText(L""));
400
401 // ASCII text.
402 EXPECT_EQ("the quick\tfox", PDF_EncodeText(L"the quick\tfox"));
403
404 // Unicode text.
405 EXPECT_EQ("\xFE\xFF\x03\x30\x03\x31", PDF_EncodeText(L"\x0330\x0331"));
406
407 // More Unicode text.
408 EXPECT_EQ(
409 ToByteString("\xFE\xFF\x7F\x51\x98\x75\x00\x20\x56\xFE\x72\x47\x00"
410 "\x20\x8D\x44\x8B\xAF\x66\xF4\x59\x1A\x00\x20\x00\xBB"),
411 PDF_EncodeText(L"\x7F51\x9875\x0020\x56FE\x7247\x0020"
412 L"\x8D44\x8BAF\x66F4\x591A\x0020\x00BB"));
413
414 // Supplementary Unicode text.
415 EXPECT_EQ("\xFE\xFF\xD8\x3C\xDF\xA8", PDF_EncodeText(L"🎨"));
416}
417
419 for (int pdf_code_point = 0; pdf_code_point < 256; ++pdf_code_point) {
420 ByteString original(static_cast<char>(pdf_code_point));
421 ByteString reencoded =
422 PDF_EncodeText(PDF_DecodeText(original.unsigned_span()).AsStringView());
423
424 switch (pdf_code_point) {
425 case 0x7F:
426 case 0x9F:
427 case 0xAD:
428 EXPECT_EQ(ByteString('\0'), reencoded) << "PDFDocEncoding undefined";
429 break;
430
431 default:
432 EXPECT_EQ(original, reencoded) << "PDFDocEncoding: " << pdf_code_point;
433 break;
434 }
435 }
436}
fxcrt::ByteString ByteString
Definition bytestring.h:180
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
Definition cpdf_array.h:29
uint32_t AddIndirectObject(RetainPtr< CPDF_Object > pObj)
#define UNSAFE_BUFFERS(...)
#define UNSAFE_TODO(...)
TEST(FXCRYPT, CryptToBase16)
TEST(FXCRYPT, MD5GenerateEmtpyData)
fxcrt::ByteStringView ByteStringView
#define STR_IN_OUT_CASE(input_literal, expected_literal,...)