7#include "core/fpdfapi/page/cpdf_streamparser.h"
15#include "constants/stream_dict_common.h"
16#include "core/fpdfapi/page/cpdf_docpagedata.h"
17#include "core/fpdfapi/parser/cpdf_array.h"
18#include "core/fpdfapi/parser/cpdf_boolean.h"
19#include "core/fpdfapi/parser/cpdf_dictionary.h"
20#include "core/fpdfapi/parser/cpdf_name.h"
21#include "core/fpdfapi/parser/cpdf_null.h"
22#include "core/fpdfapi/parser/cpdf_number.h"
23#include "core/fpdfapi/parser/cpdf_stream.h"
24#include "core/fpdfapi/parser/cpdf_string.h"
25#include "core/fpdfapi/parser/fpdf_parser_decode.h"
26#include "core/fpdfapi/parser/fpdf_parser_utility.h"
27#include "core/fxcodec/jpeg/jpegmodule.h"
28#include "core/fxcodec/scanlinedecoder.h"
29#include "core/fxcrt/data_vector.h"
30#include "core/fxcrt/fx_extension.h"
31#include "core/fxcrt/fx_memory_wrappers.h"
32#include "core/fxcrt/fx_safe_types.h"
33#include "core/fxcrt/span_util.h"
34#include "core/fxge/calculate_pitch.h"
35#include "third_party/base/check.h"
39const uint32_t kMaxNestedParsingLevel = 512;
40const size_t kMaxStringLength = 32767;
42const char kTrue[] =
"true";
43const char kFalse[] =
"false";
44const char kNull[] =
"null";
46uint32_t DecodeAllScanlines(std::unique_ptr<ScanlineDecoder> pDecoder) {
50 int ncomps = pDecoder->CountComps();
51 int bpc = pDecoder->GetBPC();
52 int width = pDecoder->GetWidth();
53 int height = pDecoder->GetHeight();
54 if (width <= 0 || height <= 0)
57 absl::optional<uint32_t> maybe_size =
58 fxge::CalculatePitch8(bpc, ncomps, width);
59 if (!maybe_size.has_value())
62 FX_SAFE_UINT32 size = maybe_size.value();
64 if (size.ValueOrDefault(0) == 0)
67 for (
int row = 0; row < height; ++row) {
68 if (pDecoder->GetScanline(row).empty())
71 return pDecoder->GetSrcOffset();
74uint32_t DecodeInlineStream(pdfium::span<
const uint8_t> src_span,
77 const ByteString& decoder,
81 DCHECK(decoder !=
"A85");
82 DCHECK(decoder !=
"AHx");
83 DCHECK(decoder !=
"CCF");
84 DCHECK(decoder !=
"DCT");
85 DCHECK(decoder !=
"Fl");
86 DCHECK(decoder !=
"LZW");
87 DCHECK(decoder !=
"RL");
90 uint32_t ignored_size;
91 if (decoder
== "FlateDecode") {
92 return FlateOrLZWDecode(
false, src_span, pParam.Get(), orig_size,
93 &ignored_result, &ignored_size);
95 if (decoder
== "LZWDecode") {
96 return FlateOrLZWDecode(
true, src_span, pParam.Get(), 0, &ignored_result,
99 if (decoder
== "DCTDecode") {
100 std::unique_ptr<ScanlineDecoder> pDecoder = JpegModule::CreateDecoder(
101 src_span, width, height, 0,
102 !pParam || pParam->GetIntegerFor(
"ColorTransform", 1));
103 return DecodeAllScanlines(std::move(pDecoder));
105 if (decoder
== "CCITTFaxDecode") {
106 std::unique_ptr<ScanlineDecoder> pDecoder =
107 CreateFaxDecoder(src_span, width, height, pParam.Get());
108 return DecodeAllScanlines(std::move(pDecoder));
111 if (decoder
== "ASCII85Decode")
112 return A85Decode(src_span, &ignored_result, &ignored_size);
113 if (decoder
== "ASCIIHexDecode")
114 return HexDecode(src_span, &ignored_result, &ignored_size);
115 if (decoder
== "RunLengthDecode")
116 return RunLengthDecode(src_span, &ignored_result, &ignored_size);
127 const WeakPtr<ByteStringPool>& pPool)
136 if (m_Pos < m_pBuf.size() && PDFCharIsWhitespace(m_pBuf[m_Pos]))
139 if (m_Pos == m_pBuf.size())
146 const CPDF_Array* pArray = pFilter->AsArray();
148 decoder = pArray->GetByteStringAt(0);
150 pDict->GetArrayFor(pdfium::stream::kDecodeParms);
152 pParam = pParams->GetDictAt(0);
154 decoder = pFilter->GetString();
158 uint32_t width = pDict->GetIntegerFor(
"Width");
159 uint32_t height = pDict->GetIntegerFor(
"Height");
161 uint32_t nComponents = 1;
164 CPDF_DocPageData::FromDocument(pDoc)->GetColorSpace(pCSObj,
nullptr);
165 nComponents = pCS ? pCS->CountComponents() : 3;
166 bpc = pDict->GetIntegerFor(
"BitsPerComponent");
168 absl::optional<uint32_t> maybe_size =
169 fxge::CalculatePitch8(bpc, nComponents, width);
170 if (!maybe_size.has_value())
173 FX_SAFE_UINT32 size = maybe_size.value();
178 uint32_t dwOrigSize = size.ValueOrDie();
179 DataVector<uint8_t> data;
180 uint32_t dwStreamSize;
182 dwOrigSize = std::min<uint32_t>(dwOrigSize, m_pBuf.size() - m_Pos);
183 auto src_span = m_pBuf.subspan(m_Pos, dwOrigSize);
184 data = DataVector<uint8_t>(src_span.begin(), src_span.end());
185 dwStreamSize = dwOrigSize;
188 dwStreamSize = DecodeInlineStream(m_pBuf.subspan(m_Pos), width, height,
189 decoder, std::move(pParam), dwOrigSize);
190 if (!pdfium::base::IsValueInRangeForNumericType<
int>(dwStreamSize))
193 uint32_t dwSavePos = m_Pos;
194 m_Pos += dwStreamSize;
196 uint32_t dwPrevPos = m_Pos;
202 dwStreamSize += m_Pos - dwPrevPos;
205 if (GetWord() ==
"EI") {
209 dwStreamSize += m_Pos - dwPrevPos;
212 auto src_span = m_pBuf.subspan(m_Pos, dwStreamSize);
213 data = DataVector<uint8_t>(src_span.begin(), src_span.end());
214 m_Pos += dwStreamSize;
216 pDict->SetNewFor<CPDF_Number>(
"Length",
static_cast<
int>(dwStreamSize));
217 return pdfium::MakeRetain<CPDF_Stream>(
std::move(data),
std::move(pDict));
223 if (!PositionIsInBounds())
226 uint8_t ch = m_pBuf[m_Pos++];
229 if (!PositionIsInBounds())
232 ch = m_pBuf[m_Pos++];
239 if (!PositionIsInBounds())
242 ch = m_pBuf[m_Pos++];
250 m_pLastObj = ReadNextObject(
false,
false, 0);
254 bool bIsNumber =
true;
256 if (m_WordSize < kMaxWordLength)
257 m_WordBuffer[m_WordSize++] = ch;
262 if (!PositionIsInBounds())
265 ch = m_pBuf[m_Pos++];
273 m_WordBuffer[m_WordSize] = 0;
277 if (m_WordBuffer[0] ==
'/')
280 if (m_WordSize == 4) {
281 if (GetWord() == kTrue) {
282 m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(
true);
285 if (GetWord() == kNull) {
286 m_pLastObj = pdfium::MakeRetain<CPDF_Null>();
289 }
else if (m_WordSize == 5) {
290 if (GetWord() == kFalse) {
291 m_pLastObj = pdfium::MakeRetain<CPDF_Boolean>(
false);
299 bool bAllowNestedArray,
301 uint32_t dwRecursionLevel) {
304 GetNextWord(bIsNumber);
305 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
309 m_WordBuffer[m_WordSize] = 0;
310 return pdfium::MakeRetain<CPDF_Number>(GetWord());
313 int first_char = m_WordBuffer[0];
314 if (first_char ==
'/') {
315 ByteString name = PDF_NameDecode(GetWord().Substr(1));
316 return pdfium::MakeRetain<CPDF_Name>(m_pPool, name);
319 if (first_char ==
'(') {
320 ByteString str = ReadString();
321 return pdfium::MakeRetain<CPDF_String>(m_pPool, str,
false);
324 if (first_char ==
'<') {
326 return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(),
true);
328 auto pDict = pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
330 GetNextWord(bIsNumber);
331 if (m_WordSize == 2 && m_WordBuffer[0] ==
'>')
334 if (!m_WordSize || m_WordBuffer[0] !=
'/')
337 ByteString key = PDF_NameDecode(GetWord().Substr(1));
339 ReadNextObject(
true, bInArray, dwRecursionLevel + 1);
343 pDict->SetFor(key,
std::move(pObj));
348 if (first_char ==
'[') {
349 if ((!bAllowNestedArray && bInArray))
352 auto pArray = pdfium::MakeRetain<CPDF_Array>();
355 ReadNextObject(bAllowNestedArray,
true, dwRecursionLevel + 1);
357 pArray->Append(
std::move(pObj));
360 if (!m_WordSize || m_WordBuffer[0] ==
']')
366 if (GetWord() == kFalse)
367 return pdfium::MakeRetain<CPDF_Boolean>(
false);
368 if (GetWord() == kTrue)
369 return pdfium::MakeRetain<CPDF_Boolean>(
true);
370 if (GetWord() == kNull)
371 return pdfium::MakeRetain<CPDF_Null>();
379 if (!PositionIsInBounds())
382 uint8_t ch = m_pBuf[m_Pos++];
385 if (!PositionIsInBounds()) {
388 ch = m_pBuf[m_Pos++];
395 if (!PositionIsInBounds())
397 ch = m_pBuf[m_Pos++];
405 m_WordBuffer[m_WordSize++] = ch;
408 if (!PositionIsInBounds())
410 ch = m_pBuf[m_Pos++];
415 if (m_WordSize < kMaxWordLength)
416 m_WordBuffer[m_WordSize++] = ch;
418 }
else if (ch ==
'<') {
419 if (!PositionIsInBounds())
421 ch = m_pBuf[m_Pos++];
423 m_WordBuffer[m_WordSize++] = ch;
426 }
else if (ch ==
'>') {
427 if (!PositionIsInBounds())
429 ch = m_pBuf[m_Pos++];
431 m_WordBuffer[m_WordSize++] = ch;
439 if (m_WordSize < kMaxWordLength)
440 m_WordBuffer[m_WordSize++] = ch;
443 if (!PositionIsInBounds())
446 ch = m_pBuf[m_Pos++];
455 if (!PositionIsInBounds())
462 uint8_t ch = m_pBuf[m_Pos++];
468 return buf.First(
std::min(buf.GetLength(), kMaxStringLength));
472 }
else if (ch ==
'(') {
475 }
else if (ch ==
'\\') {
478 buf
+= static_cast<
char>(ch);
493 }
else if (ch ==
'n') {
495 }
else if (ch ==
'r') {
497 }
else if (ch ==
't') {
499 }
else if (ch ==
'b') {
501 }
else if (ch ==
'f') {
504 buf
+= static_cast<
char>(ch);
514 buf
+= static_cast<
char>(iEscCode);
523 buf
+= static_cast<
char>(iEscCode);
526 buf
+= static_cast<
char>(iEscCode);
537 if (!PositionIsInBounds())
538 return buf.First(
std::min(buf.GetLength(), kMaxStringLength));
540 ch = m_pBuf[m_Pos++];
545 if (!PositionIsInBounds())
551 while (PositionIsInBounds()) {
552 uint8_t ch = m_pBuf[m_Pos++];
564 buf
+= static_cast<uint8_t>(code);
569 buf
+= static_cast<
char>(code);
571 return buf.First(std::min<size_t>(buf.GetLength(), kMaxStringLength));
575 return m_Pos < m_pBuf.size();
ElementType ParseNextElement()
RetainPtr< CPDF_Object > ReadNextObject(bool bAllowNestedArray, bool bInArray, uint32_t dwRecursionLevel)
RetainPtr< CPDF_Stream > ReadInlineStream(CPDF_Document *pDoc, RetainPtr< CPDF_Dictionary > pDict, const CPDF_Object *pCSObj)
CPDF_StreamParser(pdfium::span< const uint8_t > span, const WeakPtr< ByteStringPool > &pPool)
CPDF_StreamParser(pdfium::span< const uint8_t > span)
ByteString & operator+=(char ch)
bool operator==(const char *ptr) const
bool PDFCharIsWhitespace(uint8_t c)
bool PDFCharIsOther(uint8_t c)
bool PDFCharIsNumeric(uint8_t c)
bool PDFCharIsDelimiter(uint8_t c)
bool PDFCharIsLineEnding(uint8_t c)
int FXSYS_DecimalCharToInt(char c)
bool FXSYS_IsOctalDigit(char c)
int FXSYS_HexCharToInt(char c)
#define FX_INVALID_OFFSET
const char kDecodeParms[]