7#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
14#include "core/fpdfapi/parser/cpdf_array.h"
15#include "core/fpdfapi/parser/cpdf_boolean.h"
16#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17#include "core/fpdfapi/parser/cpdf_dictionary.h"
18#include "core/fpdfapi/parser/cpdf_name.h"
19#include "core/fpdfapi/parser/cpdf_null.h"
20#include "core/fpdfapi/parser/cpdf_number.h"
21#include "core/fpdfapi/parser/cpdf_read_validator.h"
22#include "core/fpdfapi/parser/cpdf_reference.h"
23#include "core/fpdfapi/parser/cpdf_stream.h"
24#include "core/fpdfapi/parser/cpdf_string.h"
25#include "core/fpdfapi/parser/fpdf_parser_utility.h"
26#include "core/fxcrt/autorestorer.h"
27#include "core/fxcrt/cfx_read_only_vector_stream.h"
28#include "core/fxcrt/fixed_size_data_vector.h"
29#include "core/fxcrt/fx_extension.h"
30#include "core/fxcrt/fx_safe_types.h"
31#include "third_party/base/check.h"
32#include "third_party/base/check_op.h"
36enum class ReadStatus {
49 : m_pFileRead(std::move(pFileRead)),
50 m_PartOffset(part_offset),
51 m_PartSize(part_size) {}
53 ~ReadableSubStream()
override =
default;
56 bool ReadBlockAtOffset(pdfium::span<uint8_t> buffer,
58 FX_SAFE_FILESIZE safe_end = offset;
59 safe_end += buffer.size();
62 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
65 return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset);
68 FX_FILESIZE GetSize()
override {
return m_PartSize; }
100 m_HeaderOffset(HeaderOffset),
102 DCHECK(m_HeaderOffset <= m_FileLen);
114 if (read_pos >= m_FileLen)
116 size_t read_size = m_ReadBufferSize;
117 FX_SAFE_FILESIZE safe_end = read_pos;
118 safe_end += read_size;
119 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
120 read_size = m_FileLen - read_pos;
122 m_pFileBuf.resize(read_size);
123 if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf, read_pos)) {
128 m_BufOffset = read_pos;
134 if (pos >= m_FileLen)
137 if (!IsPositionRead(pos) && !ReadBlockAt(pos))
140 ch = m_pFileBuf[pos - m_BufOffset];
146 return m_FileLen - m_HeaderOffset;
150 pos += m_HeaderOffset;
151 if (pos >= m_FileLen)
154 if (!IsPositionRead(pos)) {
158 if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
161 *ch = m_pFileBuf[pos - m_BufOffset];
166 if (!m_pFileAccess->ReadBlockAtOffset(buffer, m_Pos + m_HeaderOffset))
168 m_Pos += buffer.size();
174 WordType word_type = WordType::kNumber;
182 word_type = WordType::kWord;
184 m_WordBuffer[m_WordSize++] = ch;
195 if (m_WordSize <
sizeof(m_WordBuffer) - 1)
196 m_WordBuffer[m_WordSize++] = ch;
198 }
else if (ch ==
'<') {
203 m_WordBuffer[m_WordSize++] = ch;
206 }
else if (ch ==
'>') {
211 m_WordBuffer[m_WordSize++] = ch;
219 if (m_WordSize <
sizeof(m_WordBuffer) - 1)
220 m_WordBuffer[m_WordSize++] = ch;
223 word_type = WordType::kWord;
242 int32_t parlevel = 0;
243 ReadStatus status = ReadStatus::kNormal;
244 int32_t iEscCode = 0;
247 case ReadStatus::kNormal:
250 return ByteString
(buf
);
252 }
else if (ch ==
'(') {
256 status = ReadStatus::kBackslash;
258 buf
+= static_cast<
char>(ch);
260 case ReadStatus::kBackslash:
263 status = ReadStatus::kOctal;
267 status = ReadStatus::kCarriageReturn;
272 }
else if (ch ==
'r') {
274 }
else if (ch ==
't') {
276 }
else if (ch ==
'b') {
278 }
else if (ch ==
'f') {
280 }
else if (ch !=
'\n') {
281 buf
+= static_cast<
char>(ch);
283 status = ReadStatus::kNormal;
285 case ReadStatus::kOctal:
289 status = ReadStatus::kFinishOctal;
291 buf
+= static_cast<
char>(iEscCode);
292 status = ReadStatus::kNormal;
296 case ReadStatus::kFinishOctal:
297 status = ReadStatus::kNormal;
301 buf
+= static_cast<
char>(iEscCode);
303 buf
+= static_cast<
char>(iEscCode);
307 case ReadStatus::kCarriageReturn:
308 status = ReadStatus::kNormal;
340 buf
+= static_cast<
char>(code);
349 buf
+= static_cast<
char>(code);
410 DCHECK(m_TrailerEnds);
448 if (ch ==
'\r' || ch ==
'\n')
449 m_TrailerEnds->push_back(m_Pos);
465 WordType word_type = GetNextWordInternal();
467 if (!GetValidator()->has_read_problems())
468 word
= ByteString(m_WordBuffer, m_WordSize);
469 return {word, word_type == WordType::kNumber};
483 m_Pos =
std::min(pos, m_FileLen);
490 if (GetValidator()->has_read_problems())
498 AutoRestorer<
int> depth_restorer(&s_CurrentRecursionDepth);
499 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
504 const ByteString& word = word_result.word;
512 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
515 if (nextword2.word !=
"R")
516 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
518 pos_restorer.AbandonRestoration();
523 return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum);
526 if (word
== "true" || word
== "false")
527 return pdfium::MakeRetain<CPDF_Boolean>(word
== "true");
530 return pdfium::MakeRetain<CPDF_Null>();
534 return pdfium::MakeRetain<CPDF_String>(m_pPool, str,
false);
538 return pdfium::MakeRetain<CPDF_String>(m_pPool, str,
true);
541 auto pArray = pdfium::MakeRetain<CPDF_Array>();
542 while (RetainPtr<CPDF_Object> pObj =
543 GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
544 pArray->Append(
std::move(pObj));
550 if (word
[0
] ==
'/') {
551 return pdfium::MakeRetain<CPDF_Name>(
553 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
557 pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
560 const ByteString& inner_word = inner_word_result.word;
564 FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength();
565 if (inner_word
== ">>")
568 if (inner_word
== "endobj") {
572 if (inner_word
[0
] !=
'/')
575 ByteString key = PDF_NameDecode(inner_word.AsStringView());
580 GetObjectBodyInternal(pObjList, ParseType::kLoose);
591 if (key.GetLength() > 1 && !pObj->IsStream()) {
592 pDict->SetFor(key.Substr(1),
std::move(pObj));
599 pos_restorer.AbandonRestoration();
600 return ReadStream(
std::move(pDict));
615 if (!objnum_word_result
.is_number || objnum_word_result.word.IsEmpty()) {
619 const uint32_t parser_objnum = FXSYS_atoui(objnum_word_result.word.c_str());
622 const ByteString& gennum_word = gennum_word_result.word;
636 pObj->SetObjNum(parser_objnum);
637 pObj->SetGenNum(parser_gennum);
640 return GetValidator()->has_read_problems() ?
nullptr : std::move(pObj);
644 unsigned char byte1 = 0;
645 unsigned char byte2 = 0;
650 if (byte1 ==
'\r' && byte2 ==
'\n')
653 if (byte1 ==
'\r' || byte1 ==
'\n')
662 while (end_offset >= 0) {
664 if (IsWholeWord(
GetPos() - word.GetLength(), m_FileLen, word,
true))
673 const ByteStringView kEndStreamStr(
"endstream");
674 const ByteStringView kEndObjStr(
"endobj");
676 FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr);
677 FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr);
680 if (endStreamWordOffset < 0 && endObjWordOffset < 0) {
684 if (endStreamWordOffset < 0 && endObjWordOffset >= 0) {
686 endStreamWordOffset = endObjWordOffset;
687 }
else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) {
689 endObjWordOffset = endStreamWordOffset;
690 }
else if (endStreamWordOffset > endObjWordOffset) {
691 endStreamWordOffset = endObjWordOffset;
694 int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2);
695 if (numMarkers == 2) {
696 endStreamWordOffset -= 2;
698 numMarkers = ReadEOLMarkers(endStreamWordOffset - 1);
699 if (numMarkers == 1) {
700 endStreamWordOffset -= 1;
703 if (endStreamWordOffset <
GetPos()) {
706 return endStreamWordOffset;
712 ToNumber(pDict->GetDirectObjectFor(
"Length"));
713 FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
720 FX_SAFE_FILESIZE pos = GetPos();
722 if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen)
730 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
731 m_HeaderOffset + GetPos(), len)) {
735 substream = pdfium::MakeRetain<ReadableSubStream>(
740 const ByteStringView kEndStreamStr(
"endstream");
741 const ByteStringView kEndObjStr(
"endobj");
748 memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
749 GetNextWordInternal();
750 if (GetValidator()->has_read_problems())
756 if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
757 kEndStreamStr.GetLength()) != 0) {
767 const FX_FILESIZE streamEndPos = FindStreamEndPos();
768 if (streamEndPos < 0)
771 len = streamEndPos - streamStartPos;
777 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
778 m_HeaderOffset + GetPos(), len)) {
782 substream = pdfium::MakeRetain<ReadableSubStream>(
794 auto data = FixedSizeDataVector<uint8_t>::Uninit(substream->GetSize());
795 bool did_read = substream->ReadBlockAtOffset(data.span(), 0);
797 auto data_as_stream =
798 pdfium::MakeRetain<CFX_ReadOnlyVectorStream>(
std::move(data));
800 pStream = pdfium::MakeRetain<CPDF_Stream>();
801 pStream->InitStreamFromFile(
std::move(data_as_stream),
std::move(pDict));
804 pStream = pdfium::MakeRetain<CPDF_Stream>(
std::move(pDict));
807 memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
808 GetNextWordInternal();
811 unsigned char ch = 0;
818 int numMarkers = ReadEOLMarkers(
GetPos());
819 if (m_WordSize ==
static_cast<
unsigned int>(kEndObjStr.GetLength()) &&
821 memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
828 if (GetNextWordInternal() != WordType::kNumber)
831 m_WordBuffer[m_WordSize] = 0;
832 return FXSYS_atoui(reinterpret_cast<
const char*>(m_WordBuffer)
);
836 return m_pFileAccess;
843 const uint32_t taglen = tag.GetLength();
845 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
846 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
847 !PDFCharIsWhitespace(tag[taglen - 1]);
850 if (bCheckRight && startpos +
static_cast<int32_t>(taglen) <= limit &&
851 GetCharAt(startpos +
static_cast<int32_t>(taglen)
, ch
)) {
858 if (bCheckLeft && startpos > 0 &&
GetCharAt(startpos - 1
, ch
)) {
869 int32_t taglen = word.GetLength();
874 int32_t offset = taglen - 1;
876 if (limit && pos <= m_Pos - limit)
880 if (!GetCharAtBackward(pos, &byte))
883 if (byte == word[offset]) {
889 if (IsWholeWord(pos, limit, word,
false)) {
894 offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1;
903 const int32_t taglen = tag.GetLength();
904 DCHECK_GT(taglen, 0);
912 if (ch == tag[match]) {
915 return GetPos() - startpos - taglen;
917 match = ch == tag[0] ? 1 : 0;
923 return m_BufOffset <= pos &&
924 pos <
static_cast<
FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
static constexpr uint32_t kInvalidObjNum
static constexpr int kFileBufSize
RetainPtr< CPDF_Object > GetIndirectObject(CPDF_IndirectObjectHolder *pObjList, ParseType parse_type)
bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit)
RetainPtr< CPDF_ReadValidator > GetValidator() const
ByteString PeekNextWord()
void RecordingToNextWord()
CPDF_SyntaxParser(RetainPtr< IFX_SeekableReadStream > pFileAccess)
FX_FILESIZE FindTag(ByteStringView tag)
FX_FILESIZE GetPos() const
ByteString ReadHexString()
FX_FILESIZE GetDocumentSize() const
CPDF_SyntaxParser(RetainPtr< CPDF_ReadValidator > pValidator, FX_FILESIZE HeaderOffset)
bool GetNextChar(uint8_t &ch)
bool ReadBlock(pdfium::span< uint8_t > buffer)
void SetPos(FX_FILESIZE pos)
RetainPtr< CPDF_Object > GetObjectBody(CPDF_IndirectObjectHolder *pObjList)
bool GetCharAt(FX_FILESIZE pos, uint8_t &ch)
ByteString & operator+=(char ch)
bool operator==(const char *ptr) const
ByteString(const ByteString &other)
const char * c_str() const
ByteString & operator=(ByteString &&that) noexcept
CharType operator[](const size_t index) const
bool operator!=(const char *ptr) const
bool PDFCharIsWhitespace(uint8_t c)
bool PDFCharIsOther(uint8_t c)
bool PDFCharIsNumeric(uint8_t c)
bool PDFCharIsDelimiter(uint8_t c)
bool PDFCharIsLineEnding(uint8_t c)
bool FXSYS_IsOctalDigit(char c)
int FXSYS_DecimalCharToInt(wchar_t c)
int FXSYS_HexCharToInt(char c)
uint32_t FXSYS_atoui(const char *str)