7#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
14#include "core/fpdfapi/parser/cpdf_array.h"
15#include "core/fpdfapi/parser/cpdf_boolean.h"
16#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17#include "core/fpdfapi/parser/cpdf_dictionary.h"
18#include "core/fpdfapi/parser/cpdf_name.h"
19#include "core/fpdfapi/parser/cpdf_null.h"
20#include "core/fpdfapi/parser/cpdf_number.h"
21#include "core/fpdfapi/parser/cpdf_read_validator.h"
22#include "core/fpdfapi/parser/cpdf_reference.h"
23#include "core/fpdfapi/parser/cpdf_stream.h"
24#include "core/fpdfapi/parser/cpdf_string.h"
25#include "core/fpdfapi/parser/fpdf_parser_utility.h"
26#include "core/fxcrt/autorestorer.h"
27#include "core/fxcrt/cfx_read_only_vector_stream.h"
28#include "core/fxcrt/check.h"
29#include "core/fxcrt/check_op.h"
30#include "core/fxcrt/data_vector.h"
31#include "core/fxcrt/fixed_size_data_vector.h"
32#include "core/fxcrt/fx_extension.h"
33#include "core/fxcrt/fx_memcpy_wrappers.h"
34#include "core/fxcrt/fx_safe_types.h"
35#include "core/fxcrt/stl_util.h"
39enum class ReadStatus {
52 : m_pFileRead(std::move(pFileRead)),
53 m_PartOffset(part_offset),
54 m_PartSize(part_size) {}
56 ~ReadableSubStream()
override =
default;
59 bool ReadBlockAtOffset(pdfium::span<uint8_t> buffer,
62 safe_end += buffer.size();
65 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
68 return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset);
71 FX_FILESIZE GetSize()
override {
return m_PartSize; }
103 m_HeaderOffset(HeaderOffset),
105 DCHECK(m_HeaderOffset <= m_FileLen);
117 if (read_pos >= m_FileLen)
119 size_t read_size = m_ReadBufferSize;
121 safe_end += read_size;
122 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
123 read_size = m_FileLen - read_pos;
125 m_pFileBuf.resize(read_size);
126 if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf, read_pos)) {
131 m_BufOffset = read_pos;
137 if (pos >= m_FileLen)
140 if (!IsPositionRead(pos) && !ReadBlockAt(pos))
143 ch = m_pFileBuf[pos - m_BufOffset];
149 return m_FileLen - m_HeaderOffset;
153 pos += m_HeaderOffset;
154 if (pos >= m_FileLen)
157 if (!IsPositionRead(pos)) {
161 if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
164 *ch = m_pFileBuf[pos - m_BufOffset];
169 if (!m_pFileAccess->ReadBlockAtOffset(buffer, m_Pos + m_HeaderOffset))
171 m_Pos += buffer.size();
177 WordType word_type = WordType::kNumber;
185 word_type = WordType::kWord;
187 m_WordBuffer[m_WordSize++] = ch;
198 if (m_WordSize <
sizeof(m_WordBuffer) - 1)
199 m_WordBuffer[m_WordSize++] = ch;
201 }
else if (ch ==
'<') {
206 m_WordBuffer[m_WordSize++] = ch;
209 }
else if (ch ==
'>') {
214 m_WordBuffer[m_WordSize++] = ch;
222 if (m_WordSize <
sizeof(m_WordBuffer) - 1)
223 m_WordBuffer[m_WordSize++] = ch;
226 word_type = WordType::kWord;
245 int32_t parlevel = 0;
246 ReadStatus status = ReadStatus::kNormal;
247 int32_t iEscCode = 0;
250 case ReadStatus::kNormal:
255 }
else if (ch ==
'(') {
259 status = ReadStatus::kBackslash;
261 buf
+= static_cast<
char>(ch);
263 case ReadStatus::kBackslash:
266 status = ReadStatus::kOctal;
270 status = ReadStatus::kCarriageReturn;
275 }
else if (ch ==
'r') {
277 }
else if (ch ==
't') {
279 }
else if (ch ==
'b') {
281 }
else if (ch ==
'f') {
283 }
else if (ch !=
'\n') {
284 buf
+= static_cast<
char>(ch);
286 status = ReadStatus::kNormal;
288 case ReadStatus::kOctal:
292 status = ReadStatus::kFinishOctal;
294 buf
+= static_cast<
char>(iEscCode);
295 status = ReadStatus::kNormal;
299 case ReadStatus::kFinishOctal:
300 status = ReadStatus::kNormal;
304 buf
+= static_cast<
char>(iEscCode);
306 buf
+= static_cast<
char>(iEscCode);
310 case ReadStatus::kCarriageReturn:
311 status = ReadStatus::kNormal;
328 return DataVector<uint8_t>();
331 DataVector<uint8_t> buf;
454 if (ch ==
'\r' || ch ==
'\n')
455 m_TrailerEnds->push_back(m_Pos);
471 WordType word_type = GetNextWordInternal();
473 if (!GetValidator()->has_read_problems()) {
474 word = ByteStringView(pdfium::make_span(m_WordBuffer).first(m_WordSize));
476 return {
ByteString(word), word_type == WordType::kNumber};
490 m_Pos =
std::min(pos, m_FileLen);
497 if (GetValidator()->has_read_problems())
505 AutoRestorer<
int> depth_restorer(&s_CurrentRecursionDepth);
506 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
519 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
522 if (nextword2.word !=
"R")
523 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
525 pos_restorer.AbandonRestoration();
526 uint32_t refnum = FXSYS_atoui(word.c_str());
530 return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum);
533 if (word
== "true" || word
== "false")
534 return pdfium::MakeRetain<CPDF_Boolean>(word
== "true");
537 return pdfium::MakeRetain<CPDF_Null>();
540 return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadString());
543 return pdfium::MakeRetain<CPDF_String>(m_pPool, ReadHexString(),
544 CPDF_String::DataType::kIsHex);
548 while (RetainPtr<CPDF_Object> pObj =
549 GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
551 if (!pObj->IsStream()) {
552 pArray->Append(
std::move(pObj));
555 return (parse_type == ParseType::kLoose || m_WordBuffer[0] ==
']')
559 if (word[0] ==
'/') {
560 auto word_span = pdfium::make_span(m_WordBuffer).first(m_WordSize);
561 return pdfium::MakeRetain<CPDF_Name>(
562 m_pPool, PDF_NameDecode(ByteStringView(word_span).Substr(1)));
566 pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
569 const ByteString& inner_word = inner_word_result.word;
570 if (inner_word.IsEmpty())
573 FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength();
574 if (inner_word
== ">>")
577 if (inner_word
== "endobj") {
581 if (inner_word[0] !=
'/')
584 ByteString key = PDF_NameDecode(inner_word.AsStringView());
589 GetObjectBodyInternal(pObjList, ParseType::kLoose);
600 if (key.GetLength() > 1 && !pObj->IsStream()) {
601 pDict->SetFor(key.Substr(1),
std::move(pObj));
608 pos_restorer.AbandonRestoration();
609 return ReadStream(
std::move(pDict));
624 if (!objnum_word_result
.is_number || objnum_word_result.word.IsEmpty()) {
628 const uint32_t parser_objnum = FXSYS_atoui(objnum_word_result.word.c_str());
631 const ByteString& gennum_word = gennum_word_result.word;
632 if (!gennum_word_result
.is_number || gennum_word.IsEmpty()) {
636 const uint32_t parser_gennum = FXSYS_atoui(gennum_word.c_str());
645 pObj->SetObjNum(parser_objnum);
646 pObj->SetGenNum(parser_gennum);
649 return GetValidator()->has_read_problems() ?
nullptr : pObj;
653 unsigned char byte1 = 0;
654 unsigned char byte2 = 0;
659 if (byte1 ==
'\r' && byte2 ==
'\n')
662 if (byte1 ==
'\r' || byte1 ==
'\n')
671 while (end_offset >= 0) {
673 if (IsWholeWord(
GetPos() - word.GetLength(), m_FileLen, word,
true))
685 FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr);
686 FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr);
689 if (endStreamWordOffset < 0 && endObjWordOffset < 0) {
693 if (endStreamWordOffset < 0 && endObjWordOffset >= 0) {
695 endStreamWordOffset = endObjWordOffset;
696 }
else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) {
698 endObjWordOffset = endStreamWordOffset;
699 }
else if (endStreamWordOffset > endObjWordOffset) {
700 endStreamWordOffset = endObjWordOffset;
703 int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2);
704 if (numMarkers == 2) {
705 endStreamWordOffset -= 2;
707 numMarkers = ReadEOLMarkers(endStreamWordOffset - 1);
708 if (numMarkers == 1) {
709 endStreamWordOffset -= 1;
712 if (endStreamWordOffset <
GetPos()) {
715 return endStreamWordOffset;
721 ToNumber(pDict->GetDirectObjectFor(
"Length"));
722 FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
731 if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen)
739 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
740 m_HeaderOffset + GetPos(), len)) {
744 substream =
pdfium::MakeRetain<ReadableSubStream>(
757 const size_t zap_length = kEndStreamStr.GetLength() + 1;
758 fxcrt::Fill(pdfium::make_span(m_WordBuffer).first(zap_length), 0);
759 GetNextWordInternal();
760 if (GetValidator()->has_read_problems())
766 if (memcmp(m_WordBuffer.data(), kEndStreamStr.unterminated_unsigned_str(),
767 kEndStreamStr.GetLength()) != 0) {
777 const FX_FILESIZE streamEndPos = FindStreamEndPos();
778 if (streamEndPos < 0)
781 len = streamEndPos - streamStartPos;
787 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
788 m_HeaderOffset + GetPos(), len)) {
792 substream =
pdfium::MakeRetain<ReadableSubStream>(
804 auto data = FixedSizeDataVector<uint8_t>::Uninit(substream->GetSize());
805 bool did_read = substream->ReadBlockAtOffset(data.span(), 0);
807 auto data_as_stream =
808 pdfium::MakeRetain<CFX_ReadOnlyVectorStream>(
std::move(data));
810 stream =
pdfium::MakeRetain<CPDF_Stream>(
std::move(data_as_stream),
814 stream =
pdfium::MakeRetain<CPDF_Stream>(
std::move(pDict));
817 const size_t zap_length = kEndObjStr.GetLength() + 1;
818 fxcrt::Fill(pdfium::make_span(m_WordBuffer).first(zap_length), 0);
819 GetNextWordInternal();
822 unsigned char ch = 0;
829 int numMarkers = ReadEOLMarkers(
GetPos());
830 if (m_WordSize ==
static_cast<
unsigned int>(kEndObjStr.GetLength()) &&
832 memcmp(m_WordBuffer.data(), kEndObjStr.unterminated_unsigned_str(),
833 kEndObjStr.GetLength()) == 0) {
840 if (GetNextWordInternal() != WordType::kNumber)
843 m_WordBuffer[m_WordSize] = 0;
844 return FXSYS_atoui(pdfium::as_chars(pdfium::make_span(m_WordBuffer)).data());
848 return m_pFileAccess;
855 const uint32_t taglen = tag.GetLength();
857 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
858 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
859 !PDFCharIsWhitespace(tag[taglen - 1]);
862 if (bCheckRight && startpos +
static_cast<int32_t>(taglen) <= limit &&
863 GetCharAt(startpos +
static_cast<int32_t>(taglen)
, ch
)) {
870 if (bCheckLeft && startpos > 0 &&
GetCharAt(startpos - 1
, ch
)) {
881 int32_t taglen = word.GetLength();
886 int32_t offset = taglen - 1;
888 if (limit && pos <= m_Pos - limit)
892 if (!GetCharAtBackward(pos, &byte))
895 if (byte == word[offset]) {
901 if (IsWholeWord(pos, limit, word,
false)) {
906 offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1;
915 const int32_t taglen = tag.GetLength();
924 if (ch == tag[match]) {
927 return GetPos() - startpos - taglen;
929 match = ch == tag[0] ? 1 : 0;
935 return m_BufOffset <= pos &&
936 pos <
static_cast<
FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
fxcrt::ByteString ByteString
std::vector< RetainPtr< CPDF_Object > >::const_iterator const_iterator
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
static constexpr uint32_t kInvalidObjNum
static constexpr int kFileBufSize
RetainPtr< CPDF_Object > GetIndirectObject(CPDF_IndirectObjectHolder *pObjList, ParseType parse_type)
DataVector< uint8_t > ReadHexString()
bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit)
RetainPtr< CPDF_ReadValidator > GetValidator() const
ByteString PeekNextWord()
void RecordingToNextWord()
CPDF_SyntaxParser(RetainPtr< IFX_SeekableReadStream > pFileAccess)
FX_FILESIZE FindTag(ByteStringView tag)
FX_FILESIZE GetPos() const
FX_FILESIZE GetDocumentSize() const
CPDF_SyntaxParser(RetainPtr< CPDF_ReadValidator > pValidator, FX_FILESIZE HeaderOffset)
bool GetNextChar(uint8_t &ch)
bool ReadBlock(pdfium::span< uint8_t > buffer)
void SetPos(FX_FILESIZE pos)
RetainPtr< CPDF_Object > GetObjectBody(CPDF_IndirectObjectHolder *pObjList)
bool GetCharAt(FX_FILESIZE pos, uint8_t &ch)
ByteString & operator+=(char ch)
bool operator==(const char *ptr) const
bool operator!=(const char *ptr) const
ByteString(const ByteString &other)=default
bool PDFCharIsWhitespace(uint8_t c)
bool PDFCharIsOther(uint8_t c)
bool PDFCharIsNumeric(uint8_t c)
bool PDFCharIsDelimiter(uint8_t c)
bool PDFCharIsLineEnding(uint8_t c)
bool FXSYS_IsOctalDigit(char c)
int FXSYS_DecimalCharToInt(wchar_t c)
int FXSYS_HexCharToInt(char c)
pdfium::CheckedNumeric< FX_FILESIZE > FX_SAFE_FILESIZE
fxcrt::ByteStringView ByteStringView