Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_syntax_parser.h
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8#define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9
10#include <stdint.h>
11
12#include <array>
13#include <memory>
14#include <vector>
15
16#include "core/fpdfapi/parser/cpdf_stream.h"
17#include "core/fxcrt/data_vector.h"
18#include "core/fxcrt/fx_types.h"
19#include "core/fxcrt/retain_ptr.h"
20#include "core/fxcrt/span.h"
21#include "core/fxcrt/string_pool_template.h"
22#include "core/fxcrt/unowned_ptr.h"
23#include "core/fxcrt/weak_ptr.h"
24
25class CPDF_Dictionary;
27class CPDF_Object;
29class CPDF_Stream;
31
33 public:
34 enum class ParseType : bool { kStrict, kLoose };
35
40
43 FX_FILESIZE HeaderOffset);
44
47 FX_FILESIZE HeaderOffset);
49
50 void SetReadBufferSize(uint32_t read_buffer_size) {
51 m_ReadBufferSize = read_buffer_size;
52 }
53
54 FX_FILESIZE GetPos() const { return m_Pos; }
55 void SetPos(FX_FILESIZE pos);
56
59 ParseType parse_type);
60
62 void ToNextLine();
63 void ToNextWord();
67 bool ReadBlock(pdfium::span<uint8_t> buffer);
68 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
71
73 uint32_t GetDirectNum();
74 bool GetNextChar(uint8_t& ch);
75
76 // The document size may be smaller than the file size.
77 // The syntax parser use position relative to document
78 // offset (|m_HeaderOffset|).
79 // The document size will be FileSize - "Header offset".
80 // All offsets was readed from document, should not be great than document
81 // size. Use it for checks instead of real file size.
83
86
87 void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
88 m_TrailerEnds = trailer_ends;
89 }
90
91 private:
92 enum class WordType : bool { kWord, kNumber };
93
94 friend class CPDF_DataAvail;
95 friend class cpdf_syntax_parser_ReadHexString_Test;
96
97 static constexpr int kParserMaxRecursionDepth = 64;
98 static int s_CurrentRecursionDepth;
99
100 bool ReadBlockAt(FX_FILESIZE read_pos);
101 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
102 WordType GetNextWordInternal();
103 bool IsWholeWord(FX_FILESIZE startpos,
104 FX_FILESIZE limit,
105 ByteStringView tag,
106 bool checkKeyword);
107
108 unsigned int ReadEOLMarkers(FX_FILESIZE pos);
109 FX_FILESIZE FindWordPos(ByteStringView word);
110 FX_FILESIZE FindStreamEndPos();
111 RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict);
112
113 bool IsPositionRead(FX_FILESIZE pos) const;
114
115 RetainPtr<CPDF_Object> GetObjectBodyInternal(
117 ParseType parse_type);
118
119 RetainPtr<CPDF_ReadValidator> m_pFileAccess;
120 // The syntax parser use position relative to header offset.
121 // The header contains at file start, and can follow after some stuff. We
122 // ignore this stuff.
123 const FX_FILESIZE m_HeaderOffset;
124 const FX_FILESIZE m_FileLen;
125 FX_FILESIZE m_Pos = 0;
126 WeakPtr<ByteStringPool> m_pPool;
127 DataVector<uint8_t> m_pFileBuf;
128 FX_FILESIZE m_BufOffset = 0;
129 uint32_t m_WordSize = 0;
130 uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
131 std::array<uint8_t, 257> m_WordBuffer = {};
132
133 // The syntax parser records traversed trailer end byte offsets here.
134 UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
135};
136
137#endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
fxcrt::ByteString ByteString
Definition bytestring.h:180
static std::unique_ptr< CFDF_Document > CreateNewDoc()
const CPDF_Dictionary * GetRoot() const
static std::unique_ptr< CFDF_Document > ParseMemory(pdfium::span< const uint8_t > span)
RetainPtr< CPDF_Dictionary > GetMutableRoot() const
~CFDF_Document() override
ByteString WriteToString() const
std::map< ByteString, RetainPtr< CPDF_Object >, std::less<> > DictMap
bool ReplaceIndirectObjectIfHigherGeneration(uint32_t objnum, RetainPtr< CPDF_Object > pObj)
static constexpr int kFileBufSize
Definition cpdf_stream.h:25
RetainPtr< CPDF_Object > GetIndirectObject(CPDF_IndirectObjectHolder *pObjList, ParseType parse_type)
static std::unique_ptr< CPDF_SyntaxParser > CreateForTesting(RetainPtr< IFX_SeekableReadStream > pFileAccess, FX_FILESIZE HeaderOffset)
DataVector< uint8_t > ReadHexString()
bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit)
RetainPtr< CPDF_ReadValidator > GetValidator() const
CPDF_SyntaxParser(RetainPtr< IFX_SeekableReadStream > pFileAccess)
void SetReadBufferSize(uint32_t read_buffer_size)
FX_FILESIZE FindTag(ByteStringView tag)
FX_FILESIZE GetPos() const
FX_FILESIZE GetDocumentSize() const
CPDF_SyntaxParser(RetainPtr< CPDF_ReadValidator > pValidator, FX_FILESIZE HeaderOffset)
bool GetNextChar(uint8_t &ch)
bool ReadBlock(pdfium::span< uint8_t > buffer)
void SetPos(FX_FILESIZE pos)
RetainPtr< CPDF_Object > GetObjectBody(CPDF_IndirectObjectHolder *pObjList)
void SetTrailerEnds(std::vector< unsigned int > *trailer_ends)
bool GetCharAt(FX_FILESIZE pos, uint8_t &ch)
ByteString()=default
#define FX_FILESIZE
Definition fx_types.h:19
fxcrt::ByteStringView ByteStringView