Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_syntax_parser.h
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8#define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9
10#include <stdint.h>
11
12#include <memory>
13#include <vector>
14
15#include "core/fpdfapi/parser/cpdf_stream.h"
16#include "core/fxcrt/data_vector.h"
17#include "core/fxcrt/fx_types.h"
18#include "core/fxcrt/retain_ptr.h"
19#include "core/fxcrt/string_pool_template.h"
20#include "core/fxcrt/unowned_ptr.h"
21#include "core/fxcrt/weak_ptr.h"
22#include "third_party/base/containers/span.h"
23
24class CPDF_Dictionary;
26class CPDF_Object;
28class CPDF_Stream;
30
32 public:
33 enum class ParseType : bool { kStrict, kLoose };
34
35 struct WordResult {
36 ByteString word;
38 };
39
42 FX_FILESIZE HeaderOffset);
43
46 FX_FILESIZE HeaderOffset);
48
49 void SetReadBufferSize(uint32_t read_buffer_size) {
50 m_ReadBufferSize = read_buffer_size;
51 }
52
53 FX_FILESIZE GetPos() const { return m_Pos; }
54 void SetPos(FX_FILESIZE pos);
55
58 ParseType parse_type);
59
60 ByteString GetKeyword();
61 void ToNextLine();
62 void ToNextWord();
64 bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
65 FX_FILESIZE FindTag(ByteStringView tag);
66 bool ReadBlock(pdfium::span<uint8_t> buffer);
67 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
69 ByteString PeekNextWord();
70
72 uint32_t GetDirectNum();
73 bool GetNextChar(uint8_t& ch);
74
75 // The document size may be smaller than the file size.
76 // The syntax parser use position relative to document
77 // offset (|m_HeaderOffset|).
78 // The document size will be FileSize - "Header offset".
79 // All offsets was readed from document, should not be great than document
80 // size. Use it for checks instead of real file size.
82
83 ByteString ReadString();
84 ByteString ReadHexString();
85
86 void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
87 m_TrailerEnds = trailer_ends;
88 }
89
90 private:
91 enum class WordType : bool { kWord, kNumber };
92
93 friend class CPDF_DataAvail;
94 friend class cpdf_syntax_parser_ReadHexString_Test;
95
96 static constexpr int kParserMaxRecursionDepth = 64;
97 static int s_CurrentRecursionDepth;
98
99 bool ReadBlockAt(FX_FILESIZE read_pos);
100 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
101 WordType GetNextWordInternal();
102 bool IsWholeWord(FX_FILESIZE startpos,
103 FX_FILESIZE limit,
104 ByteStringView tag,
105 bool checkKeyword);
106
107 unsigned int ReadEOLMarkers(FX_FILESIZE pos);
108 FX_FILESIZE FindWordPos(ByteStringView word);
109 FX_FILESIZE FindStreamEndPos();
110 RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict);
111
112 bool IsPositionRead(FX_FILESIZE pos) const;
113
114 RetainPtr<CPDF_Object> GetObjectBodyInternal(
116 ParseType parse_type);
117
118 RetainPtr<CPDF_ReadValidator> m_pFileAccess;
119 // The syntax parser use position relative to header offset.
120 // The header contains at file start, and can follow after some stuff. We
121 // ignore this stuff.
122 const FX_FILESIZE m_HeaderOffset;
123 const FX_FILESIZE m_FileLen;
124 FX_FILESIZE m_Pos = 0;
125 WeakPtr<ByteStringPool> m_pPool;
126 DataVector<uint8_t> m_pFileBuf;
127 FX_FILESIZE m_BufOffset = 0;
128 uint32_t m_WordSize = 0;
129 uint8_t m_WordBuffer[257] = {};
130 uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
131
132 // The syntax parser records traversed trailer end byte offsets here.
133 UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
134};
135
136#endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
static std::unique_ptr< CFDF_Document > CreateNewDoc()
const CPDF_Dictionary * GetRoot() const
static std::unique_ptr< CFDF_Document > ParseMemory(pdfium::span< const uint8_t > span)
RetainPtr< CPDF_Dictionary > GetMutableRoot() const
~CFDF_Document() override
ByteString WriteToString() const
bool ReplaceIndirectObjectIfHigherGeneration(uint32_t objnum, RetainPtr< CPDF_Object > pObj)
static constexpr int kFileBufSize
Definition cpdf_stream.h:25
RetainPtr< CPDF_Object > GetIndirectObject(CPDF_IndirectObjectHolder *pObjList, ParseType parse_type)
static std::unique_ptr< CPDF_SyntaxParser > CreateForTesting(RetainPtr< IFX_SeekableReadStream > pFileAccess, FX_FILESIZE HeaderOffset)
bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit)
RetainPtr< CPDF_ReadValidator > GetValidator() const
CPDF_SyntaxParser(RetainPtr< IFX_SeekableReadStream > pFileAccess)
void SetReadBufferSize(uint32_t read_buffer_size)
FX_FILESIZE FindTag(ByteStringView tag)
FX_FILESIZE GetPos() const
FX_FILESIZE GetDocumentSize() const
CPDF_SyntaxParser(RetainPtr< CPDF_ReadValidator > pValidator, FX_FILESIZE HeaderOffset)
bool GetNextChar(uint8_t &ch)
bool ReadBlock(pdfium::span< uint8_t > buffer)
void SetPos(FX_FILESIZE pos)
RetainPtr< CPDF_Object > GetObjectBody(CPDF_IndirectObjectHolder *pObjList)
void SetTrailerEnds(std::vector< unsigned int > *trailer_ends)
bool GetCharAt(FX_FILESIZE pos, uint8_t &ch)
#define FX_FILESIZE
Definition fx_types.h:19