Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_simple_parser.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "core/fpdfapi/parser/cpdf_simple_parser.h"
8
9#include <stdint.h>
10
11#include <optional>
12
13#include "core/fpdfapi/parser/fpdf_parser_utility.h"
14#include "core/fxcrt/check_op.h"
15
16CPDF_SimpleParser::CPDF_SimpleParser(pdfium::span<const uint8_t> input)
17 : data_(input) {}
18
20
22 std::optional<uint8_t> start_char = SkipSpacesAndComments();
23 if (!start_char.has_value()) {
24 return ByteStringView();
25 }
26
27 CHECK_GT(cur_position_, 0);
28 uint32_t start_position = cur_position_ - 1;
29 CHECK_LT(start_position, data_.size());
30
31 if (!PDFCharIsDelimiter(start_char.value())) {
32 return HandleNonDelimiter();
33 }
34
35 switch (start_char.value()) {
36 case '/':
37 return HandleName();
38 case '<':
39 return HandleBeginAngleBracket();
40 case '>':
41 return HandleEndAngleBracket();
42 case '(':
43 return HandleParentheses();
44 default:
45 return GetDataToCurrentPosition(start_position);
46 }
47}
48
49ByteStringView CPDF_SimpleParser::GetDataToCurrentPosition(
50 uint32_t start_position) const {
51 return ByteStringView(
52 data_.subspan(start_position, cur_position_ - start_position));
53}
54
55std::optional<uint8_t> CPDF_SimpleParser::SkipSpacesAndComments() {
56 while (true) {
57 if (cur_position_ >= data_.size()) {
58 return std::nullopt;
59 }
60
61 // Skip whitespaces.
62 uint8_t cur_char = data_[cur_position_++];
63 while (PDFCharIsWhitespace(cur_char)) {
64 if (cur_position_ >= data_.size()) {
65 return std::nullopt;
66 }
67 cur_char = data_[cur_position_++];
68 }
69
70 if (cur_char != '%') {
71 return cur_char;
72 }
73
74 // Skip comments.
75 while (true) {
76 if (cur_position_ >= data_.size()) {
77 return std::nullopt;
78 }
79
80 cur_char = data_[cur_position_++];
81 if (PDFCharIsLineEnding(cur_char)) {
82 break;
83 }
84 }
85 }
86}
87
89 uint32_t start_position = cur_position_ - 1;
90 while (cur_position_ < data_.size()) {
91 uint8_t cur_char = data_[cur_position_];
92 // Stop parsing after encountering a whitespace or delimiter.
93 if (PDFCharIsWhitespace(cur_char) || PDFCharIsDelimiter(cur_char)) {
94 return GetDataToCurrentPosition(start_position);
95 }
96 ++cur_position_;
97 }
98 return ByteStringView();
99}
100
101ByteStringView CPDF_SimpleParser::HandleBeginAngleBracket() {
102 uint32_t start_position = cur_position_ - 1;
103 if (cur_position_ >= data_.size()) {
104 return GetDataToCurrentPosition(start_position);
105 }
106
107 uint8_t cur_char = data_[cur_position_++];
108 // Stop parsing if encountering "<<".
109 if (cur_char == '<') {
110 return GetDataToCurrentPosition(start_position);
111 }
112
113 // Continue parsing until end of `data_` or closing bracket.
114 while (cur_position_ < data_.size() && cur_char != '>') {
115 cur_char = data_[cur_position_++];
116 }
117 return GetDataToCurrentPosition(start_position);
118}
119
120ByteStringView CPDF_SimpleParser::HandleEndAngleBracket() {
121 uint32_t start_position = cur_position_ - 1;
122 if (cur_position_ < data_.size() && data_[cur_position_] == '>') {
123 ++cur_position_;
124 }
125 return GetDataToCurrentPosition(start_position);
126}
127
128ByteStringView CPDF_SimpleParser::HandleParentheses() {
129 uint32_t start_position = cur_position_ - 1;
130 int level = 1;
131 while (cur_position_ < data_.size() && level > 0) {
132 uint8_t cur_char = data_[cur_position_++];
133 if (cur_char == '(') {
134 ++level;
135 } else if (cur_char == ')') {
136 --level;
137 }
138 }
139 return GetDataToCurrentPosition(start_position);
140}
141
142ByteStringView CPDF_SimpleParser::HandleNonDelimiter() {
143 uint32_t start_position = cur_position_ - 1;
144 while (cur_position_ < data_.size()) {
145 uint8_t cur_char = data_[cur_position_];
146 if (PDFCharIsDelimiter(cur_char) || PDFCharIsWhitespace(cur_char)) {
147 break;
148 }
149 ++cur_position_;
150 }
151 return GetDataToCurrentPosition(start_position);
152}
#define CHECK_GT(x, y)
Definition check_op.h:13
#define CHECK_LT(x, y)
Definition check_op.h:12
CPDF_SimpleParser(pdfium::span< const uint8_t > input)
ByteStringView GetWord()
bool PDFCharIsWhitespace(uint8_t c)
bool PDFCharIsDelimiter(uint8_t c)
bool PDFCharIsLineEnding(uint8_t c)
fxcrt::ByteStringView ByteStringView