Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cxfa_fmlexer.cpp
Go to the documentation of this file.
1// Copyright 2014 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "xfa/fxfa/formcalc/cxfa_fmlexer.h"
8
9#include <algorithm>
10
11#include "core/fxcrt/compiler_specific.h"
12#include "core/fxcrt/fx_extension.h"
13#include "core/fxcrt/stl_util.h"
14
15namespace {
16
17bool IsFormCalcCharacter(wchar_t c) {
18 return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
19 (c >= 0xE000 && c <= 0xFFFD);
20}
21
22bool IsIdentifierCharacter(wchar_t c) {
23 return FXSYS_iswalnum(c) || c == 0x005F || // '_'
24 c == 0x0024; // '$'
25}
26
27bool IsInitialIdentifierCharacter(wchar_t c) {
28 return FXSYS_iswalpha(c) || c == 0x005F || // '_'
29 c == 0x0024 || // '$'
30 c == 0x0021; // '!'
31}
32
33bool IsWhitespaceCharacter(wchar_t c) {
34 return c == 0x0009 || // Horizontal tab
35 c == 0x000B || // Vertical tab
36 c == 0x000C || // Form feed
37 c == 0x0020; // Space
38}
39
40struct XFA_FMKeyword {
41 XFA_FM_TOKEN m_type;
42 const char* m_keyword; // Raw, POD struct.
43};
44
45const XFA_FMKeyword kKeyWords[] = {
46 {TOKdo, "do"},
47 {TOKkseq, "eq"},
48 {TOKksge, "ge"},
49 {TOKksgt, "gt"},
50 {TOKif, "if"},
51 {TOKin, "in"},
52 {TOKksle, "le"},
53 {TOKkslt, "lt"},
54 {TOKksne, "ne"},
55 {TOKksor, "or"},
56 {TOKnull, "null"},
57 {TOKbreak, "break"},
58 {TOKksand, "and"},
59 {TOKend, "end"},
60 {TOKeof, "eof"},
61 {TOKfor, "for"},
62 {TOKnan, "nan"},
63 {TOKksnot, "not"},
64 {TOKvar, "var"},
65 {TOKthen, "then"},
66 {TOKelse, "else"},
67 {TOKexit, "exit"},
68 {TOKdownto, "downto"},
69 {TOKreturn, "return"},
70 {TOKinfinity, "infinity"},
71 {TOKendwhile, "endwhile"},
72 {TOKforeach, "foreach"},
73 {TOKendfunc, "endfunc"},
74 {TOKelseif, "elseif"},
75 {TOKwhile, "while"},
76 {TOKendfor, "endfor"},
77 {TOKthrow, "throw"},
78 {TOKstep, "step"},
79 {TOKupto, "upto"},
80 {TOKcontinue, "continue"},
81 {TOKfunc, "func"},
82 {TOKendif, "endif"},
83};
84
85#ifndef NDEBUG
86constexpr auto kTokenStrings = fxcrt::ToArray<const char*>({
87 "TOKand", "TOKlparen", "TOKrparen", "TOKmul",
88 "TOKplus", "TOKcomma", "TOKminus", "TOKdot",
89 "TOKdiv", "TOKlt", "TOKassign", "TOKgt",
90 "TOKlbracket", "TOKrbracket", "TOKor", "TOKdotscream",
91 "TOKdotstar", "TOKdotdot", "TOKle", "TOKne",
92 "TOKeq", "TOKge", "TOKdo", "TOKkseq",
93 "TOKksge", "TOKksgt", "TOKif", "TOKin",
94 "TOKksle", "TOKkslt", "TOKksne", "TOKksor",
95 "TOKnull", "TOKbreak", "TOKksand", "TOKend",
96 "TOKeof", "TOKfor", "TOKnan", "TOKksnot",
97 "TOKvar", "TOKthen", "TOKelse", "TOKexit",
98 "TOKdownto", "TOKreturn", "TOKinfinity", "TOKendwhile",
99 "TOKforeach", "TOKendfunc", "TOKelseif", "TOKwhile",
100 "TOKendfor", "TOKthrow", "TOKstep", "TOKupto",
101 "TOKcontinue", "TOKfunc", "TOKendif", "TOKstar",
102 "TOKidentifier", "TOKunderscore", "TOKdollar", "TOKexclamation",
103 "TOKcall", "TOKstring", "TOKnumber", "TOKreserver",
104});
105#endif // NDEBUG
106
107XFA_FM_TOKEN TokenizeIdentifier(WideStringView str) {
108 const XFA_FMKeyword* result =
109 std::find_if(std::begin(kKeyWords), std::end(kKeyWords),
110 [str](const XFA_FMKeyword& iter) {
111 return str.EqualsASCII(iter.m_keyword);
112 });
113 if (result != std::end(kKeyWords) && str.EqualsASCII(result->m_keyword)) {
114 return result->m_type;
115 }
116 return TOKidentifier;
117}
118
119} // namespace
120
121CXFA_FMLexer::Token::Token() = default;
122
123CXFA_FMLexer::Token::Token(XFA_FM_TOKEN token) : m_type(token) {}
124
126 : m_type(token), m_string(str) {}
127
128CXFA_FMLexer::Token::Token(const Token& that) = default;
129
130CXFA_FMLexer::Token::~Token() = default;
131
132#ifndef NDEBUG
134 WideString str = WideString::FromASCII("type = ");
135 str += WideString::FromASCII(kTokenStrings[m_type]);
136 str += WideString::FromASCII(", string = ");
137 str += m_string;
138 return str;
139}
140#endif // NDEBUG
141
144
145CXFA_FMLexer::~CXFA_FMLexer() = default;
146
148 if (m_bLexerError)
149 return Token();
150
151 while (!IsComplete() && m_spInput[m_nCursor]) {
152 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
153 RaiseError();
154 return Token();
155 }
156
157 switch (m_spInput[m_nCursor]) {
158 case '\n':
159 ++m_nCursor;
160 break;
161 case '\r':
162 ++m_nCursor;
163 break;
164 case ';':
165 AdvanceForComment();
166 break;
167 case '"':
168 return AdvanceForString();
169 case '0':
170 case '1':
171 case '2':
172 case '3':
173 case '4':
174 case '5':
175 case '6':
176 case '7':
177 case '8':
178 case '9':
179 return AdvanceForNumber();
180 case '=':
181 ++m_nCursor;
182 if (m_nCursor >= m_spInput.size())
183 return Token(TOKassign);
184
185 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
186 RaiseError();
187 return Token();
188 }
189 if (m_spInput[m_nCursor] == '=') {
190 ++m_nCursor;
191 return Token(TOKeq);
192 }
193 return Token(TOKassign);
194 case '<':
195 ++m_nCursor;
196 if (m_nCursor >= m_spInput.size())
197 return Token(TOKlt);
198
199 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
200 RaiseError();
201 return Token();
202 }
203 if (m_spInput[m_nCursor] == '=') {
204 ++m_nCursor;
205 return Token(TOKle);
206 }
207 if (m_spInput[m_nCursor] == '>') {
208 ++m_nCursor;
209 return Token(TOKne);
210 }
211 return Token(TOKlt);
212 case '>':
213 ++m_nCursor;
214 if (m_nCursor >= m_spInput.size())
215 return Token(TOKgt);
216
217 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
218 RaiseError();
219 return Token();
220 }
221 if (m_spInput[m_nCursor] == '=') {
222 ++m_nCursor;
223 return Token(TOKge);
224 }
225 return Token(TOKgt);
226 case ',':
227 ++m_nCursor;
228 return Token(TOKcomma);
229 case '(':
230 ++m_nCursor;
231 return Token(TOKlparen);
232 case ')':
233 ++m_nCursor;
234 return Token(TOKrparen);
235 case '[':
236 ++m_nCursor;
237 return Token(TOKlbracket);
238 case ']':
239 ++m_nCursor;
240 return Token(TOKrbracket);
241 case '&':
242 ++m_nCursor;
243 return Token(TOKand);
244 case '|':
245 ++m_nCursor;
246 return Token(TOKor);
247 case '+':
248 ++m_nCursor;
249 return Token(TOKplus);
250 case '-':
251 ++m_nCursor;
252 return Token(TOKminus);
253 case '*':
254 ++m_nCursor;
255 return Token(TOKmul);
256 case '/': {
257 ++m_nCursor;
258 if (m_nCursor >= m_spInput.size())
259 return Token(TOKdiv);
260
261 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
262 RaiseError();
263 return Token();
264 }
265 if (m_spInput[m_nCursor] != '/')
266 return Token(TOKdiv);
267
268 AdvanceForComment();
269 break;
270 }
271 case '.':
272 ++m_nCursor;
273 if (m_nCursor >= m_spInput.size())
274 return Token(TOKdot);
275
276 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
277 RaiseError();
278 return Token();
279 }
280
281 if (m_spInput[m_nCursor] == '.') {
282 ++m_nCursor;
283 return Token(TOKdotdot);
284 }
285 if (m_spInput[m_nCursor] == '*') {
286 ++m_nCursor;
287 return Token(TOKdotstar);
288 }
289 if (m_spInput[m_nCursor] == '#') {
290 ++m_nCursor;
291 return Token(TOKdotscream);
292 }
293 if (FXSYS_IsDecimalDigit(m_spInput[m_nCursor])) {
294 --m_nCursor;
295 return AdvanceForNumber();
296 }
297 return Token(TOKdot);
298 default:
299 if (IsWhitespaceCharacter(m_spInput[m_nCursor])) {
300 ++m_nCursor;
301 break;
302 }
303 if (!IsInitialIdentifierCharacter(m_spInput[m_nCursor])) {
304 RaiseError();
305 return Token();
306 }
307 return AdvanceForIdentifier();
308 }
309 }
310 return Token(TOKeof);
311}
312
313CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForNumber() {
314 // This will set end to the character after the end of the number.
315 size_t used_length = 0;
316 if (m_nCursor < m_spInput.size()) {
317 FXSYS_wcstof(WideStringView(m_spInput.subspan(m_nCursor)), &used_length);
318 }
319 size_t end = m_nCursor + used_length;
320 if (used_length == 0 ||
321 (end < m_spInput.size() && FXSYS_iswalpha(m_spInput[end]))) {
322 RaiseError();
323 return Token();
324 }
325 WideStringView str(m_spInput.subspan(m_nCursor, end - m_nCursor));
326 m_nCursor = end;
327 return Token(TOKnumber, str);
328}
329
330CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForString() {
331 size_t start = m_nCursor;
332 ++m_nCursor;
333 while (!IsComplete() && m_spInput[m_nCursor]) {
334 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
335 break;
336
337 if (m_spInput[m_nCursor] == '"') {
338 // Check for escaped "s, i.e. "".
339 ++m_nCursor;
340 // If the end of the input has been reached it was not escaped.
341 if (m_nCursor >= m_spInput.size()) {
342 return Token(TOKstring, WideStringView(m_spInput.subspan(
343 start, m_nCursor - start)));
344 }
345 // If the next character is not a " then the end of the string has been
346 // found.
347 if (m_spInput[m_nCursor] != '"') {
348 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
349 break;
350
351 return Token(TOKstring, WideStringView(m_spInput.subspan(
352 start, m_nCursor - start)));
353 }
354 }
355 ++m_nCursor;
356 }
357
358 // Didn't find the end of the string.
359 RaiseError();
360 return Token();
361}
362
363CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForIdentifier() {
364 size_t start = m_nCursor;
365 ++m_nCursor;
366 while (!IsComplete() && m_spInput[m_nCursor]) {
367 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
368 RaiseError();
369 return Token();
370 }
371 if (!IsIdentifierCharacter(m_spInput[m_nCursor]))
372 break;
373
374 ++m_nCursor;
375 }
376
377 WideStringView str(m_spInput.subspan(start, m_nCursor - start));
378 return Token(TokenizeIdentifier(str), str);
379}
380
381void CXFA_FMLexer::AdvanceForComment() {
382 ++m_nCursor;
383 while (!IsComplete() && m_spInput[m_nCursor]) {
384 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
385 RaiseError();
386 return;
387 }
388 if (m_spInput[m_nCursor] == L'\r') {
389 ++m_nCursor;
390 return;
391 }
392 if (m_spInput[m_nCursor] == L'\n') {
393 ++m_nCursor;
394 return;
395 }
396 ++m_nCursor;
397 }
398}
WideString ToDebugString() const
Token(const Token &that)
Token(XFA_FM_TOKEN token, WideStringView str)
Token(XFA_FM_TOKEN token)
CXFA_FMLexer(WideStringView wsFormcalc)
static WideString FromASCII(ByteStringView str)
XFA_FM_TOKEN
@ TOKdotstar
@ TOKnan
@ TOKeof
@ TOKge
@ TOKbreak
@ TOKminus
@ TOKnumber
@ TOKendfor
@ TOKor
@ TOKeq
@ TOKksle
@ TOKfor
@ TOKdo
@ TOKupto
@ TOKlbracket
@ TOKexit
@ TOKplus
@ TOKkslt
@ TOKnull
@ TOKthrow
@ TOKksgt
@ TOKendif
@ TOKgt
@ TOKcontinue
@ TOKcomma
@ TOKle
@ TOKksnot
@ TOKrparen
@ TOKlparen
@ TOKksand
@ TOKidentifier
@ TOKksor
@ TOKelseif
@ TOKassign
@ TOKendfunc
@ TOKelse
@ TOKksge
@ TOKkseq
@ TOKvar
@ TOKendwhile
@ TOKfunc
@ TOKksne
@ TOKreturn
@ TOKforeach
@ TOKif
@ TOKinfinity
@ TOKin
@ TOKdotscream
@ TOKdiv
@ TOKne
@ TOKrbracket
@ TOKmul
@ TOKlt
@ TOKdownto
@ TOKthen
@ TOKdotdot
@ TOKstep
@ TOKand
@ TOKdot
@ TOKend
@ TOKwhile
bool FXSYS_iswalpha(wchar_t c)
bool FXSYS_iswalnum(wchar_t c)
fxcrt::WideStringView WideStringView
fxcrt::WideString WideString
Definition widestring.h:207