Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cxfa_fmlexer.cpp
Go to the documentation of this file.
1// Copyright 2014 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "xfa/fxfa/formcalc/cxfa_fmlexer.h"
8
9#include <algorithm>
10
11#include "core/fxcrt/fx_extension.h"
12
13namespace {
14
15bool IsFormCalcCharacter(wchar_t c) {
16 return (c >= 0x09 && c <= 0x0D) || (c >= 0x20 && c <= 0xd7FF) ||
17 (c >= 0xE000 && c <= 0xFFFD);
18}
19
20bool IsIdentifierCharacter(wchar_t c) {
21 return FXSYS_iswalnum(c) || c == 0x005F || // '_'
22 c == 0x0024; // '$'
23}
24
25bool IsInitialIdentifierCharacter(wchar_t c) {
26 return FXSYS_iswalpha(c) || c == 0x005F || // '_'
27 c == 0x0024 || // '$'
28 c == 0x0021; // '!'
29}
30
31bool IsWhitespaceCharacter(wchar_t c) {
32 return c == 0x0009 || // Horizontal tab
33 c == 0x000B || // Vertical tab
34 c == 0x000C || // Form feed
35 c == 0x0020; // Space
36}
37
38struct XFA_FMKeyword {
39 XFA_FM_TOKEN m_type;
40 const char* m_keyword; // Raw, POD struct.
41};
42
43const XFA_FMKeyword keyWords[] = {
44 {TOKdo, "do"},
45 {TOKkseq, "eq"},
46 {TOKksge, "ge"},
47 {TOKksgt, "gt"},
48 {TOKif, "if"},
49 {TOKin, "in"},
50 {TOKksle, "le"},
51 {TOKkslt, "lt"},
52 {TOKksne, "ne"},
53 {TOKksor, "or"},
54 {TOKnull, "null"},
55 {TOKbreak, "break"},
56 {TOKksand, "and"},
57 {TOKend, "end"},
58 {TOKeof, "eof"},
59 {TOKfor, "for"},
60 {TOKnan, "nan"},
61 {TOKksnot, "not"},
62 {TOKvar, "var"},
63 {TOKthen, "then"},
64 {TOKelse, "else"},
65 {TOKexit, "exit"},
66 {TOKdownto, "downto"},
67 {TOKreturn, "return"},
68 {TOKinfinity, "infinity"},
69 {TOKendwhile, "endwhile"},
70 {TOKforeach, "foreach"},
71 {TOKendfunc, "endfunc"},
72 {TOKelseif, "elseif"},
73 {TOKwhile, "while"},
74 {TOKendfor, "endfor"},
75 {TOKthrow, "throw"},
76 {TOKstep, "step"},
77 {TOKupto, "upto"},
78 {TOKcontinue, "continue"},
79 {TOKfunc, "func"},
80 {TOKendif, "endif"},
81};
82
83#ifndef NDEBUG
84const char* const tokenStrings[] = {
85 "TOKand", "TOKlparen", "TOKrparen", "TOKmul",
86 "TOKplus", "TOKcomma", "TOKminus", "TOKdot",
87 "TOKdiv", "TOKlt", "TOKassign", "TOKgt",
88 "TOKlbracket", "TOKrbracket", "TOKor", "TOKdotscream",
89 "TOKdotstar", "TOKdotdot", "TOKle", "TOKne",
90 "TOKeq", "TOKge", "TOKdo", "TOKkseq",
91 "TOKksge", "TOKksgt", "TOKif", "TOKin",
92 "TOKksle", "TOKkslt", "TOKksne", "TOKksor",
93 "TOKnull", "TOKbreak", "TOKksand", "TOKend",
94 "TOKeof", "TOKfor", "TOKnan", "TOKksnot",
95 "TOKvar", "TOKthen", "TOKelse", "TOKexit",
96 "TOKdownto", "TOKreturn", "TOKinfinity", "TOKendwhile",
97 "TOKforeach", "TOKendfunc", "TOKelseif", "TOKwhile",
98 "TOKendfor", "TOKthrow", "TOKstep", "TOKupto",
99 "TOKcontinue", "TOKfunc", "TOKendif", "TOKstar",
100 "TOKidentifier", "TOKunderscore", "TOKdollar", "TOKexclamation",
101 "TOKcall", "TOKstring", "TOKnumber", "TOKreserver",
102};
103#endif // NDEBUG
104
105XFA_FM_TOKEN TokenizeIdentifier(WideStringView str) {
106 const XFA_FMKeyword* result =
107 std::find_if(std::begin(keyWords), std::end(keyWords),
108 [str](const XFA_FMKeyword& iter) {
109 return str.EqualsASCII(iter.m_keyword);
110 });
111 if (result != std::end(keyWords) && str.EqualsASCII(result->m_keyword))
112 return result->m_type;
113 return TOKidentifier;
114}
115
116} // namespace
117
118CXFA_FMLexer::Token::Token() = default;
119
120CXFA_FMLexer::Token::Token(XFA_FM_TOKEN token) : m_type(token) {}
121
122CXFA_FMLexer::Token::Token(XFA_FM_TOKEN token, WideStringView str)
123 : m_type(token), m_string(str) {}
124
125CXFA_FMLexer::Token::Token(const Token& that) = default;
126
127CXFA_FMLexer::Token::~Token() = default;
128
129#ifndef NDEBUG
130WideString CXFA_FMLexer::Token::ToDebugString() const {
131 WideString str = WideString::FromASCII("type = ");
132 str += WideString::FromASCII(tokenStrings[m_type]);
133 str += WideString::FromASCII(", string = ");
134 str += m_string;
135 return str;
136}
137#endif // NDEBUG
138
139CXFA_FMLexer::CXFA_FMLexer(WideStringView wsFormCalc)
141
142CXFA_FMLexer::~CXFA_FMLexer() = default;
143
145 if (m_bLexerError)
146 return Token();
147
148 while (!IsComplete() && m_spInput[m_nCursor]) {
149 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
150 RaiseError();
151 return Token();
152 }
153
154 switch (m_spInput[m_nCursor]) {
155 case '\n':
156 ++m_nCursor;
157 break;
158 case '\r':
159 ++m_nCursor;
160 break;
161 case ';':
162 AdvanceForComment();
163 break;
164 case '"':
165 return AdvanceForString();
166 case '0':
167 case '1':
168 case '2':
169 case '3':
170 case '4':
171 case '5':
172 case '6':
173 case '7':
174 case '8':
175 case '9':
176 return AdvanceForNumber();
177 case '=':
178 ++m_nCursor;
179 if (m_nCursor >= m_spInput.size())
180 return Token(TOKassign);
181
182 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
183 RaiseError();
184 return Token();
185 }
186 if (m_spInput[m_nCursor] == '=') {
187 ++m_nCursor;
188 return Token(TOKeq);
189 }
190 return Token(TOKassign);
191 case '<':
192 ++m_nCursor;
193 if (m_nCursor >= m_spInput.size())
194 return Token(TOKlt);
195
196 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
197 RaiseError();
198 return Token();
199 }
200 if (m_spInput[m_nCursor] == '=') {
201 ++m_nCursor;
202 return Token(TOKle);
203 }
204 if (m_spInput[m_nCursor] == '>') {
205 ++m_nCursor;
206 return Token(TOKne);
207 }
208 return Token(TOKlt);
209 case '>':
210 ++m_nCursor;
211 if (m_nCursor >= m_spInput.size())
212 return Token(TOKgt);
213
214 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
215 RaiseError();
216 return Token();
217 }
218 if (m_spInput[m_nCursor] == '=') {
219 ++m_nCursor;
220 return Token(TOKge);
221 }
222 return Token(TOKgt);
223 case ',':
224 ++m_nCursor;
225 return Token(TOKcomma);
226 case '(':
227 ++m_nCursor;
228 return Token(TOKlparen);
229 case ')':
230 ++m_nCursor;
231 return Token(TOKrparen);
232 case '[':
233 ++m_nCursor;
234 return Token(TOKlbracket);
235 case ']':
236 ++m_nCursor;
237 return Token(TOKrbracket);
238 case '&':
239 ++m_nCursor;
240 return Token(TOKand);
241 case '|':
242 ++m_nCursor;
243 return Token(TOKor);
244 case '+':
245 ++m_nCursor;
246 return Token(TOKplus);
247 case '-':
248 ++m_nCursor;
249 return Token(TOKminus);
250 case '*':
251 ++m_nCursor;
252 return Token(TOKmul);
253 case '/': {
254 ++m_nCursor;
255 if (m_nCursor >= m_spInput.size())
256 return Token(TOKdiv);
257
258 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
259 RaiseError();
260 return Token();
261 }
262 if (m_spInput[m_nCursor] != '/')
263 return Token(TOKdiv);
264
265 AdvanceForComment();
266 break;
267 }
268 case '.':
269 ++m_nCursor;
270 if (m_nCursor >= m_spInput.size())
271 return Token(TOKdot);
272
273 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
274 RaiseError();
275 return Token();
276 }
277
278 if (m_spInput[m_nCursor] == '.') {
279 ++m_nCursor;
280 return Token(TOKdotdot);
281 }
282 if (m_spInput[m_nCursor] == '*') {
283 ++m_nCursor;
284 return Token(TOKdotstar);
285 }
286 if (m_spInput[m_nCursor] == '#') {
287 ++m_nCursor;
288 return Token(TOKdotscream);
289 }
290 if (FXSYS_IsDecimalDigit(m_spInput[m_nCursor])) {
291 --m_nCursor;
292 return AdvanceForNumber();
293 }
294 return Token(TOKdot);
295 default:
296 if (IsWhitespaceCharacter(m_spInput[m_nCursor])) {
297 ++m_nCursor;
298 break;
299 }
300 if (!IsInitialIdentifierCharacter(m_spInput[m_nCursor])) {
301 RaiseError();
302 return Token();
303 }
304 return AdvanceForIdentifier();
305 }
306 }
307 return Token(TOKeof);
308}
309
310CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForNumber() {
311 // This will set end to the character after the end of the number.
312 size_t used_length = 0;
313 if (m_nCursor < m_spInput.size()) {
314 FXSYS_wcstof(&m_spInput[m_nCursor], m_spInput.size() - m_nCursor,
315 &used_length);
316 }
317 size_t end = m_nCursor + used_length;
318 if (used_length == 0 ||
319 (end < m_spInput.size() && FXSYS_iswalpha(m_spInput[end]))) {
320 RaiseError();
321 return Token();
322 }
323 WideStringView str(m_spInput.subspan(m_nCursor, end - m_nCursor));
324 m_nCursor = end;
325 return Token(TOKnumber, str);
326}
327
328CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForString() {
329 size_t start = m_nCursor;
330 ++m_nCursor;
331 while (!IsComplete() && m_spInput[m_nCursor]) {
332 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
333 break;
334
335 if (m_spInput[m_nCursor] == '"') {
336 // Check for escaped "s, i.e. "".
337 ++m_nCursor;
338 // If the end of the input has been reached it was not escaped.
339 if (m_nCursor >= m_spInput.size()) {
340 return Token(TOKstring, WideStringView(m_spInput.subspan(
341 start, m_nCursor - start)));
342 }
343 // If the next character is not a " then the end of the string has been
344 // found.
345 if (m_spInput[m_nCursor] != '"') {
346 if (!IsFormCalcCharacter(m_spInput[m_nCursor]))
347 break;
348
349 return Token(TOKstring, WideStringView(m_spInput.subspan(
350 start, m_nCursor - start)));
351 }
352 }
353 ++m_nCursor;
354 }
355
356 // Didn't find the end of the string.
357 RaiseError();
358 return Token();
359}
360
361CXFA_FMLexer::Token CXFA_FMLexer::AdvanceForIdentifier() {
362 size_t start = m_nCursor;
363 ++m_nCursor;
364 while (!IsComplete() && m_spInput[m_nCursor]) {
365 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
366 RaiseError();
367 return Token();
368 }
369 if (!IsIdentifierCharacter(m_spInput[m_nCursor]))
370 break;
371
372 ++m_nCursor;
373 }
374
375 WideStringView str(m_spInput.subspan(start, m_nCursor - start));
376 return Token(TokenizeIdentifier(str), str);
377}
378
379void CXFA_FMLexer::AdvanceForComment() {
380 ++m_nCursor;
381 while (!IsComplete() && m_spInput[m_nCursor]) {
382 if (!IsFormCalcCharacter(m_spInput[m_nCursor])) {
383 RaiseError();
384 return;
385 }
386 if (m_spInput[m_nCursor] == L'\r') {
387 ++m_nCursor;
388 return;
389 }
390 if (m_spInput[m_nCursor] == L'\n') {
391 ++m_nCursor;
392 return;
393 }
394 ++m_nCursor;
395 }
396}
WideString ToDebugString() const
Token(const Token &that)
Token(XFA_FM_TOKEN token, WideStringView str)
Token(XFA_FM_TOKEN token)
CXFA_FMLexer(WideStringView wsFormcalc)
static WideString FromASCII(ByteStringView str)
XFA_FM_TOKEN
@ TOKdotstar
@ TOKnan
@ TOKeof
@ TOKge
@ TOKbreak
@ TOKminus
@ TOKnumber
@ TOKendfor
@ TOKor
@ TOKeq
@ TOKksle
@ TOKfor
@ TOKdo
@ TOKupto
@ TOKlbracket
@ TOKexit
@ TOKplus
@ TOKkslt
@ TOKnull
@ TOKthrow
@ TOKksgt
@ TOKendif
@ TOKgt
@ TOKcontinue
@ TOKcomma
@ TOKle
@ TOKksnot
@ TOKrparen
@ TOKlparen
@ TOKksand
@ TOKidentifier
@ TOKksor
@ TOKelseif
@ TOKassign
@ TOKendfunc
@ TOKelse
@ TOKksge
@ TOKkseq
@ TOKvar
@ TOKendwhile
@ TOKfunc
@ TOKksne
@ TOKreturn
@ TOKforeach
@ TOKif
@ TOKinfinity
@ TOKin
@ TOKdotscream
@ TOKdiv
@ TOKne
@ TOKrbracket
@ TOKmul
@ TOKlt
@ TOKdownto
@ TOKthen
@ TOKdotdot
@ TOKstep
@ TOKand
@ TOKdot
@ TOKend
@ TOKwhile
bool FXSYS_iswalpha(wchar_t c)
bool FXSYS_iswalnum(wchar_t c)