Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qqmljslexer.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:dataparser
4
8
9#include <private/qqmljsdiagnosticmessage_p.h>
10#include <private/qqmljsmemorypool_p.h>
11#include <private/qlocale_tools_p.h>
12
13
14#include <QtCore/qcoreapplication.h>
15#include <QtCore/qvarlengtharray.h>
16#include <QtCore/qdebug.h>
17#include <QtCore/QScopedValueRollback>
18
19#include <optional>
20
21QT_BEGIN_NAMESPACE
22using namespace QQmlJS;
23using namespace Qt::StringLiterals;
24
25static inline int regExpFlagFromChar(const QChar &ch)
26{
27 switch (ch.unicode()) {
28 case 'g': return Lexer::RegExp_Global;
29 case 'i': return Lexer::RegExp_IgnoreCase;
30 case 'm': return Lexer::RegExp_Multiline;
31 case 'u': return Lexer::RegExp_Unicode;
32 case 'y': return Lexer::RegExp_Sticky;
33 }
34 return 0;
35}
36
37static inline unsigned char convertHex(ushort c)
38{
39 if (c >= '0' && c <= '9')
40 return (c - '0');
41 else if (c >= 'a' && c <= 'f')
42 return (c - 'a' + 10);
43 else
44 return (c - 'A' + 10);
45}
46
47static inline QChar convertHex(QChar c1, QChar c2)
48{
49 return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
50}
51
52Lexer::Lexer(Engine *engine, LexMode lexMode)
53 : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
54{
55 if (engine)
56 engine->setLexer(this);
57}
58
59bool Lexer::qmlMode() const
60{
61 return _qmlMode;
62}
63
64QString Lexer::code() const
65{
66 return _code;
67}
68
69std::optional<DiagnosticMessage> Lexer::illegalFileLengthError() const
70{
71 Q_ASSERT(_currentOffset >= 0);
72
73 constexpr bool quint32IsBigger = sizeof(qsizetype) <= sizeof(quint32);
74 using BiggerInt = std::conditional_t<quint32IsBigger, quint32, qsizetype>;
75 using SmallerInt = std::conditional_t<!quint32IsBigger, quint32, qsizetype>;
76
77 const BiggerInt codeLength = BiggerInt(_currentOffset) + BiggerInt(_code.size());
78 const BiggerInt maxLength = BiggerInt(std::numeric_limits<SmallerInt>::max());
79 if (codeLength < maxLength)
80 return {};
81
82 constexpr int limit = quint32IsBigger ? 2 : 4;
83 return DiagnosticMessage{ u"File exceeds maximum length (%1GB)."_s.arg(limit), QtCriticalMsg,
84 SourceLocation{ 0, 1, 1, 1 } };
85}
86
87void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
88 Lexer::CodeContinuation codeContinuation)
89{
90 if (codeContinuation == Lexer::CodeContinuation::Continue)
91 _currentOffset += _code.size();
92 else
93 _currentOffset = 0;
94 if (_engine)
95 _engine->setCode(code);
96
97 _qmlMode = qmlMode;
98 _code = code;
99 _skipLinefeed = false;
100
101 _tokenText.clear();
102 _tokenText.reserve(1024);
103 _errorMessage.clear();
104 _tokenSpell = QStringView();
105 _rawString = QStringView();
106
107 _codePtr = code.unicode();
108 _endPtr = _codePtr + code.size();
109 _tokenStartPtr = _codePtr;
110
111 if (lineno >= 0)
112 _currentLineNumber = lineno;
113 _currentColumnNumber = 0;
114 _tokenLine = _currentLineNumber;
115 _tokenColumn = 0;
116 _tokenLength = 0;
117
118 if (codeContinuation == Lexer::CodeContinuation::Reset)
119 _state = State {};
120}
121
122void Lexer::scanChar()
123{
124 if (_skipLinefeed) {
125 Q_ASSERT(*_codePtr == u'\n');
126 ++_codePtr;
127 _skipLinefeed = false;
128 }
129 _state.currentChar = *_codePtr++;
130 ++_currentColumnNumber;
131
132 if (isLineTerminator()) {
133 if (_state.currentChar == u'\r') {
134 if (_codePtr < _endPtr && *_codePtr == u'\n')
135 _skipLinefeed = true;
136 _state.currentChar = u'\n';
137 }
138 ++_currentLineNumber;
139 _currentColumnNumber = 0;
140 }
141}
142
143QChar Lexer::peekChar()
144{
145 auto peekPtr = _codePtr;
146 if (peekPtr < _endPtr)
147 return *peekPtr;
148 return QChar();
149}
150
151namespace {
152inline bool isBinop(int tok)
153{
154 switch (tok) {
155 case Lexer::T_AND:
156 case Lexer::T_AND_AND:
157 case Lexer::T_AND_EQ:
158 case Lexer::T_DIVIDE_:
159 case Lexer::T_DIVIDE_EQ:
160 case Lexer::T_EQ:
161 case Lexer::T_EQ_EQ:
162 case Lexer::T_EQ_EQ_EQ:
163 case Lexer::T_GE:
164 case Lexer::T_GT:
165 case Lexer::T_GT_GT:
166 case Lexer::T_GT_GT_EQ:
167 case Lexer::T_GT_GT_GT:
168 case Lexer::T_GT_GT_GT_EQ:
169 case Lexer::T_LE:
170 case Lexer::T_LT:
171 case Lexer::T_LT_LT:
172 case Lexer::T_LT_LT_EQ:
173 case Lexer::T_MINUS:
174 case Lexer::T_MINUS_EQ:
175 case Lexer::T_NOT_EQ:
176 case Lexer::T_NOT_EQ_EQ:
177 case Lexer::T_OR:
178 case Lexer::T_OR_EQ:
179 case Lexer::T_OR_OR:
180 case Lexer::T_PLUS:
181 case Lexer::T_PLUS_EQ:
182 case Lexer::T_REMAINDER:
183 case Lexer::T_REMAINDER_EQ:
184 case Lexer::T_RETURN:
185 case Lexer::T_STAR:
186 case Lexer::T_STAR_EQ:
187 case Lexer::T_XOR:
188 case Lexer::T_XOR_EQ:
189 return true;
190
191 default:
192 return false;
193 }
194}
195
196int hexDigit(QChar c)
197{
198 if (c >= u'0' && c <= u'9')
199 return c.unicode() - u'0';
200 if (c >= u'a' && c <= u'f')
201 return c.unicode() - u'a' + 10;
202 if (c >= u'A' && c <= u'F')
203 return c.unicode() - u'A' + 10;
204 return -1;
205}
206
207int octalDigit(QChar c)
208{
209 if (c >= u'0' && c <= u'7')
210 return c.unicode() - u'0';
211 return -1;
212}
213
214} // anonymous namespace
215
216int Lexer::lex()
217{
218 const int previousTokenKind = _state.tokenKind;
219 int tokenKind;
220 bool firstPass = true;
221
222 again:
223 tokenKind = T_ERROR;
224 _tokenSpell = QStringView();
225 _rawString = QStringView();
226 if (firstPass && _state.stackToken == -1) {
227 firstPass = false;
228 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
229 return T_EOL;
230
231 if (_state.comments == CommentState::InMultilineComment) {
232 scanChar();
233 _tokenStartPtr = _codePtr - 1;
234 _tokenLine = _currentLineNumber;
235 _tokenColumn = _currentColumnNumber;
236 while (_codePtr <= _endPtr) {
237 if (_state.currentChar == u'*') {
238 scanChar();
239 if (_state.currentChar == u'/') {
240 scanChar();
241 if (_engine) {
242 _engine->addComment(tokenOffset() + 2,
243 _codePtr - _tokenStartPtr - 1 - 4,
244 tokenStartLine(), tokenStartColumn() + 2);
245 }
246 tokenKind = T_COMMENT;
247 break;
248 }
249 } else {
250 scanChar();
251 }
252 }
253 if (tokenKind == T_ERROR)
254 tokenKind = T_PARTIAL_COMMENT;
255 } else {
256 // handle multiline continuation
257 std::optional<ScanStringMode> scanMode;
258 switch (previousTokenKind) {
259 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
260 scanMode = ScanStringMode::SingleQuote;
261 break;
262 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
263 scanMode = ScanStringMode::DoubleQuote;
264 break;
265 case T_PARTIAL_TEMPLATE_HEAD:
266 scanMode = ScanStringMode::TemplateHead;
267 break;
268 case T_PARTIAL_TEMPLATE_MIDDLE:
269 scanMode = ScanStringMode::TemplateContinuation;
270 break;
271 default:
272 break;
273 }
274 if (scanMode) {
275 scanChar();
276 _tokenStartPtr = _codePtr - 1;
277 _tokenLine = _currentLineNumber;
278 _tokenColumn = _currentColumnNumber;
279 tokenKind = scanString(*scanMode);
280 }
281 }
282 }
283 if (tokenKind == T_ERROR)
284 tokenKind = scanToken();
285 _tokenLength = _codePtr - _tokenStartPtr - 1;
286 switch (tokenKind) {
287 // end of line and comments should not "overwrite" the old token type...
288 case T_EOL:
289 return tokenKind;
290 case T_COMMENT:
291 _state.comments = CommentState::HadComment;
292 return tokenKind;
293 case T_PARTIAL_COMMENT:
294 _state.comments = CommentState::InMultilineComment;
295 return tokenKind;
296 default:
297 _state.comments = CommentState::NoComment;
298 break;
299 }
300 _state.tokenKind = tokenKind;
301
302 _state.delimited = false;
303 _state.restrictedKeyword = false;
304 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
305
306 // update the flags
307 switch (_state.tokenKind) {
308 case T_LBRACE:
309 if (_state.bracesCount > 0)
310 ++_state.bracesCount;
311 Q_FALLTHROUGH();
312 case T_SEMICOLON:
313 _state.importState = ImportState::NoQmlImport;
314 Q_FALLTHROUGH();
315 case T_QUESTION:
316 case T_COLON:
317 case T_TILDE:
318 _state.delimited = true;
319 break;
320 case T_AUTOMATIC_SEMICOLON:
321 case T_AS:
322 _state.importState = ImportState::NoQmlImport;
323 Q_FALLTHROUGH();
324 default:
325 if (isBinop(_state.tokenKind))
326 _state.delimited = true;
327 break;
328
329 case T_IMPORT:
330 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
331 _state.importState = ImportState::SawImport;
332 if (isBinop(_state.tokenKind))
333 _state.delimited = true;
334 break;
335
336 case T_IF:
337 case T_FOR:
338 case T_WHILE:
339 case T_WITH:
340 _state.parenthesesState = CountParentheses;
341 _state.parenthesesCount = 0;
342 break;
343
344 case T_ELSE:
345 case T_DO:
346 _state.parenthesesState = BalancedParentheses;
347 break;
348
349 case T_CONTINUE:
350 case T_BREAK:
351 case T_RETURN:
352 case T_YIELD:
353 case T_THROW:
354 _state.restrictedKeyword = true;
355 break;
356 case T_RBRACE:
357 if (_state.bracesCount > 0)
358 --_state.bracesCount;
359 if (_state.bracesCount == 0)
360 goto again;
361 } // switch
362
363 // update the parentheses state
364 switch (_state.parenthesesState) {
365 case IgnoreParentheses:
366 break;
367
368 case CountParentheses:
369 if (_state.tokenKind == T_RPAREN) {
370 --_state.parenthesesCount;
371 if (_state.parenthesesCount == 0)
372 _state.parenthesesState = BalancedParentheses;
373 } else if (_state.tokenKind == T_LPAREN) {
374 ++_state.parenthesesCount;
375 }
376 break;
377
378 case BalancedParentheses:
379 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
380 _state.parenthesesState = IgnoreParentheses;
381 break;
382 } // switch
383
384 return _state.tokenKind;
385}
386
387uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
388{
389 Q_ASSERT(_state.currentChar == u'u');
390 scanChar(); // skip u
391 constexpr int distanceFromFirstHexToLastHex = 3;
392 if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(_state.currentChar)) {
393 uint codePoint = 0;
394 for (int i = 0; i < 4; ++i) {
395 int digit = hexDigit(_state.currentChar);
396 if (digit < 0)
397 goto error;
398 codePoint *= 16;
399 codePoint += digit;
400 scanChar();
401 }
402
403 *ok = true;
404 return codePoint;
405 } else if (_codePtr < _endPtr && _state.currentChar == u'{') {
406 scanChar(); // skip '{'
407 uint codePoint = 0;
408 if (!isHexDigit(_state.currentChar))
409 // need at least one hex digit
410 goto error;
411
412 while (_codePtr <= _endPtr) {
413 int digit = hexDigit(_state.currentChar);
414 if (digit < 0)
415 break;
416 codePoint *= 16;
417 codePoint += digit;
418 if (codePoint > 0x10ffff)
419 goto error;
420 scanChar();
421 }
422
423 if (_state.currentChar != u'}')
424 goto error;
425
426 scanChar(); // skip '}'
427
428
429 *ok = true;
430 return codePoint;
431 }
432
433error:
434 _state.errorCode = IllegalUnicodeEscapeSequence;
435 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
436
437 *ok = false;
438 return 0;
439}
440
441QChar Lexer::decodeHexEscapeCharacter(bool *ok)
442{
443 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
444 scanChar();
445
446 const QChar c1 = _state.currentChar;
447 scanChar();
448
449 const QChar c2 = _state.currentChar;
450 scanChar();
451
452 if (ok)
453 *ok = true;
454
455 return convertHex(c1, c2);
456 }
457
458 *ok = false;
459 return QChar();
460}
461
462namespace QQmlJS {
463QDebug operator<<(QDebug dbg, const Lexer &l)
464{
465 dbg << "{\n"
466 << " engine:" << qsizetype(l._engine) << ",\n"
467 << " lexMode:" << int(l._lexMode) << ",\n"
468 << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
469 << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
470 << " qmlMode:" << l._qmlMode << ",\n"
471 << " staticIsKeyword:" << l._staticIsKeyword << ",\n"
472 << " currentLineNumber:" << l._currentLineNumber << ",\n"
473 << " currentColumnNumber:" << l._currentColumnNumber << ",\n"
474 << " currentOffset:" << l._currentOffset << ",\n"
475 << " tokenLength:" << l._tokenLength << ",\n"
476 << " tokenLine:" << l._tokenLine << ",\n"
477 << " tokenColumn:" << l._tokenColumn << ",\n"
478 << " tokenText:" << l._tokenText << ",\n"
479 << " skipLinefeed:" << l._skipLinefeed << ",\n"
480 << " errorMessage:" << l._errorMessage << ",\n"
481 << " tokenSpell:" << l._tokenSpell << ",\n"
482 << " rawString:" << l._rawString << ",\n";
483 if (l._codePtr)
484 dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
485 else
486 dbg << " codePtr: *null*,\n";
487 if (l._tokenStartPtr)
488 dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
489 else
490 dbg << " tokenStartPtr: *null*,\n";
491 dbg << " state:" << l._state << "\n}";
492 return dbg;
493}
494}
495
496static inline bool isIdentifierStart(uint ch)
497{
498 // fast path for ascii
499 if ((ch >= u'a' && ch <= u'z') ||
500 (ch >= u'A' && ch <= u'Z') ||
501 ch == u'$' || ch == u'_')
502 return true;
503
504 switch (QChar::category(ch)) {
505 case QChar::Number_Letter:
506 case QChar::Letter_Uppercase:
507 case QChar::Letter_Lowercase:
508 case QChar::Letter_Titlecase:
509 case QChar::Letter_Modifier:
510 case QChar::Letter_Other:
511 return true;
512 default:
513 break;
514 }
515 return false;
516}
517
518static bool isIdentifierPart(uint ch)
519{
520 // fast path for ascii
521 if ((ch >= u'a' && ch <= u'z') ||
522 (ch >= u'A' && ch <= u'Z') ||
523 (ch >= u'0' && ch <= u'9') ||
524 ch == u'$' || ch == u'_' ||
525 ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
526 return true;
527
528 switch (QChar::category(ch)) {
529 case QChar::Mark_NonSpacing:
530 case QChar::Mark_SpacingCombining:
531
532 case QChar::Number_DecimalDigit:
533 case QChar::Number_Letter:
534
535 case QChar::Letter_Uppercase:
536 case QChar::Letter_Lowercase:
537 case QChar::Letter_Titlecase:
538 case QChar::Letter_Modifier:
539 case QChar::Letter_Other:
540
541 case QChar::Punctuation_Connector:
542 return true;
543 default:
544 break;
545 }
546 return false;
547}
548
549int Lexer::scanToken()
550{
551 if (_state.stackToken != -1) {
552 int tk = _state.stackToken;
553 _state.stackToken = -1;
554 return tk;
555 }
556
557 if (_state.bracesCount == 0) {
558 // we're inside a Template string
559 return scanString(TemplateContinuation);
560 }
561
562 if (_state.comments == CommentState::NoComment)
563 _state.terminator = false;
564
565again:
566 _state.validTokenText = false;
567
568 while (_state.currentChar.isSpace()) {
569 if (isLineTerminator()) {
570 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
571 if (_state.restrictedKeyword) {
572 // automatic semicolon insertion
573 _tokenLine = _currentLineNumber;
574 _tokenColumn = _currentColumnNumber;
575 _tokenStartPtr = _codePtr - 1;
576 return T_SEMICOLON;
577 } else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
578 _state.terminator = true;
579 syncProhibitAutomaticSemicolon();
580 } // else we will do the previous things at the start of next line...
581 }
582
583 scanChar();
584 }
585
586 _tokenStartPtr = _codePtr - 1;
587 _tokenLine = _currentLineNumber;
588 _tokenColumn = _currentColumnNumber;
589
590 if (_codePtr >= _endPtr) {
591 if (_lexMode == LexMode::LineByLine) {
592 if (!_code.isEmpty()) {
593 _state.currentChar = *(_codePtr - 2);
594 return T_EOL;
595 } else {
596 return EOF_SYMBOL;
597 }
598 } else if (_codePtr > _endPtr) {
599 return EOF_SYMBOL;
600 }
601 }
602
603 const QChar ch = _state.currentChar;
604 scanChar();
605
606 switch (ch.unicode()) {
607 case u'~': return T_TILDE;
608 case u'}': return T_RBRACE;
609
610 case u'|':
611 if (_state.currentChar == u'|') {
612 scanChar();
613 return T_OR_OR;
614 } else if (_state.currentChar == u'=') {
615 scanChar();
616 return T_OR_EQ;
617 }
618 return T_OR;
619
620 case u'{': return T_LBRACE;
621
622 case u'^':
623 if (_state.currentChar == u'=') {
624 scanChar();
625 return T_XOR_EQ;
626 }
627 return T_XOR;
628
629 case u']': return T_RBRACKET;
630 case u'[': return T_LBRACKET;
631 case u'?': {
632 if (_state.currentChar == u'?') {
633 scanChar();
634 return T_QUESTION_QUESTION;
635 }
636 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
637 scanChar();
638 return T_QUESTION_DOT;
639 }
640
641 return T_QUESTION;
642 }
643
644 case u'>':
645 if (_state.currentChar == u'>') {
646 scanChar();
647 if (_state.currentChar == u'>') {
648 scanChar();
649 if (_state.currentChar == u'=') {
650 scanChar();
651 return T_GT_GT_GT_EQ;
652 }
653 return T_GT_GT_GT;
654 } else if (_state.currentChar == u'=') {
655 scanChar();
656 return T_GT_GT_EQ;
657 }
658 return T_GT_GT;
659 } else if (_state.currentChar == u'=') {
660 scanChar();
661 return T_GE;
662 }
663 return T_GT;
664
665 case u'=':
666 if (_state.currentChar == u'=') {
667 scanChar();
668 if (_state.currentChar == u'=') {
669 scanChar();
670 return T_EQ_EQ_EQ;
671 }
672 return T_EQ_EQ;
673 } else if (_state.currentChar == u'>') {
674 scanChar();
675 return T_ARROW;
676 }
677 return T_EQ;
678
679 case u'<':
680 if (_state.currentChar == u'=') {
681 scanChar();
682 return T_LE;
683 } else if (_state.currentChar == u'<') {
684 scanChar();
685 if (_state.currentChar == u'=') {
686 scanChar();
687 return T_LT_LT_EQ;
688 }
689 return T_LT_LT;
690 }
691 return T_LT;
692
693 case u';': return T_SEMICOLON;
694 case u':': return T_COLON;
695
696 case u'/':
697 switch (_state.currentChar.unicode()) {
698 case u'*':
699 scanChar();
700 while (_codePtr <= _endPtr) {
701 if (_state.currentChar == u'*') {
702 scanChar();
703 if (_state.currentChar == u'/') {
704 scanChar();
705 if (_engine) {
706 _engine->addComment(tokenOffset() + 2,
707 _codePtr - _tokenStartPtr - 1 - 4, tokenStartLine(),
708 tokenStartColumn() + 2);
709 }
710 if (_lexMode == LexMode::LineByLine)
711 return T_COMMENT;
712 else
713 goto again;
714 }
715 } else {
716 scanChar();
717 }
718 }
719 if (_lexMode == LexMode::LineByLine)
720 return T_PARTIAL_COMMENT;
721 else
722 goto again;
723 case u'/':
724 while (_codePtr <= _endPtr && !isLineTerminator()) {
725 scanChar();
726 }
727 if (_engine) {
728 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
729 tokenStartLine(), tokenStartColumn() + 2);
730 }
731 if (_lexMode == LexMode::LineByLine)
732 return T_COMMENT;
733 else
734 goto again;
735 case u'=':
736 scanChar();
737 return T_DIVIDE_EQ;
738 default:
739 return T_DIVIDE_;
740 }
741 case u'.':
742 if (_state.importState == ImportState::SawImport)
743 return T_DOT;
744 if (isDecimalDigit(_state.currentChar.unicode()))
745 return scanNumber(ch);
746 if (_state.currentChar == u'.') {
747 scanChar();
748 if (_state.currentChar == u'.') {
749 scanChar();
750 return T_ELLIPSIS;
751 } else {
752 _state.errorCode = IllegalCharacter;
753 _errorMessage = QCoreApplication::translate("QQmlParser", "Unexpected token '.'");
754 return T_ERROR;
755 }
756 }
757 return T_DOT;
758
759 case u'-':
760 if (_state.currentChar == u'=') {
761 scanChar();
762 return T_MINUS_EQ;
763 } else if (_state.currentChar == u'-') {
764 scanChar();
765
766 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
767 && _state.tokenKind != T_LPAREN) {
768 _state.stackToken = T_MINUS_MINUS;
769 return T_SEMICOLON;
770 }
771
772 return T_MINUS_MINUS;
773 }
774 return T_MINUS;
775
776 case u',': return T_COMMA;
777
778 case u'+':
779 if (_state.currentChar == u'=') {
780 scanChar();
781 return T_PLUS_EQ;
782 } else if (_state.currentChar == u'+') {
783 scanChar();
784
785 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
786 && _state.tokenKind != T_LPAREN) {
787 _state.stackToken = T_PLUS_PLUS;
788 return T_SEMICOLON;
789 }
790
791 return T_PLUS_PLUS;
792 }
793 return T_PLUS;
794
795 case u'*':
796 if (_state.currentChar == u'=') {
797 scanChar();
798 return T_STAR_EQ;
799 } else if (_state.currentChar == u'*') {
800 scanChar();
801 if (_state.currentChar == u'=') {
802 scanChar();
803 return T_STAR_STAR_EQ;
804 }
805 return T_STAR_STAR;
806 }
807 return T_STAR;
808
809 case u')': return T_RPAREN;
810 case u'(': return T_LPAREN;
811
812 case u'@': return T_AT;
813
814 case u'&':
815 if (_state.currentChar == u'=') {
816 scanChar();
817 return T_AND_EQ;
818 } else if (_state.currentChar == u'&') {
819 scanChar();
820 return T_AND_AND;
821 }
822 return T_AND;
823
824 case u'%':
825 if (_state.currentChar == u'=') {
826 scanChar();
827 return T_REMAINDER_EQ;
828 }
829 return T_REMAINDER;
830
831 case u'!':
832 if (_state.currentChar == u'=') {
833 scanChar();
834 if (_state.currentChar == u'=') {
835 scanChar();
836 return T_NOT_EQ_EQ;
837 }
838 return T_NOT_EQ;
839 }
840 return T_NOT;
841
842 case u'`':
843 _state.outerTemplateBraceCount.push(_state.bracesCount);
844 Q_FALLTHROUGH();
845 case u'\'':
846 case u'"':
847 return scanString(ScanStringMode(ch.unicode()));
848 case u'0':
849 case u'1':
850 case u'2':
851 case u'3':
852 case u'4':
853 case u'5':
854 case u'6':
855 case u'7':
856 case u'8':
857 case u'9':
858 if (_state.importState == ImportState::SawImport)
859 return scanVersionNumber(ch);
860 else
861 return scanNumber(ch);
862
863 case '#':
864 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
865 // shebang support
866 while (_codePtr <= _endPtr && !isLineTerminator()) {
867 scanChar();
868 }
869 if (_engine) {
870 _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, tokenStartLine(),
871 tokenStartColumn());
872 }
873 if (_lexMode == LexMode::LineByLine)
874 return T_COMMENT;
875 else
876 goto again;
877 }
878 Q_FALLTHROUGH();
879
880 default: {
881 uint c = ch.unicode();
882 bool identifierWithEscapeChars = false;
883 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_state.currentChar.unicode())) {
884 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
885 scanChar();
886 } else if (c == '\\' && _state.currentChar == u'u') {
887 identifierWithEscapeChars = true;
888 bool ok = false;
889 c = decodeUnicodeEscapeCharacter(&ok);
890 if (!ok)
891 return T_ERROR;
892 }
893 if (isIdentifierStart(c)) {
894 if (identifierWithEscapeChars) {
895 _tokenText.resize(0);
896 if (QChar::requiresSurrogates(c)) {
897 _tokenText += QChar(QChar::highSurrogate(c));
898 _tokenText += QChar(QChar::lowSurrogate(c));
899 } else {
900 _tokenText += QChar(c);
901 }
902 _state.validTokenText = true;
903 }
904 while (_codePtr <= _endPtr) {
905 c = _state.currentChar.unicode();
906 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) {
907 scanChar();
908 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
909 } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
910 if (!identifierWithEscapeChars) {
911 identifierWithEscapeChars = true;
912 _tokenText.resize(0);
913 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
914 _state.validTokenText = true;
915 }
916
917 scanChar(); // skip '\\'
918 bool ok = false;
919 c = decodeUnicodeEscapeCharacter(&ok);
920 if (!ok)
921 return T_ERROR;
922
923 if (!isIdentifierPart(c))
924 break;
925
926 if (QChar::requiresSurrogates(c)) {
927 _tokenText += QChar(QChar::highSurrogate(c));
928 _tokenText += QChar(QChar::lowSurrogate(c));
929 } else {
930 _tokenText += QChar(c);
931 }
932 continue;
933 }
934
935 if (!isIdentifierPart(c))
936 break;
937
938 if (identifierWithEscapeChars) {
939 if (QChar::requiresSurrogates(c)) {
940 _tokenText += QChar(QChar::highSurrogate(c));
941 _tokenText += QChar(QChar::lowSurrogate(c));
942 } else {
943 _tokenText += QChar(c);
944 }
945 }
946 scanChar();
947 }
948
949 const auto token = QStringView(_tokenStartPtr, _codePtr - 1);
950 _tokenLength = token.size();
951 int kind = T_IDENTIFIER;
952
953 if (!identifierWithEscapeChars)
954 kind = classify(token, parseModeFlags());
955
956 if (_engine) {
957 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
958 _tokenSpell = _engine->newStringRef(_tokenText);
959 else
960 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
961 }
962
963 return kind;
964 }
965 }
966
967 break;
968 }
969
970 return T_ERROR;
971}
972
973int Lexer::scanString(ScanStringMode mode)
974{
975 const char16_t quote = mode == TemplateContinuation ? TemplateHead : mode;
976 // we actually use T_STRING_LITERAL also for multiline strings, should we want to
977 // change that we should set it to:
978 // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL ||
979 // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
980 // here and uncomment the multilineStringLiteral = true below.
981 bool multilineStringLiteral = false;
982
983 const QChar *startCode = _codePtr - 1;
984 // in case we just parsed a \r, we need to reset this flag to get things working
985 // correctly in the loop below and afterwards
986 _skipLinefeed = false;
987 bool first = true;
988
989 if (_engine) {
990 while (_codePtr <= _endPtr) {
991 if (isLineTerminator()) {
992 if ((quote == u'`' || qmlMode())) {
993 if (first)
994 --_currentLineNumber; // will be read again in scanChar()
995 break;
996 }
997 _state.errorCode = IllegalCharacter;
998 _errorMessage = QCoreApplication::translate("QQmlParser",
999 "Stray newline in string literal");
1000 return T_ERROR;
1001 } else if (_state.currentChar == u'\\') {
1002 break;
1003 } else if (_state.currentChar == u'$' && quote == u'`') {
1004 break;
1005 } else if (_state.currentChar == quote) {
1006 _tokenSpell =
1007 _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1);
1008 _rawString = _tokenSpell;
1009 scanChar();
1010
1011 if (quote == u'`')
1012 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1013 if (mode == TemplateHead)
1014 return T_NO_SUBSTITUTION_TEMPLATE;
1015 else if (mode == TemplateContinuation)
1016 return T_TEMPLATE_TAIL;
1017 else if (multilineStringLiteral)
1018 return T_MULTILINE_STRING_LITERAL;
1019 else
1020 return T_STRING_LITERAL;
1021 }
1022 // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1023 _state.currentChar = *_codePtr++;
1024 ++_currentColumnNumber;
1025 first = false;
1026 }
1027 }
1028
1029 // rewind by one char, so things gets scanned correctly
1030 --_codePtr;
1031 --_currentColumnNumber;
1032
1033 _state.validTokenText = true;
1034 _tokenText = QString(startCode, _codePtr - startCode);
1035
1036 auto setRawString = [&](const QChar *end) {
1037 QString raw(startCode, end - startCode - 1);
1038 raw.replace(QLatin1String("\r\n"), QLatin1String("\n"));
1039 raw.replace(u'\r', u'\n');
1040 _rawString = _engine->newStringRef(raw);
1041 };
1042
1043 scanChar();
1044
1045 while (_codePtr <= _endPtr) {
1046 if (_state.currentChar == quote) {
1047 scanChar();
1048
1049 if (_engine) {
1050 _tokenSpell = _engine->newStringRef(_tokenText);
1051 if (quote == u'`')
1052 setRawString(_codePtr - 1);
1053 }
1054
1055 if (quote == u'`')
1056 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1057
1058 if (mode == TemplateContinuation)
1059 return T_TEMPLATE_TAIL;
1060 else if (mode == TemplateHead)
1061 return T_NO_SUBSTITUTION_TEMPLATE;
1062
1063 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1064 } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1065 scanChar();
1066 scanChar();
1067 _state.bracesCount = 1;
1068 if (_engine) {
1069 _tokenSpell = _engine->newStringRef(_tokenText);
1070 setRawString(_codePtr - 2);
1071 }
1072
1073 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1074 } else if (_state.currentChar == u'\\') {
1075 scanChar();
1076 if (_codePtr > _endPtr) {
1077 _state.errorCode = IllegalEscapeSequence;
1078 _errorMessage = QCoreApplication::translate(
1079 "QQmlParser", "End of file reached at escape sequence");
1080 return T_ERROR;
1081 }
1082
1083 QChar u;
1084
1085 switch (_state.currentChar.unicode()) {
1086 // unicode escape sequence
1087 case u'u': {
1088 bool ok = false;
1089 uint codePoint = decodeUnicodeEscapeCharacter(&ok);
1090 if (!ok)
1091 return T_ERROR;
1092 if (QChar::requiresSurrogates(codePoint)) {
1093 // need to use a surrogate pair
1094 _tokenText += QChar(QChar::highSurrogate(codePoint));
1095 u = QChar::lowSurrogate(codePoint);
1096 } else {
1097 u = QChar(codePoint);
1098 }
1099 } break;
1100
1101 // hex escape sequence
1102 case u'x': {
1103 bool ok = false;
1104 u = decodeHexEscapeCharacter(&ok);
1105 if (!ok) {
1106 _state.errorCode = IllegalHexadecimalEscapeSequence;
1107 _errorMessage = QCoreApplication::translate(
1108 "QQmlParser", "Illegal hexadecimal escape sequence");
1109 return T_ERROR;
1110 }
1111 } break;
1112
1113 // single character escape sequence
1114 case u'\\': u = u'\\'; scanChar(); break;
1115 case u'\'': u = u'\''; scanChar(); break;
1116 case u'\"': u = u'\"'; scanChar(); break;
1117 case u'b': u = u'\b'; scanChar(); break;
1118 case u'f': u = u'\f'; scanChar(); break;
1119 case u'n': u = u'\n'; scanChar(); break;
1120 case u'r': u = u'\r'; scanChar(); break;
1121 case u't': u = u'\t'; scanChar(); break;
1122 case u'v': u = u'\v'; scanChar(); break;
1123
1124 case u'0':
1125 if (!_codePtr->isDigit()) {
1126 scanChar();
1127 u = u'\0';
1128 break;
1129 }
1130 Q_FALLTHROUGH();
1131 case u'1':
1132 case u'2':
1133 case u'3':
1134 case u'4':
1135 case u'5':
1136 case u'6':
1137 case u'7':
1138 case u'8':
1139 case u'9':
1140 _state.errorCode = IllegalEscapeSequence;
1141 _errorMessage = QCoreApplication::translate(
1142 "QQmlParser", "Octal escape sequences are not allowed");
1143 return T_ERROR;
1144
1145 case u'\r':
1146 case u'\n':
1147 case 0x2028u:
1148 case 0x2029u:
1149 // uncomment the following to use T_MULTILINE_STRING_LITERAL
1150 // multilineStringLiteral = true;
1151 scanChar();
1152 continue;
1153
1154 default:
1155 // non escape character
1156 u = _state.currentChar;
1157 scanChar();
1158 }
1159
1160 _tokenText += u;
1161 } else {
1162 _tokenText += _state.currentChar;
1163 scanChar();
1164 }
1165 }
1166 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1167 if (mode == TemplateContinuation)
1168 return T_PARTIAL_TEMPLATE_MIDDLE;
1169 else if (mode == TemplateHead)
1170 return T_PARTIAL_TEMPLATE_HEAD;
1171 else if (mode == SingleQuote)
1172 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1173 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1174 }
1175 _state.errorCode = UnclosedStringLiteral;
1176 _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line");
1177 return T_ERROR;
1178}
1179
1180int Lexer::scanNumber(QChar ch)
1181{
1182 auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){
1183 if (_state.currentChar == u'_') {
1184 if (peekChar() == u'_') {
1185 _state.errorCode = IllegalNumber;
1186 _errorMessage = QCoreApplication::translate(
1187 "QQmlParser",
1188 "There can be at most one numeric separator between digits"
1189 );
1190 return false;
1191 }
1192
1193 if (!isNextCharacterValid()) {
1194 _state.errorCode = IllegalNumber;
1195 _errorMessage = QCoreApplication::translate(
1196 "QQmlParser",
1197 "A trailing numeric separator is not allowed in numeric literals"
1198 );
1199 return false;
1200 }
1201
1202 scanChar();
1203 }
1204
1205 return true;
1206 };
1207
1208 if (ch == u'0') {
1209 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1210 ch = _state.currentChar; // remember the x or X to use it in the error message below.
1211
1212 // parse hex integer literal
1213 scanChar(); // consume 'x'
1214
1215 if (!isHexDigit(_state.currentChar)) {
1216 _state.errorCode = IllegalNumber;
1217 _errorMessage = QCoreApplication::translate(
1218 "QQmlParser",
1219 "At least one hexadecimal digit is required after '0%1'")
1220 .arg(ch);
1221 return T_ERROR;
1222 }
1223
1224 double d = 0.;
1225 while (1) {
1226 int digit = ::hexDigit(_state.currentChar);
1227 if (digit < 0)
1228 break;
1229 d *= 16;
1230 d += digit;
1231 scanChar();
1232
1233 if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); }))
1234 return T_ERROR;
1235 }
1236
1237 _state.tokenValue = d;
1238 return T_NUMERIC_LITERAL;
1239 } else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1240 ch = _state.currentChar; // remember the o or O to use it in the error message below.
1241
1242 // parse octal integer literal
1243 scanChar(); // consume 'o'
1244
1245 if (!isOctalDigit(_state.currentChar.unicode())) {
1246 _state.errorCode = IllegalNumber;
1247 _errorMessage =
1248 QCoreApplication::translate(
1249 "QQmlParser", "At least one octal digit is required after '0%1'")
1250 .arg(ch);
1251 return T_ERROR;
1252 }
1253
1254 double d = 0.;
1255 while (1) {
1256 int digit = ::octalDigit(_state.currentChar);
1257 if (digit < 0)
1258 break;
1259 d *= 8;
1260 d += digit;
1261 scanChar();
1262
1263 if (!scanOptionalNumericSeparator([this](){
1264 return isOctalDigit(peekChar().unicode());
1265 })) {
1266 return T_ERROR;
1267 }
1268 }
1269
1270 _state.tokenValue = d;
1271 return T_NUMERIC_LITERAL;
1272 } else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1273 ch = _state.currentChar; // remember the b or B to use it in the error message below.
1274
1275 // parse binary integer literal
1276 scanChar(); // consume 'b'
1277
1278 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1279 _state.errorCode = IllegalNumber;
1280 _errorMessage =
1281 QCoreApplication::translate(
1282 "QQmlParser", "At least one binary digit is required after '0%1'")
1283 .arg(ch);
1284 return T_ERROR;
1285 }
1286
1287 double d = 0.;
1288 while (1) {
1289 int digit = 0;
1290 if (_state.currentChar.unicode() == u'1')
1291 digit = 1;
1292 else if (_state.currentChar.unicode() != u'0')
1293 break;
1294 d *= 2;
1295 d += digit;
1296 scanChar();
1297
1298 if (!scanOptionalNumericSeparator([this](){
1299 return peekChar().unicode() == u'0' || peekChar().unicode() == u'1';
1300 })) {
1301 return T_ERROR;
1302 }
1303 }
1304
1305 _state.tokenValue = d;
1306 return T_NUMERIC_LITERAL;
1307 } else if (_state.currentChar.isDigit() && !qmlMode()) {
1308 _state.errorCode = IllegalCharacter;
1309 _errorMessage = QCoreApplication::translate("QQmlParser",
1310 "Decimal numbers can't start with '0'");
1311 return T_ERROR;
1312 }
1313 }
1314
1315 // decimal integer literal
1316 QVarLengthArray<char,32> chars;
1317 chars.append(ch.unicode());
1318
1319 if (ch != u'.') {
1320 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1321 return T_ERROR;
1322
1323 while (_state.currentChar.isDigit()) {
1324 chars.append(_state.currentChar.unicode());
1325 scanChar(); // consume the digit
1326
1327 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1328 return T_ERROR;
1329 }
1330
1331 if (_state.currentChar == u'.') {
1332 chars.append(_state.currentChar.unicode());
1333 scanChar(); // consume `.'
1334 }
1335 }
1336
1337 while (_state.currentChar.isDigit()) {
1338 chars.append(_state.currentChar.unicode());
1339 scanChar();
1340
1341 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1342 return T_ERROR;
1343 }
1344
1345 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1346 if (_codePtr[0].isDigit()
1347 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1348
1349 chars.append(_state.currentChar.unicode());
1350 scanChar(); // consume `e'
1351
1352 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1353 chars.append(_state.currentChar.unicode());
1354 scanChar(); // consume the sign
1355 }
1356
1357 while (_state.currentChar.isDigit()) {
1358 chars.append(_state.currentChar.unicode());
1359 scanChar();
1360
1361 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1362 return T_ERROR;
1363 }
1364 }
1365 }
1366
1367 const char *begin = chars.constData();
1368 const char *end = nullptr;
1369 bool ok = false;
1370
1371 _state.tokenValue = qstrntod(begin, chars.size(), &end, &ok);
1372
1373 if (end - begin != chars.size()) {
1374 _state.errorCode = IllegalExponentIndicator;
1375 _errorMessage =
1376 QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
1377 return T_ERROR;
1378 }
1379
1380 return T_NUMERIC_LITERAL;
1381}
1382
1383int Lexer::scanVersionNumber(QChar ch)
1384{
1385 if (ch == u'0') {
1386 _state.tokenValue = 0;
1387 return T_VERSION_NUMBER;
1388 }
1389
1390 int acc = 0;
1391 acc += ch.digitValue();
1392
1393 while (_state.currentChar.isDigit()) {
1394 acc *= 10;
1395 acc += _state.currentChar.digitValue();
1396 scanChar(); // consume the digit
1397 }
1398
1399 _state.tokenValue = acc;
1400 return T_VERSION_NUMBER;
1401}
1402
1403bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1404{
1405 _tokenText.resize(0);
1406 _state.validTokenText = true;
1407 _state.patternFlags = 0;
1408
1409 if (prefix == EqualPrefix)
1410 _tokenText += u'=';
1411
1412 while (true) {
1413 switch (_state.currentChar.unicode()) {
1414 case u'/':
1415 scanChar();
1416
1417 // scan the flags
1418 _state.patternFlags = 0;
1419 while (isIdentLetter(_state.currentChar)) {
1420 int flag = regExpFlagFromChar(_state.currentChar);
1421 if (flag == 0 || _state.patternFlags & flag) {
1422 _errorMessage = QCoreApplication::translate(
1423 "QQmlParser", "Invalid regular expression flag '%0'")
1424 .arg(QChar(_state.currentChar));
1425 return false;
1426 }
1427 _state.patternFlags |= flag;
1428 scanChar();
1429 }
1430
1431 _tokenLength = _codePtr - _tokenStartPtr - 1;
1432 return true;
1433
1434 case u'\\':
1435 // regular expression backslash sequence
1436 _tokenText += _state.currentChar;
1437 scanChar();
1438
1439 if (_codePtr > _endPtr || isLineTerminator()) {
1440 _errorMessage = QCoreApplication::translate(
1441 "QQmlParser", "Unterminated regular expression backslash sequence");
1442 return false;
1443 }
1444
1445 _tokenText += _state.currentChar;
1446 scanChar();
1447 break;
1448
1449 case u'[':
1450 // regular expression class
1451 _tokenText += _state.currentChar;
1452 scanChar();
1453
1454 while (_codePtr <= _endPtr && !isLineTerminator()) {
1455 if (_state.currentChar == u']')
1456 break;
1457 else if (_state.currentChar == u'\\') {
1458 // regular expression backslash sequence
1459 _tokenText += _state.currentChar;
1460 scanChar();
1461
1462 if (_codePtr > _endPtr || isLineTerminator()) {
1463 _errorMessage = QCoreApplication::translate(
1464 "QQmlParser", "Unterminated regular expression backslash sequence");
1465 return false;
1466 }
1467
1468 _tokenText += _state.currentChar;
1469 scanChar();
1470 } else {
1471 _tokenText += _state.currentChar;
1472 scanChar();
1473 }
1474 }
1475
1476 if (_state.currentChar != u']') {
1477 _errorMessage = QCoreApplication::translate(
1478 "QQmlParser", "Unterminated regular expression class");
1479 return false;
1480 }
1481
1482 _tokenText += _state.currentChar;
1483 scanChar(); // skip ]
1484 break;
1485
1486 default:
1487 if (_codePtr > _endPtr || isLineTerminator()) {
1488 _errorMessage = QCoreApplication::translate(
1489 "QQmlParser", "Unterminated regular expression literal");
1490 return false;
1491 } else {
1492 _tokenText += _state.currentChar;
1493 scanChar();
1494 }
1495 } // switch
1496 } // while
1497
1498 return false;
1499}
1500
1501bool Lexer::isLineTerminator() const
1502{
1503 const ushort unicode = _state.currentChar.unicode();
1504 return unicode == 0x000Au
1505 || unicode == 0x000Du
1506 || unicode == 0x2028u
1507 || unicode == 0x2029u;
1508}
1509
1510unsigned Lexer::isLineTerminatorSequence() const
1511{
1512 switch (_state.currentChar.unicode()) {
1513 case 0x000Au:
1514 case 0x2028u:
1515 case 0x2029u:
1516 return 1;
1517 case 0x000Du:
1518 if (_codePtr->unicode() == 0x000Au)
1519 return 2;
1520 else
1521 return 1;
1522 default:
1523 return 0;
1524 }
1525}
1526
1527bool Lexer::isIdentLetter(QChar ch)
1528{
1529 // ASCII-biased, since all reserved words are ASCII, aand hence the
1530 // bulk of content to be parsed.
1531 if ((ch >= u'a' && ch <= u'z')
1532 || (ch >= u'A' && ch <= u'Z')
1533 || ch == u'$' || ch == u'_')
1534 return true;
1535 if (ch.unicode() < 128)
1536 return false;
1537 return ch.isLetterOrNumber();
1538}
1539
1540bool Lexer::isDecimalDigit(ushort c)
1541{
1542 return (c >= u'0' && c <= u'9');
1543}
1544
1545bool Lexer::isHexDigit(QChar c)
1546{
1547 return ((c >= u'0' && c <= u'9')
1548 || (c >= u'a' && c <= u'f')
1549 || (c >= u'A' && c <= u'F'));
1550}
1551
1552bool Lexer::isOctalDigit(ushort c)
1553{
1554 return (c >= u'0' && c <= u'7');
1555}
1556
1557QString Lexer::tokenText() const
1558{
1559 if (_state.validTokenText)
1560 return _tokenText;
1561
1562 if (_state.tokenKind == T_STRING_LITERAL)
1563 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1564
1565 return QString(_tokenStartPtr, _tokenLength);
1566}
1567
1568Lexer::Error Lexer::errorCode() const
1569{
1570 return _state.errorCode;
1571}
1572
1573QString Lexer::errorMessage() const
1574{
1575 return _errorMessage;
1576}
1577
1578void Lexer::syncProhibitAutomaticSemicolon()
1579{
1580 if (_state.parenthesesState == BalancedParentheses) {
1581 // we have seen something like "if (foo)", which means we should
1582 // never insert an automatic semicolon at this point, since it would
1583 // then be expanded into an empty statement (ECMA-262 7.9.1)
1584 _state.prohibitAutomaticSemicolon = true;
1585 _state.parenthesesState = IgnoreParentheses;
1586 } else {
1587 _state.prohibitAutomaticSemicolon = false;
1588 }
1589}
1590
1591bool Lexer::prevTerminator() const
1592{
1593 return _state.terminator;
1594}
1595
1596bool Lexer::followsClosingBrace() const
1597{
1598 return _state.followsClosingBrace;
1599}
1600
1601bool Lexer::canInsertAutomaticSemicolon(int token) const
1602{
1603 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1604 || _state.followsClosingBrace;
1605}
1606
1607static const int uriTokens[] = {
1608 QQmlJSGrammar::T_IDENTIFIER,
1609 QQmlJSGrammar::T_PROPERTY,
1610 QQmlJSGrammar::T_SIGNAL,
1611 QQmlJSGrammar::T_READONLY,
1612 QQmlJSGrammar::T_ON,
1613 QQmlJSGrammar::T_BREAK,
1614 QQmlJSGrammar::T_CASE,
1615 QQmlJSGrammar::T_CATCH,
1616 QQmlJSGrammar::T_CONTINUE,
1617 QQmlJSGrammar::T_DEFAULT,
1618 QQmlJSGrammar::T_DELETE,
1619 QQmlJSGrammar::T_DO,
1620 QQmlJSGrammar::T_ELSE,
1621 QQmlJSGrammar::T_FALSE,
1622 QQmlJSGrammar::T_FINAL,
1623 QQmlJSGrammar::T_FINALLY,
1624 QQmlJSGrammar::T_FOR,
1625 QQmlJSGrammar::T_FUNCTION,
1626 QQmlJSGrammar::T_IF,
1627 QQmlJSGrammar::T_IN,
1628 QQmlJSGrammar::T_OF,
1629 QQmlJSGrammar::T_INSTANCEOF,
1630 QQmlJSGrammar::T_NEW,
1631 QQmlJSGrammar::T_NULL,
1632 QQmlJSGrammar::T_RETURN,
1633 QQmlJSGrammar::T_SWITCH,
1634 QQmlJSGrammar::T_THIS,
1635 QQmlJSGrammar::T_THROW,
1636 QQmlJSGrammar::T_TRUE,
1637 QQmlJSGrammar::T_TRY,
1638 QQmlJSGrammar::T_TYPEOF,
1639 QQmlJSGrammar::T_VAR,
1640 QQmlJSGrammar::T_VOID,
1641 QQmlJSGrammar::T_WHILE,
1642 QQmlJSGrammar::T_CONST,
1643 QQmlJSGrammar::T_DEBUGGER,
1644 QQmlJSGrammar::T_ENUM,
1645 QQmlJSGrammar::T_PACKAGE,
1646 QQmlJSGrammar::T_ABSTRACT,
1647 QQmlJSGrammar::T_INTERFACE,
1648 QQmlJSGrammar::T_IMPLEMENTS,
1649 QQmlJSGrammar::T_PUBLIC,
1650 QQmlJSGrammar::T_PROTECTED,
1651 QQmlJSGrammar::T_PRIVATE,
1652 QQmlJSGrammar::T_NATIVE,
1653 QQmlJSGrammar::T_VOLATILE,
1654 QQmlJSGrammar::T_TRANSIENT,
1655 QQmlJSGrammar::T_SYNCHRONIZED,
1656 QQmlJSGrammar::T_THROWS,
1657 QQmlJSGrammar::T_WITH,
1658
1659 QQmlJSGrammar::EOF_SYMBOL
1660};
1661static inline bool isUriToken(int token)
1662{
1663 const int *current = uriTokens;
1664 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1665 if (*current == token)
1666 return true;
1667 ++current;
1668 }
1669 return false;
1670}
1671
1672bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error)
1673{
1674 auto setError = [error, this](QString message) {
1675 error->message = std::move(message);
1676 error->loc.startLine = tokenStartLine();
1677 error->loc.startColumn = tokenStartColumn();
1678 };
1679
1680 QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1681 Q_ASSERT(!_qmlMode);
1682
1683 lex(); // fetch the first token
1684
1685 if (_state.tokenKind != T_DOT)
1686 return true;
1687
1688 do {
1689 const int lineNumber = tokenStartLine();
1690 const int column = tokenStartColumn();
1691
1692 lex(); // skip T_DOT
1693
1694 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1695 return true; // expected a valid QML/JS directive
1696
1697 const QString directiveName = tokenText();
1698
1699 if (! (directiveName == QLatin1String("pragma") ||
1700 directiveName == QLatin1String("import"))) {
1701 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1702 return false; // not a valid directive name
1703 }
1704
1705 // it must be a pragma or an import directive.
1706 if (directiveName == QLatin1String("pragma")) {
1707 // .pragma library
1708 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1709 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1710 return false; // expected `library
1711 }
1712
1713 // we found a .pragma library directive
1714 directives->pragmaLibrary();
1715
1716 } else {
1717 Q_ASSERT(directiveName == QLatin1String("import"));
1718 lex(); // skip .import
1719
1720 QString pathOrUri;
1721 QString version;
1722 bool fileImport = false; // file or uri import
1723
1724 if (_state.tokenKind == T_STRING_LITERAL) {
1725 // .import T_STRING_LITERAL as T_IDENTIFIER
1726
1727 fileImport = true;
1728 pathOrUri = tokenText();
1729
1730 if (!pathOrUri.endsWith(QLatin1String("js"))) {
1731 setError(QCoreApplication::translate("QQmlParser","Imported file must be a script"));
1732 return false;
1733 }
1734 lex();
1735
1736 } else if (_state.tokenKind == T_IDENTIFIER) {
1737 // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER
1738 while (true) {
1739 if (!isUriToken(_state.tokenKind)) {
1740 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1741 return false;
1742 }
1743
1744 pathOrUri.append(tokenText());
1745
1746 lex();
1747 if (tokenStartLine() != lineNumber) {
1748 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1749 return false;
1750 }
1751 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1752 break;
1753
1754 pathOrUri.append(u'.');
1755
1756 lex();
1757 if (tokenStartLine() != lineNumber) {
1758 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1759 return false;
1760 }
1761 }
1762
1763 if (_state.tokenKind == T_VERSION_NUMBER) {
1764 version = tokenText();
1765 lex();
1766 if (_state.tokenKind == T_DOT) {
1767 version += u'.';
1768 lex();
1769 if (_state.tokenKind != T_VERSION_NUMBER) {
1770 setError(QCoreApplication::translate(
1771 "QQmlParser", "Incomplete version number (dot but no minor)"));
1772 return false; // expected the module version number
1773 }
1774 version += tokenText();
1775 lex();
1776 }
1777 }
1778 }
1779
1780 //
1781 // recognize the mandatory `as' followed by the module name
1782 //
1783 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1784 if (fileImport)
1785 setError(QCoreApplication::translate("QQmlParser", "File import requires a qualifier"));
1786 else
1787 setError(QCoreApplication::translate("QQmlParser", "Module import requires a qualifier"));
1788 if (tokenStartLine() != lineNumber) {
1789 error->loc.startLine = lineNumber;
1790 error->loc.startColumn = column;
1791 }
1792 return false; // expected `as'
1793 }
1794
1795 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1796 if (fileImport)
1797 setError(QCoreApplication::translate("QQmlParser", "File import requires a qualifier"));
1798 else
1799 setError(QCoreApplication::translate("QQmlParser", "Module import requires a qualifier"));
1800 return false; // expected module name
1801 }
1802
1803 const QString module = tokenText();
1804 if (!module.at(0).isUpper()) {
1805 setError(QCoreApplication::translate("QQmlParser","Invalid import qualifier"));
1806 return false;
1807 }
1808
1809 if (fileImport)
1810 directives->importFile(pathOrUri, module, lineNumber, column);
1811 else
1812 directives->importModule(pathOrUri, version, module, lineNumber, column);
1813 }
1814
1815 if (tokenStartLine() != lineNumber) {
1816 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1817 return false; // the directives cannot span over multiple lines
1818 }
1819
1820 // fetch the first token after the .pragma/.import directive
1821 lex();
1822 } while (_state.tokenKind == T_DOT);
1823
1824 return true;
1825}
1826
1827const Lexer::State &Lexer::state() const
1828{
1829 return _state;
1830}
1831void Lexer::setState(const Lexer::State &state)
1832{
1833 _state = state;
1834}
1835
1836int Lexer::parseModeFlags() const {
1837 int flags = 0;
1838 if (qmlMode())
1839 flags |= QmlMode|StaticIsKeyword;
1840 if (yieldIsKeyWord())
1841 flags |= YieldIsKeyword;
1842 if (_staticIsKeyword)
1843 flags |= StaticIsKeyword;
1844 return flags;
1845}
1846
1847namespace QQmlJS {
1848QDebug operator<<(QDebug dbg, const Lexer::State &s)
1849{
1850 dbg << "{\n"
1851 << " errorCode:" << int(s.errorCode) << ",\n"
1852 << " currentChar:" << s.currentChar << ",\n"
1853 << " tokenValue:" << s.tokenValue << ",\n"
1854 << " parenthesesState:" << s.parenthesesState << ",\n"
1855 << " parenthesesCount:" << s.parenthesesCount << ",\n"
1856 << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1857 << " bracesCount:" << s.bracesCount << ",\n"
1858 << " stackToken:" << s.stackToken << ",\n"
1859 << " patternFlags:" << s.patternFlags << ",\n"
1860 << " tokenKind:" << s.tokenKind << ",\n"
1861 << " importState:" << int(s.importState) << ",\n"
1862 << " validTokenText:" << s.validTokenText << ",\n"
1863 << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1864 << " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1865 << " terminator:" << s.terminator << ",\n"
1866 << " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1867 << " delimited:" << s.delimited << ",\n"
1868 << " handlingDirectives:" << s.handlingDirectives << ",\n"
1869 << " generatorLevel:" << s.generatorLevel << "\n}";
1870 return dbg;
1871}
1872}
1873
1874QT_END_NAMESPACE
static int regExpFlagFromChar(const QChar &ch)
static bool isIdentifierPart(uint ch)
static QChar convertHex(QChar c1, QChar c2)
static const int uriTokens[]
static bool isUriToken(int token)
static unsigned char convertHex(ushort c)
static bool isIdentifierStart(uint ch)