Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qqmljslexer.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qqmljslexer_p.h"
5#include "qqmljsengine_p.h"
6#include "qqmljskeywords_p.h"
7
8#include <private/qqmljsdiagnosticmessage_p.h>
9#include <private/qqmljsmemorypool_p.h>
10#include <private/qlocale_tools_p.h>
11
12
13#include <QtCore/qcoreapplication.h>
14#include <QtCore/qvarlengtharray.h>
15#include <QtCore/qdebug.h>
16#include <QtCore/QScopedValueRollback>
17
18#include <optional>
19
21using namespace QQmlJS;
22
23static inline int regExpFlagFromChar(const QChar &ch)
24{
25 switch (ch.unicode()) {
26 case 'g': return Lexer::RegExp_Global;
27 case 'i': return Lexer::RegExp_IgnoreCase;
28 case 'm': return Lexer::RegExp_Multiline;
29 case 'u': return Lexer::RegExp_Unicode;
30 case 'y': return Lexer::RegExp_Sticky;
31 }
32 return 0;
33}
34
35static inline unsigned char convertHex(ushort c)
36{
37 if (c >= '0' && c <= '9')
38 return (c - '0');
39 else if (c >= 'a' && c <= 'f')
40 return (c - 'a' + 10);
41 else
42 return (c - 'A' + 10);
43}
44
45static inline QChar convertHex(QChar c1, QChar c2)
46{
47 return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
48}
49
51 : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
52{
53 if (engine)
54 engine->setLexer(this);
55}
56
57bool Lexer::qmlMode() const
58{
59 return _qmlMode;
60}
61
63{
64 return _code;
65}
66
67void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
68 Lexer::CodeContinuation codeContinuation)
69{
70 if (codeContinuation == Lexer::CodeContinuation::Continue)
71 _currentOffset += _code.size();
72 else
73 _currentOffset = 0;
74 if (_engine)
75 _engine->setCode(code);
76
77 _qmlMode = qmlMode;
78 _code = code;
79 _skipLinefeed = false;
80
81 _tokenText.clear();
82 _tokenText.reserve(1024);
83 _errorMessage.clear();
84 _tokenSpell = QStringView();
85 _rawString = QStringView();
86
87 _codePtr = code.unicode();
88 _endPtr = _codePtr + code.size();
89 _tokenStartPtr = _codePtr;
90
91 if (lineno >= 0)
92 _currentLineNumber = lineno;
93 _currentColumnNumber = 0;
94 _tokenLine = _currentLineNumber;
95 _tokenColumn = 0;
96 _tokenLength = 0;
97
98 if (codeContinuation == Lexer::CodeContinuation::Reset)
99 _state = State {};
100}
101
102void Lexer::scanChar()
103{
104 if (_skipLinefeed) {
105 Q_ASSERT(*_codePtr == u'\n');
106 ++_codePtr;
107 _skipLinefeed = false;
108 }
109 _state.currentChar = *_codePtr++;
110 ++_currentColumnNumber;
111
112 if (isLineTerminator()) {
113 if (_state.currentChar == u'\r') {
114 if (_codePtr < _endPtr && *_codePtr == u'\n')
115 _skipLinefeed = true;
116 _state.currentChar = u'\n';
117 }
118 ++_currentLineNumber;
119 _currentColumnNumber = 0;
120 }
121}
122
123QChar Lexer::peekChar()
124{
125 auto peekPtr = _codePtr;
126 if (peekPtr < _endPtr)
127 return *peekPtr;
128 return QChar();
129}
130
131namespace {
132inline bool isBinop(int tok)
133{
134 switch (tok) {
135 case Lexer::T_AND:
136 case Lexer::T_AND_AND:
137 case Lexer::T_AND_EQ:
138 case Lexer::T_DIVIDE_:
139 case Lexer::T_DIVIDE_EQ:
140 case Lexer::T_EQ:
141 case Lexer::T_EQ_EQ:
142 case Lexer::T_EQ_EQ_EQ:
143 case Lexer::T_GE:
144 case Lexer::T_GT:
145 case Lexer::T_GT_GT:
146 case Lexer::T_GT_GT_EQ:
147 case Lexer::T_GT_GT_GT:
148 case Lexer::T_GT_GT_GT_EQ:
149 case Lexer::T_LE:
150 case Lexer::T_LT:
151 case Lexer::T_LT_LT:
152 case Lexer::T_LT_LT_EQ:
153 case Lexer::T_MINUS:
154 case Lexer::T_MINUS_EQ:
155 case Lexer::T_NOT_EQ:
156 case Lexer::T_NOT_EQ_EQ:
157 case Lexer::T_OR:
158 case Lexer::T_OR_EQ:
159 case Lexer::T_OR_OR:
160 case Lexer::T_PLUS:
161 case Lexer::T_PLUS_EQ:
162 case Lexer::T_REMAINDER:
163 case Lexer::T_REMAINDER_EQ:
164 case Lexer::T_RETURN:
165 case Lexer::T_STAR:
166 case Lexer::T_STAR_EQ:
167 case Lexer::T_XOR:
168 case Lexer::T_XOR_EQ:
169 return true;
170
171 default:
172 return false;
173 }
174}
175
176int hexDigit(QChar c)
177{
178 if (c >= u'0' && c <= u'9')
179 return c.unicode() - u'0';
180 if (c >= u'a' && c <= u'f')
181 return c.unicode() - u'a' + 10;
182 if (c >= u'A' && c <= u'F')
183 return c.unicode() - u'A' + 10;
184 return -1;
185}
186
187int octalDigit(QChar c)
188{
189 if (c >= u'0' && c <= u'7')
190 return c.unicode() - u'0';
191 return -1;
192}
193
194} // anonymous namespace
195
197{
198 const int previousTokenKind = _state.tokenKind;
199 int tokenKind;
200 bool firstPass = true;
201
202 again:
203 tokenKind = T_ERROR;
204 _tokenSpell = QStringView();
205 _rawString = QStringView();
206 if (firstPass && _state.stackToken == -1) {
207 firstPass = false;
208 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
209 return T_EOL;
210
212 scanChar();
213 _tokenStartPtr = _codePtr - 1;
214 _tokenLine = _currentLineNumber;
215 _tokenColumn = _currentColumnNumber;
216 while (_codePtr <= _endPtr) {
217 if (_state.currentChar == u'*') {
218 scanChar();
219 if (_state.currentChar == u'/') {
220 scanChar();
221 if (_engine) {
222 _engine->addComment(tokenOffset() + 2,
223 _codePtr - _tokenStartPtr - 1 - 4,
225 }
226 tokenKind = T_COMMENT;
227 break;
228 }
229 } else {
230 scanChar();
231 }
232 }
233 if (tokenKind == T_ERROR)
234 tokenKind = T_PARTIAL_COMMENT;
235 } else {
236 // handle multiline continuation
237 std::optional<ScanStringMode> scanMode;
238 switch (previousTokenKind) {
239 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
240 scanMode = ScanStringMode::SingleQuote;
241 break;
242 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
243 scanMode = ScanStringMode::DoubleQuote;
244 break;
245 case T_PARTIAL_TEMPLATE_HEAD:
246 scanMode = ScanStringMode::TemplateHead;
247 break;
248 case T_PARTIAL_TEMPLATE_MIDDLE:
249 scanMode = ScanStringMode::TemplateContinuation;
250 break;
251 default:
252 break;
253 }
254 if (scanMode) {
255 scanChar();
256 _tokenStartPtr = _codePtr - 1;
257 _tokenLine = _currentLineNumber;
258 _tokenColumn = _currentColumnNumber;
259 tokenKind = scanString(*scanMode);
260 }
261 }
262 }
263 if (tokenKind == T_ERROR)
264 tokenKind = scanToken();
265 _tokenLength = _codePtr - _tokenStartPtr - 1;
266 switch (tokenKind) {
267 // end of line and comments should not "overwrite" the old token type...
268 case T_EOL:
269 return tokenKind;
270 case T_COMMENT:
272 return tokenKind;
273 case T_PARTIAL_COMMENT:
275 return tokenKind;
276 default:
278 break;
279 }
280 _state.tokenKind = tokenKind;
281
282 _state.delimited = false;
283 _state.restrictedKeyword = false;
284 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
285
286 // update the flags
287 switch (_state.tokenKind) {
288 case T_LBRACE:
289 if (_state.bracesCount > 0)
290 ++_state.bracesCount;
292 case T_SEMICOLON:
295 case T_QUESTION:
296 case T_COLON:
297 case T_TILDE:
298 _state.delimited = true;
299 break;
300 case T_AUTOMATIC_SEMICOLON:
301 case T_AS:
304 default:
305 if (isBinop(_state.tokenKind))
306 _state.delimited = true;
307 break;
308
309 case T_IMPORT:
310 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
312 if (isBinop(_state.tokenKind))
313 _state.delimited = true;
314 break;
315
316 case T_IF:
317 case T_FOR:
318 case T_WHILE:
319 case T_WITH:
321 _state.parenthesesCount = 0;
322 break;
323
324 case T_ELSE:
325 case T_DO:
327 break;
328
329 case T_CONTINUE:
330 case T_BREAK:
331 case T_RETURN:
332 case T_YIELD:
333 case T_THROW:
334 _state.restrictedKeyword = true;
335 break;
336 case T_RBRACE:
337 if (_state.bracesCount > 0)
338 --_state.bracesCount;
339 if (_state.bracesCount == 0)
340 goto again;
341 } // switch
342
343 // update the parentheses state
344 switch (_state.parenthesesState) {
346 break;
347
348 case CountParentheses:
349 if (_state.tokenKind == T_RPAREN) {
350 --_state.parenthesesCount;
351 if (_state.parenthesesCount == 0)
353 } else if (_state.tokenKind == T_LPAREN) {
354 ++_state.parenthesesCount;
355 }
356 break;
357
359 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
361 break;
362 } // switch
363
364 return _state.tokenKind;
365}
366
367uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
368{
369 Q_ASSERT(_state.currentChar == u'u');
370 scanChar(); // skip u
371 constexpr int distanceFromFirstHexToLastHex = 3;
372 if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(_state.currentChar)) {
373 uint codePoint = 0;
374 for (int i = 0; i < 4; ++i) {
375 int digit = hexDigit(_state.currentChar);
376 if (digit < 0)
377 goto error;
378 codePoint *= 16;
379 codePoint += digit;
380 scanChar();
381 }
382
383 *ok = true;
384 return codePoint;
385 } else if (_codePtr < _endPtr && _state.currentChar == u'{') {
386 scanChar(); // skip '{'
387 uint codePoint = 0;
388 if (!isHexDigit(_state.currentChar))
389 // need at least one hex digit
390 goto error;
391
392 while (_codePtr <= _endPtr) {
393 int digit = hexDigit(_state.currentChar);
394 if (digit < 0)
395 break;
396 codePoint *= 16;
397 codePoint += digit;
398 if (codePoint > 0x10ffff)
399 goto error;
400 scanChar();
401 }
402
403 if (_state.currentChar != u'}')
404 goto error;
405
406 scanChar(); // skip '}'
407
408
409 *ok = true;
410 return codePoint;
411 }
412
413error:
415 _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
416
417 *ok = false;
418 return 0;
419}
420
421QChar Lexer::decodeHexEscapeCharacter(bool *ok)
422{
423 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
424 scanChar();
425
426 const QChar c1 = _state.currentChar;
427 scanChar();
428
429 const QChar c2 = _state.currentChar;
430 scanChar();
431
432 if (ok)
433 *ok = true;
434
435 return convertHex(c1, c2);
436 }
437
438 *ok = false;
439 return QChar();
440}
441
442namespace QQmlJS {
444{
445 dbg << "{\n"
446 << " engine:" << qsizetype(l._engine) << ",\n"
447 << " lexMode:" << int(l._lexMode) << ",\n"
448 << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
449 << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
450 << " qmlMode:" << l._qmlMode << ",\n"
451 << " staticIsKeyword:" << l._staticIsKeyword << ",\n"
452 << " currentLineNumber:" << l._currentLineNumber << ",\n"
453 << " currentColumnNumber:" << l._currentColumnNumber << ",\n"
454 << " currentOffset:" << l._currentOffset << ",\n"
455 << " tokenLength:" << l._tokenLength << ",\n"
456 << " tokenLine:" << l._tokenLine << ",\n"
457 << " tokenColumn:" << l._tokenColumn << ",\n"
458 << " tokenText:" << l._tokenText << ",\n"
459 << " skipLinefeed:" << l._skipLinefeed << ",\n"
460 << " errorMessage:" << l._errorMessage << ",\n"
461 << " tokenSpell:" << l._tokenSpell << ",\n"
462 << " rawString:" << l._rawString << ",\n";
463 if (l._codePtr)
464 dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
465 else
466 dbg << " codePtr: *null*,\n";
467 if (l._tokenStartPtr)
468 dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
469 else
470 dbg << " tokenStartPtr: *null*,\n";
471 dbg << " state:" << l._state << "\n}";
472 return dbg;
473}
474}
475
476static inline bool isIdentifierStart(uint ch)
477{
478 // fast path for ascii
479 if ((ch >= u'a' && ch <= u'z') ||
480 (ch >= u'A' && ch <= u'Z') ||
481 ch == u'$' || ch == u'_')
482 return true;
483
484 switch (QChar::category(ch)) {
485 case QChar::Number_Letter:
486 case QChar::Letter_Uppercase:
487 case QChar::Letter_Lowercase:
488 case QChar::Letter_Titlecase:
489 case QChar::Letter_Modifier:
490 case QChar::Letter_Other:
491 return true;
492 default:
493 break;
494 }
495 return false;
496}
497
499{
500 // fast path for ascii
501 if ((ch >= u'a' && ch <= u'z') ||
502 (ch >= u'A' && ch <= u'Z') ||
503 (ch >= u'0' && ch <= u'9') ||
504 ch == u'$' || ch == u'_' ||
505 ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
506 return true;
507
508 switch (QChar::category(ch)) {
509 case QChar::Mark_NonSpacing:
510 case QChar::Mark_SpacingCombining:
511
512 case QChar::Number_DecimalDigit:
513 case QChar::Number_Letter:
514
515 case QChar::Letter_Uppercase:
516 case QChar::Letter_Lowercase:
517 case QChar::Letter_Titlecase:
518 case QChar::Letter_Modifier:
519 case QChar::Letter_Other:
520
521 case QChar::Punctuation_Connector:
522 return true;
523 default:
524 break;
525 }
526 return false;
527}
528
529int Lexer::scanToken()
530{
531 if (_state.stackToken != -1) {
532 int tk = _state.stackToken;
533 _state.stackToken = -1;
534 return tk;
535 }
536
537 if (_state.bracesCount == 0) {
538 // we're inside a Template string
539 return scanString(TemplateContinuation);
540 }
541
542 if (_state.comments == CommentState::NoComment)
543 _state.terminator = false;
544
545again:
546 _state.validTokenText = false;
547
548 while (_state.currentChar.isSpace()) {
549 if (isLineTerminator()) {
550 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
551 if (_state.restrictedKeyword) {
552 // automatic semicolon insertion
553 _tokenLine = _currentLineNumber;
554 _tokenColumn = _currentColumnNumber;
555 _tokenStartPtr = _codePtr - 1;
556 return T_SEMICOLON;
557 } else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
558 _state.terminator = true;
559 syncProhibitAutomaticSemicolon();
560 } // else we will do the previous things at the start of next line...
561 }
562
563 scanChar();
564 }
565
566 _tokenStartPtr = _codePtr - 1;
567 _tokenLine = _currentLineNumber;
568 _tokenColumn = _currentColumnNumber;
569
570 if (_codePtr >= _endPtr) {
571 if (_lexMode == LexMode::LineByLine) {
572 if (!_code.isEmpty()) {
573 _state.currentChar = *(_codePtr - 2);
574 return T_EOL;
575 } else {
576 return EOF_SYMBOL;
577 }
578 } else if (_codePtr > _endPtr) {
579 return EOF_SYMBOL;
580 }
581 }
582
583 const QChar ch = _state.currentChar;
584 scanChar();
585
586 switch (ch.unicode()) {
587 case u'~': return T_TILDE;
588 case u'}': return T_RBRACE;
589
590 case u'|':
591 if (_state.currentChar == u'|') {
592 scanChar();
593 return T_OR_OR;
594 } else if (_state.currentChar == u'=') {
595 scanChar();
596 return T_OR_EQ;
597 }
598 return T_OR;
599
600 case u'{': return T_LBRACE;
601
602 case u'^':
603 if (_state.currentChar == u'=') {
604 scanChar();
605 return T_XOR_EQ;
606 }
607 return T_XOR;
608
609 case u']': return T_RBRACKET;
610 case u'[': return T_LBRACKET;
611 case u'?': {
612 if (_state.currentChar == u'?') {
613 scanChar();
614 return T_QUESTION_QUESTION;
615 }
616 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
617 scanChar();
618 return T_QUESTION_DOT;
619 }
620
621 return T_QUESTION;
622 }
623
624 case u'>':
625 if (_state.currentChar == u'>') {
626 scanChar();
627 if (_state.currentChar == u'>') {
628 scanChar();
629 if (_state.currentChar == u'=') {
630 scanChar();
631 return T_GT_GT_GT_EQ;
632 }
633 return T_GT_GT_GT;
634 } else if (_state.currentChar == u'=') {
635 scanChar();
636 return T_GT_GT_EQ;
637 }
638 return T_GT_GT;
639 } else if (_state.currentChar == u'=') {
640 scanChar();
641 return T_GE;
642 }
643 return T_GT;
644
645 case u'=':
646 if (_state.currentChar == u'=') {
647 scanChar();
648 if (_state.currentChar == u'=') {
649 scanChar();
650 return T_EQ_EQ_EQ;
651 }
652 return T_EQ_EQ;
653 } else if (_state.currentChar == u'>') {
654 scanChar();
655 return T_ARROW;
656 }
657 return T_EQ;
658
659 case u'<':
660 if (_state.currentChar == u'=') {
661 scanChar();
662 return T_LE;
663 } else if (_state.currentChar == u'<') {
664 scanChar();
665 if (_state.currentChar == u'=') {
666 scanChar();
667 return T_LT_LT_EQ;
668 }
669 return T_LT_LT;
670 }
671 return T_LT;
672
673 case u';': return T_SEMICOLON;
674 case u':': return T_COLON;
675
676 case u'/':
677 switch (_state.currentChar.unicode()) {
678 case u'*':
679 scanChar();
680 while (_codePtr <= _endPtr) {
681 if (_state.currentChar == u'*') {
682 scanChar();
683 if (_state.currentChar == u'/') {
684 scanChar();
685 if (_engine) {
686 _engine->addComment(tokenOffset() + 2,
687 _codePtr - _tokenStartPtr - 1 - 4, tokenStartLine(),
688 tokenStartColumn() + 2);
689 }
690 if (_lexMode == LexMode::LineByLine)
691 return T_COMMENT;
692 else
693 goto again;
694 }
695 } else {
696 scanChar();
697 }
698 }
699 if (_lexMode == LexMode::LineByLine)
700 return T_PARTIAL_COMMENT;
701 else
702 goto again;
703 case u'/':
704 while (_codePtr <= _endPtr && !isLineTerminator()) {
705 scanChar();
706 }
707 if (_engine) {
708 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
710 }
711 if (_lexMode == LexMode::LineByLine)
712 return T_COMMENT;
713 else
714 goto again;
715 case u'=':
716 scanChar();
717 return T_DIVIDE_EQ;
718 default:
719 return T_DIVIDE_;
720 }
721 case u'.':
723 return T_DOT;
724 if (isDecimalDigit(_state.currentChar.unicode()))
725 return scanNumber(ch);
726 if (_state.currentChar == u'.') {
727 scanChar();
728 if (_state.currentChar == u'.') {
729 scanChar();
730 return T_ELLIPSIS;
731 } else {
733 _errorMessage = QCoreApplication::translate("QQmlParser", "Unexpected token '.'");
734 return T_ERROR;
735 }
736 }
737 return T_DOT;
738
739 case u'-':
740 if (_state.currentChar == u'=') {
741 scanChar();
742 return T_MINUS_EQ;
743 } else if (_state.currentChar == u'-') {
744 scanChar();
745
746 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
747 && _state.tokenKind != T_LPAREN) {
748 _state.stackToken = T_MINUS_MINUS;
749 return T_SEMICOLON;
750 }
751
752 return T_MINUS_MINUS;
753 }
754 return T_MINUS;
755
756 case u',': return T_COMMA;
757
758 case u'+':
759 if (_state.currentChar == u'=') {
760 scanChar();
761 return T_PLUS_EQ;
762 } else if (_state.currentChar == u'+') {
763 scanChar();
764
765 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
766 && _state.tokenKind != T_LPAREN) {
767 _state.stackToken = T_PLUS_PLUS;
768 return T_SEMICOLON;
769 }
770
771 return T_PLUS_PLUS;
772 }
773 return T_PLUS;
774
775 case u'*':
776 if (_state.currentChar == u'=') {
777 scanChar();
778 return T_STAR_EQ;
779 } else if (_state.currentChar == u'*') {
780 scanChar();
781 if (_state.currentChar == u'=') {
782 scanChar();
783 return T_STAR_STAR_EQ;
784 }
785 return T_STAR_STAR;
786 }
787 return T_STAR;
788
789 case u')': return T_RPAREN;
790 case u'(': return T_LPAREN;
791
792 case u'@': return T_AT;
793
794 case u'&':
795 if (_state.currentChar == u'=') {
796 scanChar();
797 return T_AND_EQ;
798 } else if (_state.currentChar == u'&') {
799 scanChar();
800 return T_AND_AND;
801 }
802 return T_AND;
803
804 case u'%':
805 if (_state.currentChar == u'=') {
806 scanChar();
807 return T_REMAINDER_EQ;
808 }
809 return T_REMAINDER;
810
811 case u'!':
812 if (_state.currentChar == u'=') {
813 scanChar();
814 if (_state.currentChar == u'=') {
815 scanChar();
816 return T_NOT_EQ_EQ;
817 }
818 return T_NOT_EQ;
819 }
820 return T_NOT;
821
822 case u'`':
825 case u'\'':
826 case u'"':
827 return scanString(ScanStringMode(ch.unicode()));
828 case u'0':
829 case u'1':
830 case u'2':
831 case u'3':
832 case u'4':
833 case u'5':
834 case u'6':
835 case u'7':
836 case u'8':
837 case u'9':
839 return scanVersionNumber(ch);
840 else
841 return scanNumber(ch);
842
843 case '#':
844 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
845 // shebang support
846 while (_codePtr <= _endPtr && !isLineTerminator()) {
847 scanChar();
848 }
849 if (_engine) {
850 _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, tokenStartLine(),
852 }
853 if (_lexMode == LexMode::LineByLine)
854 return T_COMMENT;
855 else
856 goto again;
857 }
859
860 default: {
861 uint c = ch.unicode();
862 bool identifierWithEscapeChars = false;
863 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_state.currentChar.unicode())) {
864 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
865 scanChar();
866 } else if (c == '\\' && _state.currentChar == u'u') {
867 identifierWithEscapeChars = true;
868 bool ok = false;
869 c = decodeUnicodeEscapeCharacter(&ok);
870 if (!ok)
871 return T_ERROR;
872 }
873 if (isIdentifierStart(c)) {
874 if (identifierWithEscapeChars) {
875 _tokenText.resize(0);
876 if (QChar::requiresSurrogates(c)) {
877 _tokenText += QChar(QChar::highSurrogate(c));
878 _tokenText += QChar(QChar::lowSurrogate(c));
879 } else {
880 _tokenText += QChar(c);
881 }
882 _state.validTokenText = true;
883 }
884 while (_codePtr <= _endPtr) {
885 c = _state.currentChar.unicode();
886 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) {
887 scanChar();
888 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
889 } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
890 if (!identifierWithEscapeChars) {
891 identifierWithEscapeChars = true;
892 _tokenText.resize(0);
893 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
894 _state.validTokenText = true;
895 }
896
897 scanChar(); // skip '\\'
898 bool ok = false;
899 c = decodeUnicodeEscapeCharacter(&ok);
900 if (!ok)
901 return T_ERROR;
902
903 if (!isIdentifierPart(c))
904 break;
905
906 if (QChar::requiresSurrogates(c)) {
907 _tokenText += QChar(QChar::highSurrogate(c));
908 _tokenText += QChar(QChar::lowSurrogate(c));
909 } else {
910 _tokenText += QChar(c);
911 }
912 continue;
913 }
914
915 if (!isIdentifierPart(c))
916 break;
917
918 if (identifierWithEscapeChars) {
919 if (QChar::requiresSurrogates(c)) {
920 _tokenText += QChar(QChar::highSurrogate(c));
921 _tokenText += QChar(QChar::lowSurrogate(c));
922 } else {
923 _tokenText += QChar(c);
924 }
925 }
926 scanChar();
927 }
928
929 _tokenLength = _codePtr - _tokenStartPtr - 1;
930
931 int kind = T_IDENTIFIER;
932
933 if (!identifierWithEscapeChars)
934 kind = classify(_tokenStartPtr, _tokenLength, parseModeFlags());
935
936 if (kind == T_FUNCTION) {
937 continue_skipping:
938 while (_codePtr < _endPtr && _state.currentChar.isSpace())
939 scanChar();
940 if (_state.currentChar == u'*') {
941 _tokenLength = _codePtr - _tokenStartPtr - 1;
942 kind = T_FUNCTION_STAR;
943 scanChar();
944 } else if (_state.currentChar == u'/') {
945 scanChar();
946 switch (_state.currentChar.unicode()) {
947 case u'*':
948 scanChar();
949 while (_codePtr <= _endPtr) {
950 if (_state.currentChar == u'*') {
951 scanChar();
952 if (_state.currentChar == u'/') {
953 scanChar();
954 if (_engine) {
955 _engine->addComment(tokenOffset() + 2,
956 _codePtr - _tokenStartPtr - 1 - 4,
958 tokenStartColumn() + 2);
959 }
960 if (_lexMode == LexMode::LineByLine)
961 return T_COMMENT;
962 goto continue_skipping;
963 }
964 } else {
965 scanChar();
966 }
967 }
968 if (_lexMode == LexMode::LineByLine)
969 return T_PARTIAL_COMMENT;
970 else
971 goto continue_skipping;
972 case u'/':
973 while (_codePtr <= _endPtr && !isLineTerminator()) {
974 scanChar();
975 }
976 if (_engine) {
977 _engine->addComment(tokenOffset() + 2,
978 _codePtr - _tokenStartPtr - 1 - 2,
980 }
981 if (_lexMode == LexMode::LineByLine)
982 return T_COMMENT;
983 else
984 goto continue_skipping;
985 default:
986 break;
987 }
988 }
989 }
990
991 if (_engine) {
992 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
993 _tokenSpell = _engine->newStringRef(_tokenText);
994 else
995 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
996 }
997
998 return kind;
999 }
1000 }
1001
1002 break;
1003 }
1004
1005 return T_ERROR;
1006}
1007
1008int Lexer::scanString(ScanStringMode mode)
1009{
1010 QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode);
1011 // we actually use T_STRING_LITERAL also for multiline strings, should we want to
1012 // change that we should set it to:
1013 // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL ||
1014 // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
1015 // here and uncomment the multilineStringLiteral = true below.
1016 bool multilineStringLiteral = false;
1017
1018 const QChar *startCode = _codePtr - 1;
1019 // in case we just parsed a \r, we need to reset this flag to get things working
1020 // correctly in the loop below and afterwards
1021 _skipLinefeed = false;
1022 bool first = true;
1023
1024 if (_engine) {
1025 while (_codePtr <= _endPtr) {
1026 if (isLineTerminator()) {
1027 if ((quote == u'`' || qmlMode())) {
1028 if (first)
1029 --_currentLineNumber; // will be read again in scanChar()
1030 break;
1031 }
1032 _state.errorCode = IllegalCharacter;
1033 _errorMessage = QCoreApplication::translate("QQmlParser",
1034 "Stray newline in string literal");
1035 return T_ERROR;
1036 } else if (_state.currentChar == u'\\') {
1037 break;
1038 } else if (_state.currentChar == u'$' && quote == u'`') {
1039 break;
1040 } else if (_state.currentChar == quote) {
1041 _tokenSpell =
1042 _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1);
1043 _rawString = _tokenSpell;
1044 scanChar();
1045
1046 if (quote == u'`')
1047 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1048 if (mode == TemplateHead)
1049 return T_NO_SUBSTITUTION_TEMPLATE;
1050 else if (mode == TemplateContinuation)
1051 return T_TEMPLATE_TAIL;
1052 else if (multilineStringLiteral)
1053 return T_MULTILINE_STRING_LITERAL;
1054 else
1055 return T_STRING_LITERAL;
1056 }
1057 // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1058 _state.currentChar = *_codePtr++;
1059 ++_currentColumnNumber;
1060 first = false;
1061 }
1062 }
1063
1064 // rewind by one char, so things gets scanned correctly
1065 --_codePtr;
1066 --_currentColumnNumber;
1067
1068 _state.validTokenText = true;
1069 _tokenText = QString(startCode, _codePtr - startCode);
1070
1071 auto setRawString = [&](const QChar *end) {
1072 QString raw(startCode, end - startCode - 1);
1073 raw.replace(QLatin1String("\r\n"), QLatin1String("\n"));
1074 raw.replace(u'\r', u'\n');
1075 _rawString = _engine->newStringRef(raw);
1076 };
1077
1078 scanChar();
1079
1080 while (_codePtr <= _endPtr) {
1081 if (_state.currentChar == quote) {
1082 scanChar();
1083
1084 if (_engine) {
1085 _tokenSpell = _engine->newStringRef(_tokenText);
1086 if (quote == u'`')
1087 setRawString(_codePtr - 1);
1088 }
1089
1090 if (quote == u'`')
1091 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1092
1093 if (mode == TemplateContinuation)
1094 return T_TEMPLATE_TAIL;
1095 else if (mode == TemplateHead)
1096 return T_NO_SUBSTITUTION_TEMPLATE;
1097
1098 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1099 } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1100 scanChar();
1101 scanChar();
1102 _state.bracesCount = 1;
1103 if (_engine) {
1104 _tokenSpell = _engine->newStringRef(_tokenText);
1105 setRawString(_codePtr - 2);
1106 }
1107
1108 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1109 } else if (_state.currentChar == u'\\') {
1110 scanChar();
1111 if (_codePtr > _endPtr) {
1113 _errorMessage = QCoreApplication::translate(
1114 "QQmlParser", "End of file reached at escape sequence");
1115 return T_ERROR;
1116 }
1117
1118 QChar u;
1119
1120 switch (_state.currentChar.unicode()) {
1121 // unicode escape sequence
1122 case u'u': {
1123 bool ok = false;
1124 uint codePoint = decodeUnicodeEscapeCharacter(&ok);
1125 if (!ok)
1126 return T_ERROR;
1127 if (QChar::requiresSurrogates(codePoint)) {
1128 // need to use a surrogate pair
1129 _tokenText += QChar(QChar::highSurrogate(codePoint));
1130 u = QChar::lowSurrogate(codePoint);
1131 } else {
1132 u = QChar(codePoint);
1133 }
1134 } break;
1135
1136 // hex escape sequence
1137 case u'x': {
1138 bool ok = false;
1139 u = decodeHexEscapeCharacter(&ok);
1140 if (!ok) {
1142 _errorMessage = QCoreApplication::translate(
1143 "QQmlParser", "Illegal hexadecimal escape sequence");
1144 return T_ERROR;
1145 }
1146 } break;
1147
1148 // single character escape sequence
1149 case u'\\': u = u'\\'; scanChar(); break;
1150 case u'\'': u = u'\''; scanChar(); break;
1151 case u'\"': u = u'\"'; scanChar(); break;
1152 case u'b': u = u'\b'; scanChar(); break;
1153 case u'f': u = u'\f'; scanChar(); break;
1154 case u'n': u = u'\n'; scanChar(); break;
1155 case u'r': u = u'\r'; scanChar(); break;
1156 case u't': u = u'\t'; scanChar(); break;
1157 case u'v': u = u'\v'; scanChar(); break;
1158
1159 case u'0':
1160 if (!_codePtr->isDigit()) {
1161 scanChar();
1162 u = u'\0';
1163 break;
1164 }
1165 Q_FALLTHROUGH();
1166 case u'1':
1167 case u'2':
1168 case u'3':
1169 case u'4':
1170 case u'5':
1171 case u'6':
1172 case u'7':
1173 case u'8':
1174 case u'9':
1176 _errorMessage = QCoreApplication::translate(
1177 "QQmlParser", "Octal escape sequences are not allowed");
1178 return T_ERROR;
1179
1180 case u'\r':
1181 case u'\n':
1182 case 0x2028u:
1183 case 0x2029u:
1184 // uncomment the following to use T_MULTILINE_STRING_LITERAL
1185 // multilineStringLiteral = true;
1186 scanChar();
1187 continue;
1188
1189 default:
1190 // non escape character
1191 u = _state.currentChar;
1192 scanChar();
1193 }
1194
1195 _tokenText += u;
1196 } else {
1197 _tokenText += _state.currentChar;
1198 scanChar();
1199 }
1200 }
1201 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1202 if (mode == TemplateContinuation)
1203 return T_PARTIAL_TEMPLATE_MIDDLE;
1204 else if (mode == TemplateHead)
1205 return T_PARTIAL_TEMPLATE_HEAD;
1206 else if (mode == SingleQuote)
1207 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1208 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1209 }
1211 _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line");
1212 return T_ERROR;
1213}
1214
1215int Lexer::scanNumber(QChar ch)
1216{
1217 auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){
1218 if (_state.currentChar == u'_') {
1219 if (peekChar() == u'_') {
1220 _state.errorCode = IllegalNumber;
1221 _errorMessage = QCoreApplication::translate(
1222 "QQmlParser",
1223 "There can be at most one numeric separator beetwen digits"
1224 );
1225 return false;
1226 }
1227
1228 if (!isNextCharacterValid()) {
1229 _state.errorCode = IllegalNumber;
1230 _errorMessage = QCoreApplication::translate(
1231 "QQmlParser",
1232 "A trailing numeric separator is not allowed in numeric literals"
1233 );
1234 return false;
1235 }
1236
1237 scanChar();
1238 }
1239
1240 return true;
1241 };
1242
1243 if (ch == u'0') {
1244 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1245 ch = _state.currentChar; // remember the x or X to use it in the error message below.
1246
1247 // parse hex integer literal
1248 scanChar(); // consume 'x'
1249
1250 if (!isHexDigit(_state.currentChar)) {
1251 _state.errorCode = IllegalNumber;
1252 _errorMessage = QCoreApplication::translate(
1253 "QQmlParser",
1254 "At least one hexadecimal digit is required after '0%1'")
1255 .arg(ch);
1256 return T_ERROR;
1257 }
1258
1259 double d = 0.;
1260 while (1) {
1261 int digit = ::hexDigit(_state.currentChar);
1262 if (digit < 0)
1263 break;
1264 d *= 16;
1265 d += digit;
1266 scanChar();
1267
1268 if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); }))
1269 return T_ERROR;
1270 }
1271
1272 _state.tokenValue = d;
1273 return T_NUMERIC_LITERAL;
1274 } else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1275 ch = _state.currentChar; // remember the o or O to use it in the error message below.
1276
1277 // parse octal integer literal
1278 scanChar(); // consume 'o'
1279
1280 if (!isOctalDigit(_state.currentChar.unicode())) {
1281 _state.errorCode = IllegalNumber;
1282 _errorMessage =
1284 "QQmlParser", "At least one octal digit is required after '0%1'")
1285 .arg(ch);
1286 return T_ERROR;
1287 }
1288
1289 double d = 0.;
1290 while (1) {
1291 int digit = ::octalDigit(_state.currentChar);
1292 if (digit < 0)
1293 break;
1294 d *= 8;
1295 d += digit;
1296 scanChar();
1297
1298 if (!scanOptionalNumericSeparator([this](){
1299 return isOctalDigit(peekChar().unicode());
1300 })) {
1301 return T_ERROR;
1302 }
1303 }
1304
1305 _state.tokenValue = d;
1306 return T_NUMERIC_LITERAL;
1307 } else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1308 ch = _state.currentChar; // remember the b or B to use it in the error message below.
1309
1310 // parse binary integer literal
1311 scanChar(); // consume 'b'
1312
1313 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1314 _state.errorCode = IllegalNumber;
1315 _errorMessage =
1317 "QQmlParser", "At least one binary digit is required after '0%1'")
1318 .arg(ch);
1319 return T_ERROR;
1320 }
1321
1322 double d = 0.;
1323 while (1) {
1324 int digit = 0;
1325 if (_state.currentChar.unicode() == u'1')
1326 digit = 1;
1327 else if (_state.currentChar.unicode() != u'0')
1328 break;
1329 d *= 2;
1330 d += digit;
1331 scanChar();
1332
1333 if (!scanOptionalNumericSeparator([this](){
1334 return peekChar().unicode() == u'0' || peekChar().unicode() == u'1';
1335 })) {
1336 return T_ERROR;
1337 }
1338 }
1339
1340 _state.tokenValue = d;
1341 return T_NUMERIC_LITERAL;
1342 } else if (_state.currentChar.isDigit() && !qmlMode()) {
1343 _state.errorCode = IllegalCharacter;
1344 _errorMessage = QCoreApplication::translate("QQmlParser",
1345 "Decimal numbers can't start with '0'");
1346 return T_ERROR;
1347 }
1348 }
1349
1350 // decimal integer literal
1351 QVarLengthArray<char,32> chars;
1352 chars.append(ch.unicode());
1353
1354 if (ch != u'.') {
1355 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1356 return T_ERROR;
1357
1358 while (_state.currentChar.isDigit()) {
1359 chars.append(_state.currentChar.unicode());
1360 scanChar(); // consume the digit
1361
1362 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1363 return T_ERROR;
1364 }
1365
1366 if (_state.currentChar == u'.') {
1367 chars.append(_state.currentChar.unicode());
1368 scanChar(); // consume `.'
1369 }
1370 }
1371
1372 while (_state.currentChar.isDigit()) {
1373 chars.append(_state.currentChar.unicode());
1374 scanChar();
1375
1376 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1377 return T_ERROR;
1378 }
1379
1380 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1381 if (_codePtr[0].isDigit()
1382 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1383
1384 chars.append(_state.currentChar.unicode());
1385 scanChar(); // consume `e'
1386
1387 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1388 chars.append(_state.currentChar.unicode());
1389 scanChar(); // consume the sign
1390 }
1391
1392 while (_state.currentChar.isDigit()) {
1393 chars.append(_state.currentChar.unicode());
1394 scanChar();
1395
1396 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1397 return T_ERROR;
1398 }
1399 }
1400 }
1401
1402 const char *begin = chars.constData();
1403 const char *end = nullptr;
1404 bool ok = false;
1405
1406 _state.tokenValue = qstrntod(begin, chars.size(), &end, &ok);
1407
1408 if (end - begin != chars.size()) {
1410 _errorMessage =
1411 QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
1412 return T_ERROR;
1413 }
1414
1415 return T_NUMERIC_LITERAL;
1416}
1417
1418int Lexer::scanVersionNumber(QChar ch)
1419{
1420 if (ch == u'0') {
1421 _state.tokenValue = 0;
1422 return T_VERSION_NUMBER;
1423 }
1424
1425 int acc = 0;
1426 acc += ch.digitValue();
1427
1428 while (_state.currentChar.isDigit()) {
1429 acc *= 10;
1430 acc += _state.currentChar.digitValue();
1431 scanChar(); // consume the digit
1432 }
1433
1434 _state.tokenValue = acc;
1435 return T_VERSION_NUMBER;
1436}
1437
1439{
1440 _tokenText.resize(0);
1441 _state.validTokenText = true;
1442 _state.patternFlags = 0;
1443
1444 if (prefix == EqualPrefix)
1445 _tokenText += u'=';
1446
1447 while (true) {
1448 switch (_state.currentChar.unicode()) {
1449 case u'/':
1450 scanChar();
1451
1452 // scan the flags
1453 _state.patternFlags = 0;
1454 while (isIdentLetter(_state.currentChar)) {
1455 int flag = regExpFlagFromChar(_state.currentChar);
1456 if (flag == 0 || _state.patternFlags & flag) {
1457 _errorMessage = QCoreApplication::translate(
1458 "QQmlParser", "Invalid regular expression flag '%0'")
1459 .arg(QChar(_state.currentChar));
1460 return false;
1461 }
1462 _state.patternFlags |= flag;
1463 scanChar();
1464 }
1465
1466 _tokenLength = _codePtr - _tokenStartPtr - 1;
1467 return true;
1468
1469 case u'\\':
1470 // regular expression backslash sequence
1471 _tokenText += _state.currentChar;
1472 scanChar();
1473
1474 if (_codePtr > _endPtr || isLineTerminator()) {
1475 _errorMessage = QCoreApplication::translate(
1476 "QQmlParser", "Unterminated regular expression backslash sequence");
1477 return false;
1478 }
1479
1480 _tokenText += _state.currentChar;
1481 scanChar();
1482 break;
1483
1484 case u'[':
1485 // regular expression class
1486 _tokenText += _state.currentChar;
1487 scanChar();
1488
1489 while (_codePtr <= _endPtr && !isLineTerminator()) {
1490 if (_state.currentChar == u']')
1491 break;
1492 else if (_state.currentChar == u'\\') {
1493 // regular expression backslash sequence
1494 _tokenText += _state.currentChar;
1495 scanChar();
1496
1497 if (_codePtr > _endPtr || isLineTerminator()) {
1498 _errorMessage = QCoreApplication::translate(
1499 "QQmlParser", "Unterminated regular expression backslash sequence");
1500 return false;
1501 }
1502
1503 _tokenText += _state.currentChar;
1504 scanChar();
1505 } else {
1506 _tokenText += _state.currentChar;
1507 scanChar();
1508 }
1509 }
1510
1511 if (_state.currentChar != u']') {
1512 _errorMessage = QCoreApplication::translate(
1513 "QQmlParser", "Unterminated regular expression class");
1514 return false;
1515 }
1516
1517 _tokenText += _state.currentChar;
1518 scanChar(); // skip ]
1519 break;
1520
1521 default:
1522 if (_codePtr > _endPtr || isLineTerminator()) {
1523 _errorMessage = QCoreApplication::translate(
1524 "QQmlParser", "Unterminated regular expression literal");
1525 return false;
1526 } else {
1527 _tokenText += _state.currentChar;
1528 scanChar();
1529 }
1530 } // switch
1531 } // while
1532
1533 return false;
1534}
1535
1536bool Lexer::isLineTerminator() const
1537{
1538 const ushort unicode = _state.currentChar.unicode();
1539 return unicode == 0x000Au
1540 || unicode == 0x000Du
1541 || unicode == 0x2028u
1542 || unicode == 0x2029u;
1543}
1544
1545unsigned Lexer::isLineTerminatorSequence() const
1546{
1547 switch (_state.currentChar.unicode()) {
1548 case 0x000Au:
1549 case 0x2028u:
1550 case 0x2029u:
1551 return 1;
1552 case 0x000Du:
1553 if (_codePtr->unicode() == 0x000Au)
1554 return 2;
1555 else
1556 return 1;
1557 default:
1558 return 0;
1559 }
1560}
1561
1562bool Lexer::isIdentLetter(QChar ch)
1563{
1564 // ASCII-biased, since all reserved words are ASCII, aand hence the
1565 // bulk of content to be parsed.
1566 if ((ch >= u'a' && ch <= u'z')
1567 || (ch >= u'A' && ch <= u'Z')
1568 || ch == u'$' || ch == u'_')
1569 return true;
1570 if (ch.unicode() < 128)
1571 return false;
1572 return ch.isLetterOrNumber();
1573}
1574
1575bool Lexer::isDecimalDigit(ushort c)
1576{
1577 return (c >= u'0' && c <= u'9');
1578}
1579
1580bool Lexer::isHexDigit(QChar c)
1581{
1582 return ((c >= u'0' && c <= u'9')
1583 || (c >= u'a' && c <= u'f')
1584 || (c >= u'A' && c <= u'F'));
1585}
1586
1587bool Lexer::isOctalDigit(ushort c)
1588{
1589 return (c >= u'0' && c <= u'7');
1590}
1591
1593{
1594 if (_state.validTokenText)
1595 return _tokenText;
1596
1597 if (_state.tokenKind == T_STRING_LITERAL)
1598 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1599
1600 return QString(_tokenStartPtr, _tokenLength);
1601}
1602
1604{
1605 return _state.errorCode;
1606}
1607
1609{
1610 return _errorMessage;
1611}
1612
1613void Lexer::syncProhibitAutomaticSemicolon()
1614{
1615 if (_state.parenthesesState == BalancedParentheses) {
1616 // we have seen something like "if (foo)", which means we should
1617 // never insert an automatic semicolon at this point, since it would
1618 // then be expanded into an empty statement (ECMA-262 7.9.1)
1619 _state.prohibitAutomaticSemicolon = true;
1621 } else {
1622 _state.prohibitAutomaticSemicolon = false;
1623 }
1624}
1625
1626bool Lexer::prevTerminator() const
1627{
1628 return _state.terminator;
1629}
1630
1631bool Lexer::followsClosingBrace() const
1632{
1633 return _state.followsClosingBrace;
1634}
1635
1637{
1638 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1639 || _state.followsClosingBrace;
1640}
1641
1642static const int uriTokens[] = {
1643 QQmlJSGrammar::T_IDENTIFIER,
1644 QQmlJSGrammar::T_PROPERTY,
1645 QQmlJSGrammar::T_SIGNAL,
1646 QQmlJSGrammar::T_READONLY,
1647 QQmlJSGrammar::T_ON,
1648 QQmlJSGrammar::T_BREAK,
1649 QQmlJSGrammar::T_CASE,
1650 QQmlJSGrammar::T_CATCH,
1651 QQmlJSGrammar::T_CONTINUE,
1652 QQmlJSGrammar::T_DEFAULT,
1653 QQmlJSGrammar::T_DELETE,
1654 QQmlJSGrammar::T_DO,
1655 QQmlJSGrammar::T_ELSE,
1656 QQmlJSGrammar::T_FALSE,
1657 QQmlJSGrammar::T_FINALLY,
1658 QQmlJSGrammar::T_FOR,
1659 QQmlJSGrammar::T_FUNCTION,
1660 QQmlJSGrammar::T_FUNCTION_STAR,
1661 QQmlJSGrammar::T_IF,
1662 QQmlJSGrammar::T_IN,
1663 QQmlJSGrammar::T_OF,
1664 QQmlJSGrammar::T_INSTANCEOF,
1665 QQmlJSGrammar::T_NEW,
1666 QQmlJSGrammar::T_NULL,
1667 QQmlJSGrammar::T_RETURN,
1668 QQmlJSGrammar::T_SWITCH,
1669 QQmlJSGrammar::T_THIS,
1670 QQmlJSGrammar::T_THROW,
1671 QQmlJSGrammar::T_TRUE,
1672 QQmlJSGrammar::T_TRY,
1673 QQmlJSGrammar::T_TYPEOF,
1674 QQmlJSGrammar::T_VAR,
1675 QQmlJSGrammar::T_VOID,
1676 QQmlJSGrammar::T_WHILE,
1677 QQmlJSGrammar::T_CONST,
1678 QQmlJSGrammar::T_DEBUGGER,
1679 QQmlJSGrammar::T_RESERVED_WORD,
1680 QQmlJSGrammar::T_WITH,
1681
1682 QQmlJSGrammar::EOF_SYMBOL
1683};
1684static inline bool isUriToken(int token)
1685{
1686 const int *current = uriTokens;
1687 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1688 if (*current == token)
1689 return true;
1690 ++current;
1691 }
1692 return false;
1693}
1694
1696{
1697 auto setError = [error, this](QString message) {
1698 error->message = std::move(message);
1699 error->loc.startLine = tokenStartLine();
1700 error->loc.startColumn = tokenStartColumn();
1701 };
1702
1703 QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1704 Q_ASSERT(!_qmlMode);
1705
1706 lex(); // fetch the first token
1707
1708 if (_state.tokenKind != T_DOT)
1709 return true;
1710
1711 do {
1712 const int lineNumber = tokenStartLine();
1713 const int column = tokenStartColumn();
1714
1715 lex(); // skip T_DOT
1716
1717 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1718 return true; // expected a valid QML/JS directive
1719
1720 const QString directiveName = tokenText();
1721
1722 if (! (directiveName == QLatin1String("pragma") ||
1723 directiveName == QLatin1String("import"))) {
1724 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1725 return false; // not a valid directive name
1726 }
1727
1728 // it must be a pragma or an import directive.
1729 if (directiveName == QLatin1String("pragma")) {
1730 // .pragma library
1731 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1732 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1733 return false; // expected `library
1734 }
1735
1736 // we found a .pragma library directive
1737 directives->pragmaLibrary();
1738
1739 } else {
1740 Q_ASSERT(directiveName == QLatin1String("import"));
1741 lex(); // skip .import
1742
1743 QString pathOrUri;
1744 QString version;
1745 bool fileImport = false; // file or uri import
1746
1747 if (_state.tokenKind == T_STRING_LITERAL) {
1748 // .import T_STRING_LITERAL as T_IDENTIFIER
1749
1750 fileImport = true;
1751 pathOrUri = tokenText();
1752
1753 if (!pathOrUri.endsWith(QLatin1String("js"))) {
1754 setError(QCoreApplication::translate("QQmlParser","Imported file must be a script"));
1755 return false;
1756 }
1757 lex();
1758
1759 } else if (_state.tokenKind == T_IDENTIFIER) {
1760 // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER
1761 while (true) {
1762 if (!isUriToken(_state.tokenKind)) {
1763 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1764 return false;
1765 }
1766
1767 pathOrUri.append(tokenText());
1768
1769 lex();
1770 if (tokenStartLine() != lineNumber) {
1771 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1772 return false;
1773 }
1774 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1775 break;
1776
1777 pathOrUri.append(u'.');
1778
1779 lex();
1780 if (tokenStartLine() != lineNumber) {
1781 setError(QCoreApplication::translate("QQmlParser","Invalid module URI"));
1782 return false;
1783 }
1784 }
1785
1786 if (_state.tokenKind == T_VERSION_NUMBER) {
1787 version = tokenText();
1788 lex();
1789 if (_state.tokenKind == T_DOT) {
1790 version += u'.';
1791 lex();
1792 if (_state.tokenKind != T_VERSION_NUMBER) {
1794 "QQmlParser", "Incomplete version number (dot but no minor)"));
1795 return false; // expected the module version number
1796 }
1797 version += tokenText();
1798 lex();
1799 }
1800 }
1801 }
1802
1803 //
1804 // recognize the mandatory `as' followed by the module name
1805 //
1806 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1807 if (fileImport)
1808 setError(QCoreApplication::translate("QQmlParser", "File import requires a qualifier"));
1809 else
1810 setError(QCoreApplication::translate("QQmlParser", "Module import requires a qualifier"));
1811 if (tokenStartLine() != lineNumber) {
1812 error->loc.startLine = lineNumber;
1813 error->loc.startColumn = column;
1814 }
1815 return false; // expected `as'
1816 }
1817
1818 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1819 if (fileImport)
1820 setError(QCoreApplication::translate("QQmlParser", "File import requires a qualifier"));
1821 else
1822 setError(QCoreApplication::translate("QQmlParser", "Module import requires a qualifier"));
1823 return false; // expected module name
1824 }
1825
1826 const QString module = tokenText();
1827 if (!module.at(0).isUpper()) {
1828 setError(QCoreApplication::translate("QQmlParser","Invalid import qualifier"));
1829 return false;
1830 }
1831
1832 if (fileImport)
1833 directives->importFile(pathOrUri, module, lineNumber, column);
1834 else
1835 directives->importModule(pathOrUri, version, module, lineNumber, column);
1836 }
1837
1838 if (tokenStartLine() != lineNumber) {
1839 setError(QCoreApplication::translate("QQmlParser", "Syntax error"));
1840 return false; // the directives cannot span over multiple lines
1841 }
1842
1843 // fetch the first token after the .pragma/.import directive
1844 lex();
1845 } while (_state.tokenKind == T_DOT);
1846
1847 return true;
1848}
1849
1851{
1852 return _state;
1853}
1855{
1856 _state = state;
1857}
1858
1859int Lexer::parseModeFlags() const {
1860 int flags = 0;
1861 if (qmlMode())
1863 if (yieldIsKeyWord())
1865 if (_staticIsKeyword)
1867 return flags;
1868}
1869
1870namespace QQmlJS {
1872{
1873 dbg << "{\n"
1874 << " errorCode:" << int(s.errorCode) << ",\n"
1875 << " currentChar:" << s.currentChar << ",\n"
1876 << " tokenValue:" << s.tokenValue << ",\n"
1877 << " parenthesesState:" << s.parenthesesState << ",\n"
1878 << " parenthesesCount:" << s.parenthesesCount << ",\n"
1879 << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1880 << " bracesCount:" << s.bracesCount << ",\n"
1881 << " stackToken:" << s.stackToken << ",\n"
1882 << " patternFlags:" << s.patternFlags << ",\n"
1883 << " tokenKind:" << s.tokenKind << ",\n"
1884 << " importState:" << int(s.importState) << ",\n"
1885 << " validTokenText:" << s.validTokenText << ",\n"
1886 << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1887 << " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1888 << " terminator:" << s.terminator << ",\n"
1889 << " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1890 << " delimited:" << s.delimited << ",\n"
1891 << " handlingDirectives:" << s.handlingDirectives << ",\n"
1892 << " generatorLevel:" << s.generatorLevel << "\n}";
1893 return dbg;
1894}
1895}
1896
\inmodule QtCore
static QString translate(const char *context, const char *key, const char *disambiguation=nullptr, int n=-1)
\threadsafe
\inmodule QtCore
virtual void pragmaLibrary()
virtual void importFile(const QString &jsfile, const QString &module, int line, int column)
virtual void importModule(const QString &uri, const QString &version, const QString &module, int line, int column)
void addComment(int pos, int len, int line, int col)
QStringView midRef(int position, int size)
QStringView newStringRef(const QString &text)
void setCode(const QString &code)
int tokenKind() const
void setCode(const QString &code, int lineno, bool qmlMode=true, CodeContinuation codeContinuation=CodeContinuation::Reset)
static int classify(const QChar *s, int n, int parseModeFlags)
QString code() const
int tokenStartColumn() const
QString tokenText() const
@ IllegalUnicodeEscapeSequence
@ IllegalHexadecimalEscapeSequence
bool qmlMode() const
bool scanDirectives(Directives *directives, DiagnosticMessage *error)
Error errorCode() const
bool scanRegExp(RegExpBodyPrefix prefix=NoPrefix)
int tokenOffset() const
const State & state() const
bool yieldIsKeyWord() const
int tokenStartLine() const
void setState(const State &state)
Lexer(Engine *engine, LexMode lexMode=LexMode::WholeCode)
QString errorMessage() const
bool canInsertAutomaticSemicolon(int token) const
T pop()
Removes the top item from the stack and returns it.
Definition qstack.h:18
void push(const T &t)
Adds element t to the top of the stack.
Definition qstack.h:17
\inmodule QtCore
Definition qstringview.h:78
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:129
void reserve(qsizetype size)
Ensures the string has space for at least size characters.
Definition qstring.h:1325
bool isEmpty() const noexcept
Returns true if the string has no characters; otherwise returns false.
Definition qstring.h:192
void clear()
Clears the contents of the string and makes it null.
Definition qstring.h:1252
qsizetype size() const noexcept
Returns the number of characters in this string.
Definition qstring.h:186
QString & insert(qsizetype i, QChar c)
Definition qstring.cpp:3132
const QChar * unicode() const
Returns a Unicode representation of the string.
Definition qstring.h:1230
void resize(qsizetype size)
Sets the size of the string to size characters.
Definition qstring.cpp:2668
else opt state
[0]
Token token
Definition keywords.cpp:444
QDebug operator<<(QDebug dbg, const Lexer &l)
Combined button and popup list for selecting options.
#define Q_FALLTHROUGH()
DBusConnection const char DBusError * error
double qstrntod(const char *s00, qsizetype len, const char **se, bool *ok)
GLenum mode
GLuint GLuint end
GLbitfield flags
GLuint GLsizei const GLchar * message
GLint first
GLenum GLenum GLsizei void GLsizei void * column
GLdouble s
[6]
Definition qopenglext.h:235
const GLubyte * c
static int regExpFlagFromChar(const QChar &ch)
static const int uriTokens[]
static bool isIdentifierPart(uint ch)
static bool isUriToken(int token)
static unsigned char convertHex(ushort c)
static bool isIdentifierStart(uint ch)
static void setError(QJsonObject *response, const QString &msg)
static QT_BEGIN_NAMESPACE bool isDigit(ushort ch)
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
QLatin1StringView QLatin1String
Definition qstringfwd.h:31
ptrdiff_t qsizetype
Definition qtypes.h:165
unsigned int uint
Definition qtypes.h:34
unsigned short ushort
Definition qtypes.h:33
static QString quote(const QString &str)
QObject::connect nullptr
MyCustomStruct c2
QJSEngine engine
[0]
ParenthesesState parenthesesState
QStack< int > outerTemplateBraceCount