9#include <private/qqmljsdiagnosticmessage_p.h>
10#include <private/qqmljsmemorypool_p.h>
11#include <private/qlocale_tools_p.h>
14#include <QtCore/qcoreapplication.h>
15#include <QtCore/qvarlengtharray.h>
16#include <QtCore/qdebug.h>
17#include <QtCore/QScopedValueRollback>
22using namespace QQmlJS;
23using namespace Qt::StringLiterals;
27 switch (ch.unicode()) {
28 case 'g':
return Lexer::RegExp_Global;
29 case 'i':
return Lexer::RegExp_IgnoreCase;
30 case 'm':
return Lexer::RegExp_Multiline;
31 case 'u':
return Lexer::RegExp_Unicode;
32 case 'y':
return Lexer::RegExp_Sticky;
39 if (c >=
'0' && c <=
'9')
41 else if (c >=
'a' && c <=
'f')
42 return (c -
'a' + 10);
44 return (c -
'A' + 10);
49 return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
52Lexer::Lexer(Engine *engine, LexMode lexMode)
53 : _engine(engine), _lexMode(lexMode), _endPtr(
nullptr), _qmlMode(
true)
56 engine->setLexer(
this);
59bool Lexer::qmlMode()
const
64QString Lexer::code()
const
69std::optional<DiagnosticMessage> Lexer::illegalFileLengthError()
const
71 Q_ASSERT(_currentOffset >= 0);
73 constexpr bool quint32IsBigger =
sizeof(qsizetype) <=
sizeof(quint32);
74 using BiggerInt = std::conditional_t<quint32IsBigger, quint32, qsizetype>;
75 using SmallerInt = std::conditional_t<!quint32IsBigger, quint32, qsizetype>;
77 const BiggerInt codeLength = BiggerInt(_currentOffset) + BiggerInt(_code.size());
78 const BiggerInt maxLength = BiggerInt(std::numeric_limits<SmallerInt>::max());
79 if (codeLength < maxLength)
82 constexpr int limit = quint32IsBigger ? 2 : 4;
83 return DiagnosticMessage{ u"File exceeds maximum length (%1GB)."_s.arg(limit), QtCriticalMsg,
84 SourceLocation{ 0, 1, 1, 1 } };
87void Lexer::setCode(
const QString &code,
int lineno,
bool qmlMode,
88 Lexer::CodeContinuation codeContinuation)
90 if (codeContinuation == Lexer::CodeContinuation::Continue)
91 _currentOffset += _code.size();
95 _engine->setCode(code);
99 _skipLinefeed =
false;
102 _tokenText.reserve(1024);
103 _errorMessage.clear();
104 _tokenSpell = QStringView();
105 _rawString = QStringView();
107 _codePtr = code.unicode();
108 _endPtr = _codePtr + code.size();
109 _tokenStartPtr = _codePtr;
112 _currentLineNumber = lineno;
113 _currentColumnNumber = 0;
114 _tokenLine = _currentLineNumber;
118 if (codeContinuation == Lexer::CodeContinuation::Reset)
122void Lexer::scanChar()
125 Q_ASSERT(*_codePtr == u'\n');
127 _skipLinefeed =
false;
129 _state.currentChar = *_codePtr++;
130 ++_currentColumnNumber;
132 if (isLineTerminator()) {
133 if (_state.currentChar == u'\r') {
134 if (_codePtr < _endPtr && *_codePtr == u'\n')
135 _skipLinefeed =
true;
136 _state.currentChar = u'\n';
138 ++_currentLineNumber;
139 _currentColumnNumber = 0;
143QChar Lexer::peekChar()
145 auto peekPtr = _codePtr;
146 if (peekPtr < _endPtr)
152inline bool isBinop(
int tok)
156 case Lexer::T_AND_AND:
157 case Lexer::T_AND_EQ:
158 case Lexer::T_DIVIDE_:
159 case Lexer::T_DIVIDE_EQ:
162 case Lexer::T_EQ_EQ_EQ:
166 case Lexer::T_GT_GT_EQ:
167 case Lexer::T_GT_GT_GT:
168 case Lexer::T_GT_GT_GT_EQ:
172 case Lexer::T_LT_LT_EQ:
174 case Lexer::T_MINUS_EQ:
175 case Lexer::T_NOT_EQ:
176 case Lexer::T_NOT_EQ_EQ:
181 case Lexer::T_PLUS_EQ:
182 case Lexer::T_REMAINDER:
183 case Lexer::T_REMAINDER_EQ:
184 case Lexer::T_RETURN:
186 case Lexer::T_STAR_EQ:
188 case Lexer::T_XOR_EQ:
198 if (c >= u'0' && c <= u'9')
199 return c.unicode() - u'0';
200 if (c >= u'a' && c <= u'f')
201 return c.unicode() - u'a' + 10;
202 if (c >= u'A' && c <= u'F')
203 return c.unicode() - u'A' + 10;
207int octalDigit(QChar c)
209 if (c >= u'0' && c <= u'7')
210 return c.unicode() - u'0';
218 const int previousTokenKind = _state.tokenKind;
220 bool firstPass =
true;
224 _tokenSpell = QStringView();
225 _rawString = QStringView();
226 if (firstPass && _state.stackToken == -1) {
228 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
231 if (_state.comments == CommentState::InMultilineComment) {
233 _tokenStartPtr = _codePtr - 1;
234 _tokenLine = _currentLineNumber;
235 _tokenColumn = _currentColumnNumber;
236 while (_codePtr <= _endPtr) {
237 if (_state.currentChar == u'*') {
239 if (_state.currentChar == u'/') {
242 _engine->addComment(tokenOffset() + 2,
243 _codePtr - _tokenStartPtr - 1 - 4,
244 tokenStartLine(), tokenStartColumn() + 2);
246 tokenKind = T_COMMENT;
253 if (tokenKind == T_ERROR)
254 tokenKind = T_PARTIAL_COMMENT;
257 std::optional<ScanStringMode> scanMode;
258 switch (previousTokenKind) {
259 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
260 scanMode = ScanStringMode::SingleQuote;
262 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
263 scanMode = ScanStringMode::DoubleQuote;
265 case T_PARTIAL_TEMPLATE_HEAD:
266 scanMode = ScanStringMode::TemplateHead;
268 case T_PARTIAL_TEMPLATE_MIDDLE:
269 scanMode = ScanStringMode::TemplateContinuation;
276 _tokenStartPtr = _codePtr - 1;
277 _tokenLine = _currentLineNumber;
278 _tokenColumn = _currentColumnNumber;
279 tokenKind = scanString(*scanMode);
283 if (tokenKind == T_ERROR)
284 tokenKind = scanToken();
285 _tokenLength = _codePtr - _tokenStartPtr - 1;
291 _state.comments = CommentState::HadComment;
293 case T_PARTIAL_COMMENT:
294 _state.comments = CommentState::InMultilineComment;
297 _state.comments = CommentState::NoComment;
300 _state.tokenKind = tokenKind;
302 _state.delimited =
false;
303 _state.restrictedKeyword =
false;
304 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
307 switch (_state.tokenKind) {
309 if (_state.bracesCount > 0)
310 ++_state.bracesCount;
313 _state.importState = ImportState::NoQmlImport;
318 _state.delimited =
true;
320 case T_AUTOMATIC_SEMICOLON:
322 _state.importState = ImportState::NoQmlImport;
325 if (isBinop(_state.tokenKind))
326 _state.delimited =
true;
330 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
331 _state.importState = ImportState::SawImport;
332 if (isBinop(_state.tokenKind))
333 _state.delimited =
true;
340 _state.parenthesesState = CountParentheses;
341 _state.parenthesesCount = 0;
346 _state.parenthesesState = BalancedParentheses;
354 _state.restrictedKeyword =
true;
357 if (_state.bracesCount > 0)
358 --_state.bracesCount;
359 if (_state.bracesCount == 0)
364 switch (_state.parenthesesState) {
365 case IgnoreParentheses:
368 case CountParentheses:
369 if (_state.tokenKind == T_RPAREN) {
370 --_state.parenthesesCount;
371 if (_state.parenthesesCount == 0)
372 _state.parenthesesState = BalancedParentheses;
373 }
else if (_state.tokenKind == T_LPAREN) {
374 ++_state.parenthesesCount;
378 case BalancedParentheses:
379 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
380 _state.parenthesesState = IgnoreParentheses;
384 return _state.tokenKind;
387uint Lexer::decodeUnicodeEscapeCharacter(
bool *ok)
389 Q_ASSERT(_state.currentChar == u'u');
391 constexpr int distanceFromFirstHexToLastHex = 3;
392 if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(_state.currentChar)) {
394 for (
int i = 0; i < 4; ++i) {
395 int digit = hexDigit(_state.currentChar);
405 }
else if (_codePtr < _endPtr && _state.currentChar == u'{') {
408 if (!isHexDigit(_state.currentChar))
412 while (_codePtr <= _endPtr) {
413 int digit = hexDigit(_state.currentChar);
418 if (codePoint > 0x10ffff)
423 if (_state.currentChar != u'}')
434 _state.errorCode = IllegalUnicodeEscapeSequence;
435 _errorMessage = QCoreApplication::translate(
"QQmlParser",
"Illegal unicode escape sequence");
441QChar Lexer::decodeHexEscapeCharacter(
bool *ok)
443 if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
446 const QChar c1 = _state.currentChar;
449 const QChar c2 = _state.currentChar;
455 return convertHex(c1, c2);
463QDebug operator<<(QDebug dbg,
const Lexer &l)
466 <<
" engine:" << qsizetype(l._engine) <<
",\n"
467 <<
" lexMode:" <<
int(l._lexMode) <<
",\n"
468 <<
" code.size:" << qsizetype(l._code.unicode()) <<
"+" << l._code.size() <<
",\n"
469 <<
" endPtr: codePtr + " << (l._endPtr - l._codePtr) <<
",\n"
470 <<
" qmlMode:" << l._qmlMode <<
",\n"
471 <<
" staticIsKeyword:" << l._staticIsKeyword <<
",\n"
472 <<
" currentLineNumber:" << l._currentLineNumber <<
",\n"
473 <<
" currentColumnNumber:" << l._currentColumnNumber <<
",\n"
474 <<
" currentOffset:" << l._currentOffset <<
",\n"
475 <<
" tokenLength:" << l._tokenLength <<
",\n"
476 <<
" tokenLine:" << l._tokenLine <<
",\n"
477 <<
" tokenColumn:" << l._tokenColumn <<
",\n"
478 <<
" tokenText:" << l._tokenText <<
",\n"
479 <<
" skipLinefeed:" << l._skipLinefeed <<
",\n"
480 <<
" errorMessage:" << l._errorMessage <<
",\n"
481 <<
" tokenSpell:" << l._tokenSpell <<
",\n"
482 <<
" rawString:" << l._rawString <<
",\n";
484 dbg <<
" codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) <<
",\n";
486 dbg <<
" codePtr: *null*,\n";
487 if (l._tokenStartPtr)
488 dbg <<
" tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) <<
",\n";
490 dbg <<
" tokenStartPtr: *null*,\n";
491 dbg <<
" state:" << l._state <<
"\n}";
499 if ((ch >= u'a' && ch <= u'z') ||
500 (ch >= u'A' && ch <= u'Z') ||
501 ch == u'$' || ch == u'_')
504 switch (QChar::category(ch)) {
505 case QChar::Number_Letter:
506 case QChar::Letter_Uppercase:
507 case QChar::Letter_Lowercase:
508 case QChar::Letter_Titlecase:
509 case QChar::Letter_Modifier:
510 case QChar::Letter_Other:
521 if ((ch >= u'a' && ch <= u'z') ||
522 (ch >= u'A' && ch <= u'Z') ||
523 (ch >= u'0' && ch <= u'9') ||
524 ch == u'$' || ch == u'_' ||
525 ch == 0x200c || ch == 0x200d )
528 switch (QChar::category(ch)) {
529 case QChar::Mark_NonSpacing:
530 case QChar::Mark_SpacingCombining:
532 case QChar::Number_DecimalDigit:
533 case QChar::Number_Letter:
535 case QChar::Letter_Uppercase:
536 case QChar::Letter_Lowercase:
537 case QChar::Letter_Titlecase:
538 case QChar::Letter_Modifier:
539 case QChar::Letter_Other:
541 case QChar::Punctuation_Connector:
549int Lexer::scanToken()
551 if (_state.stackToken != -1) {
552 int tk = _state.stackToken;
553 _state.stackToken = -1;
557 if (_state.bracesCount == 0) {
559 return scanString(TemplateContinuation);
562 if (_state.comments == CommentState::NoComment)
563 _state.terminator =
false;
566 _state.validTokenText =
false;
568 while (_state.currentChar.isSpace()) {
569 if (isLineTerminator()) {
570 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
571 if (_state.restrictedKeyword) {
573 _tokenLine = _currentLineNumber;
574 _tokenColumn = _currentColumnNumber;
575 _tokenStartPtr = _codePtr - 1;
577 }
else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
578 _state.terminator =
true;
579 syncProhibitAutomaticSemicolon();
586 _tokenStartPtr = _codePtr - 1;
587 _tokenLine = _currentLineNumber;
588 _tokenColumn = _currentColumnNumber;
590 if (_codePtr >= _endPtr) {
591 if (_lexMode == LexMode::LineByLine) {
592 if (!_code.isEmpty()) {
593 _state.currentChar = *(_codePtr - 2);
598 }
else if (_codePtr > _endPtr) {
603 const QChar ch = _state.currentChar;
606 switch (ch.unicode()) {
607 case u'~':
return T_TILDE;
608 case u'}':
return T_RBRACE;
611 if (_state.currentChar == u'|') {
614 }
else if (_state.currentChar == u'=') {
620 case u'{':
return T_LBRACE;
623 if (_state.currentChar == u'=') {
629 case u']':
return T_RBRACKET;
630 case u'[':
return T_LBRACKET;
632 if (_state.currentChar == u'?') {
634 return T_QUESTION_QUESTION;
636 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
638 return T_QUESTION_DOT;
645 if (_state.currentChar == u'>') {
647 if (_state.currentChar == u'>') {
649 if (_state.currentChar == u'=') {
651 return T_GT_GT_GT_EQ;
654 }
else if (_state.currentChar == u'=') {
659 }
else if (_state.currentChar == u'=') {
666 if (_state.currentChar == u'=') {
668 if (_state.currentChar == u'=') {
673 }
else if (_state.currentChar == u'>') {
680 if (_state.currentChar == u'=') {
683 }
else if (_state.currentChar == u'<') {
685 if (_state.currentChar == u'=') {
693 case u';':
return T_SEMICOLON;
694 case u':':
return T_COLON;
697 switch (_state.currentChar.unicode()) {
700 while (_codePtr <= _endPtr) {
701 if (_state.currentChar == u'*') {
703 if (_state.currentChar == u'/') {
706 _engine->addComment(tokenOffset() + 2,
707 _codePtr - _tokenStartPtr - 1 - 4, tokenStartLine(),
708 tokenStartColumn() + 2);
710 if (_lexMode == LexMode::LineByLine)
719 if (_lexMode == LexMode::LineByLine)
720 return T_PARTIAL_COMMENT;
724 while (_codePtr <= _endPtr && !isLineTerminator()) {
728 _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
729 tokenStartLine(), tokenStartColumn() + 2);
731 if (_lexMode == LexMode::LineByLine)
742 if (_state.importState == ImportState::SawImport)
744 if (isDecimalDigit(_state.currentChar.unicode()))
745 return scanNumber(ch);
746 if (_state.currentChar == u'.') {
748 if (_state.currentChar == u'.') {
752 _state.errorCode = IllegalCharacter;
753 _errorMessage = QCoreApplication::translate(
"QQmlParser",
"Unexpected token '.'");
760 if (_state.currentChar == u'=') {
763 }
else if (_state.currentChar == u'-') {
766 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
767 && _state.tokenKind != T_LPAREN) {
768 _state.stackToken = T_MINUS_MINUS;
772 return T_MINUS_MINUS;
776 case u',':
return T_COMMA;
779 if (_state.currentChar == u'=') {
782 }
else if (_state.currentChar == u'+') {
785 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
786 && _state.tokenKind != T_LPAREN) {
787 _state.stackToken = T_PLUS_PLUS;
796 if (_state.currentChar == u'=') {
799 }
else if (_state.currentChar == u'*') {
801 if (_state.currentChar == u'=') {
803 return T_STAR_STAR_EQ;
809 case u')':
return T_RPAREN;
810 case u'(':
return T_LPAREN;
812 case u'@':
return T_AT;
815 if (_state.currentChar == u'=') {
818 }
else if (_state.currentChar == u'&') {
825 if (_state.currentChar == u'=') {
827 return T_REMAINDER_EQ;
832 if (_state.currentChar == u'=') {
834 if (_state.currentChar == u'=') {
843 _state.outerTemplateBraceCount.push(_state.bracesCount);
847 return scanString(ScanStringMode(ch.unicode()));
858 if (_state.importState == ImportState::SawImport)
859 return scanVersionNumber(ch);
861 return scanNumber(ch);
864 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
866 while (_codePtr <= _endPtr && !isLineTerminator()) {
870 _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, tokenStartLine(),
873 if (_lexMode == LexMode::LineByLine)
881 uint c = ch.unicode();
882 bool identifierWithEscapeChars =
false;
883 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_state.currentChar.unicode())) {
884 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
886 }
else if (c ==
'\\' && _state.currentChar == u'u') {
887 identifierWithEscapeChars =
true;
889 c = decodeUnicodeEscapeCharacter(&ok);
893 if (isIdentifierStart(c)) {
894 if (identifierWithEscapeChars) {
895 _tokenText.resize(0);
896 if (QChar::requiresSurrogates(c)) {
897 _tokenText += QChar(QChar::highSurrogate(c));
898 _tokenText += QChar(QChar::lowSurrogate(c));
900 _tokenText += QChar(c);
902 _state.validTokenText =
true;
904 while (_codePtr <= _endPtr) {
905 c = _state.currentChar.unicode();
906 if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) {
908 c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode());
909 }
else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
910 if (!identifierWithEscapeChars) {
911 identifierWithEscapeChars =
true;
912 _tokenText.resize(0);
913 _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
914 _state.validTokenText =
true;
919 c = decodeUnicodeEscapeCharacter(&ok);
923 if (!isIdentifierPart(c))
926 if (QChar::requiresSurrogates(c)) {
927 _tokenText += QChar(QChar::highSurrogate(c));
928 _tokenText += QChar(QChar::lowSurrogate(c));
930 _tokenText += QChar(c);
935 if (!isIdentifierPart(c))
938 if (identifierWithEscapeChars) {
939 if (QChar::requiresSurrogates(c)) {
940 _tokenText += QChar(QChar::highSurrogate(c));
941 _tokenText += QChar(QChar::lowSurrogate(c));
943 _tokenText += QChar(c);
949 const auto token = QStringView(_tokenStartPtr, _codePtr - 1);
950 _tokenLength = token.size();
951 int kind = T_IDENTIFIER;
953 if (!identifierWithEscapeChars)
954 kind = classify(token, parseModeFlags());
957 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
958 _tokenSpell = _engine->newStringRef(_tokenText);
960 _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
973int Lexer::scanString(ScanStringMode mode)
975 const char16_t quote = mode == TemplateContinuation ? TemplateHead : mode;
981 bool multilineStringLiteral =
false;
983 const QChar *startCode = _codePtr - 1;
986 _skipLinefeed =
false;
990 while (_codePtr <= _endPtr) {
991 if (isLineTerminator()) {
992 if ((quote == u'`' || qmlMode())) {
994 --_currentLineNumber;
997 _state.errorCode = IllegalCharacter;
998 _errorMessage = QCoreApplication::translate(
"QQmlParser",
999 "Stray newline in string literal");
1001 }
else if (_state.currentChar == u'\\') {
1003 }
else if (_state.currentChar == u'$' && quote == u'`') {
1005 }
else if (_state.currentChar == quote) {
1007 _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1);
1008 _rawString = _tokenSpell;
1012 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1013 if (mode == TemplateHead)
1014 return T_NO_SUBSTITUTION_TEMPLATE;
1015 else if (mode == TemplateContinuation)
1016 return T_TEMPLATE_TAIL;
1017 else if (multilineStringLiteral)
1018 return T_MULTILINE_STRING_LITERAL;
1020 return T_STRING_LITERAL;
1023 _state.currentChar = *_codePtr++;
1024 ++_currentColumnNumber;
1031 --_currentColumnNumber;
1033 _state.validTokenText =
true;
1034 _tokenText = QString(startCode, _codePtr - startCode);
1036 auto setRawString = [&](
const QChar *end) {
1037 QString raw(startCode, end - startCode - 1);
1038 raw.replace(QLatin1String(
"\r\n"), QLatin1String(
"\n"));
1039 raw.replace(u'\r', u'\n');
1040 _rawString = _engine->newStringRef(raw);
1045 while (_codePtr <= _endPtr) {
1046 if (_state.currentChar == quote) {
1050 _tokenSpell = _engine->newStringRef(_tokenText);
1052 setRawString(_codePtr - 1);
1056 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1058 if (mode == TemplateContinuation)
1059 return T_TEMPLATE_TAIL;
1060 else if (mode == TemplateHead)
1061 return T_NO_SUBSTITUTION_TEMPLATE;
1063 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1064 }
else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1067 _state.bracesCount = 1;
1069 _tokenSpell = _engine->newStringRef(_tokenText);
1070 setRawString(_codePtr - 2);
1073 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1074 }
else if (_state.currentChar == u'\\') {
1076 if (_codePtr > _endPtr) {
1077 _state.errorCode = IllegalEscapeSequence;
1078 _errorMessage = QCoreApplication::translate(
1079 "QQmlParser",
"End of file reached at escape sequence");
1085 switch (_state.currentChar.unicode()) {
1089 uint codePoint = decodeUnicodeEscapeCharacter(&ok);
1092 if (QChar::requiresSurrogates(codePoint)) {
1094 _tokenText += QChar(QChar::highSurrogate(codePoint));
1095 u = QChar::lowSurrogate(codePoint);
1097 u = QChar(codePoint);
1104 u = decodeHexEscapeCharacter(&ok);
1106 _state.errorCode = IllegalHexadecimalEscapeSequence;
1107 _errorMessage = QCoreApplication::translate(
1108 "QQmlParser",
"Illegal hexadecimal escape sequence");
1114 case u'\\': u = u'\\'; scanChar();
break;
1115 case u'\'': u = u'\''; scanChar();
break;
1116 case u'\"': u = u'\"'; scanChar();
break;
1117 case u'b': u = u'\b'; scanChar();
break;
1118 case u'f': u = u'\f'; scanChar();
break;
1119 case u'n': u = u'\n'; scanChar();
break;
1120 case u'r': u = u'\r'; scanChar();
break;
1121 case u't': u = u'\t'; scanChar();
break;
1122 case u'v': u = u'\v'; scanChar();
break;
1125 if (!_codePtr->isDigit()) {
1140 _state.errorCode = IllegalEscapeSequence;
1141 _errorMessage = QCoreApplication::translate(
1142 "QQmlParser",
"Octal escape sequences are not allowed");
1156 u = _state.currentChar;
1162 _tokenText += _state.currentChar;
1166 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1167 if (mode == TemplateContinuation)
1168 return T_PARTIAL_TEMPLATE_MIDDLE;
1169 else if (mode == TemplateHead)
1170 return T_PARTIAL_TEMPLATE_HEAD;
1171 else if (mode == SingleQuote)
1172 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1173 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1175 _state.errorCode = UnclosedStringLiteral;
1176 _errorMessage = QCoreApplication::translate(
"QQmlParser",
"Unclosed string at end of line");
1180int Lexer::scanNumber(QChar ch)
1182 auto scanOptionalNumericSeparator = [
this](
auto isNextCharacterValid){
1183 if (_state.currentChar == u'_') {
1184 if (peekChar() == u'_') {
1185 _state.errorCode = IllegalNumber;
1186 _errorMessage = QCoreApplication::translate(
1188 "There can be at most one numeric separator between digits"
1193 if (!isNextCharacterValid()) {
1194 _state.errorCode = IllegalNumber;
1195 _errorMessage = QCoreApplication::translate(
1197 "A trailing numeric separator is not allowed in numeric literals"
1209 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1210 ch = _state.currentChar;
1215 if (!isHexDigit(_state.currentChar)) {
1216 _state.errorCode = IllegalNumber;
1217 _errorMessage = QCoreApplication::translate(
1219 "At least one hexadecimal digit is required after '0%1'")
1226 int digit = ::hexDigit(_state.currentChar);
1233 if (!scanOptionalNumericSeparator([
this](){
return isHexDigit(peekChar()); }))
1237 _state.tokenValue = d;
1238 return T_NUMERIC_LITERAL;
1239 }
else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1240 ch = _state.currentChar;
1245 if (!isOctalDigit(_state.currentChar.unicode())) {
1246 _state.errorCode = IllegalNumber;
1248 QCoreApplication::translate(
1249 "QQmlParser",
"At least one octal digit is required after '0%1'")
1256 int digit = ::octalDigit(_state.currentChar);
1263 if (!scanOptionalNumericSeparator([
this](){
1264 return isOctalDigit(peekChar().unicode());
1270 _state.tokenValue = d;
1271 return T_NUMERIC_LITERAL;
1272 }
else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1273 ch = _state.currentChar;
1278 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1279 _state.errorCode = IllegalNumber;
1281 QCoreApplication::translate(
1282 "QQmlParser",
"At least one binary digit is required after '0%1'")
1290 if (_state.currentChar.unicode() == u'1')
1292 else if (_state.currentChar.unicode() != u'0')
1298 if (!scanOptionalNumericSeparator([
this](){
1299 return peekChar().unicode() == u'0' || peekChar().unicode() == u'1';
1305 _state.tokenValue = d;
1306 return T_NUMERIC_LITERAL;
1307 }
else if (_state.currentChar.isDigit() && !qmlMode()) {
1308 _state.errorCode = IllegalCharacter;
1309 _errorMessage = QCoreApplication::translate(
"QQmlParser",
1310 "Decimal numbers can't start with '0'");
1316 QVarLengthArray<
char,32> chars;
1317 chars.append(ch.unicode());
1320 if (!scanOptionalNumericSeparator([
this](){
return peekChar().isDigit(); }))
1323 while (_state.currentChar.isDigit()) {
1324 chars.append(_state.currentChar.unicode());
1327 if (!scanOptionalNumericSeparator([
this](){
return peekChar().isDigit(); }))
1331 if (_state.currentChar == u'.') {
1332 chars.append(_state.currentChar.unicode());
1337 while (_state.currentChar.isDigit()) {
1338 chars.append(_state.currentChar.unicode());
1341 if (!scanOptionalNumericSeparator([
this](){
return peekChar().isDigit(); }))
1345 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1346 if (_codePtr[0].isDigit()
1347 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1349 chars.append(_state.currentChar.unicode());
1352 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1353 chars.append(_state.currentChar.unicode());
1357 while (_state.currentChar.isDigit()) {
1358 chars.append(_state.currentChar.unicode());
1361 if (!scanOptionalNumericSeparator([
this](){
return peekChar().isDigit(); }))
1367 const char *begin = chars.constData();
1368 const char *end =
nullptr;
1371 _state.tokenValue = qstrntod(begin, chars.size(), &end, &ok);
1373 if (end - begin != chars.size()) {
1374 _state.errorCode = IllegalExponentIndicator;
1376 QCoreApplication::translate(
"QQmlParser",
"Illegal syntax for exponential number");
1380 return T_NUMERIC_LITERAL;
1383int Lexer::scanVersionNumber(QChar ch)
1386 _state.tokenValue = 0;
1387 return T_VERSION_NUMBER;
1391 acc += ch.digitValue();
1393 while (_state.currentChar.isDigit()) {
1395 acc += _state.currentChar.digitValue();
1399 _state.tokenValue = acc;
1400 return T_VERSION_NUMBER;
1403bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1405 _tokenText.resize(0);
1406 _state.validTokenText =
true;
1407 _state.patternFlags = 0;
1409 if (prefix == EqualPrefix)
1413 switch (_state.currentChar.unicode()) {
1418 _state.patternFlags = 0;
1419 while (isIdentLetter(_state.currentChar)) {
1420 int flag = regExpFlagFromChar(_state.currentChar);
1421 if (flag == 0 || _state.patternFlags & flag) {
1422 _errorMessage = QCoreApplication::translate(
1423 "QQmlParser",
"Invalid regular expression flag '%0'")
1424 .arg(QChar(_state.currentChar));
1427 _state.patternFlags |= flag;
1431 _tokenLength = _codePtr - _tokenStartPtr - 1;
1436 _tokenText += _state.currentChar;
1439 if (_codePtr > _endPtr || isLineTerminator()) {
1440 _errorMessage = QCoreApplication::translate(
1441 "QQmlParser",
"Unterminated regular expression backslash sequence");
1445 _tokenText += _state.currentChar;
1451 _tokenText += _state.currentChar;
1454 while (_codePtr <= _endPtr && !isLineTerminator()) {
1455 if (_state.currentChar == u']')
1457 else if (_state.currentChar == u'\\') {
1459 _tokenText += _state.currentChar;
1462 if (_codePtr > _endPtr || isLineTerminator()) {
1463 _errorMessage = QCoreApplication::translate(
1464 "QQmlParser",
"Unterminated regular expression backslash sequence");
1468 _tokenText += _state.currentChar;
1471 _tokenText += _state.currentChar;
1476 if (_state.currentChar != u']') {
1477 _errorMessage = QCoreApplication::translate(
1478 "QQmlParser",
"Unterminated regular expression class");
1482 _tokenText += _state.currentChar;
1487 if (_codePtr > _endPtr || isLineTerminator()) {
1488 _errorMessage = QCoreApplication::translate(
1489 "QQmlParser",
"Unterminated regular expression literal");
1492 _tokenText += _state.currentChar;
1501bool Lexer::isLineTerminator()
const
1503 const ushort unicode = _state.currentChar.unicode();
1504 return unicode == 0x000Au
1505 || unicode == 0x000Du
1506 || unicode == 0x2028u
1507 || unicode == 0x2029u;
1510unsigned Lexer::isLineTerminatorSequence()
const
1512 switch (_state.currentChar.unicode()) {
1518 if (_codePtr->unicode() == 0x000Au)
1527bool Lexer::isIdentLetter(QChar ch)
1531 if ((ch >= u'a' && ch <= u'z')
1532 || (ch >= u'A' && ch <= u'Z')
1533 || ch == u'$' || ch == u'_')
1535 if (ch.unicode() < 128)
1537 return ch.isLetterOrNumber();
1540bool Lexer::isDecimalDigit(ushort c)
1542 return (c >= u'0' && c <= u'9');
1545bool Lexer::isHexDigit(QChar c)
1547 return ((c >= u'0' && c <= u'9')
1548 || (c >= u'a' && c <= u'f')
1549 || (c >= u'A' && c <= u'F'));
1552bool Lexer::isOctalDigit(ushort c)
1554 return (c >= u'0' && c <= u'7');
1557QString Lexer::tokenText()
const
1559 if (_state.validTokenText)
1562 if (_state.tokenKind == T_STRING_LITERAL)
1563 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1565 return QString(_tokenStartPtr, _tokenLength);
1568Lexer::Error Lexer::errorCode()
const
1570 return _state.errorCode;
1573QString Lexer::errorMessage()
const
1575 return _errorMessage;
1578void Lexer::syncProhibitAutomaticSemicolon()
1580 if (_state.parenthesesState == BalancedParentheses) {
1584 _state.prohibitAutomaticSemicolon =
true;
1585 _state.parenthesesState = IgnoreParentheses;
1587 _state.prohibitAutomaticSemicolon =
false;
1591bool Lexer::prevTerminator()
const
1593 return _state.terminator;
1596bool Lexer::followsClosingBrace()
const
1598 return _state.followsClosingBrace;
1601bool Lexer::canInsertAutomaticSemicolon(
int token)
const
1603 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1604 || _state.followsClosingBrace;
1608 QQmlJSGrammar::T_IDENTIFIER,
1609 QQmlJSGrammar::T_PROPERTY,
1610 QQmlJSGrammar::T_SIGNAL,
1611 QQmlJSGrammar::T_READONLY,
1612 QQmlJSGrammar::T_ON,
1613 QQmlJSGrammar::T_BREAK,
1614 QQmlJSGrammar::T_CASE,
1615 QQmlJSGrammar::T_CATCH,
1616 QQmlJSGrammar::T_CONTINUE,
1617 QQmlJSGrammar::T_DEFAULT,
1618 QQmlJSGrammar::T_DELETE,
1619 QQmlJSGrammar::T_DO,
1620 QQmlJSGrammar::T_ELSE,
1621 QQmlJSGrammar::T_FALSE,
1622 QQmlJSGrammar::T_FINAL,
1623 QQmlJSGrammar::T_FINALLY,
1624 QQmlJSGrammar::T_FOR,
1625 QQmlJSGrammar::T_FUNCTION,
1626 QQmlJSGrammar::T_IF,
1627 QQmlJSGrammar::T_IN,
1628 QQmlJSGrammar::T_OF,
1629 QQmlJSGrammar::T_INSTANCEOF,
1630 QQmlJSGrammar::T_NEW,
1631 QQmlJSGrammar::T_NULL,
1632 QQmlJSGrammar::T_RETURN,
1633 QQmlJSGrammar::T_SWITCH,
1634 QQmlJSGrammar::T_THIS,
1635 QQmlJSGrammar::T_THROW,
1636 QQmlJSGrammar::T_TRUE,
1637 QQmlJSGrammar::T_TRY,
1638 QQmlJSGrammar::T_TYPEOF,
1639 QQmlJSGrammar::T_VAR,
1640 QQmlJSGrammar::T_VOID,
1641 QQmlJSGrammar::T_WHILE,
1642 QQmlJSGrammar::T_CONST,
1643 QQmlJSGrammar::T_DEBUGGER,
1644 QQmlJSGrammar::T_ENUM,
1645 QQmlJSGrammar::T_PACKAGE,
1646 QQmlJSGrammar::T_ABSTRACT,
1647 QQmlJSGrammar::T_INTERFACE,
1648 QQmlJSGrammar::T_IMPLEMENTS,
1649 QQmlJSGrammar::T_PUBLIC,
1650 QQmlJSGrammar::T_PROTECTED,
1651 QQmlJSGrammar::T_PRIVATE,
1652 QQmlJSGrammar::T_NATIVE,
1653 QQmlJSGrammar::T_VOLATILE,
1654 QQmlJSGrammar::T_TRANSIENT,
1655 QQmlJSGrammar::T_SYNCHRONIZED,
1656 QQmlJSGrammar::T_THROWS,
1657 QQmlJSGrammar::T_WITH,
1659 QQmlJSGrammar::EOF_SYMBOL
1664 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1665 if (*current == token)
1672bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error)
1674 auto setError = [error,
this](QString message) {
1675 error->message = std::move(message);
1676 error->loc.startLine = tokenStartLine();
1677 error->loc.startColumn = tokenStartColumn();
1680 QScopedValueRollback<
bool> directivesGuard(_state.handlingDirectives,
true);
1681 Q_ASSERT(!_qmlMode);
1685 if (_state.tokenKind != T_DOT)
1689 const int lineNumber = tokenStartLine();
1690 const int column = tokenStartColumn();
1694 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1697 const QString directiveName = tokenText();
1699 if (! (directiveName == QLatin1String(
"pragma") ||
1700 directiveName == QLatin1String(
"import"))) {
1701 setError(QCoreApplication::translate(
"QQmlParser",
"Syntax error"));
1706 if (directiveName == QLatin1String(
"pragma")) {
1708 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String(
"library"))) {
1709 setError(QCoreApplication::translate(
"QQmlParser",
"Syntax error"));
1714 directives->pragmaLibrary();
1717 Q_ASSERT(directiveName == QLatin1String(
"import"));
1722 bool fileImport =
false;
1724 if (_state.tokenKind == T_STRING_LITERAL) {
1728 pathOrUri = tokenText();
1730 if (!pathOrUri.endsWith(QLatin1String(
"js"))) {
1731 setError(QCoreApplication::translate(
"QQmlParser",
"Imported file must be a script"));
1736 }
else if (_state.tokenKind == T_IDENTIFIER) {
1739 if (!isUriToken(_state.tokenKind)) {
1740 setError(QCoreApplication::translate(
"QQmlParser",
"Invalid module URI"));
1744 pathOrUri.append(tokenText());
1747 if (tokenStartLine() != lineNumber) {
1748 setError(QCoreApplication::translate(
"QQmlParser",
"Invalid module URI"));
1751 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1754 pathOrUri.append(u'.');
1757 if (tokenStartLine() != lineNumber) {
1758 setError(QCoreApplication::translate(
"QQmlParser",
"Invalid module URI"));
1763 if (_state.tokenKind == T_VERSION_NUMBER) {
1764 version = tokenText();
1766 if (_state.tokenKind == T_DOT) {
1769 if (_state.tokenKind != T_VERSION_NUMBER) {
1770 setError(QCoreApplication::translate(
1771 "QQmlParser",
"Incomplete version number (dot but no minor)"));
1774 version += tokenText();
1783 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1785 setError(QCoreApplication::translate(
"QQmlParser",
"File import requires a qualifier"));
1787 setError(QCoreApplication::translate(
"QQmlParser",
"Module import requires a qualifier"));
1788 if (tokenStartLine() != lineNumber) {
1789 error->loc.startLine = lineNumber;
1790 error->loc.startColumn = column;
1795 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1797 setError(QCoreApplication::translate(
"QQmlParser",
"File import requires a qualifier"));
1799 setError(QCoreApplication::translate(
"QQmlParser",
"Module import requires a qualifier"));
1803 const QString module = tokenText();
1804 if (!module.at(0).isUpper()) {
1805 setError(QCoreApplication::translate(
"QQmlParser",
"Invalid import qualifier"));
1810 directives->importFile(pathOrUri, module, lineNumber, column);
1812 directives->importModule(pathOrUri, version, module, lineNumber, column);
1815 if (tokenStartLine() != lineNumber) {
1816 setError(QCoreApplication::translate(
"QQmlParser",
"Syntax error"));
1822 }
while (_state.tokenKind == T_DOT);
1827const Lexer::State &Lexer::state()
const
1831void Lexer::setState(
const Lexer::State &state)
1836int Lexer::parseModeFlags()
const {
1839 flags |= QmlMode|StaticIsKeyword;
1840 if (yieldIsKeyWord())
1841 flags |= YieldIsKeyword;
1842 if (_staticIsKeyword)
1843 flags |= StaticIsKeyword;
1848QDebug operator<<(QDebug dbg,
const Lexer::State &s)
1851 <<
" errorCode:" <<
int(s.errorCode) <<
",\n"
1852 <<
" currentChar:" << s.currentChar <<
",\n"
1853 <<
" tokenValue:" << s.tokenValue <<
",\n"
1854 <<
" parenthesesState:" << s.parenthesesState <<
",\n"
1855 <<
" parenthesesCount:" << s.parenthesesCount <<
",\n"
1856 <<
" outerTemplateBraceCount:" << s.outerTemplateBraceCount <<
",\n"
1857 <<
" bracesCount:" << s.bracesCount <<
",\n"
1858 <<
" stackToken:" << s.stackToken <<
",\n"
1859 <<
" patternFlags:" << s.patternFlags <<
",\n"
1860 <<
" tokenKind:" << s.tokenKind <<
",\n"
1861 <<
" importState:" <<
int(s.importState) <<
",\n"
1862 <<
" validTokenText:" << s.validTokenText <<
",\n"
1863 <<
" prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon <<
",\n"
1864 <<
" restrictedKeyword:" << s.restrictedKeyword <<
",\n"
1865 <<
" terminator:" << s.terminator <<
",\n"
1866 <<
" followsClosingBrace:" << s.followsClosingBrace <<
",\n"
1867 <<
" delimited:" << s.delimited <<
",\n"
1868 <<
" handlingDirectives:" << s.handlingDirectives <<
",\n"
1869 <<
" generatorLevel:" << s.generatorLevel <<
"\n}";
static int regExpFlagFromChar(const QChar &ch)
static bool isIdentifierPart(uint ch)
static QChar convertHex(QChar c1, QChar c2)
static const int uriTokens[]
static bool isUriToken(int token)
static unsigned char convertHex(ushort c)
static bool isIdentifierStart(uint ch)