Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
preprocessor.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include "preprocessor.h"
6#include "utils.h"
7#include <qstringlist.h>
8#include <qfile.h>
9#include <qdir.h>
10#include <qfileinfo.h>
11#include <qvarlengtharray.h>
12
14
15using namespace QtMiscUtils;
16
17#include "ppkeywords.cpp"
18#include "keywords.cpp"
19
20// transform \r\n into \n
21// \r into \n (os9 style)
22// backslash-newlines into newlines
23static QByteArray cleaned(const QByteArray &input)
24{
25 QByteArray result;
26 result.resize(input.size());
27 const char *data = input.constData();
28 const char *end = input.constData() + input.size();
29 char *output = result.data();
30
31 int newlines = 0;
32 while (data != end) {
33 while (data != end && is_space(*data))
34 ++data;
35 bool takeLine = (*data == '#');
36 if (*data == '%' && *(data+1) == ':') {
37 takeLine = true;
38 ++data;
39 }
40 if (takeLine) {
41 *output = '#';
42 ++output;
43 do ++data; while (data != end && is_space(*data));
44 }
45 while (data != end) {
46 // handle \\\n, \\\r\n and \\\r
47 if (*data == '\\') {
48 if (*(data + 1) == '\r') {
49 ++data;
50 }
51 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
52 ++newlines;
53 data += 1;
54 if (data != end && *data != '\r')
55 data += 1;
56 continue;
57 }
58 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
59 ++data;
60 }
61 if (data == end)
62 break;
63
64 char ch = *data;
65 if (ch == '\r') // os9: replace \r with \n
66 ch = '\n';
67 *output = ch;
68 ++output;
69
70 if (*data == '\n') {
71 // output additional newlines to keep the correct line-numbering
72 // for the lines following the backslash-newline sequence(s)
73 while (newlines) {
74 *output = '\n';
75 ++output;
76 --newlines;
77 }
78 ++data;
79 break;
80 }
81 ++data;
82 }
83 }
84 result.resize(output - result.constData());
85 return result;
86}
87
88bool Preprocessor::preprocessOnly = false;
90{
91 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
92 switch (symbols.at(index).token) {
93 case PP_IF:
94 case PP_IFDEF:
95 case PP_IFNDEF:
96 ++index;
98 break;
99 default:
100 ;
101 }
102 ++index;
103 }
104}
105
107{
108 while (index < symbols.size() - 1
109 && (symbols.at(index).token != PP_ENDIF
110 && symbols.at(index).token != PP_ELIF
111 && symbols.at(index).token != PP_ELSE)
112 ){
113 switch (symbols.at(index).token) {
114 case PP_IF:
115 case PP_IFDEF:
116 case PP_IFNDEF:
117 ++index;
119 break;
120 default:
121 ;
122 }
123 ++index;
124 }
125 return (index < symbols.size() - 1);
126}
127
128
129Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
130{
131 Symbols symbols;
132 // Preallocate some space to speed up the code below.
133 // The magic divisor value was found by calculating the average ratio between
134 // input size and the final size of symbols.
135 // This yielded a value of 16.x when compiling Qt Base.
136 symbols.reserve(input.size() / 16);
137 const char *begin = input.constData();
138 const char *data = begin;
139 while (*data) {
140 if (mode == TokenizeCpp || mode == TokenizeDefine) {
141 int column = 0;
142
143 const char *lexem = data;
144 int state = 0;
145 Token token = NOTOKEN;
146 for (;;) {
147 if (static_cast<signed char>(*data) < 0) {
148 ++data;
149 continue;
150 }
151 int nextindex = keywords[state].next;
152 int next = 0;
153 if (*data == keywords[state].defchar)
154 next = keywords[state].defnext;
155 else if (!state || nextindex)
156 next = keyword_trans[nextindex][(int)*data];
157 if (!next)
158 break;
159 state = next;
160 token = keywords[state].token;
161 ++data;
162 }
163
164 // suboptimal, is_ident_char should use a table
165 if (keywords[state].ident && is_ident_char(*data))
166 token = keywords[state].ident;
167
168 if (token == NOTOKEN) {
169 if (*data)
170 ++data;
171 // an error really, but let's ignore this input
172 // to not confuse moc later. However in pre-processor
173 // only mode let's continue.
175 continue;
176 }
177
178 ++column;
179
180 if (token > SPECIAL_TREATMENT_MARK) {
181 switch (token) {
182 case QUOTE:
183 data = skipQuote(data);
184 token = STRING_LITERAL;
185 // concatenate multi-line strings for easier
186 // STRING_LITERAL handling in moc
188 && !symbols.isEmpty()
189 && symbols.constLast().token == STRING_LITERAL) {
190
191 const QByteArray newString
192 = '\"'
193 + symbols.constLast().unquotedLexemView()
194 + input.mid(lexem - begin + 1, data - lexem - 2)
195 + '\"';
196 symbols.last() = Symbol(symbols.constLast().lineNum,
197 STRING_LITERAL,
198 newString);
199 continue;
200 }
201 break;
202 case SINGLEQUOTE:
203 while (*data && (*data != '\''
204 || (*(data-1)=='\\'
205 && *(data-2)!='\\')))
206 ++data;
207 if (*data)
208 ++data;
209 token = CHARACTER_LITERAL;
210 break;
211 case LANGLE_SCOPE:
212 // split <:: into two tokens, < and ::
213 token = LANGLE;
214 data -= 2;
215 break;
216 case DIGIT:
217 {
218 bool hasSeenTokenSeparator = false;;
219 while (isAsciiDigit(*data) || (hasSeenTokenSeparator = *data == '\''))
220 ++data;
221 if (!*data || *data != '.') {
222 token = INTEGER_LITERAL;
223 if (data - lexem == 1 &&
224 (*data == 'x' || *data == 'X'
225 || *data == 'b' || *data == 'B')
226 && *lexem == '0') {
227 ++data;
228 while (isHexDigit(*data) || (hasSeenTokenSeparator = *data == '\''))
229 ++data;
230 } else if (*data == 'L') // TODO: handle other suffixes
231 ++data;
232 if (!hasSeenTokenSeparator) {
233 while (is_ident_char(*data)) {
234 ++data;
235 token = IDENTIFIER;
236 }
237 }
238 break;
239 }
240 token = FLOATING_LITERAL;
241 ++data;
242 Q_FALLTHROUGH();
243 }
244 case FLOATING_LITERAL:
245 while (isAsciiDigit(*data) || *data == '\'')
246 ++data;
247 if (*data == '+' || *data == '-')
248 ++data;
249 if (*data == 'e' || *data == 'E') {
250 ++data;
251 while (isAsciiDigit(*data) || *data == '\'')
252 ++data;
253 }
254 if (*data == 'f' || *data == 'F'
255 || *data == 'l' || *data == 'L')
256 ++data;
257 break;
258 case HASH:
259 if (column == 1 && mode == TokenizeCpp) {
261 while (*data && (*data == ' ' || *data == '\t'))
262 ++data;
263 if (is_ident_char(*data))
265 continue;
266 }
267 break;
268 case PP_HASHHASH:
269 if (mode == TokenizeCpp)
270 continue;
271 break;
272 case NEWLINE:
273 ++lineNum;
274 if (mode == TokenizeDefine) {
275 mode = TokenizeCpp;
276 // emit the newline token
277 break;
278 }
279 continue;
280 case BACKSLASH:
281 {
282 const char *rewind = data;
283 while (*data && (*data == ' ' || *data == '\t'))
284 ++data;
285 if (*data && *data == '\n') {
286 ++data;
287 continue;
288 }
289 data = rewind;
290 } break;
291 case CHARACTER:
292 while (is_ident_char(*data))
293 ++data;
294 token = IDENTIFIER;
295 break;
296 case C_COMMENT:
297 if (*data) {
298 if (*data == '\n')
299 ++lineNum;
300 ++data;
301 if (*data) {
302 if (*data == '\n')
303 ++lineNum;
304 ++data;
305 }
306 }
307 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
308 if (*data == '\n')
309 ++lineNum;
310 ++data;
311 }
312 token = WHITESPACE; // one comment, one whitespace
313 Q_FALLTHROUGH();
314 case WHITESPACE:
315 if (column == 1)
316 column = 0;
317 while (*data && (*data == ' ' || *data == '\t'))
318 ++data;
319 if (Preprocessor::preprocessOnly) // tokenize whitespace
320 break;
321 continue;
322 case CPP_COMMENT:
323 while (*data && *data != '\n')
324 ++data;
325 continue; // ignore safely, the newline is a separator
326 default:
327 continue; //ignore
328 }
329 }
330 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
331
332 } else { // Preprocessor
333
334 const char *lexem = data;
335 int state = 0;
336 Token token = NOTOKEN;
337 if (mode == TokenizePreprocessorStatement) {
338 state = pp_keyword_trans[0][(int)'#'];
340 }
341 for (;;) {
342 if (static_cast<signed char>(*data) < 0) {
343 ++data;
344 continue;
345 }
346 int nextindex = pp_keywords[state].next;
347 int next = 0;
348 if (*data == pp_keywords[state].defchar)
349 next = pp_keywords[state].defnext;
350 else if (!state || nextindex)
351 next = pp_keyword_trans[nextindex][(int)*data];
352 if (!next)
353 break;
354 state = next;
355 token = pp_keywords[state].token;
356 ++data;
357 }
358 // suboptimal, is_ident_char should use a table
359 if (pp_keywords[state].ident && is_ident_char(*data))
360 token = pp_keywords[state].ident;
361
362 switch (token) {
363 case NOTOKEN:
364 if (*data)
365 ++data;
366 break;
367 case PP_DEFINE:
368 mode = PrepareDefine;
369 break;
370 case PP_IFDEF:
371 symbols += Symbol(lineNum, PP_IF);
372 symbols += Symbol(lineNum, PP_DEFINED);
373 continue;
374 case PP_IFNDEF:
375 symbols += Symbol(lineNum, PP_IF);
376 symbols += Symbol(lineNum, PP_NOT);
377 symbols += Symbol(lineNum, PP_DEFINED);
378 continue;
379 case PP_INCLUDE:
380 mode = TokenizeInclude;
381 break;
382 case PP_QUOTE:
383 data = skipQuote(data);
384 token = PP_STRING_LITERAL;
385 break;
386 case PP_SINGLEQUOTE:
387 while (*data && (*data != '\''
388 || (*(data-1)=='\\'
389 && *(data-2)!='\\')))
390 ++data;
391 if (*data)
392 ++data;
393 token = PP_CHARACTER_LITERAL;
394 break;
395 case PP_DIGIT:
396 while (isAsciiDigit(*data) || *data == '\'')
397 ++data;
398 if (!*data || *data != '.') {
399 token = PP_INTEGER_LITERAL;
400 if (data - lexem == 1 &&
401 (*data == 'x' || *data == 'X')
402 && *lexem == '0') {
403 ++data;
404 while (isHexDigit(*data) || *data == '\'')
405 ++data;
406 } else if (*data == 'L') // TODO: handle other suffixes
407 ++data;
408 break;
409 }
410 token = PP_FLOATING_LITERAL;
411 ++data;
412 Q_FALLTHROUGH();
413 case PP_FLOATING_LITERAL:
414 while (isAsciiDigit(*data) || *data == '\'')
415 ++data;
416 if (*data == '+' || *data == '-')
417 ++data;
418 if (*data == 'e' || *data == 'E') {
419 ++data;
420 while (isAsciiDigit(*data) || *data == '\'')
421 ++data;
422 }
423 if (*data == 'f' || *data == 'F'
424 || *data == 'l' || *data == 'L')
425 ++data;
426 break;
427 case PP_CHARACTER:
428 if (mode == PreparePreprocessorStatement) {
429 // rewind entire token to begin
430 data = lexem;
432 continue;
433 }
434 while (is_ident_char(*data))
435 ++data;
436 token = PP_IDENTIFIER;
437
438 if (mode == PrepareDefine) {
439 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
440 // make sure we explicitly add the whitespace here if the next char
441 // is not an opening brace, so we can distinguish correctly between
442 // regular and function macros
443 if (*data != '(')
444 symbols += Symbol(lineNum, WHITESPACE);
445 mode = TokenizeDefine;
446 continue;
447 }
448 break;
449 case PP_C_COMMENT:
450 if (*data) {
451 if (*data == '\n')
452 ++lineNum;
453 ++data;
454 if (*data) {
455 if (*data == '\n')
456 ++lineNum;
457 ++data;
458 }
459 }
460 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
461 if (*data == '\n')
462 ++lineNum;
463 ++data;
464 }
465 token = PP_WHITESPACE; // one comment, one whitespace
466 Q_FALLTHROUGH();
467 case PP_WHITESPACE:
468 while (*data && (*data == ' ' || *data == '\t'))
469 ++data;
470 continue; // the preprocessor needs no whitespace
471 case PP_CPP_COMMENT:
472 while (*data && *data != '\n')
473 ++data;
474 continue; // ignore safely, the newline is a separator
475 case PP_NEWLINE:
476 ++lineNum;
477 mode = TokenizeCpp;
478 break;
479 case PP_BACKSLASH:
480 {
481 const char *rewind = data;
482 while (*data && (*data == ' ' || *data == '\t'))
483 ++data;
484 if (*data && *data == '\n') {
485 ++data;
486 continue;
487 }
488 data = rewind;
489 } break;
490 case PP_LANGLE:
491 if (mode != TokenizeInclude)
492 break;
493 token = PP_STRING_LITERAL;
494 while (*data && *data != '\n' && *(data-1) != '>')
495 ++data;
496 break;
497 default:
498 break;
499 }
501 continue;
502 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
503 }
504 }
505 symbols += Symbol(); // eof symbol
506 return symbols;
507}
508
509void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, qsizetype &index,
510 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
511{
512 SymbolStack symbols;
513 symbols.reserve(8);
514 SafeSymbols sf;
515 sf.symbols = toExpand;
516 sf.index = index;
517 sf.excludedSymbols = excludeSymbols;
518 symbols.push(std::move(sf));
519
520 if (toExpand.isEmpty())
521 return;
522
523 for (;;) {
524 QByteArray macro;
525 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
526
527 if (macro.isEmpty()) {
528 // not a macro
529 Symbol s = symbols.symbol();
530 s.lineNum = lineNum;
531 *into += s;
532 } else {
533 SafeSymbols sf;
534 sf.symbols = newSyms;
535 sf.index = 0;
536 sf.expandedMacro = macro;
537 symbols.push(std::move(sf));
538 }
539 if (!symbols.hasNext() || (one && symbols.size() == 1))
540 break;
541 symbols.next();
542 }
543
544 if (symbols.size())
545 index = symbols.top().index;
546 else
547 index = toExpand.size();
548}
549
550
551Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
552{
553 Symbol s = symbols.symbol();
554
555 // not a macro
556 if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) {
557 return Symbols();
558 }
559
560 const Macro &macro = that->macros.value(s);
561 *macroName = s.lexem();
562
563 Symbols expansion;
564 if (!macro.isFunction) {
565 expansion = macro.symbols;
566 } else {
567 bool haveSpace = false;
568 while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
569 if (!symbols.test(PP_LPAREN)) {
570 *macroName = QByteArray();
571 Symbols syms;
572 if (haveSpace)
573 syms += Symbol(lineNum, PP_WHITESPACE);
574 syms += s;
575 syms.last().lineNum = lineNum;
576 return syms;
577 }
578 QVarLengthArray<Symbols, 5> arguments;
579 while (symbols.hasNext()) {
580 Symbols argument;
581 // strip leading space
582 while (symbols.test(PP_WHITESPACE)) {}
583 int nesting = 0;
584 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
585 while (symbols.hasNext()) {
586 Token t = symbols.next();
587 if (t == PP_LPAREN) {
588 ++nesting;
589 } else if (t == PP_RPAREN) {
590 --nesting;
591 if (nesting < 0)
592 break;
593 } else if (t == PP_COMMA && nesting == 0) {
594 if (!vararg)
595 break;
596 }
597 argument += symbols.symbol();
598 }
599 arguments += argument;
600
601 if (nesting < 0)
602 break;
603 else if (!symbols.hasNext())
604 that->error("missing ')' in macro usage");
605 }
606
607 // empty VA_ARGS
608 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
609 arguments += Symbols();
610
611 // now replace the macro arguments with the expanded arguments
612 enum Mode {
613 Normal,
614 Hash,
615 HashHash
616 } mode = Normal;
617
618 const auto end = macro.symbols.cend();
619 auto it = macro.symbols.cbegin();
620 const auto lastSym = std::prev(macro.symbols.cend(), !macro.symbols.isEmpty() ? 1 : 0);
621 for (; it != end; ++it) {
622 const Symbol &s = *it;
623 if (s.token == HASH || s.token == PP_HASHHASH) {
624 mode = (s.token == HASH ? Hash : HashHash);
625 continue;
626 }
627 const qsizetype index = macro.arguments.indexOf(s);
628 if (mode == Normal) {
629 if (index >= 0 && index < arguments.size()) {
630 // each argument undoergoes macro expansion if it's not used as part of a # or ##
631 if (it == lastSym || std::next(it)->token != PP_HASHHASH) {
632 Symbols arg = arguments.at(index);
633 qsizetype idx = 1;
634 macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
635 } else {
636 expansion += arguments.at(index);
637 }
638 } else {
639 expansion += s;
640 }
641 } else if (mode == Hash) {
642 if (index < 0) {
643 that->error("'#' is not followed by a macro parameter");
644 continue;
645 } else if (index >= arguments.size()) {
646 that->error("Macro invoked with too few parameters for a use of '#'");
647 continue;
648 }
649
650 const Symbols &arg = arguments.at(index);
651 QByteArray stringified;
652 for (const Symbol &sym : arg)
653 stringified += sym.lexemView();
654
655 stringified.replace('"', "\\\"");
656 stringified.prepend('"');
657 stringified.append('"');
658 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
659 } else if (mode == HashHash){
660 if (s.token == WHITESPACE)
661 continue;
662
663 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
664 expansion.pop_back();
665
666 Symbol next = s;
667 if (index >= 0 && index < arguments.size()) {
668 const Symbols &arg = arguments.at(index);
669 if (arg.size() == 0) {
670 mode = Normal;
671 continue;
672 }
673 next = arg.at(0);
674 }
675
676 if (!expansion.isEmpty() && expansion.constLast().token == s.token
677 && expansion.constLast().token != STRING_LITERAL) {
678 Symbol last = expansion.takeLast();
679
680 QByteArray lexem = last.lexem() + next.lexem();
681 expansion += Symbol(lineNum, last.token, lexem);
682 } else {
683 expansion += next;
684 }
685
686 if (index >= 0 && index < arguments.size()) {
687 const Symbols &arg = arguments.at(index);
688 if (!arg.isEmpty())
689 expansion.append(arg.cbegin() + 1, arg.cend());
690 }
691 }
692 mode = Normal;
693 }
694 if (mode != Normal)
695 that->error("'#' or '##' found at the end of a macro argument");
696
697 }
698
699 return expansion;
700}
701
703{
704 while (hasNext()) {
705 Token token = next();
706 if (token == PP_IDENTIFIER) {
707 macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
708 } else if (token == PP_DEFINED) {
709 bool braces = test(PP_LPAREN);
710 if (test(PP_HAS_INCLUDE)) {
711 // __has_include is always supported
712 Symbol definedOrNotDefined = symbol();
713 definedOrNotDefined.token = PP_MOC_TRUE;
714 substituted += definedOrNotDefined;
715 } else {
716 next(PP_IDENTIFIER);
717 Symbol definedOrNotDefined = symbol();
718 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
719 substituted += definedOrNotDefined;
720 }
721 if (braces)
722 test(PP_RPAREN);
723 continue;
724 } else if (token == PP_NEWLINE) {
725 substituted += symbol();
726 break;
727 } else if (token == PP_HAS_INCLUDE) {
728 next(LPAREN);
729 Token tok = next(); // quote or LANGLE
730 bool usesAngleInclude = false;
731 QByteArray includeAsString;
732 Symbols innerSymbols;
733 if (tok == PP_LANGLE) {
734 usesAngleInclude = true;
735 next();
736 do {
737 Symbol currentSymbol = symbol();
738 includeAsString += currentSymbol.lexem();
739 if (currentSymbol.token == PP_IDENTIFIER)
740 macroExpand(&innerSymbols, this, symbols, index, symbol().lineNum, true);
741 else
742 innerSymbols.append(currentSymbol);
743 } while (next() != PP_RANGLE);
744 } else {
745 includeAsString = unquotedLexem();
746 }
747 next(RPAREN);
748 const QByteArray &relative = usesAngleInclude ? QByteArray() : currentFilenames.top();
749 bool result = !resolveInclude(includeAsString, relative).isNull();
750 if (usesAngleInclude && !result) {
751 // try with expansion
752 includeAsString = {};
753 for (const auto &innerSymbol: innerSymbols)
754 includeAsString.append(innerSymbol.lexem());
755 result = !resolveInclude(includeAsString, relative).isNull();
756 }
757 Symbol definedOrNotDefined = symbol();
758 definedOrNotDefined.token = result ? PP_MOC_TRUE : PP_MOC_FALSE;
759 substituted += definedOrNotDefined;
760 } else {
761 substituted += symbol();
762 }
763 }
764}
765
766
788
790{
791 int value = logical_OR_expression();
792 if (test(PP_QUESTION)) {
793 int alt1 = conditional_expression();
794 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
795 return value ? alt1 : alt2;
796 }
797 return value;
798}
799
801{
802 int value = logical_AND_expression();
803 if (test(PP_OROR))
804 return logical_OR_expression() || value;
805 return value;
806}
807
809{
810 int value = inclusive_OR_expression();
811 if (test(PP_ANDAND))
812 return logical_AND_expression() && value;
813 return value;
814}
815
817{
818 int value = exclusive_OR_expression();
819 if (test(PP_OR))
820 return value | inclusive_OR_expression();
821 return value;
822}
823
825{
826 int value = AND_expression();
827 if (test(PP_HAT))
828 return value ^ exclusive_OR_expression();
829 return value;
830}
831
833{
834 int value = equality_expression();
835 if (test(PP_AND))
836 return value & AND_expression();
837 return value;
838}
839
841{
842 int value = relational_expression();
843 switch (next()) {
844 case PP_EQEQ:
845 return value == equality_expression();
846 case PP_NE:
847 return value != equality_expression();
848 default:
849 prev();
850 return value;
851 }
852}
853
855{
856 int value = shift_expression();
857 switch (next()) {
858 case PP_LANGLE:
859 return value < relational_expression();
860 case PP_RANGLE:
861 return value > relational_expression();
862 case PP_LE:
863 return value <= relational_expression();
864 case PP_GE:
865 return value >= relational_expression();
866 default:
867 prev();
868 return value;
869 }
870}
871
873{
874 int value = additive_expression();
875 switch (next()) {
876 case PP_LTLT:
877 return value << shift_expression();
878 case PP_GTGT:
879 return value >> shift_expression();
880 default:
881 prev();
882 return value;
883 }
884}
885
887{
888 int value = multiplicative_expression();
889 switch (next()) {
890 case PP_PLUS:
891 return value + additive_expression();
892 case PP_MINUS:
893 return value - additive_expression();
894 default:
895 prev();
896 return value;
897 }
898}
899
901{
902 int value = unary_expression();
903 switch (next()) {
904 case PP_STAR:
905 {
906 // get well behaved overflow behavior by converting to long
907 // and then back to int
908 // NOTE: A conformant preprocessor would need to work intmax_t/
909 // uintmax_t according to [cpp.cond], 19.1 §10
910 // But we're not compliant anyway
911 qint64 result = qint64(value) * qint64(multiplicative_expression());
912 return int(result);
913 }
914 case PP_PERCENT:
915 {
916 int remainder = multiplicative_expression();
917 return remainder ? value % remainder : 0;
918 }
919 case PP_SLASH:
920 {
922 return div ? value / div : 0;
923 }
924 default:
925 prev();
926 return value;
927 };
928}
929
931{
932 switch (next()) {
933 case PP_PLUS:
934 return unary_expression();
935 case PP_MINUS:
936 return -unary_expression();
937 case PP_NOT:
938 return !unary_expression();
939 case PP_TILDE:
940 return ~unary_expression();
941 case PP_MOC_TRUE:
942 return 1;
943 case PP_MOC_FALSE:
944 return 0;
945 default:
946 prev();
948 }
949}
950
952{
953 Token t = lookup();
955 || t == PP_PLUS
956 || t == PP_MINUS
957 || t == PP_NOT
958 || t == PP_TILDE
959 || t == PP_DEFINED);
960}
961
963{
964 int value;
965 if (test(PP_LPAREN)) {
967 test(PP_RPAREN);
968 } else {
969 next();
970 auto lexView = lexemView();
971 if (lexView.endsWith('L'))
972 lexView.chop(1);
973 value = lexView.toInt(nullptr, 0);
974 }
975 return value;
976}
977
979{
980 Token t = lookup();
981 return (t == PP_IDENTIFIER
982 || t == PP_INTEGER_LITERAL
983 || t == PP_FLOATING_LITERAL
984 || t == PP_MOC_TRUE
985 || t == PP_MOC_FALSE
986 || t == PP_LPAREN);
987}
988
990{
991 PP_Expression expression;
992 expression.currentFilenames = currentFilenames;
993
994 substituteUntilNewline(expression.symbols);
995
996 return expression.value();
997}
998
999static QByteArray readOrMapFile(QFile *file)
1000{
1001 const qint64 size = file->size();
1002 char *rawInput = reinterpret_cast<char*>(file->map(0, size));
1003 return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
1004}
1005
1007{
1008 Q_ASSERT(len >= 2); // at least `""`
1009 Q_ASSERT(from + len <= lex.size());
1010 Q_ASSERT(next.len >= 2); // at least `""`
1011 Q_ASSERT(next.from + next.len <= next.lex.size());
1012
1013 if (len != lex.size()) {
1014 // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file)
1015 QByteArray l = lexemView().chopped(1) % next.lexemView().sliced(1);
1016 lex = std::move(l); // lexemView() aliases `lex`; only clobber it now
1017 from = 0;
1018 } else {
1019 // like QByteArray::append(), but dealing with the "" around each lexem:
1020 const auto unquoted = next.unquotedLexemView();
1021 lex.insert(from + len - 1, // before closing `"`
1022 unquoted);
1023 }
1024 len = lex.size();
1025}
1026
1027static void mergeStringLiterals(Symbols &symbols)
1028{
1029 // like std::unique, but merges instead of skips adjacent STRING_LITERALs:
1030
1031 const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) {
1032 return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL;
1033 };
1034
1035 auto end = symbols.end();
1036 auto it = std::adjacent_find(symbols.begin(), symbols.end(), mergeable);
1037 if (it == end) // none found
1038 return;
1039
1040 // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition)
1041 // in particular: it + 1 < end
1042
1043 auto dst = it;
1044 auto lit = dst;
1045 ++it;
1046 lit->mergeStringLiteral(*it);
1047
1048 while (++it != end) {
1049 // Loop Invariants:
1050 // - [begin(), dst] is already processed
1051 // - `lit` is the last string literal
1052 // - we can merge if lit == dst
1053 // - [it, end[ still to be checked
1054 if (it->token == STRING_LITERAL) {
1055 if (lit == dst) { // can merge
1056 lit->mergeStringLiteral(*it);
1057 } else { // can't merge: not adjacent to previous STRING_LITERAL
1058 *++dst = std::move(*it);
1059 lit = dst; // remember that this was a literal
1060 }
1061 } else {
1062 *++dst = std::move(*it);
1063 }
1064 }
1065
1066 ++dst;
1067
1068 symbols.erase(dst, end);
1069}
1070
1071static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1072 const QByteArray &include,
1073 const bool debugIncludes)
1074{
1075 QFileInfo fi;
1076
1077 if (Q_UNLIKELY(debugIncludes)) {
1078 fprintf(stderr, "debug-includes: searching for '%s'\n", include.constData());
1079 }
1080
1081 for (const Parser::IncludePath &p : includepaths) {
1082 if (fi.exists())
1083 break;
1084
1085 if (p.isFrameworkPath) {
1086 const qsizetype slashPos = include.indexOf('/');
1087 if (slashPos == -1)
1088 continue;
1089 fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/"),
1090 QString::fromLocal8Bit(include.mid(slashPos + 1)));
1091 } else {
1092 fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include));
1093 }
1094
1095 if (Q_UNLIKELY(debugIncludes)) {
1096 const auto candidate = fi.filePath().toLocal8Bit();
1097 fprintf(stderr, "debug-includes: considering '%s'\n", candidate.constData());
1098 }
1099
1100 // try again, maybe there's a file later in the include paths with the same name
1101 // (186067)
1102 if (fi.isDir()) {
1103 fi = QFileInfo();
1104 continue;
1105 }
1106 }
1107
1108 if (!fi.exists() || fi.isDir()) {
1109 if (Q_UNLIKELY(debugIncludes)) {
1110 fprintf(stderr, "debug-includes: can't find '%s'\n", include.constData());
1111 }
1112 return QByteArray();
1113 }
1114
1115 const auto result = fi.canonicalFilePath().toLocal8Bit();
1116
1117 if (Q_UNLIKELY(debugIncludes)) {
1118 fprintf(stderr, "debug-includes: found '%s'\n", result.constData());
1119 }
1120
1121 return result;
1122}
1123
1124QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1125{
1126 if (!relativeTo.isEmpty()) {
1127 QFileInfo fi;
1128 fi.setFile(QFileInfo(QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include));
1129 if (fi.exists() && !fi.isDir())
1130 return fi.canonicalFilePath().toLocal8Bit();
1131 }
1132
1133 auto it = nonlocalIncludePathResolutionCache.find(include);
1134 if (it == nonlocalIncludePathResolutionCache.end())
1135 it = nonlocalIncludePathResolutionCache.insert(include,
1136 searchIncludePaths(
1137 includes,
1138 include,
1139 debugIncludes));
1140 return it.value();
1141}
1142
1143void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1144{
1145 currentFilenames.push(filename);
1146 preprocessed.reserve(preprocessed.size() + symbols.size());
1147 while (hasNext()) {
1148 Token token = next();
1149
1150 switch (token) {
1151 case PP_INCLUDE:
1152 {
1153 int lineNum = symbol().lineNum;
1154 QByteArray include;
1155 bool local = false;
1156 if (test(PP_STRING_LITERAL)) {
1157 local = lexemView().startsWith('\"');
1158 include = unquotedLexem();
1159 } else
1160 continue;
1161 until(PP_NEWLINE);
1162
1163 include = resolveInclude(include, local ? filename : QByteArray());
1164 if (include.isNull())
1165 continue;
1166
1167 if (Preprocessor::preprocessedIncludes.contains(include))
1168 continue;
1169 Preprocessor::preprocessedIncludes.insert(include);
1170
1171 QFile file(QString::fromLocal8Bit(include.constData()));
1172 if (!file.open(QFile::ReadOnly))
1173 continue;
1174
1175 QByteArray input = readOrMapFile(&file);
1176
1177 file.close();
1178 if (input.isEmpty())
1179 continue;
1180
1181 Symbols saveSymbols = symbols;
1182 qsizetype saveIndex = index;
1183
1184 // phase 1: get rid of backslash-newlines
1185 input = cleaned(input);
1186
1187 // phase 2: tokenize for the preprocessor
1188 symbols = tokenize(input);
1189 input.clear();
1190
1191 index = 0;
1192
1193 // phase 3: preprocess conditions and substitute macros
1194 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1195 preprocess(include, preprocessed);
1196 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1197
1198 symbols = saveSymbols;
1199 index = saveIndex;
1200 continue;
1201 }
1202 case PP_DEFINE:
1203 {
1204 next();
1205 QByteArray name = lexem();
1206 if (name.isEmpty() || !is_ident_start(name[0]))
1207 error();
1208 Macro macro;
1209 macro.isVariadic = false;
1210 if (test(LPAREN)) {
1211 // we have a function macro
1212 macro.isFunction = true;
1214 } else {
1215 macro.isFunction = false;
1216 }
1217 qsizetype start = index;
1218 until(PP_NEWLINE);
1219 macro.symbols.reserve(index - start - 1);
1220
1221 // remove whitespace where there shouldn't be any:
1222 // Before and after the macro, after a # and around ##
1223 Token lastToken = HASH; // skip shitespace at the beginning
1224 for (qsizetype i = start; i < index - 1; ++i) {
1225 Token token = symbols.at(i).token;
1226 if (token == WHITESPACE) {
1227 if (lastToken == PP_HASH || lastToken == HASH ||
1228 lastToken == PP_HASHHASH ||
1229 lastToken == WHITESPACE)
1230 continue;
1231 } else if (token == PP_HASHHASH) {
1232 if (!macro.symbols.isEmpty() &&
1233 lastToken == WHITESPACE)
1234 macro.symbols.pop_back();
1235 }
1236 macro.symbols.append(symbols.at(i));
1237 lastToken = token;
1238 }
1239 // remove trailing whitespace
1240 while (!macro.symbols.isEmpty() &&
1241 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1242 macro.symbols.pop_back();
1243
1244 if (!macro.symbols.isEmpty()) {
1245 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1246 macro.symbols.constLast().token == PP_HASHHASH) {
1247 error("'##' cannot appear at either end of a macro expansion");
1248 }
1249 }
1250 macros.insert(name, macro);
1251 continue;
1252 }
1253 case PP_UNDEF: {
1254 next();
1255 QByteArray name = lexem();
1256 until(PP_NEWLINE);
1257 macros.remove(name);
1258 continue;
1259 }
1260 case PP_IDENTIFIER: {
1261 // substitute macros
1262 macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1263 continue;
1264 }
1265 case PP_HASH:
1266 until(PP_NEWLINE);
1267 continue; // skip unknown preprocessor statement
1268 case PP_IFDEF:
1269 case PP_IFNDEF:
1270 case PP_IF:
1271 while (!evaluateCondition()) {
1272 if (!skipBranch())
1273 break;
1274 if (test(PP_ELIF)) {
1275 } else {
1276 until(PP_NEWLINE);
1277 break;
1278 }
1279 }
1280 continue;
1281 case PP_ELIF:
1282 case PP_ELSE:
1284 Q_FALLTHROUGH();
1285 case PP_ENDIF:
1286 until(PP_NEWLINE);
1287 continue;
1288 case PP_NEWLINE:
1289 continue;
1290 case SIGNALS:
1291 case SLOTS: {
1292 Symbol sym = symbol();
1293 if (macros.contains("QT_NO_KEYWORDS"))
1294 sym.token = IDENTIFIER;
1295 else
1296 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1297 preprocessed += sym;
1298 } continue;
1299 default:
1300 break;
1301 }
1302 preprocessed += symbol();
1303 }
1304
1305 currentFilenames.pop();
1306}
1307
1308Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1309{
1310 QByteArray input = readOrMapFile(file);
1311
1312 if (input.isEmpty())
1313 return symbols;
1314
1315 // phase 1: get rid of backslash-newlines
1316 input = cleaned(input);
1317
1318 // phase 2: tokenize for the preprocessor
1319 index = 0;
1320 symbols = tokenize(input);
1321
1322#if 0
1323 for (int j = 0; j < symbols.size(); ++j)
1324 fprintf(stderr, "line %d: %s(%s)\n",
1325 symbols[j].lineNum,
1326 symbols[j].lexem().constData(),
1327 tokenTypeName(symbols[j].token));
1328#endif
1329
1330 // phase 3: preprocess conditions and substitute macros
1331 Symbols result;
1332 // Preallocate some space to speed up the code below.
1333 // The magic value was found by logging the final size
1334 // and calculating an average when running moc over FOSS projects.
1335 result.reserve(file->size() / 300000);
1336 preprocess(filename, result);
1337 mergeStringLiterals(result);
1338
1339#if 0
1340 for (int j = 0; j < result.size(); ++j)
1341 fprintf(stderr, "line %d: %s(%s)\n",
1342 result[j].lineNum,
1343 result[j].lexem().constData(),
1344 tokenTypeName(result[j].token));
1345#endif
1346
1347 return result;
1348}
1349
1351{
1352 Symbols arguments;
1353 while (hasNext()) {
1354 while (test(PP_WHITESPACE)) {}
1355 Token t = next();
1356 if (t == PP_RPAREN)
1357 break;
1358 if (t != PP_IDENTIFIER) {
1359 QByteArrayView l = lexemView();
1360 if (l == "...") {
1361 m->isVariadic = true;
1362 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1363 while (test(PP_WHITESPACE)) {}
1364 if (!test(PP_RPAREN))
1365 error("missing ')' in macro argument list");
1366 break;
1367 } else if (!is_identifier(l.constData(), l.size())) {
1368 error("Unexpected character in macro argument list.");
1369 }
1370 }
1371
1372 Symbol arg = symbol();
1373 if (arguments.contains(arg))
1374 error("Duplicate macro parameter.");
1375 arguments += symbol();
1376
1377 while (test(PP_WHITESPACE)) {}
1378 t = next();
1379 if (t == PP_RPAREN)
1380 break;
1381 if (t == PP_COMMA)
1382 continue;
1383 if (lexemView() == "...") {
1384 //GCC extension: #define FOO(x, y...) x(y)
1385 // The last argument was already parsed. Just mark the macro as variadic.
1386 m->isVariadic = true;
1387 while (test(PP_WHITESPACE)) {}
1388 if (!test(PP_RPAREN))
1389 error("missing ')' in macro argument list");
1390 break;
1391 }
1392 error("Unexpected character in macro argument list.");
1393 }
1394 m->arguments = arguments;
1395 while (test(PP_WHITESPACE)) {}
1396}
1397
1398void Preprocessor::until(Token t)
1399{
1400 while(hasNext() && next() != t)
1401 ;
1402}
1403
1405{
1406 debugIncludes = value;
1407}
1408
1409
1410QT_END_NAMESPACE
int relational_expression()
int exclusive_OR_expression()
bool unary_expression_lookup()
int logical_OR_expression()
int equality_expression()
int logical_AND_expression()
int additive_expression()
int multiplicative_expression()
int conditional_expression()
bool primary_expression_lookup()
int inclusive_OR_expression()
int evaluateCondition()
void setDebugIncludes(bool value)
void parseDefineArguments(Macro *m)
void skipUntilEndif()
Symbols preprocessed(const QByteArray &filename, QFile *device)
void substituteUntilNewline(Symbols &substituted)
static bool preprocessOnly
QByteArray resolveInclude(const QByteArray &filename, const QByteArray &relativeTo)
@ PreparePreprocessorStatement
@ TokenizePreprocessorStatement
Definition qlist.h:80
const Symbol & symbol() const
Definition symbols.h:102
bool hasNext()
Definition symbols.h:89
Token next()
Definition symbols.h:94
bool test(Token)
Definition symbols.h:111
short defnext
Definition keywords.cpp:455
static const short keyword_trans[][128]
Definition keywords.cpp:7
Token token
Definition keywords.cpp:452
Token ident
Definition keywords.cpp:456
short next
Definition keywords.cpp:453
char defchar
Definition keywords.cpp:454
bool is_ident_char(char s)
Definition utils.h:30
const char * skipQuote(const char *data)
Definition utils.h:42
bool is_space(char s)
Definition utils.h:19
short next
static const short pp_keyword_trans[][128]
Definition ppkeywords.cpp:7
PP_Token ident
short defnext
PP_Token token
char defchar
static QByteArray readOrMapFile(QFile *file)
static QByteArray searchIncludePaths(const QList< Parser::IncludePath > &includepaths, const QByteArray &include, const bool debugIncludes)
static QByteArray cleaned(const QByteArray &input)
static void mergeStringLiterals(Symbols &symbols)
Simple structure used by the Doc and DocParser classes.
bool isVariadic
bool isFunction
Symbol(int lineNum, Token token)
Definition symbols.h:48
Token token
Definition symbols.h:58
void mergeStringLiteral(const Symbol &next)
int lineNum
Definition symbols.h:57
Symbol()=default
QList< Symbol > Symbols
Definition symbols.h:75