Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
preprocessor.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include "preprocessor.h"
6#include "utils.h"
7#include <qstringlist.h>
8#include <qfile.h>
9#include <qdir.h>
10#include <qfileinfo.h>
11#include <qvarlengtharray.h>
12
14
15using namespace QtMiscUtils;
16
17#include "ppkeywords.cpp"
18#include "keywords.cpp"
19
20// transform \r\n into \n
21// \r into \n (os9 style)
22// backslash-newlines into newlines
23static QByteArray cleaned(const QByteArray &input)
24{
25 QByteArray result;
26 result.resize(input.size());
27 const char *data = input.constData();
28 const char *end = input.constData() + input.size();
29 char *output = result.data();
30
31 int newlines = 0;
32 while (data != end) {
33 while (data != end && is_space(*data))
34 ++data;
35 bool takeLine = (*data == '#');
36 if (*data == '%' && *(data+1) == ':') {
37 takeLine = true;
38 ++data;
39 }
40 if (takeLine) {
41 *output = '#';
42 ++output;
43 do ++data; while (data != end && is_space(*data));
44 }
45 while (data != end) {
46 // handle \\\n, \\\r\n and \\\r
47 if (*data == '\\') {
48 if (*(data + 1) == '\r') {
49 ++data;
50 }
51 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
52 ++newlines;
53 data += 1;
54 if (data != end && *data != '\r')
55 data += 1;
56 continue;
57 }
58 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
59 ++data;
60 }
61 if (data == end)
62 break;
63
64 char ch = *data;
65 if (ch == '\r') // os9: replace \r with \n
66 ch = '\n';
67 *output = ch;
68 ++output;
69
70 if (*data == '\n') {
71 // output additional newlines to keep the correct line-numbering
72 // for the lines following the backslash-newline sequence(s)
73 while (newlines) {
74 *output = '\n';
75 ++output;
76 --newlines;
77 }
78 ++data;
79 break;
80 }
81 ++data;
82 }
83 }
84 result.resize(output - result.constData());
85 return result;
86}
87
88bool Preprocessor::preprocessOnly = false;
90{
91 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
92 switch (symbols.at(index).token) {
93 case PP_IF:
94 case PP_IFDEF:
95 case PP_IFNDEF:
96 ++index;
98 break;
99 default:
100 ;
101 }
102 ++index;
103 }
104}
105
107{
108 while (index < symbols.size() - 1
109 && (symbols.at(index).token != PP_ENDIF
110 && symbols.at(index).token != PP_ELIF
111 && symbols.at(index).token != PP_ELSE)
112 ){
113 switch (symbols.at(index).token) {
114 case PP_IF:
115 case PP_IFDEF:
116 case PP_IFNDEF:
117 ++index;
119 break;
120 default:
121 ;
122 }
123 ++index;
124 }
125 return (index < symbols.size() - 1);
126}
127
128
129Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
130{
131 Symbols symbols;
132 // Preallocate some space to speed up the code below.
133 // The magic divisor value was found by calculating the average ratio between
134 // input size and the final size of symbols.
135 // This yielded a value of 16.x when compiling Qt Base.
136 symbols.reserve(input.size() / 16);
137 const char *begin = input.constData();
138 const char *data = begin;
139 while (*data) {
140 if (mode == TokenizeCpp || mode == TokenizeDefine) {
141 int column = 0;
142
143 const char *lexem = data;
144 int state = 0;
145 Token token = NOTOKEN;
146 for (;;) {
147 if (static_cast<signed char>(*data) < 0) {
148 ++data;
149 continue;
150 }
151 int nextindex = keywords[state].next;
152 int next = 0;
153 if (*data == keywords[state].defchar)
154 next = keywords[state].defnext;
155 else if (!state || nextindex)
156 next = keyword_trans[nextindex][(int)*data];
157 if (!next)
158 break;
159 state = next;
160 token = keywords[state].token;
161 ++data;
162 }
163
164 // suboptimal, is_ident_char should use a table
165 if (keywords[state].ident && is_ident_char(*data))
166 token = keywords[state].ident;
167
168 if (token == NOTOKEN) {
169 if (*data)
170 ++data;
171 // an error really, but let's ignore this input
172 // to not confuse moc later. However in pre-processor
173 // only mode let's continue.
175 continue;
176 }
177
178 ++column;
179
180 if (token > SPECIAL_TREATMENT_MARK) {
181 switch (token) {
182 case QUOTE:
183 data = skipQuote(data);
184 token = STRING_LITERAL;
185 // concatenate multi-line strings for easier
186 // STRING_LITERAL handling in moc
188 && !symbols.isEmpty()
189 && symbols.constLast().token == STRING_LITERAL) {
190
191 const QByteArray newString
192 = '\"'
193 + symbols.constLast().unquotedLexemView()
194 + input.mid(lexem - begin + 1, data - lexem - 2)
195 + '\"';
196 symbols.last() = Symbol(symbols.constLast().lineNum,
197 STRING_LITERAL,
198 newString);
199 continue;
200 }
201 break;
202 case SINGLEQUOTE:
203 while (*data && (*data != '\''
204 || (*(data-1)=='\\'
205 && *(data-2)!='\\')))
206 ++data;
207 if (*data)
208 ++data;
209 token = CHARACTER_LITERAL;
210 break;
211 case LANGLE_SCOPE:
212 // split <:: into two tokens, < and ::
213 token = LANGLE;
214 data -= 2;
215 break;
216 case DIGIT:
217 {
218 bool hasSeenTokenSeparator = false;;
219 while (isAsciiDigit(*data) || (hasSeenTokenSeparator = *data == '\''))
220 ++data;
221 if (!*data || *data != '.') {
222 token = INTEGER_LITERAL;
223 if (data - lexem == 1 &&
224 (*data == 'x' || *data == 'X'
225 || *data == 'b' || *data == 'B')
226 && *lexem == '0') {
227 ++data;
228 while (isHexDigit(*data) || (hasSeenTokenSeparator = *data == '\''))
229 ++data;
230 } else if (*data == 'L') // TODO: handle other suffixes
231 ++data;
232 if (!hasSeenTokenSeparator) {
233 while (is_ident_char(*data)) {
234 ++data;
235 token = IDENTIFIER;
236 }
237 }
238 break;
239 }
240 token = FLOATING_LITERAL;
241 ++data;
242 Q_FALLTHROUGH();
243 }
244 case FLOATING_LITERAL:
245 while (isAsciiDigit(*data) || *data == '\'')
246 ++data;
247 if (*data == '+' || *data == '-')
248 ++data;
249 if (*data == 'e' || *data == 'E') {
250 ++data;
251 while (isAsciiDigit(*data) || *data == '\'')
252 ++data;
253 }
254 if (*data == 'f' || *data == 'F'
255 || *data == 'l' || *data == 'L')
256 ++data;
257 break;
258 case HASH:
259 if (column == 1 && mode == TokenizeCpp) {
261 while (*data && (*data == ' ' || *data == '\t'))
262 ++data;
263 if (is_ident_char(*data))
265 continue;
266 }
267 break;
268 case PP_HASHHASH:
269 if (mode == TokenizeCpp)
270 continue;
271 break;
272 case NEWLINE:
273 ++lineNum;
274 if (mode == TokenizeDefine) {
275 mode = TokenizeCpp;
276 // emit the newline token
277 break;
278 }
279 continue;
280 case BACKSLASH:
281 {
282 const char *rewind = data;
283 while (*data && (*data == ' ' || *data == '\t'))
284 ++data;
285 if (*data && *data == '\n') {
286 ++data;
287 continue;
288 }
289 data = rewind;
290 } break;
291 case CHARACTER:
292 while (is_ident_char(*data))
293 ++data;
294 token = IDENTIFIER;
295 break;
296 case C_COMMENT:
297 if (*data) {
298 if (*data == '\n')
299 ++lineNum;
300 ++data;
301 if (*data) {
302 if (*data == '\n')
303 ++lineNum;
304 ++data;
305 }
306 }
307 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
308 if (*data == '\n')
309 ++lineNum;
310 ++data;
311 }
312 token = WHITESPACE; // one comment, one whitespace
313 Q_FALLTHROUGH();
314 case WHITESPACE:
315 if (column == 1)
316 column = 0;
317 while (*data && (*data == ' ' || *data == '\t'))
318 ++data;
319 if (Preprocessor::preprocessOnly) // tokenize whitespace
320 break;
321 continue;
322 case CPP_COMMENT:
323 while (*data && *data != '\n')
324 ++data;
325 continue; // ignore safely, the newline is a separator
326 default:
327 continue; //ignore
328 }
329 }
330 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
331
332 } else { // Preprocessor
333
334 const char *lexem = data;
335 int state = 0;
336 Token token = NOTOKEN;
337 if (mode == TokenizePreprocessorStatement) {
338 state = pp_keyword_trans[0][(int)'#'];
340 }
341 for (;;) {
342 if (static_cast<signed char>(*data) < 0) {
343 ++data;
344 continue;
345 }
346 int nextindex = pp_keywords[state].next;
347 int next = 0;
348 if (*data == pp_keywords[state].defchar)
349 next = pp_keywords[state].defnext;
350 else if (!state || nextindex)
351 next = pp_keyword_trans[nextindex][(int)*data];
352 if (!next)
353 break;
354 state = next;
355 token = pp_keywords[state].token;
356 ++data;
357 }
358 // suboptimal, is_ident_char should use a table
359 if (pp_keywords[state].ident && is_ident_char(*data))
360 token = pp_keywords[state].ident;
361
362 switch (token) {
363 case NOTOKEN:
364 if (*data)
365 ++data;
366 break;
367 case PP_DEFINE:
368 mode = PrepareDefine;
369 break;
370 case PP_IFDEF:
371 symbols += Symbol(lineNum, PP_IF);
372 symbols += Symbol(lineNum, PP_DEFINED);
373 continue;
374 case PP_IFNDEF:
375 symbols += Symbol(lineNum, PP_IF);
376 symbols += Symbol(lineNum, PP_NOT);
377 symbols += Symbol(lineNum, PP_DEFINED);
378 continue;
379 case PP_INCLUDE:
380 mode = TokenizeInclude;
381 break;
382 case PP_QUOTE:
383 data = skipQuote(data);
384 token = PP_STRING_LITERAL;
385 break;
386 case PP_SINGLEQUOTE:
387 while (*data && (*data != '\''
388 || (*(data-1)=='\\'
389 && *(data-2)!='\\')))
390 ++data;
391 if (*data)
392 ++data;
393 token = PP_CHARACTER_LITERAL;
394 break;
395 case PP_DIGIT:
396 while (isAsciiDigit(*data) || *data == '\'')
397 ++data;
398 if (!*data || *data != '.') {
399 token = PP_INTEGER_LITERAL;
400 if (data - lexem == 1 &&
401 (*data == 'x' || *data == 'X')
402 && *lexem == '0') {
403 ++data;
404 while (isHexDigit(*data) || *data == '\'')
405 ++data;
406 } else if (*data == 'L') // TODO: handle other suffixes
407 ++data;
408 break;
409 }
410 token = PP_FLOATING_LITERAL;
411 ++data;
412 Q_FALLTHROUGH();
413 case PP_FLOATING_LITERAL:
414 while (isAsciiDigit(*data) || *data == '\'')
415 ++data;
416 if (*data == '+' || *data == '-')
417 ++data;
418 if (*data == 'e' || *data == 'E') {
419 ++data;
420 while (isAsciiDigit(*data) || *data == '\'')
421 ++data;
422 }
423 if (*data == 'f' || *data == 'F'
424 || *data == 'l' || *data == 'L')
425 ++data;
426 break;
427 case PP_CHARACTER:
428 if (mode == PreparePreprocessorStatement) {
429 // rewind entire token to begin
430 data = lexem;
432 continue;
433 }
434 while (is_ident_char(*data))
435 ++data;
436 token = PP_IDENTIFIER;
437
438 if (mode == PrepareDefine) {
439 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
440 // make sure we explicitly add the whitespace here if the next char
441 // is not an opening brace, so we can distinguish correctly between
442 // regular and function macros
443 if (*data != '(')
444 symbols += Symbol(lineNum, WHITESPACE);
445 mode = TokenizeDefine;
446 continue;
447 }
448 break;
449 case PP_C_COMMENT:
450 if (*data) {
451 if (*data == '\n')
452 ++lineNum;
453 ++data;
454 if (*data) {
455 if (*data == '\n')
456 ++lineNum;
457 ++data;
458 }
459 }
460 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
461 if (*data == '\n')
462 ++lineNum;
463 ++data;
464 }
465 token = PP_WHITESPACE; // one comment, one whitespace
466 Q_FALLTHROUGH();
467 case PP_WHITESPACE:
468 while (*data && (*data == ' ' || *data == '\t'))
469 ++data;
470 continue; // the preprocessor needs no whitespace
471 case PP_CPP_COMMENT:
472 while (*data && *data != '\n')
473 ++data;
474 continue; // ignore safely, the newline is a separator
475 case PP_NEWLINE:
476 ++lineNum;
477 mode = TokenizeCpp;
478 break;
479 case PP_BACKSLASH:
480 {
481 const char *rewind = data;
482 while (*data && (*data == ' ' || *data == '\t'))
483 ++data;
484 if (*data && *data == '\n') {
485 ++data;
486 continue;
487 }
488 data = rewind;
489 } break;
490 case PP_LANGLE:
491 if (mode != TokenizeInclude)
492 break;
493 token = PP_STRING_LITERAL;
494 while (*data && *data != '\n' && *(data-1) != '>')
495 ++data;
496 break;
497 default:
498 break;
499 }
501 continue;
502 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
503 }
504 }
505 symbols += Symbol(); // eof symbol
506 return symbols;
507}
508
509void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, qsizetype &index,
510 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
511{
512 SymbolStack symbols;
513 symbols.reserve(8);
514 SafeSymbols sf;
515 sf.symbols = toExpand;
516 sf.index = index;
517 sf.excludedSymbols = excludeSymbols;
518 symbols.push(std::move(sf));
519
520 if (toExpand.isEmpty())
521 return;
522
523 for (;;) {
524 QByteArray macro;
525 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, &macro);
526
527 if (macro.isEmpty()) {
528 // not a macro
529 Symbol s = symbols.symbol();
530 s.lineNum = lineNum;
531 *into += s;
532 } else {
533 SafeSymbols sf;
534 sf.symbols = newSyms;
535 sf.index = 0;
536 sf.expandedMacro = macro;
537 symbols.push(std::move(sf));
538 }
539 if (!symbols.hasNext() || (one && symbols.size() == 1))
540 break;
541 symbols.next();
542 }
543
544 if (symbols.size())
545 index = symbols.top().index;
546 else
547 index = toExpand.size();
548}
549
550
551Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
552{
553 Symbol s = symbols.symbol();
554
555 // not a macro
556 if (s.token != PP_IDENTIFIER || !that->macros.contains(s) || symbols.dontReplaceSymbol(s.lexem())) {
557 return Symbols();
558 }
559
560 const Macro &macro = that->macros.value(s);
561 *macroName = s.lexem();
562
563 Symbols expansion;
564 if (!macro.isFunction) {
565 expansion = macro.symbols;
566 } else {
567 bool haveSpace = false;
568 while (symbols.test(PP_WHITESPACE)) { haveSpace = true; }
569 if (!symbols.test(PP_LPAREN)) {
570 *macroName = QByteArray();
571 Symbols syms;
572 if (haveSpace)
573 syms += Symbol(lineNum, PP_WHITESPACE);
574 syms += s;
575 syms.last().lineNum = lineNum;
576 return syms;
577 }
578 QVarLengthArray<Symbols, 5> arguments;
579 while (symbols.hasNext()) {
580 Symbols argument;
581 // strip leading space
582 while (symbols.test(PP_WHITESPACE)) {}
583 int nesting = 0;
584 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
585 while (symbols.hasNext()) {
586 Token t = symbols.next();
587 if (t == PP_LPAREN) {
588 ++nesting;
589 } else if (t == PP_RPAREN) {
590 --nesting;
591 if (nesting < 0)
592 break;
593 } else if (t == PP_COMMA && nesting == 0) {
594 if (!vararg)
595 break;
596 }
597 argument += symbols.symbol();
598 }
599 arguments += argument;
600
601 if (nesting < 0)
602 break;
603 else if (!symbols.hasNext())
604 that->error("missing ')' in macro usage");
605 }
606
607 // empty VA_ARGS
608 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
609 arguments += Symbols();
610
611 // now replace the macro arguments with the expanded arguments
612 enum Mode {
613 Normal,
614 Hash,
615 HashHash
616 } mode = Normal;
617
618 const auto end = macro.symbols.cend();
619 auto it = macro.symbols.cbegin();
620 const auto lastSym = std::prev(macro.symbols.cend(), !macro.symbols.isEmpty() ? 1 : 0);
621 for (; it != end; ++it) {
622 const Symbol &s = *it;
623 if (s.token == HASH || s.token == PP_HASHHASH) {
624 mode = (s.token == HASH ? Hash : HashHash);
625 continue;
626 }
627 const qsizetype index = macro.arguments.indexOf(s);
628 if (mode == Normal) {
629 if (index >= 0 && index < arguments.size()) {
630 // each argument undoergoes macro expansion if it's not used as part of a # or ##
631 if (it == lastSym || std::next(it)->token != PP_HASHHASH) {
632 Symbols arg = arguments.at(index);
633 qsizetype idx = 1;
634 macroExpand(&expansion, that, arg, idx, lineNum, false, symbols.excludeSymbols());
635 } else {
636 expansion += arguments.at(index);
637 }
638 } else {
639 expansion += s;
640 }
641 } else if (mode == Hash) {
642 if (index < 0) {
643 that->error("'#' is not followed by a macro parameter");
644 continue;
645 } else if (index >= arguments.size()) {
646 that->error("Macro invoked with too few parameters for a use of '#'");
647 continue;
648 }
649
650 const Symbols &arg = arguments.at(index);
651 QByteArray stringified;
652 if (!arg.empty()) {
653 stringified = arg.front().lexem();
654 for (auto it = arg.cbegin(); std::next(it) != arg.cend(); ++it) {
655 const auto next = std::next(it);
656 if (next->from - (it->from + it->len) > 0)
657 stringified += ' ' + next->lexem();
658 else
659 stringified += next->lexem();
660 }
661
662 stringified.replace('"', "\\\"");
663 stringified.prepend('"');
664 stringified.append('"');
665 }
666
667 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
668 } else if (mode == HashHash){
669 if (s.token == WHITESPACE)
670 continue;
671
672 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
673 expansion.pop_back();
674
675 Symbol next = s;
676 if (index >= 0 && index < arguments.size()) {
677 const Symbols &arg = arguments.at(index);
678 if (arg.size() == 0) {
679 mode = Normal;
680 continue;
681 }
682 next = arg.at(0);
683 }
684
685 if (!expansion.isEmpty() && expansion.constLast().token == s.token
686 && expansion.constLast().token != STRING_LITERAL) {
687 Symbol last = expansion.takeLast();
688
689 QByteArray lexem = last.lexem() + next.lexem();
690 expansion += Symbol(lineNum, last.token, lexem);
691 } else {
692 expansion += next;
693 }
694
695 if (index >= 0 && index < arguments.size()) {
696 const Symbols &arg = arguments.at(index);
697 if (!arg.isEmpty())
698 expansion.append(arg.cbegin() + 1, arg.cend());
699 }
700 }
701 mode = Normal;
702 }
703 if (mode != Normal)
704 that->error("'#' or '##' found at the end of a macro argument");
705
706 }
707
708 return expansion;
709}
710
712{
713 while (hasNext()) {
714 Token token = next();
715 if (token == PP_IDENTIFIER) {
716 macroExpand(&substituted, this, symbols, index, symbol().lineNum, true);
717 } else if (token == PP_DEFINED) {
718 bool braces = test(PP_LPAREN);
719 if (test(PP_HAS_INCLUDE)) {
720 // __has_include is always supported
721 Symbol definedOrNotDefined = symbol();
722 definedOrNotDefined.token = PP_MOC_TRUE;
723 substituted += definedOrNotDefined;
724 } else {
725 next(PP_IDENTIFIER);
726 Symbol definedOrNotDefined = symbol();
727 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
728 substituted += definedOrNotDefined;
729 }
730 if (braces)
731 test(PP_RPAREN);
732 continue;
733 } else if (token == PP_NEWLINE) {
734 substituted += symbol();
735 break;
736 } else if (token == PP_HAS_INCLUDE) {
737 next(LPAREN);
738 Token tok = next(); // quote or LANGLE
739 bool usesAngleInclude = false;
740 QByteArray includeAsString;
741 Symbols innerSymbols;
742 if (tok == PP_LANGLE) {
743 usesAngleInclude = true;
744 next();
745 do {
746 Symbol currentSymbol = symbol();
747 includeAsString += currentSymbol.lexem();
748 if (currentSymbol.token == PP_IDENTIFIER)
749 macroExpand(&innerSymbols, this, symbols, index, symbol().lineNum, true);
750 else
751 innerSymbols.append(currentSymbol);
752 } while (next() != PP_RANGLE);
753 } else {
754 includeAsString = unquotedLexem();
755 }
756 next(RPAREN);
757 const QByteArray &relative = usesAngleInclude ? QByteArray() : currentFilenames.top();
758 bool result = !resolveInclude(includeAsString, relative).isNull();
759 if (usesAngleInclude && !result) {
760 // try with expansion
761 includeAsString = {};
762 for (const auto &innerSymbol: innerSymbols)
763 includeAsString.append(innerSymbol.lexem());
764 result = !resolveInclude(includeAsString, relative).isNull();
765 }
766 Symbol definedOrNotDefined = symbol();
767 definedOrNotDefined.token = result ? PP_MOC_TRUE : PP_MOC_FALSE;
768 substituted += definedOrNotDefined;
769 } else {
770 substituted += symbol();
771 }
772 }
773}
774
775
797
799{
800 int value = logical_OR_expression();
801 if (test(PP_QUESTION)) {
802 int alt1 = conditional_expression();
803 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
804 return value ? alt1 : alt2;
805 }
806 return value;
807}
808
810{
811 int value = logical_AND_expression();
812 if (test(PP_OROR))
813 return logical_OR_expression() || value;
814 return value;
815}
816
818{
819 int value = inclusive_OR_expression();
820 if (test(PP_ANDAND))
821 return logical_AND_expression() && value;
822 return value;
823}
824
826{
827 int value = exclusive_OR_expression();
828 if (test(PP_OR))
829 return value | inclusive_OR_expression();
830 return value;
831}
832
834{
835 int value = AND_expression();
836 if (test(PP_HAT))
837 return value ^ exclusive_OR_expression();
838 return value;
839}
840
842{
843 int value = equality_expression();
844 if (test(PP_AND))
845 return value & AND_expression();
846 return value;
847}
848
850{
851 int value = relational_expression();
852 switch (next()) {
853 case PP_EQEQ:
854 return value == equality_expression();
855 case PP_NE:
856 return value != equality_expression();
857 default:
858 prev();
859 return value;
860 }
861}
862
864{
865 int value = shift_expression();
866 switch (next()) {
867 case PP_LANGLE:
868 return value < relational_expression();
869 case PP_RANGLE:
870 return value > relational_expression();
871 case PP_LE:
872 return value <= relational_expression();
873 case PP_GE:
874 return value >= relational_expression();
875 default:
876 prev();
877 return value;
878 }
879}
880
882{
883 int value = additive_expression();
884 switch (next()) {
885 case PP_LTLT:
886 return value << shift_expression();
887 case PP_GTGT:
888 return value >> shift_expression();
889 default:
890 prev();
891 return value;
892 }
893}
894
896{
897 int value = multiplicative_expression();
898 switch (next()) {
899 case PP_PLUS:
900 return value + additive_expression();
901 case PP_MINUS:
902 return value - additive_expression();
903 default:
904 prev();
905 return value;
906 }
907}
908
910{
911 int value = unary_expression();
912 switch (next()) {
913 case PP_STAR:
914 {
915 // get well behaved overflow behavior by converting to long
916 // and then back to int
917 // NOTE: A conformant preprocessor would need to work intmax_t/
918 // uintmax_t according to [cpp.cond], 19.1 §10
919 // But we're not compliant anyway
920 qint64 result = qint64(value) * qint64(multiplicative_expression());
921 return int(result);
922 }
923 case PP_PERCENT:
924 {
925 int remainder = multiplicative_expression();
926 return remainder ? value % remainder : 0;
927 }
928 case PP_SLASH:
929 {
931 return div ? value / div : 0;
932 }
933 default:
934 prev();
935 return value;
936 };
937}
938
940{
941 switch (next()) {
942 case PP_PLUS:
943 return unary_expression();
944 case PP_MINUS:
945 return -unary_expression();
946 case PP_NOT:
947 return !unary_expression();
948 case PP_TILDE:
949 return ~unary_expression();
950 case PP_MOC_TRUE:
951 return 1;
952 case PP_MOC_FALSE:
953 return 0;
954 default:
955 prev();
957 }
958}
959
961{
962 Token t = lookup();
964 || t == PP_PLUS
965 || t == PP_MINUS
966 || t == PP_NOT
967 || t == PP_TILDE
968 || t == PP_DEFINED);
969}
970
972{
973 int value;
974 if (test(PP_LPAREN)) {
976 test(PP_RPAREN);
977 } else {
978 next();
979 auto lexView = lexemView();
980 if (lexView.endsWith('L'))
981 lexView.chop(1);
982 value = lexView.toInt(nullptr, 0);
983 }
984 return value;
985}
986
988{
989 Token t = lookup();
990 return (t == PP_IDENTIFIER
991 || t == PP_INTEGER_LITERAL
992 || t == PP_FLOATING_LITERAL
993 || t == PP_MOC_TRUE
994 || t == PP_MOC_FALSE
995 || t == PP_LPAREN);
996}
997
999{
1000 PP_Expression expression;
1001 expression.currentFilenames = currentFilenames;
1002
1003 substituteUntilNewline(expression.symbols);
1004
1005 return expression.value();
1006}
1007
1008static QByteArray readOrMapFile(QFile *file)
1009{
1010 const qint64 size = file->size();
1011 char *rawInput = reinterpret_cast<char*>(file->map(0, size));
1012 return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
1013}
1014
1016{
1017 Q_ASSERT(len >= 2); // at least `""`
1018 Q_ASSERT(from + len <= lex.size());
1019 Q_ASSERT(next.len >= 2); // at least `""`
1020 Q_ASSERT(next.from + next.len <= next.lex.size());
1021
1022 if (len != lex.size()) {
1023 // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file)
1024 QByteArray l = lexemView().chopped(1) % next.lexemView().sliced(1);
1025 lex = std::move(l); // lexemView() aliases `lex`; only clobber it now
1026 from = 0;
1027 } else {
1028 // like QByteArray::append(), but dealing with the "" around each lexem:
1029 const auto unquoted = next.unquotedLexemView();
1030 lex.insert(from + len - 1, // before closing `"`
1031 unquoted);
1032 }
1033 len = lex.size();
1034}
1035
1036static void mergeStringLiterals(Symbols &symbols)
1037{
1038 // like std::unique, but merges instead of skips adjacent STRING_LITERALs:
1039
1040 const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) {
1041 return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL;
1042 };
1043
1044 auto end = symbols.end();
1045 auto it = std::adjacent_find(symbols.begin(), symbols.end(), mergeable);
1046 if (it == end) // none found
1047 return;
1048
1049 // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition)
1050 // in particular: it + 1 < end
1051
1052 auto dst = it;
1053 auto lit = dst;
1054 ++it;
1055 lit->mergeStringLiteral(*it);
1056
1057 while (++it != end) {
1058 // Loop Invariants:
1059 // - [begin(), dst] is already processed
1060 // - `lit` is the last string literal
1061 // - we can merge if lit == dst
1062 // - [it, end[ still to be checked
1063 if (it->token == STRING_LITERAL) {
1064 if (lit == dst) { // can merge
1065 lit->mergeStringLiteral(*it);
1066 } else { // can't merge: not adjacent to previous STRING_LITERAL
1067 *++dst = std::move(*it);
1068 lit = dst; // remember that this was a literal
1069 }
1070 } else {
1071 *++dst = std::move(*it);
1072 }
1073 }
1074
1075 ++dst;
1076
1077 symbols.erase(dst, end);
1078}
1079
1080static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1081 const QByteArray &include,
1082 const bool debugIncludes)
1083{
1084 QFileInfo fi;
1085
1086 if (Q_UNLIKELY(debugIncludes)) {
1087 fprintf(stderr, "debug-includes: searching for '%s'\n", include.constData());
1088 }
1089
1090 for (const Parser::IncludePath &p : includepaths) {
1091 if (fi.exists())
1092 break;
1093
1094 if (p.isFrameworkPath) {
1095 const qsizetype slashPos = include.indexOf('/');
1096 if (slashPos == -1)
1097 continue;
1098 fi.setFile(QString::fromLocal8Bit(p.path + '/' + include.left(slashPos) + ".framework/Headers/"),
1099 QString::fromLocal8Bit(include.mid(slashPos + 1)));
1100 } else {
1101 fi.setFile(QString::fromLocal8Bit(p.path), QString::fromLocal8Bit(include));
1102 }
1103
1104 if (Q_UNLIKELY(debugIncludes)) {
1105 const auto candidate = fi.filePath().toLocal8Bit();
1106 fprintf(stderr, "debug-includes: considering '%s'\n", candidate.constData());
1107 }
1108
1109 // try again, maybe there's a file later in the include paths with the same name
1110 // (186067)
1111 if (fi.isDir()) {
1112 fi = QFileInfo();
1113 continue;
1114 }
1115 }
1116
1117 if (!fi.exists() || fi.isDir()) {
1118 if (Q_UNLIKELY(debugIncludes)) {
1119 fprintf(stderr, "debug-includes: can't find '%s'\n", include.constData());
1120 }
1121 return QByteArray();
1122 }
1123
1124 const auto result = fi.canonicalFilePath().toLocal8Bit();
1125
1126 if (Q_UNLIKELY(debugIncludes)) {
1127 fprintf(stderr, "debug-includes: found '%s'\n", result.constData());
1128 }
1129
1130 return result;
1131}
1132
1133QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1134{
1135 if (!relativeTo.isEmpty()) {
1136 QFileInfo fi;
1137 fi.setFile(QFileInfo(QString::fromLocal8Bit(relativeTo)).dir(), QString::fromLocal8Bit(include));
1138 if (fi.exists() && !fi.isDir())
1139 return fi.canonicalFilePath().toLocal8Bit();
1140 }
1141
1142 auto it = nonlocalIncludePathResolutionCache.find(include);
1143 if (it == nonlocalIncludePathResolutionCache.end())
1144 it = nonlocalIncludePathResolutionCache.insert(include,
1145 searchIncludePaths(
1146 includes,
1147 include,
1148 debugIncludes));
1149 return it.value();
1150}
1151
1152void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1153{
1154 currentFilenames.push(filename);
1155 preprocessed.reserve(preprocessed.size() + symbols.size());
1156 while (hasNext()) {
1157 Token token = next();
1158
1159 switch (token) {
1160 case PP_INCLUDE:
1161 {
1162 int lineNum = symbol().lineNum;
1163 QByteArray include;
1164 bool local = false;
1165 if (test(PP_STRING_LITERAL)) {
1166 local = lexemView().startsWith('\"');
1167 include = unquotedLexem();
1168 } else
1169 continue;
1170 until(PP_NEWLINE);
1171
1172 include = resolveInclude(include, local ? filename : QByteArray());
1173 if (include.isNull())
1174 continue;
1175
1176 if (Preprocessor::preprocessedIncludes.contains(include))
1177 continue;
1178 Preprocessor::preprocessedIncludes.insert(include);
1179
1180 QFile file(QString::fromLocal8Bit(include.constData()));
1181 if (!file.open(QFile::ReadOnly))
1182 continue;
1183
1184 QByteArray input = readOrMapFile(&file);
1185
1186 file.close();
1187 if (input.isEmpty())
1188 continue;
1189
1190 Symbols saveSymbols = symbols;
1191 qsizetype saveIndex = index;
1192
1193 // phase 1: get rid of backslash-newlines
1194 input = cleaned(input);
1195
1196 // phase 2: tokenize for the preprocessor
1197 symbols = tokenize(input);
1198 input.clear();
1199
1200 index = 0;
1201
1202 // phase 3: preprocess conditions and substitute macros
1203 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1204 preprocess(include, preprocessed);
1205 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1206
1207 symbols = saveSymbols;
1208 index = saveIndex;
1209 continue;
1210 }
1211 case PP_DEFINE:
1212 {
1213 next();
1214 QByteArray name = lexem();
1215 if (name.isEmpty() || !is_ident_start(name[0]))
1216 error();
1217 Macro macro;
1218 macro.isVariadic = false;
1219 if (test(LPAREN)) {
1220 // we have a function macro
1221 macro.isFunction = true;
1223 } else {
1224 macro.isFunction = false;
1225 }
1226 qsizetype start = index;
1227 until(PP_NEWLINE);
1228 macro.symbols.reserve(index - start - 1);
1229
1230 // remove whitespace where there shouldn't be any:
1231 // Before and after the macro, after a # and around ##
1232 Token lastToken = HASH; // skip shitespace at the beginning
1233 for (qsizetype i = start; i < index - 1; ++i) {
1234 Token token = symbols.at(i).token;
1235 if (token == WHITESPACE) {
1236 if (lastToken == PP_HASH || lastToken == HASH ||
1237 lastToken == PP_HASHHASH ||
1238 lastToken == WHITESPACE)
1239 continue;
1240 } else if (token == PP_HASHHASH) {
1241 if (!macro.symbols.isEmpty() &&
1242 lastToken == WHITESPACE)
1243 macro.symbols.pop_back();
1244 }
1245 macro.symbols.append(symbols.at(i));
1246 lastToken = token;
1247 }
1248 // remove trailing whitespace
1249 while (!macro.symbols.isEmpty() &&
1250 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1251 macro.symbols.pop_back();
1252
1253 if (!macro.symbols.isEmpty()) {
1254 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1255 macro.symbols.constLast().token == PP_HASHHASH) {
1256 error("'##' cannot appear at either end of a macro expansion");
1257 }
1258 }
1259 macros.insert(name, macro);
1260 continue;
1261 }
1262 case PP_UNDEF: {
1263 next();
1264 QByteArray name = lexem();
1265 until(PP_NEWLINE);
1266 macros.remove(name);
1267 continue;
1268 }
1269 case PP_IDENTIFIER: {
1270 // substitute macros
1271 macroExpand(&preprocessed, this, symbols, index, symbol().lineNum, true);
1272 continue;
1273 }
1274 case PP_HASH:
1275 until(PP_NEWLINE);
1276 continue; // skip unknown preprocessor statement
1277 case PP_IFDEF:
1278 case PP_IFNDEF:
1279 case PP_IF:
1280 while (!evaluateCondition()) {
1281 if (!skipBranch())
1282 break;
1283 if (test(PP_ELIF)) {
1284 } else {
1285 until(PP_NEWLINE);
1286 break;
1287 }
1288 }
1289 continue;
1290 case PP_ELIF:
1291 case PP_ELSE:
1293 Q_FALLTHROUGH();
1294 case PP_ENDIF:
1295 until(PP_NEWLINE);
1296 continue;
1297 case PP_NEWLINE:
1298 continue;
1299 case SIGNALS:
1300 case SLOTS: {
1301 Symbol sym = symbol();
1302 if (macros.contains("QT_NO_KEYWORDS"))
1303 sym.token = IDENTIFIER;
1304 else
1305 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1306 preprocessed += sym;
1307 } continue;
1308 default:
1309 break;
1310 }
1311 preprocessed += symbol();
1312 }
1313
1314 currentFilenames.pop();
1315}
1316
1317Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1318{
1319 QByteArray input = readOrMapFile(file);
1320
1321 if (input.isEmpty())
1322 return symbols;
1323
1324 // phase 1: get rid of backslash-newlines
1325 input = cleaned(input);
1326
1327 // phase 2: tokenize for the preprocessor
1328 index = 0;
1329 symbols = tokenize(input);
1330
1331#if 0
1332 for (int j = 0; j < symbols.size(); ++j)
1333 fprintf(stderr, "line %d: %s(%s)\n",
1334 symbols[j].lineNum,
1335 symbols[j].lexem().constData(),
1336 tokenTypeName(symbols[j].token));
1337#endif
1338
1339 // phase 3: preprocess conditions and substitute macros
1340 Symbols result;
1341 // Preallocate some space to speed up the code below.
1342 // The magic value was found by logging the final size
1343 // and calculating an average when running moc over FOSS projects.
1344 result.reserve(file->size() / 300000);
1345 preprocess(filename, result);
1346 mergeStringLiterals(result);
1347
1348#if 0
1349 for (int j = 0; j < result.size(); ++j)
1350 fprintf(stderr, "line %d: %s(%s)\n",
1351 result[j].lineNum,
1352 result[j].lexem().constData(),
1353 tokenTypeName(result[j].token));
1354#endif
1355
1356 return result;
1357}
1358
1360{
1361 Symbols arguments;
1362 while (hasNext()) {
1363 while (test(PP_WHITESPACE)) {}
1364 Token t = next();
1365 if (t == PP_RPAREN)
1366 break;
1367 if (t != PP_IDENTIFIER) {
1368 QByteArrayView l = lexemView();
1369 if (l == "...") {
1370 m->isVariadic = true;
1371 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1372 while (test(PP_WHITESPACE)) {}
1373 if (!test(PP_RPAREN))
1374 error("missing ')' in macro argument list");
1375 break;
1376 } else if (!is_identifier(l.constData(), l.size())) {
1377 error("Unexpected character in macro argument list.");
1378 }
1379 }
1380
1381 Symbol arg = symbol();
1382 if (arguments.contains(arg))
1383 error("Duplicate macro parameter.");
1384 arguments += symbol();
1385
1386 while (test(PP_WHITESPACE)) {}
1387 t = next();
1388 if (t == PP_RPAREN)
1389 break;
1390 if (t == PP_COMMA)
1391 continue;
1392 if (lexemView() == "...") {
1393 //GCC extension: #define FOO(x, y...) x(y)
1394 // The last argument was already parsed. Just mark the macro as variadic.
1395 m->isVariadic = true;
1396 while (test(PP_WHITESPACE)) {}
1397 if (!test(PP_RPAREN))
1398 error("missing ')' in macro argument list");
1399 break;
1400 }
1401 error("Unexpected character in macro argument list.");
1402 }
1403 m->arguments = arguments;
1404 while (test(PP_WHITESPACE)) {}
1405}
1406
1407void Preprocessor::until(Token t)
1408{
1409 while(hasNext() && next() != t)
1410 ;
1411}
1412
1414{
1415 debugIncludes = value;
1416}
1417
1418
1419QT_END_NAMESPACE
int relational_expression()
int exclusive_OR_expression()
bool unary_expression_lookup()
int logical_OR_expression()
int equality_expression()
int logical_AND_expression()
int additive_expression()
int multiplicative_expression()
int conditional_expression()
bool primary_expression_lookup()
int inclusive_OR_expression()
int evaluateCondition()
void setDebugIncludes(bool value)
void parseDefineArguments(Macro *m)
void skipUntilEndif()
Symbols preprocessed(const QByteArray &filename, QFile *device)
void substituteUntilNewline(Symbols &substituted)
static bool preprocessOnly
QByteArray resolveInclude(const QByteArray &filename, const QByteArray &relativeTo)
@ PreparePreprocessorStatement
@ TokenizePreprocessorStatement
Definition qlist.h:81
const Symbol & symbol() const
Definition symbols.h:102
bool hasNext()
Definition symbols.h:89
Token next()
Definition symbols.h:94
bool test(Token)
Definition symbols.h:111
short defnext
Definition keywords.cpp:455
static const short keyword_trans[][128]
Definition keywords.cpp:7
Token token
Definition keywords.cpp:452
Token ident
Definition keywords.cpp:456
short next
Definition keywords.cpp:453
char defchar
Definition keywords.cpp:454
Combined button and popup list for selecting options.
short next
static const short pp_keyword_trans[][128]
Definition ppkeywords.cpp:7
PP_Token ident
short defnext
PP_Token token
char defchar
static QByteArray readOrMapFile(QFile *file)
static QByteArray searchIncludePaths(const QList< Parser::IncludePath > &includepaths, const QByteArray &include, const bool debugIncludes)
static QByteArray cleaned(const QByteArray &input)
static void mergeStringLiterals(Symbols &symbols)
bool is_ident_char(char s)
Definition utils.h:30
const char * skipQuote(const char *data)
Definition utils.h:42
bool is_space(char s)
Definition utils.h:19
Simple structure used by the Doc and DocParser classes.
bool isVariadic
bool isFunction
Symbol(int lineNum, Token token)
Definition symbols.h:48
Token token
Definition symbols.h:58
void mergeStringLiteral(const Symbol &next)
int lineNum
Definition symbols.h:57
Symbol()=default
QList< Symbol > Symbols
Definition symbols.h:75