Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
python.cpp
Go to the documentation of this file.
1// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
2// Copyright (C) 2021 The Qt Company Ltd.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include <translator.h>
6#include "lupdate.h"
7#include "metastrings.h"
8
9#include <QtCore/qhash.h>
10#include <QtCore/qlist.h>
11#include <QtCore/qstring.h>
12#include <QtCore/qtextstream.h>
13#include <QtCore/qstack.h>
14
15#include <cctype>
16#include <cerrno>
17#include <cstdio>
18#include <cstring>
19
20using namespace Qt::StringLiterals;
21
22QT_BEGIN_NAMESPACE
23
24class PythonParser
25{
26
27 enum Token {
28 Tok_Eof,
29 Tok_class,
30 Tok_def,
31 Tok_return,
32 Tok_tr,
33 Tok_trUtf8,
34 Tok_translate,
35 Tok_Ident,
36 Tok_Dot,
37 Tok_String,
38 Tok_LeftParen,
39 Tok_RightParen,
40 Tok_Comma,
41 Tok_None,
42 Tok_Integer
43 };
44
45 enum class StringType { NoString, String, FormatString, RawString };
46
47public:
48 PythonParser(Translator &translator, const QString &fileName, bool &error, ConversionData &cd)
49 : tor(translator), m_cd(cd)
50 {
51#ifdef Q_CC_MSVC
52 const auto *fileNameC = reinterpret_cast<const wchar_t *>(fileName.utf16());
53 error = _wfopen_s(&yyInFile, fileNameC, L"r") != 0;
54#else
55 const QByteArray fileNameC = QFile::encodeName(fileName);
56 yyInFile = std::fopen(fileNameC.constData(), "r");
57 error = yyInFile == nullptr;
58#endif
59 if (!error)
60 startTokenizer(fileName);
61 }
62
63 /*
64 Accomplishes a very easy task: It finds all strings inside a tr() or translate()
65 call, and possibly finds out the context of the call. It supports
66 three cases:
67 (1) the context is specified, as in FunnyDialog.tr("Hello") or
68 translate("FunnyDialog", "Hello");
69 (2) the call appears within an inlined function;
70 (3) the call appears within a function defined outside the class definition.
71 */
72 void parse(const QByteArray &initialContext = {}, const QByteArray &defaultContext = {})
73 {
74 QByteArray context;
75 QByteArray text;
76 QByteArray comment;
77 QByteArray prefix;
78 bool utf8 = false;
79
80 yyTok = getToken();
81 while (yyTok != Tok_Eof) {
82
83 switch (yyTok) {
84 case Tok_class: {
85 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
86 yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
87 const int indent =
88 yyIndentationSize > 0 ? yyContinuousSpaceCount / yyIndentationSize : 0;
89 while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
90 yyContextStack.pop();
91 yyTok = getToken();
92 yyContextStack.push({ yyIdent, indent });
93 yyTok = getToken();
94 } break;
95 case Tok_def:
96 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
97 yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
98 if (!yyContextStack.isEmpty()) {
99 // Pop classes if the function is further outdented than the class on the top
100 // (end of a nested class).
101 const int classIndent = yyIndentationSize > 0
102 ? yyContinuousSpaceCount / yyIndentationSize - 1
103 : 0;
104 while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
105 yyContextStack.pop();
106 }
107 yyTok = getToken();
108 break;
109 case Tok_tr:
110 case Tok_trUtf8: {
111 utf8 = true;
112 yyTok = getToken();
113 const int lineNo = yyCurLineNo;
114 if (match(Tok_LeftParen) && matchString(&text)) {
115 comment.clear();
116 bool plural = false;
117
118 MetaStrings metaBackup = std::move(metaStrings);
119
120 if (match(Tok_RightParen)) {
121 // There is no comment or plural arguments.
122 } else if (match(Tok_Comma) && matchStringOrNone(&comment)) {
123 // There is a comment argument.
124 if (match(Tok_RightParen)) {
125 // There is no plural argument.
126 } else if (match(Tok_Comma)) {
127 // There is a plural argument.
128 plural = true;
129 }
130 }
131
132 if (prefix.isEmpty())
133 context = defaultContext;
134 else if (prefix == "self")
135 context = yyContextStack.isEmpty() ? initialContext
136 : yyContextStack.top().first;
137 else
138 context = prefix;
139
140 prefix.clear();
141 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
142 QString::fromUtf8(comment), {}, yyFileName, lineNo,
143 {}, TranslatorMessage::Unfinished, plural);
144 setMessageParameters(&message, metaBackup);
145 tor.extend(message, m_cd);
146 }
147 } break;
148 case Tok_translate: {
149 bool plural{};
150 const int lineNo = yyCurLineNo;
151 MetaStrings metaBackup = std::move(metaStrings);
152 if (parseTranslate(&text, &context, &comment, &utf8, &plural)) {
153 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
154 QString::fromUtf8(comment), {}, yyFileName, lineNo,
155 {}, TranslatorMessage::Unfinished, plural);
156 setMessageParameters(&message, metaBackup);
157 tor.extend(message, m_cd);
158 } else {
159 metaStrings = std::move(metaBackup);
160 }
161 } break;
162 case Tok_Ident:
163 if (!prefix.isEmpty())
164 prefix += '.';
165 prefix += yyIdent;
166 yyTok = getToken();
167 if (yyTok != Tok_Dot)
168 prefix.clear();
169 break;
170 default:
171 yyTok = getToken();
172 }
173 }
174
175 if (yyParenDepth != 0) {
176 qWarning("%s: Unbalanced parentheses in Python code", qPrintable(yyFileName));
177 }
178 }
179
180 ~PythonParser() { std::fclose(yyInFile); }
181
182private:
183 QHash<QByteArray, Token> fillTokens()
184 {
185 QHash<QByteArray, Token> tokens = { { "None", Tok_None }, { "class", Tok_class },
186 { "def", Tok_def }, { "return", Tok_return },
187 { "__tr", Tok_tr }, // Legacy?
188 { "__trUtf8", Tok_trUtf8 } };
189
190 const auto &nameMap = trFunctionAliasManager.nameToTrFunctionMap();
191 for (auto it = nameMap.cbegin(), end = nameMap.cend(); it != end; ++it) {
192 switch (it.value()) {
193 case TrFunctionAliasManager::Function_tr:
194 case TrFunctionAliasManager::Function_QT_TR_NOOP:
195 tokens.insert(it.key().toUtf8(), Tok_tr);
196 break;
197 case TrFunctionAliasManager::Function_trUtf8:
198 tokens.insert(it.key().toUtf8(), Tok_trUtf8);
199 break;
200 case TrFunctionAliasManager::Function_translate:
201 case TrFunctionAliasManager::Function_QT_TRANSLATE_NOOP:
202 // QTranslator::findMessage() has the same parameters as QApplication::translate().
203 case TrFunctionAliasManager::Function_findMessage:
204 tokens.insert(it.key().toUtf8(), Tok_translate);
205 break;
206 default:
207 break;
208 }
209 }
210 return tokens;
211 }
212
213 QHash<QByteArray, Token> &getTokens()
214 {
215 static QHash<QByteArray, Token> tokens = fillTokens();
216 return tokens;
217 }
218
219 int getChar()
220 {
221 int c;
222
223 if (buf < 0) {
224 c = getc(yyInFile);
225 } else {
226 c = buf;
227 buf = -1;
228 }
229 if (c == '\n') {
230 yyCurLineNo++;
231 yyCountingIndentation = true;
232 yyContinuousSpaceCount = 0;
233 } else if (yyCountingIndentation && (c == 32 || c == 9)) {
234 yyContinuousSpaceCount++;
235 } else {
236 yyCountingIndentation = false;
237 }
238 return c;
239 }
240
241 int peekChar()
242 {
243 int c = getc(yyInFile);
244 buf = c;
245 return c;
246 }
247
248 void startTokenizer(const QString &fileName)
249 {
250 yyInPos = 0;
251 buf = -1;
252
253 yyFileName = fileName;
254 yyCh = getChar();
255 yyParenDepth = 0;
256 yyCurLineNo = 1;
257
258 yyIndentationSize = -1;
259 yyContinuousSpaceCount = 0;
260 yyContextStack.clear();
261 }
262
263 bool parseStringEscape(int quoteChar, StringType stringType)
264 {
265 static const char tab[] = "abfnrtv";
266 static const char backTab[] = "\a\b\f\n\r\t\v";
267
268 yyCh = getChar();
269 if (yyCh == EOF)
270 return false;
271
272 if (stringType == StringType::RawString) {
273 if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
274 yyString[yyStringLen++] = '\\';
275 yyString[yyStringLen++] = yyCh;
276 yyCh = getChar();
277 return true;
278 }
279
280 if (yyCh == 'x' || yyCh == 'u' || yyCh == 'U') {
281 qsizetype maxSize = 2; // \x
282 if (yyCh == 'u')
283 maxSize = 4;
284 else if (yyCh == 'U')
285 maxSize = 8;
286
287 QByteArray hex;
288 yyCh = getChar();
289 if (yyCh == EOF)
290 return false;
291
292 while (maxSize-- && std::isxdigit(yyCh)) {
293 hex += char(yyCh);
294 yyCh = getChar();
295 if (yyCh == EOF)
296 return false;
297 }
298 uint n;
299#ifdef Q_CC_MSVC
300 sscanf_s(hex, "%x", &n);
301#else
302 std::sscanf(hex, "%x", &n);
303#endif
304
305 QByteArray hexChar = QString(QChar(n)).toUtf8();
306 if (yyStringLen < sizeof(yyString) - hexChar.size())
307 for (char c : std::as_const(hexChar))
308 yyString[yyStringLen++] = c;
309 return true;
310 }
311
312 if (yyCh >= '0' && yyCh < '8') {
313 QByteArray oct;
314 int n = 0;
315 do {
316 oct += char(yyCh);
317 ++n;
318 yyCh = getChar();
319 if (yyCh == EOF)
320 return false;
321 } while (yyCh >= '0' && yyCh < '8' && n < 3);
322#ifdef Q_CC_MSVC
323 sscanf_s(oct, "%o", &n);
324#else
325 std::sscanf(oct, "%o", &n);
326#endif
327 if (yyStringLen < sizeof(yyString) - 1)
328 yyString[yyStringLen++] = char(n);
329 return true;
330 }
331
332 const char *p = std::strchr(tab, yyCh);
333 if (yyStringLen < sizeof(yyString) - 1) {
334 yyString[yyStringLen++] = p == nullptr ? char(yyCh) : backTab[p - tab];
335 }
336 yyCh = getChar();
337 return true;
338 }
339
340 Token parseString(StringType stringType = StringType::NoString)
341 {
342 int quoteChar = yyCh;
343 bool tripleQuote = false;
344 bool singleQuote = true;
345 bool in = false;
346
347 yyCh = getChar();
348
349 while (yyCh != EOF) {
350 if (singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)))
351 break;
352
353 if (yyCh == quoteChar) {
354 if (peekChar() == quoteChar) {
355 yyCh = getChar();
356 if (!tripleQuote) {
357 tripleQuote = true;
358 singleQuote = false;
359 in = true;
360 yyCh = getChar();
361 } else {
362 yyCh = getChar();
363 if (yyCh == quoteChar) {
364 tripleQuote = false;
365 break;
366 }
367 }
368 } else if (tripleQuote) {
369 if (yyStringLen < sizeof(yyString) - 1)
370 yyString[yyStringLen++] = char(yyCh);
371 yyCh = getChar();
372 continue;
373 } else {
374 break;
375 }
376 } else {
377 in = true;
378 }
379
380 if (yyCh == '\\') {
381 if (!parseStringEscape(quoteChar, stringType))
382 return Tok_Eof;
383 } else {
384 char *yStart = yyString + yyStringLen;
385 char *yp = yStart;
386 while (yyCh != EOF && (tripleQuote || yyCh != '\n') && yyCh != quoteChar
387 && yyCh != '\\') {
388 *yp++ = char(yyCh);
389 yyCh = getChar();
390 }
391 yyStringLen += yp - yStart;
392 }
393 }
394 yyString[yyStringLen] = '\0';
395
396 if (yyCh != quoteChar) {
397 printf("%c\n", yyCh);
398
399 qWarning("%s:%d: Unterminated string", qPrintable(yyFileName), yyLineNo);
400 }
401
402 if (yyCh == EOF)
403 return Tok_Eof;
404 yyCh = getChar();
405 return Tok_String;
406 }
407
408 QByteArray readLine()
409 {
410 QByteArray result;
411 while (true) {
412 yyCh = getChar();
413 if (yyCh == EOF || yyCh == '\n')
414 break;
415 result.append(char(yyCh));
416 }
417 return result;
418 }
419
420 Token getToken(StringType stringType = StringType::NoString)
421 {
422 yyIdent.clear();
423 yyStringLen = 0;
424 while (yyCh != EOF) {
425 yyLineNo = yyCurLineNo;
426
427 if (std::isalpha(yyCh) || yyCh == '_') {
428 do {
429 yyIdent.append(char(yyCh));
430 yyCh = getChar();
431 } while (std::isalnum(yyCh) || yyCh == '_');
432
433 return getTokens().value(yyIdent, Tok_Ident);
434 }
435 switch (yyCh) {
436 case '#': {
437 auto comment = QString::fromUtf8(readLine());
438 if (!metaStrings.parse(comment)) {
439 qWarning() << qPrintable(yyFileName) << ':' << yyLineNo << ": "
440 << metaStrings.popError().toStdString();
441 break;
442 }
443 if (metaStrings.magicComment()) {
444 auto [context, comment] = *metaStrings.magicComment();
445 TranslatorMessage msg(ParserTool::transcode(context), QString(),
446 ParserTool::transcode(comment), QString(), yyFileName,
447 yyCurLineNo, QStringList(), TranslatorMessage::Finished,
448 false);
449 msg.setExtraComment(
450 ParserTool::transcode(metaStrings.extracomment().simplified()));
451 tor.append(msg);
452 tor.setExtras(metaStrings.extra());
453 metaStrings.clear();
454 }
455 break;
456 }
457 case '"':
458 case '\'':
459 return parseString(stringType);
460 case '(':
461 yyParenDepth++;
462 yyCh = getChar();
463 return Tok_LeftParen;
464 case ')':
465 yyParenDepth--;
466 yyCh = getChar();
467 return Tok_RightParen;
468 case ',':
469 yyCh = getChar();
470 return Tok_Comma;
471 case '.':
472 yyCh = getChar();
473 return Tok_Dot;
474 case '0':
475 case '1':
476 case '2':
477 case '3':
478 case '4':
479 case '5':
480 case '6':
481 case '7':
482 case '8':
483 case '9': {
484 QByteArray ba;
485 ba += char(yyCh);
486 yyCh = getChar();
487 const bool hex = yyCh == 'x';
488 if (hex) {
489 ba += char(yyCh);
490 yyCh = getChar();
491 }
492 while ((hex ? std::isxdigit(yyCh) : std::isdigit(yyCh))) {
493 ba += char(yyCh);
494 yyCh = getChar();
495 }
496 bool ok;
497 auto v = ba.toLongLong(&ok);
498 Q_UNUSED(v);
499 if (ok)
500 return Tok_Integer;
501 break;
502 }
503 default:
504 yyCh = getChar();
505 }
506 }
507 return Tok_Eof;
508 }
509
510 bool match(Token t)
511 {
512 const bool matches = (yyTok == t);
513 if (matches)
514 yyTok = getToken();
515 return matches;
516 }
517
518 bool matchStringStart()
519 {
520 if (yyTok == Tok_String)
521 return true;
522 // Check for f"bla{var}" and raw strings r"bla".
523 if (yyTok == Tok_Ident && yyIdent.size() == 1) {
524 switch (yyIdent.at(0)) {
525 case 'r':
526 yyTok = getToken(StringType::RawString);
527 return yyTok == Tok_String;
528 case 'f':
529 yyTok = getToken(StringType::FormatString);
530 return yyTok == Tok_String;
531 }
532 }
533 return false;
534 }
535
536 bool matchString(QByteArray *s)
537 {
538 s->clear();
539 bool ok = false;
540 while (matchStringStart()) {
541 *s += yyString;
542 yyTok = getToken();
543 ok = true;
544 }
545 return ok;
546 }
547
548 bool matchEncoding(bool *utf8)
549 {
550 // Remove any leading module paths.
551 if (yyTok == Tok_Ident && std::strcmp(yyIdent, "PySide6") == 0) {
552 yyTok = getToken();
553
554 if (yyTok != Tok_Dot)
555 return false;
556
557 yyTok = getToken();
558 }
559
560 if (yyTok == Tok_Ident
561 && (std::strcmp(yyIdent, "QtGui") == 0 || std::strcmp(yyIdent, "QtCore") == 0)) {
562 yyTok = getToken();
563
564 if (yyTok != Tok_Dot)
565 return false;
566
567 yyTok = getToken();
568 }
569
570 if (yyTok == Tok_Ident) {
571 if (std::strcmp(yyIdent, "QApplication") == 0
572 || std::strcmp(yyIdent, "QGuiApplication") == 0
573 || std::strcmp(yyIdent, "QCoreApplication") == 0) {
574 yyTok = getToken();
575
576 if (yyTok == Tok_Dot)
577 yyTok = getToken();
578 }
579
580 *utf8 = QByteArray(yyIdent).endsWith("UTF8");
581 yyTok = getToken();
582 return true;
583 }
584 return false;
585 }
586
587 bool matchStringOrNone(QByteArray *s)
588 {
589 bool matches = matchString(s);
590
591 if (!matches)
592 matches = match(Tok_None);
593
594 return matches;
595 }
596
597 /*
598 * match any expression that can return a number, which can be
599 * 1. Literal number (e.g. '11')
600 * 2. simple identifier (e.g. 'm_count')
601 * 3. simple function call (e.g. 'size()')
602 * 4. function call on an object (e.g. 'list.size()')
603 * * Other cases:
604 * size(2,4)
605 * list().size()
606 * list(a,b).size(2,4)
607 * etc...
608 */
609 bool matchExpression()
610 {
611 if (match(Tok_Integer))
612 return true;
613
614 int parenlevel = 0;
615 while (match(Tok_Ident) || parenlevel > 0) {
616 if (yyTok == Tok_RightParen) {
617 if (parenlevel == 0)
618 break;
619 --parenlevel;
620 yyTok = getToken();
621 } else if (yyTok == Tok_LeftParen) {
622 yyTok = getToken();
623 if (yyTok == Tok_RightParen) {
624 yyTok = getToken();
625 } else {
626 ++parenlevel;
627 }
628 } else if (yyTok == Tok_Ident) {
629 continue;
630 } else if (parenlevel == 0) {
631 return false;
632 }
633 }
634 return true;
635 }
636
637 bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment, bool *utf8,
638 bool *plural)
639 {
640 text->clear();
641 context->clear();
642 comment->clear();
643 *utf8 = false;
644 *plural = false;
645
646 yyTok = getToken();
647 if (!match(Tok_LeftParen) || !matchString(context) || !match(Tok_Comma)
648 || !matchString(text)) {
649 return false;
650 }
651
652 if (match(Tok_RightParen))
653 return true;
654
655 // not a comma or a right paren, illegal syntax
656 if (!match(Tok_Comma))
657 return false;
658
659 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
660 if (match(Tok_RightParen))
661 return true;
662
663 // check for comment
664 if (!matchStringOrNone(comment))
665 return false; // not a comment, or a trailing comma... something is wrong
666
667 if (match(Tok_RightParen))
668 return true;
669
670 // not a comma or a right paren, illegal syntax
671 if (!match(Tok_Comma))
672 return false;
673
674 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
675 if (match(Tok_RightParen))
676 return true;
677
678 // look for optional encoding information
679 if (matchEncoding(utf8)) {
680 if (match(Tok_RightParen))
681 return true;
682
683 // not a comma or a right paren, illegal syntax
684 if (!match(Tok_Comma))
685 return false;
686
687 // python accepts trailing commas within parenthesis, so allow a comma with nothing
688 // after
689 if (match(Tok_RightParen))
690 return true;
691 }
692
693 // Must be a plural expression
694 if (!matchExpression())
695 return false;
696
697 *plural = true;
698
699 // Ignore any trailing comma here
700 match(Tok_Comma);
701
702 // This must be the end, or there are too many parameters
703 if (match(Tok_RightParen))
704 return true;
705
706 return false;
707 }
708
709 void setMessageParameters(TranslatorMessage *message, const MetaStrings &meta)
710 {
711 // PYSIDE-2863: parseTranslate() can read past the message
712 // and capture extraComments intended for the next message.
713 // Use only extraComments for the current message.
714
715 message->setExtraComment(ParserTool::transcode(meta.extracomment().simplified()));
716 message->setId(meta.msgid());
717 message->setExtras(meta.extra());
718 if (!meta.label().isEmpty() && meta.msgid().isEmpty())
719 m_cd.appendError("%1:%2: labels cannot be used with text-based translation. "
720 "Ignoring\n"_L1.arg(yyFileName)
721 .arg(yyLineNo));
722 else
723 message->setLabel(meta.label());
724 }
725
726 QString yyFileName;
727 Token yyTok{};
728 int yyCh{};
729 QByteArray yyIdent;
730 char yyString[65536];
731 size_t yyStringLen{};
732 int yyParenDepth{};
733 int yyLineNo = 1;
734 int yyCurLineNo{};
735 // the file to read from (if reading from a file)
736 FILE *yyInFile;
737 // the string to read from and current position in the string (otherwise)
738 int yyInPos{};
739 int buf{};
740 int yyIndentationSize{};
741 int yyContinuousSpaceCount{};
742 bool yyCountingIndentation = false;
743 // (Context, indentation level) pair.
744 using ContextPair = QPair<QByteArray, int>;
745 // Stack of (Context, indentation level) pairs.
746 using ContextStack = QStack<ContextPair>;
747 ContextStack yyContextStack;
748 MetaStrings metaStrings;
749 Translator &tor;
750 ConversionData &m_cd;
751};
752
753bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
754{
755
756 bool error = false;
757 PythonParser parser(translator, fileName, error, cd);
758 if (error) {
759 cd.appendError(QStringLiteral("Cannot open %1").arg(fileName));
760 return false;
761 }
762
763 parser.parse();
764 return true;
765}
766
767QT_END_NAMESPACE
bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
Definition python.cpp:753