Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
python.cpp
Go to the documentation of this file.
1// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
2// Copyright (C) 2021 The Qt Company Ltd.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include <translator.h>
6#include "lupdate.h"
7
8#include <QtCore/qhash.h>
9#include <QtCore/qlist.h>
10#include <QtCore/qstring.h>
11#include <QtCore/qtextstream.h>
12#include <QtCore/qstack.h>
13
14#include <cctype>
15#include <cerrno>
16#include <cstdio>
17#include <cstring>
18
20
21static const char PythonMagicComment[] = "TRANSLATOR ";
22
23/*
24 The first part of this source file is the Python tokenizer. We skip
25 most of Python; the only tokens that interest us are defined here.
26*/
27
33
34enum class StringType
35{
37 String,
40};
41
42/*
43 The tokenizer maintains the following global variables. The names
44 should be self-explanatory.
45*/
47static int yyCh;
49static char yyComment[65536];
51static char yyString[65536];
53static int yyParenDepth;
54static int yyLineNo;
55static int yyCurLineNo;
56
62
64
66
68 {"None", Tok_None},
69 {"class", Tok_class},
70 {"def", Tok_def},
71 {"return", Tok_return},
72 {"__tr", Tok_tr}, // Legacy?
73 {"__trUtf8", Tok_trUtf8}
74};
75
76// the file to read from (if reading from a file)
77static FILE *yyInFile;
78
79// the string to read from and current position in the string (otherwise)
80static int yyInPos;
81static int buf;
82
83static int (*getChar)();
84static int (*peekChar)();
85
89
90// (Context, indentation level) pair.
91using ContextPair = QPair<QByteArray, int>;
92// Stack of (Context, indentation level) pairs.
93using ContextStack = QStack<ContextPair>;
95
96static int getCharFromFile()
97{
98 int c;
99
100 if (buf < 0) {
101 c = getc(yyInFile);
102 } else {
103 c = buf;
104 buf = -1;
105 }
106 if (c == '\n') {
107 yyCurLineNo++;
110 } else if (yyCountingIndentation && (c == 32 || c == 9)) {
112 } else {
113 yyCountingIndentation = false;
114 }
115 return c;
116}
117
119{
120 int c = getc(yyInFile);
121 buf = c;
122 return c;
123}
124
125static void startTokenizer(const QString &fileName, int (*getCharFunc)(),
126 int (*peekCharFunc)())
127{
128 yyInPos = 0;
129 buf = -1;
130 getChar = getCharFunc;
131 peekChar = peekCharFunc;
132
133 yyFileName = fileName;
134 yyCh = getChar();
135 yyParenDepth = 0;
136 yyCurLineNo = 1;
137
140 yyCountingIndentation = false;
141 yyContextStack.clear();
142}
143
144static bool parseStringEscape(int quoteChar, StringType stringType)
145{
146 static const char tab[] = "abfnrtv";
147 static const char backTab[] = "\a\b\f\n\r\t\v";
148
149 yyCh = getChar();
150 if (yyCh == EOF)
151 return false;
152
153 if (stringType == StringType::RawString) {
154 if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
155 yyString[yyStringLen++] = '\\';
156 yyString[yyStringLen++] = yyCh;
157 yyCh = getChar();
158 return true;
159 }
160
161 if (yyCh == 'x') {
162 QByteArray hex = "0";
163 yyCh = getChar();
164 if (yyCh == EOF)
165 return false;
166 while (std::isxdigit(yyCh)) {
167 hex += char(yyCh);
168 yyCh = getChar();
169 if (yyCh == EOF)
170 return false;
171 }
172 uint n;
173#ifdef Q_CC_MSVC
174 sscanf_s(hex, "%x", &n);
175#else
176 std::sscanf(hex, "%x", &n);
177#endif
178 if (yyStringLen < sizeof(yyString) - 1)
179 yyString[yyStringLen++] = char(n);
180 return true;
181 }
182
183 if (yyCh >= '0' && yyCh < '8') {
184 QByteArray oct;
185 int n = 0;
186 do {
187 oct += char(yyCh);
188 ++n;
189 yyCh = getChar();
190 if (yyCh == EOF)
191 return false;
192 } while (yyCh >= '0' && yyCh < '8' && n < 3);
193#ifdef Q_CC_MSVC
194 sscanf_s(oct, "%o", &n);
195#else
196 std::sscanf(oct, "%o", &n);
197#endif
198 if (yyStringLen < sizeof(yyString) - 1)
199 yyString[yyStringLen++] = char(n);
200 return true;
201 }
202
203 const char *p = std::strchr(tab, yyCh);
204 if (yyStringLen < sizeof(yyString) - 1) {
205 yyString[yyStringLen++] = p == nullptr
206 ? char(yyCh) : backTab[p - tab];
207 }
208 yyCh = getChar();
209 return true;
210}
211
213{
214 int quoteChar = yyCh;
215 bool tripleQuote = false;
216 bool singleQuote = true;
217 bool in = false;
218
219 yyCh = getChar();
220
221 while (yyCh != EOF) {
222 if (singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)))
223 break;
224
225 if (yyCh == quoteChar) {
226 if (peekChar() == quoteChar) {
227 yyCh = getChar();
228 if (!tripleQuote) {
229 tripleQuote = true;
230 singleQuote = false;
231 in = true;
232 yyCh = getChar();
233 } else {
234 yyCh = getChar();
235 if (yyCh == quoteChar) {
236 tripleQuote = false;
237 break;
238 }
239 }
240 } else if (tripleQuote) {
241 if (yyStringLen < sizeof(yyString) - 1)
242 yyString[yyStringLen++] = char(yyCh);
243 yyCh = getChar();
244 continue;
245 } else {
246 break;
247 }
248 } else {
249 in = true;
250 }
251
252 if (yyCh == '\\') {
253 if (!parseStringEscape(quoteChar, stringType))
254 return Tok_Eof;
255 } else {
256 char *yStart = yyString + yyStringLen;
257 char *yp = yStart;
258 while (yyCh != EOF && (tripleQuote || yyCh != '\n') && yyCh != quoteChar
259 && yyCh != '\\') {
260 *yp++ = char(yyCh);
261 yyCh = getChar();
262 }
263 yyStringLen += yp - yStart;
264 }
265 }
266 yyString[yyStringLen] = '\0';
267
268 if (yyCh != quoteChar) {
269 printf("%c\n", yyCh);
270
271 qWarning("%s:%d: Unterminated string",
272 qPrintable(yyFileName), yyLineNo);
273 }
274
275 if (yyCh == EOF)
276 return Tok_Eof;
277 yyCh = getChar();
278 return Tok_String;
279}
280
282{
283 QByteArray result;
284 while (true) {
285 yyCh = getChar();
286 if (yyCh == EOF || yyCh == '\n')
287 break;
288 result.append(char(yyCh));
289 }
290 return result;
291}
292
294{
295 yyIdent.clear();
296 yyCommentLen = 0;
297 yyStringLen = 0;
298 while (yyCh != EOF) {
300
301 if (std::isalpha(yyCh) || yyCh == '_') {
302 do {
303 yyIdent.append(char(yyCh));
304 yyCh = getChar();
305 } while (std::isalnum(yyCh) || yyCh == '_');
306
307 return tokens.value(yyIdent, Tok_Ident);
308 }
309 switch (yyCh) {
310 case '#':
311 switch (getChar()) {
312 case ':':
313 extraComments.append({readLine().trimmed(), yyCurLineNo});
314 break;
315 case '=':
316 ids.append({readLine().trimmed(), yyCurLineNo});
317 break;
318 case EOF:
319 return Tok_Eof;
320 case '\n':
321 break;
322 default:
323 do {
324 yyCh = getChar();
325 } while (yyCh != EOF && yyCh != '\n');
326 break;
327 }
328 break;
329 case '"':
330 case '\'':
331 return parseString(stringType);
332 case '(':
333 yyParenDepth++;
334 yyCh = getChar();
335 return Tok_LeftParen;
336 case ')':
337 yyParenDepth--;
338 yyCh = getChar();
339 return Tok_RightParen;
340 case ',':
341 yyCh = getChar();
342 return Tok_Comma;
343 case '.':
344 yyCh = getChar();
345 return Tok_Dot;
346 case '0':
347 case '1':
348 case '2':
349 case '3':
350 case '4':
351 case '5':
352 case '6':
353 case '7':
354 case '8':
355 case '9': {
356 QByteArray ba;
357 ba += char(yyCh);
358 yyCh = getChar();
359 const bool hex = yyCh == 'x';
360 if (hex) {
361 ba += char(yyCh);
362 yyCh = getChar();
363 }
364 while ((hex ? std::isxdigit(yyCh) : std::isdigit(yyCh))) {
365 ba += char(yyCh);
366 yyCh = getChar();
367 }
368 bool ok;
369 auto v = ba.toLongLong(&ok);
370 Q_UNUSED(v);
371 if (ok)
372 return Tok_Integer;
373 break;
374 }
375 default:
376 yyCh = getChar();
377 }
378 }
379 return Tok_Eof;
380}
381
382/*
383 The second part of this source file is the parser. It accomplishes
384 a very easy task: It finds all strings inside a tr() or translate()
385 call, and possibly finds out the context of the call. It supports
386 three cases:
387 (1) the context is specified, as in FunnyDialog.tr("Hello") or
388 translate("FunnyDialog", "Hello");
389 (2) the call appears within an inlined function;
390 (3) the call appears within a function defined outside the class definition.
391*/
392
394
395static bool match(Token t)
396{
397 const bool matches = (yyTok == t);
398 if (matches)
400 return matches;
401}
402
403static bool matchStringStart()
404{
405 if (yyTok == Tok_String)
406 return true;
407 // Check for f"bla{var}" and raw strings r"bla".
408 if (yyTok == Tok_Ident && yyIdent.size() == 1) {
409 switch (yyIdent.at(0)) {
410 case 'r':
412 return yyTok == Tok_String;
413 case 'f':
415 return yyTok == Tok_String;
416 }
417 }
418 return false;
419}
420
421static bool matchString(QByteArray *s)
422{
423 s->clear();
424 bool ok = false;
425 while (matchStringStart()) {
426 *s += yyString;
428 ok = true;
429 }
430 return ok;
431}
432
433static bool matchEncoding(bool *utf8)
434{
435 // Remove any leading module paths.
436 if (yyTok == Tok_Ident && std::strcmp(yyIdent, "PySide6") == 0) {
438
439 if (yyTok != Tok_Dot)
440 return false;
441
443 }
444
445 if (yyTok == Tok_Ident && (std::strcmp(yyIdent, "QtGui") == 0
446 || std::strcmp(yyIdent, "QtCore") == 0)) {
448
449 if (yyTok != Tok_Dot)
450 return false;
451
453 }
454
455 if (yyTok == Tok_Ident) {
456 if (std::strcmp(yyIdent, "QApplication") == 0
457 || std::strcmp(yyIdent, "QGuiApplication") == 0
458 || std::strcmp(yyIdent, "QCoreApplication") == 0) {
460
461 if (yyTok == Tok_Dot)
463 }
464
465 *utf8 = QByteArray(yyIdent).endsWith("UTF8");
467 return true;
468 }
469 return false;
470}
471
472static bool matchStringOrNone(QByteArray *s)
473{
474 bool matches = matchString(s);
475
476 if (!matches)
477 matches = match(Tok_None);
478
479 return matches;
480}
481
482/*
483 * match any expression that can return a number, which can be
484 * 1. Literal number (e.g. '11')
485 * 2. simple identifier (e.g. 'm_count')
486 * 3. simple function call (e.g. 'size()')
487 * 4. function call on an object (e.g. 'list.size()')
488 *
489 * Other cases:
490 * size(2,4)
491 * list().size()
492 * list(a,b).size(2,4)
493 * etc...
494 */
495static bool matchExpression()
496{
498 return true;
499
500 int parenlevel = 0;
501 while (match(Tok_Ident) || parenlevel > 0) {
502 if (yyTok == Tok_RightParen) {
503 if (parenlevel == 0)
504 break;
505 --parenlevel;
507 } else if (yyTok == Tok_LeftParen) {
509 if (yyTok == Tok_RightParen) {
511 } else {
512 ++parenlevel;
513 }
514 } else if (yyTok == Tok_Ident) {
515 continue;
516 } else if (parenlevel == 0) {
517 return false;
518 }
519 }
520 return true;
521}
522
523static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment,
524 bool *utf8, bool *plural)
525{
526 text->clear();
527 context->clear();
528 comment->clear();
529 *utf8 = false;
530 *plural = false;
531
533 if (!match(Tok_LeftParen) || !matchString(context) || !match(Tok_Comma)
534 || !matchString(text)) {
535 return false;
536 }
537
539 return true;
540
541 // not a comma or a right paren, illegal syntax
542 if (!match(Tok_Comma))
543 return false;
544
545 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
547 return true;
548
549 // check for comment
550 if (!matchStringOrNone(comment))
551 return false; // not a comment, or a trailing comma... something is wrong
552
554 return true;
555
556 // not a comma or a right paren, illegal syntax
557 if (!match(Tok_Comma))
558 return false;
559
560 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
562 return true;
563
564 // look for optional encoding information
565 if (matchEncoding(utf8)) {
567 return true;
568
569 // not a comma or a right paren, illegal syntax
570 if (!match(Tok_Comma))
571 return false;
572
573 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
575 return true;
576 }
577
578 // Must be a plural expression
580 return false;
581
582 *plural = true;
583
584 // Ignore any trailing comma here
586
587 // This must be the end, or there are too many parameters
589 return true;
590
591 return false;
592}
593
595 int lineNo)
596{
597 // PYSIDE-2863: parseTranslate() can read past the message
598 // and capture extraComments intended for the next message.
599 // Use only extraComments for the current message.
600 QByteArray extraComment;
601 while (!extraComments.isEmpty() && extraComments.constFirst().lineNo <= lineNo) {
602 if (!extraComment.isEmpty())
603 extraComment += ' ';
604 extraComment += extraComments.takeFirst().extraComment;
605 }
606
607 if (!extraComment.isEmpty())
608 message->setExtraComment(QString::fromUtf8(extraComment));
609
610 while (!ids.isEmpty() && ids.constFirst().lineNo <= lineNo)
611 message->setId(QString::fromUtf8(ids.takeFirst().extraComment));
612}
613
614static void parse(Translator &tor, ConversionData &cd,
615 const QByteArray &initialContext = {},
616 const QByteArray &defaultContext = {})
617{
618 QByteArray context;
619 QByteArray text;
620 QByteArray comment;
621 QByteArray prefix;
622 bool utf8 = false;
623
625 while (yyTok != Tok_Eof) {
626
627 switch (yyTok) {
628 case Tok_class: {
630 yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
631 const int indent = yyIndentationSize > 0
633 while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
634 yyContextStack.pop();
636 yyContextStack.push({yyIdent, indent});
638 }
639 break;
640 case Tok_def:
642 yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
643 if (!yyContextStack.isEmpty()) {
644 // Pop classes if the function is further outdented than the class on the top
645 // (end of a nested class).
646 const int classIndent = yyIndentationSize > 0
648 while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
649 yyContextStack.pop();
650 }
652 break;
653 case Tok_tr:
654 case Tok_trUtf8: {
655 utf8 = true;
657 const int lineNo = yyCurLineNo;
658 if (match(Tok_LeftParen) && matchString(&text)) {
659 comment.clear();
660 bool plural = false;
661
663 // There is no comment or plural arguments.
664 } else if (match(Tok_Comma) && matchStringOrNone(&comment)) {
665 // There is a comment argument.
667 // There is no plural argument.
668 } else if (match(Tok_Comma)) {
669 // There is a plural argument.
670 plural = true;
671 }
672 }
673
674 if (prefix.isEmpty())
675 context = defaultContext;
676 else if (prefix == "self")
677 context = yyContextStack.isEmpty()
678 ? initialContext : yyContextStack.top().first;
679 else
680 context = prefix;
681
682 prefix.clear();
683
684 if (!text.isEmpty()) {
685 TranslatorMessage message(QString::fromUtf8(context),
686 QString::fromUtf8(text),
687 QString::fromUtf8(comment),
688 {}, yyFileName, yyLineNo,
689 {}, TranslatorMessage::Unfinished, plural);
690 setMessageParameters(&message, lineNo);
691 tor.extend(message, cd);
692 }
693 }
694 }
695 break;
696 case Tok_translate: {
697 bool plural{};
698 const int lineNo = yyCurLineNo;
699 if (parseTranslate(&text, &context, &comment, &utf8, &plural)
700 && !text.isEmpty()) {
701 TranslatorMessage message(QString::fromUtf8(context),
702 QString::fromUtf8(text),
703 QString::fromUtf8(comment),
704 {}, yyFileName, yyLineNo,
705 {}, TranslatorMessage::Unfinished, plural);
706 setMessageParameters(&message, lineNo);
707 tor.extend(message, cd);
708 }
709 }
710 break;
711 case Tok_Ident:
712 if (!prefix.isEmpty())
713 prefix += '.';
714 prefix += yyIdent;
716 if (yyTok != Tok_Dot)
717 prefix.clear();
718 break;
719 case Tok_Comment:
720 comment = yyComment;
721 comment = comment.simplified();
722 if (comment.left(sizeof(PythonMagicComment) - 1) == PythonMagicComment) {
723 comment.remove(0, sizeof(PythonMagicComment) - 1);
724 int k = comment.indexOf(' ');
725 if (k == -1) {
726 context = comment;
727 } else {
728 context = comment.left(k);
729 comment.remove( 0, k + 1);
730 TranslatorMessage message(QString::fromUtf8(context),
731 {}, QString::fromUtf8(comment), {},
732 yyFileName, yyLineNo, {});
733 tor.extend(message, cd);
734 }
735 }
737 break;
738 default:
740 }
741 }
742
743 if (yyParenDepth != 0) {
744 qWarning("%s: Unbalanced parentheses in Python code",
745 qPrintable(yyFileName));
746 }
747}
748
749bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
750{
751 // Match the function aliases to our tokens
752 static bool firstTime = true;
753 if (firstTime) {
754 firstTime = false;
755 const auto &nameMap = trFunctionAliasManager.nameToTrFunctionMap();
756 for (auto it = nameMap.cbegin(), end = nameMap.cend(); it != end; ++it) {
757 switch (it.value()) {
758 case TrFunctionAliasManager::Function_tr:
759 case TrFunctionAliasManager::Function_QT_TR_NOOP:
760 tokens.insert(it.key().toUtf8(), Tok_tr);
761 break;
762 case TrFunctionAliasManager::Function_trUtf8:
763 tokens.insert(it.key().toUtf8(), Tok_trUtf8);
764 break;
765 case TrFunctionAliasManager::Function_translate:
766 case TrFunctionAliasManager::Function_QT_TRANSLATE_NOOP:
767 // QTranslator::findMessage() has the same parameters as QApplication::translate().
768 case TrFunctionAliasManager::Function_findMessage:
769 tokens.insert(it.key().toUtf8(), Tok_translate);
770 break;
771 default:
772 break;
773 }
774 }
775 }
776
777#ifdef Q_CC_MSVC
778 const auto *fileNameC = reinterpret_cast<const wchar_t *>(fileName.utf16());
779 const bool ok = _wfopen_s(&yyInFile, fileNameC, L"r") == 0;
780#else
781 const QByteArray fileNameC = QFile::encodeName(fileName);
782 yyInFile = std::fopen( fileNameC.constData(), "r");
783 const bool ok = yyInFile != nullptr;
784#endif
785 if (!ok) {
786 cd.appendError(QStringLiteral("Cannot open %1").arg(fileName));
787 return false;
788 }
789
790 startTokenizer(fileName, getCharFromFile, peekCharFromFile);
791 parse(translator, cd);
792 std::fclose(yyInFile);
793 return true;
794}
795
796QT_END_NAMESPACE
void extend(const TranslatorMessage &msg, ConversionData &cd)
bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
Definition python.cpp:749
static bool match(Token t)
Definition python.cpp:395
static Token parseString(StringType stringType=StringType::NoString)
Definition python.cpp:212
static int yyLineNo
Definition python.cpp:54
static int getCharFromFile()
Definition python.cpp:96
static QByteArray readLine()
Definition python.cpp:281
static int yyInPos
Definition python.cpp:80
static int yyCh
Definition python.cpp:47
static FILE * yyInFile
Definition python.cpp:77
static size_t yyStringLen
Definition python.cpp:52
static bool matchEncoding(bool *utf8)
Definition python.cpp:433
static size_t yyCommentLen
Definition python.cpp:50
static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment, bool *utf8, bool *plural)
Definition python.cpp:523
static int yyCurLineNo
Definition python.cpp:55
static void parse(Translator &tor, ConversionData &cd, const QByteArray &initialContext={}, const QByteArray &defaultContext={})
Definition python.cpp:614
static bool matchExpression()
Definition python.cpp:495
static Token getToken(StringType stringType=StringType::NoString)
Definition python.cpp:293
Token
Definition python.cpp:28
@ Tok_RightParen
Definition python.cpp:31
@ Tok_tr
Definition python.cpp:28
@ Tok_class
Definition python.cpp:28
@ Tok_String
Definition python.cpp:30
@ Tok_return
Definition python.cpp:28
@ Tok_Comma
Definition python.cpp:32
@ Tok_Integer
Definition python.cpp:32
@ Tok_None
Definition python.cpp:32
@ Tok_Ident
Definition python.cpp:29
@ Tok_trUtf8
Definition python.cpp:29
@ Tok_def
Definition python.cpp:28
@ Tok_Dot
Definition python.cpp:30
@ Tok_translate
Definition python.cpp:29
@ Tok_LeftParen
Definition python.cpp:31
@ Tok_Eof
Definition python.cpp:28
@ Tok_Comment
Definition python.cpp:30
static char yyComment[65536]
Definition python.cpp:49
static char yyString[65536]
Definition python.cpp:51
static bool yyCountingIndentation
Definition python.cpp:88
static int buf
Definition python.cpp:81
static int yyContinuousSpaceCount
Definition python.cpp:87
static int(* peekChar)()
Definition python.cpp:84
static QString yyFileName
Definition python.cpp:46
static QList< ExtraComment > extraComments
Definition python.cpp:63
static QList< ExtraComment > ids
Definition python.cpp:65
static bool matchStringStart()
Definition python.cpp:403
static QByteArray yyIdent
Definition python.cpp:48
static int yyIndentationSize
Definition python.cpp:86
static bool matchString(QByteArray *s)
Definition python.cpp:421
static Token yyTok
Definition python.cpp:393
static ContextStack yyContextStack
Definition python.cpp:94
StringType
Definition python.cpp:35
static bool matchStringOrNone(QByteArray *s)
Definition python.cpp:472
static bool parseStringEscape(int quoteChar, StringType stringType)
Definition python.cpp:144
static int yyParenDepth
Definition python.cpp:53
QHash< QByteArray, Token > tokens
Definition python.cpp:67
static void setMessageParameters(TranslatorMessage *message, int lineNo)
Definition python.cpp:594
static void startTokenizer(const QString &fileName, int(*getCharFunc)(), int(*peekCharFunc)())
Definition python.cpp:125
static int peekCharFromFile()
Definition python.cpp:118
static QT_BEGIN_NAMESPACE const char PythonMagicComment[]
Definition python.cpp:21
static int(* getChar)()
Definition python.cpp:83
QByteArray extraComment
Definition python.cpp:59