Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
python.cpp
Go to the documentation of this file.
1// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
2// Copyright (C) 2021 The Qt Company Ltd.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include <translator.h>
6#include "trparser.h"
7#include "metastrings.h"
8
9#include <QtCore/qhash.h>
10#include <QtCore/qlist.h>
11#include <QtCore/qstring.h>
12#include <QtCore/qtextstream.h>
13#include <QtCore/qstack.h>
14
15#include <cctype>
16#include <cerrno>
17#include <cstdio>
18#include <cstring>
19
20using namespace Qt::StringLiterals;
21
22QT_BEGIN_NAMESPACE
23
24class PythonParser
25{
26
27 enum Token {
28 Tok_Eof,
29 Tok_class,
30 Tok_def,
31 Tok_return,
32 Tok_tr,
33 Tok_trUtf8,
34 Tok_translate,
35 Tok_Ident,
36 Tok_Dot,
37 Tok_String,
38 Tok_LeftParen,
39 Tok_RightParen,
40 Tok_Comma,
41 Tok_None,
42 Tok_Integer
43 };
44
45 enum class StringType { NoString, String, FormatString, RawString };
46
47public:
48 PythonParser(Translator &translator, const QString &fileName, bool &error, ConversionData &cd)
49 : tor(translator), m_cd(cd)
50 {
51#ifdef Q_CC_MSVC
52 const auto *fileNameC = reinterpret_cast<const wchar_t *>(fileName.utf16());
53 error = _wfopen_s(&yyInFile, fileNameC, L"r") != 0;
54#else
55 const QByteArray fileNameC = QFile::encodeName(fileName);
56 yyInFile = std::fopen(fileNameC.constData(), "r");
57 error = yyInFile == nullptr;
58#endif
59 if (!error)
60 startTokenizer(fileName);
61 }
62
63 /*
64 Accomplishes a very easy task: It finds all strings inside a tr() or translate()
65 call, and possibly finds out the context of the call. It supports
66 three cases:
67 (1) the context is specified, as in FunnyDialog.tr("Hello") or
68 translate("FunnyDialog", "Hello");
69 (2) the call appears within an inlined function;
70 (3) the call appears within a function defined outside the class definition.
71 */
72 void parse(const QByteArray &initialContext = {}, const QByteArray &defaultContext = {})
73 {
74 QByteArray context;
75 QByteArray text;
76 QByteArray comment;
77 QByteArray prefix;
78 bool utf8 = false;
79
80 yyTok = getToken();
81 while (yyTok != Tok_Eof) {
82
83 switch (yyTok) {
84 case Tok_class: {
85 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
86 yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
87 const int indent =
88 yyIndentationSize > 0 ? yyContinuousSpaceCount / yyIndentationSize : 0;
89 while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
90 yyContextStack.pop();
91 yyTok = getToken();
92 yyContextStack.push({ yyIdent, indent });
93 yyTok = getToken();
94 } break;
95 case Tok_def:
96 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
97 yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
98 if (!yyContextStack.isEmpty()) {
99 // Pop classes if the function is further outdented than the class on the top
100 // (end of a nested class).
101 const int classIndent = yyIndentationSize > 0
102 ? yyContinuousSpaceCount / yyIndentationSize - 1
103 : 0;
104 while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
105 yyContextStack.pop();
106 }
107 yyTok = getToken();
108 break;
109 case Tok_tr:
110 case Tok_trUtf8: {
111 utf8 = true;
112 yyTok = getToken();
113 const int lineNo = yyCurLineNo;
114 if (match(Tok_LeftParen) && matchString(&text)) {
115 comment.clear();
116 bool plural = false;
117
118 MetaStrings metaBackup = std::move(metaStrings);
119
120 if (match(Tok_RightParen)) {
121 // There is no comment or plural arguments.
122 } else if (match(Tok_Comma) && matchStringOrNone(&comment)) {
123 // There is a comment argument.
124 if (match(Tok_RightParen)) {
125 // There is no plural argument.
126 } else if (match(Tok_Comma)) {
127 // There is a plural argument.
128 plural = true;
129 }
130 }
131
132 if (prefix.isEmpty())
133 context = defaultContext;
134 else if (prefix == "self")
135 context = yyContextStack.isEmpty() ? initialContext
136 : yyContextStack.top().first;
137 else
138 context = prefix;
139
140 prefix.clear();
141 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
142 QString::fromUtf8(comment), {}, yyFileName, lineNo,
143 {}, TranslatorMessage::Unfinished, plural);
144 setMessageParameters(&message, metaBackup);
145 tor.extend(message, m_cd);
146 }
147 } break;
148 case Tok_translate: {
149 bool plural{};
150 const int lineNo = yyCurLineNo;
151 MetaStrings metaBackup = std::move(metaStrings);
152 if (parseTranslate(&text, &context, &comment, &utf8, &plural)) {
153 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
154 QString::fromUtf8(comment), {}, yyFileName, lineNo,
155 {}, TranslatorMessage::Unfinished, plural);
156 setMessageParameters(&message, metaBackup);
157 tor.extend(message, m_cd);
158 } else {
159 metaStrings = std::move(metaBackup);
160 }
161 } break;
162 case Tok_Ident:
163 if (!prefix.isEmpty())
164 prefix += '.';
165 prefix += yyIdent;
166 yyTok = getToken();
167 if (yyTok != Tok_Dot)
168 prefix.clear();
169 break;
170 default:
171 yyTok = getToken();
172 }
173 }
174
175 if (yyParenDepth != 0) {
176 qWarning("%s: Unbalanced parentheses in Python code", qPrintable(yyFileName));
177 }
178 }
179
180 ~PythonParser() { std::fclose(yyInFile); }
181
182private:
183 QHash<QByteArray, Token> fillTokens()
184 {
185 QHash<QByteArray, Token> tokens = { { "None", Tok_None }, { "class", Tok_class },
186 { "def", Tok_def }, { "return", Tok_return },
187 { "__tr", Tok_tr }, // Legacy?
188 { "__trUtf8", Tok_trUtf8 } };
189
190 const auto &nameMap = trFunctionAliasManager.nameToTrFunctionMap();
191 for (auto it = nameMap.cbegin(), end = nameMap.cend(); it != end; ++it) {
192 switch (it.value()) {
193 case TrFunctionAliasManager::Function_tr:
194 case TrFunctionAliasManager::Function_QT_TR_NOOP:
195 tokens.insert(it.key().toUtf8(), Tok_tr);
196 break;
197 case TrFunctionAliasManager::Function_trUtf8:
198 tokens.insert(it.key().toUtf8(), Tok_trUtf8);
199 break;
200 case TrFunctionAliasManager::Function_translate:
201 case TrFunctionAliasManager::Function_QT_TRANSLATE_NOOP:
202 // QTranslator::findMessage() has the same parameters as QApplication::translate().
203 case TrFunctionAliasManager::Function_findMessage:
204 tokens.insert(it.key().toUtf8(), Tok_translate);
205 break;
206 default:
207 break;
208 }
209 }
210 return tokens;
211 }
212
213 QHash<QByteArray, Token> &getTokens()
214 {
215 static QHash<QByteArray, Token> tokens = fillTokens();
216 return tokens;
217 }
218
219 int getChar()
220 {
221 int c;
222
223 if (buf < 0) {
224 c = getc(yyInFile);
225 } else {
226 c = buf;
227 buf = -1;
228 }
229 if (c == '\n') {
230 yyCurLineNo++;
231 yyCountingIndentation = true;
232 yyContinuousSpaceCount = 0;
233 } else if (yyCountingIndentation && (c == 32 || c == 9)) {
234 yyContinuousSpaceCount++;
235 } else {
236 yyCountingIndentation = false;
237 }
238 return c;
239 }
240
241 int peekChar()
242 {
243 int c = getc(yyInFile);
244 buf = c;
245 return c;
246 }
247
248 void startTokenizer(const QString &fileName)
249 {
250 yyInPos = 0;
251 buf = -1;
252
253 yyFileName = fileName;
254 yyCh = getChar();
255 yyParenDepth = 0;
256 yyCurLineNo = 1;
257
258 yyIndentationSize = -1;
259 yyContinuousSpaceCount = 0;
260 yyContextStack.clear();
261 }
262
263 bool parseStringEscape(int quoteChar, StringType stringType)
264 {
265 static const char tab[] = "abfnrtv";
266 static const char backTab[] = "\a\b\f\n\r\t\v";
267
268 yyCh = getChar();
269 if (yyCh == EOF)
270 return false;
271
272 if (stringType == StringType::RawString) {
273 if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
274 yyString[yyStringLen++] = '\\';
275 yyString[yyStringLen++] = yyCh;
276 yyCh = getChar();
277 return true;
278 }
279
280 if (yyCh == 'x' || yyCh == 'u' || yyCh == 'U') {
281 qsizetype maxSize = 2; // \x
282 if (yyCh == 'u')
283 maxSize = 4;
284 else if (yyCh == 'U')
285 maxSize = 8;
286
287 QByteArray hex;
288 yyCh = getChar();
289 if (yyCh == EOF)
290 return false;
291
292 while (maxSize-- && std::isxdigit(yyCh)) {
293 hex += char(yyCh);
294 yyCh = getChar();
295 if (yyCh == EOF)
296 return false;
297 }
298 uint n;
299#ifdef Q_CC_MSVC
300 sscanf_s(hex, "%x", &n);
301#else
302 std::sscanf(hex, "%x", &n);
303#endif
304
305 QByteArray hexChar = QString(QChar(n)).toUtf8();
306 if (yyStringLen < sizeof(yyString) - hexChar.size())
307 for (char c : std::as_const(hexChar))
308 yyString[yyStringLen++] = c;
309 return true;
310 }
311
312 if (yyCh >= '0' && yyCh < '8') {
313 QByteArray oct;
314 int n = 0;
315 do {
316 oct += char(yyCh);
317 ++n;
318 yyCh = getChar();
319 if (yyCh == EOF)
320 return false;
321 } while (yyCh >= '0' && yyCh < '8' && n < 3);
322#ifdef Q_CC_MSVC
323 sscanf_s(oct, "%o", &n);
324#else
325 std::sscanf(oct, "%o", &n);
326#endif
327 if (yyStringLen < sizeof(yyString) - 1)
328 yyString[yyStringLen++] = char(n);
329 return true;
330 }
331
332 const char *p = std::strchr(tab, yyCh);
333 if (yyStringLen < sizeof(yyString) - 1) {
334 yyString[yyStringLen++] = p == nullptr ? char(yyCh) : backTab[p - tab];
335 }
336 yyCh = getChar();
337 return true;
338 }
339
340 Token parseString(StringType stringType = StringType::NoString)
341 {
342 int quoteChar = yyCh;
343 bool tripleQuote = false;
344 bool singleQuote = true;
345 bool in = false;
346
347 yyCh = getChar();
348
349 while (yyCh != EOF) {
350 if (singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)))
351 break;
352
353 if (yyCh == quoteChar) {
354 if (peekChar() == quoteChar) {
355 yyCh = getChar();
356 if (!tripleQuote) {
357 tripleQuote = true;
358 singleQuote = false;
359 in = true;
360 yyCh = getChar();
361 } else {
362 yyCh = getChar();
363 if (yyCh == quoteChar) {
364 tripleQuote = false;
365 break;
366 }
367 }
368 } else if (tripleQuote) {
369 if (yyStringLen < sizeof(yyString) - 1)
370 yyString[yyStringLen++] = char(yyCh);
371 yyCh = getChar();
372 continue;
373 } else {
374 break;
375 }
376 } else {
377 in = true;
378 }
379
380 if (yyCh == '\\') {
381 if (!parseStringEscape(quoteChar, stringType))
382 return Tok_Eof;
383 } else {
384 char *yStart = yyString + yyStringLen;
385 char *yp = yStart;
386 while (yyCh != EOF && (tripleQuote || yyCh != '\n') && yyCh != quoteChar
387 && yyCh != '\\') {
388 *yp++ = char(yyCh);
389 yyCh = getChar();
390 }
391 yyStringLen += yp - yStart;
392 }
393 }
394 yyString[yyStringLen] = '\0';
395
396 if (yyCh != quoteChar) {
397 printf("%c\n", yyCh);
398
399 qWarning("%s:%d: Unterminated string", qPrintable(yyFileName), yyLineNo);
400 }
401
402 if (yyCh == EOF)
403 return Tok_Eof;
404 yyCh = getChar();
405 return Tok_String;
406 }
407
408 QByteArray readLine()
409 {
410 QByteArray result;
411 while (true) {
412 yyCh = getChar();
413 if (yyCh == EOF || yyCh == '\n')
414 break;
415 result.append(char(yyCh));
416 }
417 return result;
418 }
419
420 Token getToken(StringType stringType = StringType::NoString)
421 {
422 yyIdent.clear();
423 yyStringLen = 0;
424 while (yyCh != EOF) {
425 yyLineNo = yyCurLineNo;
426
427 if (std::isalpha(yyCh) || yyCh == '_') {
428 do {
429 yyIdent.append(char(yyCh));
430 yyCh = getChar();
431 } while (std::isalnum(yyCh) || yyCh == '_');
432
433 return getTokens().value(yyIdent, Tok_Ident);
434 }
435 switch (yyCh) {
436 case '#': {
437 auto comment = QString::fromUtf8(readLine());
438 if (!metaStrings.parse(comment)) {
439 qWarning() << qPrintable(yyFileName) << ':' << yyLineNo << ": "
440 << metaStrings.popError().toStdString();
441 break;
442 }
443 if (metaStrings.magicComment()) {
444 auto [context, comment] = *metaStrings.magicComment();
445 TranslatorMessage msg(transcode(context), QString(), transcode(comment),
446 QString(), yyFileName, yyCurLineNo, QStringList(),
447 TranslatorMessage::Finished, false);
448 msg.setExtraComment(transcode(metaStrings.extracomment().simplified()));
449 tor.append(msg);
450 tor.setExtras(metaStrings.extra());
451 metaStrings.clear();
452 }
453 break;
454 }
455 case '"':
456 case '\'':
457 return parseString(stringType);
458 case '(':
459 yyParenDepth++;
460 yyCh = getChar();
461 return Tok_LeftParen;
462 case ')':
463 yyParenDepth--;
464 yyCh = getChar();
465 return Tok_RightParen;
466 case ',':
467 yyCh = getChar();
468 return Tok_Comma;
469 case '.':
470 yyCh = getChar();
471 return Tok_Dot;
472 case '0':
473 case '1':
474 case '2':
475 case '3':
476 case '4':
477 case '5':
478 case '6':
479 case '7':
480 case '8':
481 case '9': {
482 QByteArray ba;
483 ba += char(yyCh);
484 yyCh = getChar();
485 const bool hex = yyCh == 'x';
486 if (hex) {
487 ba += char(yyCh);
488 yyCh = getChar();
489 }
490 while ((hex ? std::isxdigit(yyCh) : std::isdigit(yyCh))) {
491 ba += char(yyCh);
492 yyCh = getChar();
493 }
494 bool ok;
495 auto v = ba.toLongLong(&ok);
496 Q_UNUSED(v);
497 if (ok)
498 return Tok_Integer;
499 break;
500 }
501 default:
502 yyCh = getChar();
503 }
504 }
505 return Tok_Eof;
506 }
507
508 bool match(Token t)
509 {
510 const bool matches = (yyTok == t);
511 if (matches)
512 yyTok = getToken();
513 return matches;
514 }
515
516 bool matchStringStart()
517 {
518 if (yyTok == Tok_String)
519 return true;
520 // Check for f"bla{var}" and raw strings r"bla".
521 if (yyTok == Tok_Ident && yyIdent.size() == 1) {
522 switch (yyIdent.at(0)) {
523 case 'r':
524 yyTok = getToken(StringType::RawString);
525 return yyTok == Tok_String;
526 case 'f':
527 yyTok = getToken(StringType::FormatString);
528 return yyTok == Tok_String;
529 }
530 }
531 return false;
532 }
533
534 bool matchString(QByteArray *s)
535 {
536 s->clear();
537 bool ok = false;
538 while (matchStringStart()) {
539 *s += yyString;
540 yyTok = getToken();
541 ok = true;
542 }
543 return ok;
544 }
545
546 bool matchEncoding(bool *utf8)
547 {
548 // Remove any leading module paths.
549 if (yyTok == Tok_Ident && std::strcmp(yyIdent, "PySide6") == 0) {
550 yyTok = getToken();
551
552 if (yyTok != Tok_Dot)
553 return false;
554
555 yyTok = getToken();
556 }
557
558 if (yyTok == Tok_Ident
559 && (std::strcmp(yyIdent, "QtGui") == 0 || std::strcmp(yyIdent, "QtCore") == 0)) {
560 yyTok = getToken();
561
562 if (yyTok != Tok_Dot)
563 return false;
564
565 yyTok = getToken();
566 }
567
568 if (yyTok == Tok_Ident) {
569 if (std::strcmp(yyIdent, "QApplication") == 0
570 || std::strcmp(yyIdent, "QGuiApplication") == 0
571 || std::strcmp(yyIdent, "QCoreApplication") == 0) {
572 yyTok = getToken();
573
574 if (yyTok == Tok_Dot)
575 yyTok = getToken();
576 }
577
578 *utf8 = QByteArray(yyIdent).endsWith("UTF8");
579 yyTok = getToken();
580 return true;
581 }
582 return false;
583 }
584
585 bool matchStringOrNone(QByteArray *s)
586 {
587 bool matches = matchString(s);
588
589 if (!matches)
590 matches = match(Tok_None);
591
592 return matches;
593 }
594
595 /*
596 * match any expression that can return a number, which can be
597 * 1. Literal number (e.g. '11')
598 * 2. simple identifier (e.g. 'm_count')
599 * 3. simple function call (e.g. 'size()')
600 * 4. function call on an object (e.g. 'list.size()')
601 * * Other cases:
602 * size(2,4)
603 * list().size()
604 * list(a,b).size(2,4)
605 * etc...
606 */
607 bool matchExpression()
608 {
609 if (match(Tok_Integer))
610 return true;
611
612 int parenlevel = 0;
613 while (match(Tok_Ident) || parenlevel > 0) {
614 if (yyTok == Tok_RightParen) {
615 if (parenlevel == 0)
616 break;
617 --parenlevel;
618 yyTok = getToken();
619 } else if (yyTok == Tok_LeftParen) {
620 yyTok = getToken();
621 if (yyTok == Tok_RightParen) {
622 yyTok = getToken();
623 } else {
624 ++parenlevel;
625 }
626 } else if (yyTok == Tok_Ident) {
627 continue;
628 } else if (parenlevel == 0) {
629 return false;
630 }
631 }
632 return true;
633 }
634
635 bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment, bool *utf8,
636 bool *plural)
637 {
638 text->clear();
639 context->clear();
640 comment->clear();
641 *utf8 = false;
642 *plural = false;
643
644 yyTok = getToken();
645 if (!match(Tok_LeftParen) || !matchString(context) || !match(Tok_Comma)
646 || !matchString(text)) {
647 return false;
648 }
649
650 if (match(Tok_RightParen))
651 return true;
652
653 // not a comma or a right paren, illegal syntax
654 if (!match(Tok_Comma))
655 return false;
656
657 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
658 if (match(Tok_RightParen))
659 return true;
660
661 // check for comment
662 if (!matchStringOrNone(comment))
663 return false; // not a comment, or a trailing comma... something is wrong
664
665 if (match(Tok_RightParen))
666 return true;
667
668 // not a comma or a right paren, illegal syntax
669 if (!match(Tok_Comma))
670 return false;
671
672 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
673 if (match(Tok_RightParen))
674 return true;
675
676 // look for optional encoding information
677 if (matchEncoding(utf8)) {
678 if (match(Tok_RightParen))
679 return true;
680
681 // not a comma or a right paren, illegal syntax
682 if (!match(Tok_Comma))
683 return false;
684
685 // python accepts trailing commas within parenthesis, so allow a comma with nothing
686 // after
687 if (match(Tok_RightParen))
688 return true;
689 }
690
691 // Must be a plural expression
692 if (!matchExpression())
693 return false;
694
695 *plural = true;
696
697 // Ignore any trailing comma here
698 match(Tok_Comma);
699
700 // This must be the end, or there are too many parameters
701 if (match(Tok_RightParen))
702 return true;
703
704 return false;
705 }
706
707 void setMessageParameters(TranslatorMessage *message, const MetaStrings &meta)
708 {
709 // PYSIDE-2863: parseTranslate() can read past the message
710 // and capture extraComments intended for the next message.
711 // Use only extraComments for the current message.
712
713 message->setExtraComment(transcode(meta.extracomment().simplified()));
714 message->setId(meta.msgid());
715 message->setExtras(meta.extra());
716 if (!meta.label().isEmpty())
717 m_cd.appendError("%1:%2: labels cannot be used with text-based translation. "
718 "Ignoring\n"_L1.arg(yyFileName)
719 .arg(yyLineNo));
720 }
721
722 QString yyFileName;
723 Token yyTok{};
724 int yyCh{};
725 QByteArray yyIdent;
726 char yyString[65536];
727 size_t yyStringLen{};
728 int yyParenDepth{};
729 int yyLineNo = 1;
730 int yyCurLineNo{};
731 // the file to read from (if reading from a file)
732 FILE *yyInFile;
733 // the string to read from and current position in the string (otherwise)
734 int yyInPos{};
735 int buf{};
736 int yyIndentationSize{};
737 int yyContinuousSpaceCount{};
738 bool yyCountingIndentation = false;
739 // (Context, indentation level) pair.
740 using ContextPair = QPair<QByteArray, int>;
741 // Stack of (Context, indentation level) pairs.
742 using ContextStack = QStack<ContextPair>;
743 ContextStack yyContextStack;
744 MetaStrings metaStrings;
745 Translator &tor;
746 ConversionData &m_cd;
747};
748
749bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
750{
751
752 bool error = false;
753 PythonParser parser(translator, fileName, error, cd);
754 if (error) {
755 cd.appendError(QStringLiteral("Cannot open %1").arg(fileName));
756 return false;
757 }
758
759 parser.parse();
760 return true;
761}
762
763QT_END_NAMESPACE
bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
Definition python.cpp:749