Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
python.cpp
Go to the documentation of this file.
1// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
2// Copyright (C) 2021 The Qt Company Ltd.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include <translator.h>
6#include "trparser.h"
7#include "metastrings.h"
8
9#include <QtCore/qhash.h>
10#include <QtCore/qlist.h>
11#include <QtCore/qstring.h>
12#include <QtCore/qtextstream.h>
13#include <QtCore/qstack.h>
14
15#include <cctype>
16#include <cerrno>
17#include <cstdio>
18#include <cstring>
19
20using namespace Qt::StringLiterals;
21
22QT_BEGIN_NAMESPACE
23
24class PythonParser
25{
26
27 enum Token {
28 Tok_Eof,
29 Tok_class,
30 Tok_def,
31 Tok_return,
32 Tok_tr,
33 Tok_trUtf8,
34 Tok_translate,
35 Tok_Ident,
36 Tok_Dot,
37 Tok_String,
38 Tok_LeftParen,
39 Tok_RightParen,
40 Tok_Comma,
41 Tok_None,
42 Tok_Integer
43 };
44
45 enum class StringType { NoString, String, FormatString, RawString };
46
47public:
48 PythonParser(Translator &translator, const QString &fileName, bool &error, ConversionData &cd)
49 : tor(translator), m_cd(cd)
50 {
51#ifdef Q_CC_MSVC
52 const auto *fileNameC = reinterpret_cast<const wchar_t *>(fileName.utf16());
53 error = _wfopen_s(&yyInFile, fileNameC, L"r") != 0;
54#else
55 const QByteArray fileNameC = QFile::encodeName(fileName);
56 yyInFile = std::fopen(fileNameC.constData(), "r");
57 error = yyInFile == nullptr;
58#endif
59 if (!error)
60 startTokenizer(fileName);
61 }
62
63 /*
64 Accomplishes a very easy task: It finds all strings inside a tr() or translate()
65 call, and possibly finds out the context of the call. It supports
66 three cases:
67 (1) the context is specified, as in FunnyDialog.tr("Hello") or
68 translate("FunnyDialog", "Hello");
69 (2) the call appears within an inlined function;
70 (3) the call appears within a function defined outside the class definition.
71 */
72 void parse(const QByteArray &initialContext = {}, const QByteArray &defaultContext = {})
73 {
74 QByteArray context;
75 QByteArray text;
76 QByteArray comment;
77 QByteArray prefix;
78 bool utf8 = false;
79
80 yyTok = getToken();
81 while (yyTok != Tok_Eof) {
82
83 switch (yyTok) {
84 case Tok_class: {
85 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
86 yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
87 const int indent =
88 yyIndentationSize > 0 ? yyContinuousSpaceCount / yyIndentationSize : 0;
89 while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
90 yyContextStack.pop();
91 yyTok = getToken();
92 yyContextStack.push({ yyIdent, indent });
93 yyTok = getToken();
94 } break;
95 case Tok_def:
96 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
97 yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
98 if (!yyContextStack.isEmpty()) {
99 // Pop classes if the function is further outdented than the class on the top
100 // (end of a nested class).
101 const int classIndent = yyIndentationSize > 0
102 ? yyContinuousSpaceCount / yyIndentationSize - 1
103 : 0;
104 while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
105 yyContextStack.pop();
106 }
107 yyTok = getToken();
108 break;
109 case Tok_tr:
110 case Tok_trUtf8: {
111 utf8 = true;
112 yyTok = getToken();
113 const int lineNo = yyCurLineNo;
114 if (match(Tok_LeftParen) && matchString(&text)) {
115 comment.clear();
116 bool plural = false;
117
118 MetaStrings metaBackup = std::move(metaStrings);
119
120 if (match(Tok_RightParen)) {
121 // There is no comment or plural arguments.
122 } else if (match(Tok_Comma) && matchStringOrNone(&comment)) {
123 // There is a comment argument.
124 if (match(Tok_RightParen)) {
125 // There is no plural argument.
126 } else if (match(Tok_Comma)) {
127 // There is a plural argument.
128 plural = true;
129 }
130 }
131
132 if (prefix.isEmpty())
133 context = defaultContext;
134 else if (prefix == "self")
135 context = yyContextStack.isEmpty() ? initialContext
136 : yyContextStack.top().first;
137 else
138 context = prefix;
139
140 prefix.clear();
141 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
142 QString::fromUtf8(comment), {}, yyFileName, lineNo,
143 {}, TranslatorMessage::Unfinished, plural);
144 setMessageParameters(&message, metaBackup);
145 tor.extend(message, m_cd);
146 }
147 } break;
148 case Tok_translate: {
149 bool plural{};
150 const int lineNo = yyCurLineNo;
151 MetaStrings metaBackup = std::move(metaStrings);
152 if (parseTranslate(&text, &context, &comment, &utf8, &plural)) {
153 TranslatorMessage message(QString::fromUtf8(context), QString::fromUtf8(text),
154 QString::fromUtf8(comment), {}, yyFileName, lineNo,
155 {}, TranslatorMessage::Unfinished, plural);
156 setMessageParameters(&message, metaBackup);
157 tor.extend(message, m_cd);
158 } else {
159 metaStrings = std::move(metaBackup);
160 }
161 } break;
162 case Tok_Ident:
163 if (!prefix.isEmpty())
164 prefix += '.';
165 prefix += yyIdent;
166 yyTok = getToken();
167 if (yyTok != Tok_Dot)
168 prefix.clear();
169 break;
170 default:
171 yyTok = getToken();
172 }
173 }
174
175 if (yyParenDepth != 0) {
176 qWarning("%s: Unbalanced parentheses in Python code", qPrintable(yyFileName));
177 }
178 }
179
180 ~PythonParser() { std::fclose(yyInFile); }
181
182private:
183 QHash<QByteArray, Token> fillTokens()
184 {
185 QHash<QByteArray, Token> tokens = { { "None", Tok_None }, { "class", Tok_class },
186 { "def", Tok_def }, { "return", Tok_return },
187 { "__tr", Tok_tr }, // Legacy?
188 { "__trUtf8", Tok_trUtf8 } };
189
190 const auto &nameMap = trFunctionAliasManager.nameToTrFunctionMap();
191 for (auto it = nameMap.cbegin(), end = nameMap.cend(); it != end; ++it) {
192 switch (it.value()) {
193 case TrFunctionAliasManager::Function_tr:
194 case TrFunctionAliasManager::Function_QT_TR_NOOP:
195 tokens.insert(it.key().toUtf8(), Tok_tr);
196 break;
197 case TrFunctionAliasManager::Function_trUtf8:
198 tokens.insert(it.key().toUtf8(), Tok_trUtf8);
199 break;
200 case TrFunctionAliasManager::Function_translate:
201 case TrFunctionAliasManager::Function_QT_TRANSLATE_NOOP:
202 // QTranslator::findMessage() has the same parameters as QApplication::translate().
203 case TrFunctionAliasManager::Function_findMessage:
204 tokens.insert(it.key().toUtf8(), Tok_translate);
205 break;
206 default:
207 break;
208 }
209 }
210 return tokens;
211 }
212
213 QHash<QByteArray, Token> &getTokens()
214 {
215 static QHash<QByteArray, Token> tokens = fillTokens();
216 return tokens;
217 }
218
219 int getChar()
220 {
221 int c;
222
223 if (buf < 0) {
224 c = getc(yyInFile);
225 } else {
226 c = buf;
227 buf = -1;
228 }
229 if (c == '\n') {
230 yyCurLineNo++;
231 yyCountingIndentation = true;
232 yyContinuousSpaceCount = 0;
233 } else if (yyCountingIndentation && (c == 32 || c == 9)) {
234 yyContinuousSpaceCount++;
235 } else {
236 yyCountingIndentation = false;
237 }
238 return c;
239 }
240
241 int peekChar()
242 {
243 int c = getc(yyInFile);
244 buf = c;
245 return c;
246 }
247
248 void startTokenizer(const QString &fileName)
249 {
250 yyInPos = 0;
251 buf = -1;
252
253 yyFileName = fileName;
254 yyCh = getChar();
255 yyParenDepth = 0;
256 yyCurLineNo = 1;
257
258 yyIndentationSize = -1;
259 yyContinuousSpaceCount = 0;
260 yyContextStack.clear();
261 }
262
263 bool parseStringEscape(int quoteChar, StringType stringType)
264 {
265 static const char tab[] = "abfnrtv";
266 static const char backTab[] = "\a\b\f\n\r\t\v";
267
268 yyCh = getChar();
269 if (yyCh == EOF)
270 return false;
271
272 if (stringType == StringType::RawString) {
273 if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
274 yyString[yyStringLen++] = '\\';
275 yyString[yyStringLen++] = yyCh;
276 yyCh = getChar();
277 return true;
278 }
279
280 if (yyCh == 'x' || yyCh == 'u' || yyCh == 'U') {
281 qsizetype maxSize = 2; // \x
282 if (yyCh == 'u')
283 maxSize = 4;
284 else if (yyCh == 'U')
285 maxSize = 8;
286
287 QByteArray hex;
288 yyCh = getChar();
289 if (yyCh == EOF)
290 return false;
291
292 while (maxSize-- && std::isxdigit(yyCh)) {
293 hex += char(yyCh);
294 yyCh = getChar();
295 if (yyCh == EOF)
296 return false;
297 }
298 uint n;
299#ifdef Q_CC_MSVC
300 sscanf_s(hex.constData(), "%x", &n);
301#else
302 std::sscanf(hex.constData(), "%x", &n);
303#endif
304
305 QByteArray hexChar = QString(QChar(n)).toUtf8();
306 if (yyStringLen < sizeof(yyString) - hexChar.size())
307 for (char c : std::as_const(hexChar))
308 yyString[yyStringLen++] = c;
309 return true;
310 }
311
312 if (yyCh >= '0' && yyCh < '8') {
313 QByteArray oct;
314 int n = 0;
315 do {
316 oct += char(yyCh);
317 ++n;
318 yyCh = getChar();
319 if (yyCh == EOF)
320 return false;
321 } while (yyCh >= '0' && yyCh < '8' && n < 3);
322#ifdef Q_CC_MSVC
323 sscanf_s(oct.constData(), "%o", &n);
324#else
325 std::sscanf(oct.constData(), "%o", &n);
326#endif
327 if (yyStringLen < sizeof(yyString) - 1)
328 yyString[yyStringLen++] = char(n);
329 return true;
330 }
331
332 const char *p = std::strchr(tab, yyCh);
333 if (yyStringLen < sizeof(yyString) - 1) {
334 yyString[yyStringLen++] = p == nullptr ? char(yyCh) : backTab[p - tab];
335 }
336 yyCh = getChar();
337 return true;
338 }
339
340 Token parseString(StringType stringType = StringType::NoString)
341 {
342 int quoteChar = yyCh;
343 bool tripleQuote = false;
344 bool singleQuote = true;
345 bool in = false;
346
347 yyCh = getChar();
348
349 while (yyCh != EOF) {
350 if (singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)))
351 break;
352
353 if (yyCh == quoteChar) {
354 if (peekChar() == quoteChar) {
355 yyCh = getChar();
356 if (!tripleQuote) {
357 tripleQuote = true;
358 singleQuote = false;
359 in = true;
360 yyCh = getChar();
361 } else {
362 yyCh = getChar();
363 if (yyCh == quoteChar) {
364 tripleQuote = false;
365 break;
366 }
367 }
368 } else if (tripleQuote) {
369 if (yyStringLen < sizeof(yyString) - 1)
370 yyString[yyStringLen++] = char(yyCh);
371 yyCh = getChar();
372 continue;
373 } else {
374 break;
375 }
376 } else {
377 in = true;
378 }
379
380 if (yyCh == '\\') {
381 if (!parseStringEscape(quoteChar, stringType))
382 return Tok_Eof;
383 } else {
384 char *yStart = yyString + yyStringLen;
385 char *yp = yStart;
386 while (yyCh != EOF && (tripleQuote || yyCh != '\n') && yyCh != quoteChar
387 && yyCh != '\\') {
388 *yp++ = char(yyCh);
389 yyCh = getChar();
390 }
391 yyStringLen += yp - yStart;
392 }
393 }
394 yyString[yyStringLen] = '\0';
395
396 if (yyCh != quoteChar) {
397 printf("%c\n", yyCh);
398
399 qWarning("%s:%d: Unterminated string", qPrintable(yyFileName), yyLineNo);
400 }
401
402 if (yyCh == EOF)
403 return Tok_Eof;
404 yyCh = getChar();
405 return Tok_String;
406 }
407
408 QByteArray readLine()
409 {
410 QByteArray result;
411 while (true) {
412 yyCh = getChar();
413 if (yyCh == EOF || yyCh == '\n')
414 break;
415 result.append(char(yyCh));
416 }
417 return result;
418 }
419
420 Token getToken(StringType stringType = StringType::NoString)
421 {
422 yyIdent.clear();
423 yyStringLen = 0;
424 while (yyCh != EOF) {
425 yyLineNo = yyCurLineNo;
426
427 if (std::isalpha(yyCh) || yyCh == '_') {
428 do {
429 yyIdent.append(char(yyCh));
430 yyCh = getChar();
431 } while (std::isalnum(yyCh) || yyCh == '_');
432
433 return getTokens().value(yyIdent, Tok_Ident);
434 }
435 switch (yyCh) {
436 case '#': {
437 auto comment = QString::fromUtf8(readLine());
438 if (!metaStrings.parse(comment)) {
439 qWarning() << qPrintable(yyFileName) << ':' << yyLineNo << ": "
440 << metaStrings.popError().toStdString();
441 break;
442 }
443 if (metaStrings.magicComment()) {
444 auto [context, comment] = *metaStrings.magicComment();
445 TranslatorMessage msg(transcode(context), QString(), transcode(comment),
446 QString(), yyFileName, yyCurLineNo, QStringList(),
447 TranslatorMessage::Finished, false);
448 msg.setExtraComment(transcode(metaStrings.extracomment().simplified()));
449 tor.append(msg);
450 tor.setExtras(metaStrings.extra());
451 metaStrings.clear();
452 }
453 break;
454 }
455 case '"':
456 case '\'':
457 return parseString(stringType);
458 case '(':
459 yyParenDepth++;
460 yyCh = getChar();
461 return Tok_LeftParen;
462 case ')':
463 yyParenDepth--;
464 yyCh = getChar();
465 return Tok_RightParen;
466 case ',':
467 yyCh = getChar();
468 return Tok_Comma;
469 case '.':
470 yyCh = getChar();
471 return Tok_Dot;
472 case '0':
473 case '1':
474 case '2':
475 case '3':
476 case '4':
477 case '5':
478 case '6':
479 case '7':
480 case '8':
481 case '9': {
482 QByteArray ba;
483 ba += char(yyCh);
484 yyCh = getChar();
485 const bool hex = yyCh == 'x';
486 if (hex) {
487 ba += char(yyCh);
488 yyCh = getChar();
489 }
490 while ((hex ? std::isxdigit(yyCh) : std::isdigit(yyCh))) {
491 ba += char(yyCh);
492 yyCh = getChar();
493 }
494 bool ok;
495 auto v = ba.toLongLong(&ok);
496 Q_UNUSED(v);
497 if (ok)
498 return Tok_Integer;
499 break;
500 }
501 default:
502 yyCh = getChar();
503 }
504 }
505 return Tok_Eof;
506 }
507
508 bool match(Token t)
509 {
510 const bool matches = (yyTok == t);
511 if (matches)
512 yyTok = getToken();
513 return matches;
514 }
515
516 bool matchStringStart()
517 {
518 if (yyTok == Tok_String)
519 return true;
520 // Check for f"bla{var}" and raw strings r"bla".
521 if (yyTok == Tok_Ident && yyIdent.size() == 1) {
522 switch (yyIdent.at(0)) {
523 case 'r':
524 yyTok = getToken(StringType::RawString);
525 return yyTok == Tok_String;
526 case 'f':
527 yyTok = getToken(StringType::FormatString);
528 return yyTok == Tok_String;
529 }
530 }
531 return false;
532 }
533
534 bool matchString(QByteArray *s)
535 {
536 s->clear();
537 bool ok = false;
538 while (matchStringStart()) {
539 *s += yyString;
540 yyTok = getToken();
541 ok = true;
542 }
543 return ok;
544 }
545
546 bool matchEncoding(bool *utf8)
547 {
548 // Remove any leading module paths.
549 if (yyTok == Tok_Ident && yyIdent == "PySide6") {
550 yyTok = getToken();
551
552 if (yyTok != Tok_Dot)
553 return false;
554
555 yyTok = getToken();
556 }
557
558 if (yyTok == Tok_Ident && (yyIdent == "QtGui" || yyIdent == "QtCore")) {
559 yyTok = getToken();
560
561 if (yyTok != Tok_Dot)
562 return false;
563
564 yyTok = getToken();
565 }
566
567 if (yyTok == Tok_Ident) {
568 if (yyIdent == "QApplication" || yyIdent == "QGuiApplication"
569 || yyIdent == "QCoreApplication") {
570 yyTok = getToken();
571
572 if (yyTok == Tok_Dot)
573 yyTok = getToken();
574 }
575
576 *utf8 = QByteArray(yyIdent).endsWith("UTF8");
577 yyTok = getToken();
578 return true;
579 }
580 return false;
581 }
582
583 bool matchStringOrNone(QByteArray *s)
584 {
585 bool matches = matchString(s);
586
587 if (!matches)
588 matches = match(Tok_None);
589
590 return matches;
591 }
592
593 /*
594 * match any expression that can return a number, which can be
595 * 1. Literal number (e.g. '11')
596 * 2. simple identifier (e.g. 'm_count')
597 * 3. simple function call (e.g. 'size()')
598 * 4. function call on an object (e.g. 'list.size()')
599 * * Other cases:
600 * size(2,4)
601 * list().size()
602 * list(a,b).size(2,4)
603 * etc...
604 */
605 bool matchExpression()
606 {
607 if (match(Tok_Integer))
608 return true;
609
610 int parenlevel = 0;
611 while (match(Tok_Ident) || parenlevel > 0) {
612 if (yyTok == Tok_RightParen) {
613 if (parenlevel == 0)
614 break;
615 --parenlevel;
616 yyTok = getToken();
617 } else if (yyTok == Tok_LeftParen) {
618 yyTok = getToken();
619 if (yyTok == Tok_RightParen) {
620 yyTok = getToken();
621 } else {
622 ++parenlevel;
623 }
624 } else if (yyTok == Tok_Ident) {
625 continue;
626 } else if (parenlevel == 0) {
627 return false;
628 }
629 }
630 return true;
631 }
632
633 bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment, bool *utf8,
634 bool *plural)
635 {
636 text->clear();
637 context->clear();
638 comment->clear();
639 *utf8 = false;
640 *plural = false;
641
642 yyTok = getToken();
643 if (!match(Tok_LeftParen) || !matchString(context) || !match(Tok_Comma)
644 || !matchString(text)) {
645 return false;
646 }
647
648 if (match(Tok_RightParen))
649 return true;
650
651 // not a comma or a right paren, illegal syntax
652 if (!match(Tok_Comma))
653 return false;
654
655 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
656 if (match(Tok_RightParen))
657 return true;
658
659 // check for comment
660 if (!matchStringOrNone(comment))
661 return false; // not a comment, or a trailing comma... something is wrong
662
663 if (match(Tok_RightParen))
664 return true;
665
666 // not a comma or a right paren, illegal syntax
667 if (!match(Tok_Comma))
668 return false;
669
670 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
671 if (match(Tok_RightParen))
672 return true;
673
674 // look for optional encoding information
675 if (matchEncoding(utf8)) {
676 if (match(Tok_RightParen))
677 return true;
678
679 // not a comma or a right paren, illegal syntax
680 if (!match(Tok_Comma))
681 return false;
682
683 // python accepts trailing commas within parenthesis, so allow a comma with nothing
684 // after
685 if (match(Tok_RightParen))
686 return true;
687 }
688
689 // Must be a plural expression
690 if (!matchExpression())
691 return false;
692
693 *plural = true;
694
695 // Ignore any trailing comma here
696 match(Tok_Comma);
697
698 // This must be the end, or there are too many parameters
699 if (match(Tok_RightParen))
700 return true;
701
702 return false;
703 }
704
705 void setMessageParameters(TranslatorMessage *message, const MetaStrings &meta)
706 {
707 // PYSIDE-2863: parseTranslate() can read past the message
708 // and capture extraComments intended for the next message.
709 // Use only extraComments for the current message.
710
711 message->setExtraComment(transcode(meta.extracomment().simplified()));
712 message->setId(meta.msgid());
713 message->setExtras(meta.extra());
714 if (!meta.label().isEmpty())
715 m_cd.appendError("%1:%2: labels cannot be used with text-based translation. "
716 "Ignoring\n"_L1.arg(yyFileName)
717 .arg(yyLineNo));
718 }
719
720 QString yyFileName;
721 Token yyTok{};
722 int yyCh{};
723 QByteArray yyIdent;
724 char yyString[65536];
725 size_t yyStringLen{};
726 int yyParenDepth{};
727 int yyLineNo = 1;
728 int yyCurLineNo{};
729 // the file to read from (if reading from a file)
730 FILE *yyInFile;
731 // the string to read from and current position in the string (otherwise)
732 int yyInPos{};
733 int buf{};
734 int yyIndentationSize{};
735 int yyContinuousSpaceCount{};
736 bool yyCountingIndentation = false;
737 // (Context, indentation level) pair.
738 using ContextPair = QPair<QByteArray, int>;
739 // Stack of (Context, indentation level) pairs.
740 using ContextStack = QStack<ContextPair>;
741 ContextStack yyContextStack;
742 MetaStrings metaStrings;
743 Translator &tor;
744 ConversionData &m_cd;
745};
746
747bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
748{
749
750 bool error = false;
751 PythonParser parser(translator, fileName, error, cd);
752 if (error) {
753 cd.appendError(QStringLiteral("Cannot open %1").arg(fileName));
754 return false;
755 }
756
757 parser.parse();
758 return true;
759}
760
761QT_END_NAMESPACE
bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
Definition python.cpp:747