Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qtexthtmlparser.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:critical reason:data-parser
4
6
7#include <qbytearray.h>
8#include <qstack.h>
9#include <qdebug.h>
10#include <qthread.h>
11#include <qguiapplication.h>
12
13#include "qtextdocument.h"
14#include "qtextformat_p.h"
16#include "qtextcursor.h"
17#include "qfont_p.h"
18
19#include <algorithm>
20
21#ifndef QT_NO_TEXTHTMLPARSER
22
23QT_BEGIN_NAMESPACE
24
25using namespace Qt::StringLiterals;
26
27// see also tst_qtextdocumentfragment.cpp
28static constexpr struct QTextHtmlEntity { QLatin1StringView name; char16_t code; } entities[]= {
29 { "AElig"_L1, 0x00c6 },
30 { "AMP"_L1, 38 },
31 { "Aacute"_L1, 0x00c1 },
32 { "Acirc"_L1, 0x00c2 },
33 { "Agrave"_L1, 0x00c0 },
34 { "Alpha"_L1, 0x0391 },
35 { "Aring"_L1, 0x00c5 },
36 { "Atilde"_L1, 0x00c3 },
37 { "Auml"_L1, 0x00c4 },
38 { "Beta"_L1, 0x0392 },
39 { "Ccedil"_L1, 0x00c7 },
40 { "Chi"_L1, 0x03a7 },
41 { "Dagger"_L1, 0x2021 },
42 { "Delta"_L1, 0x0394 },
43 { "ETH"_L1, 0x00d0 },
44 { "Eacute"_L1, 0x00c9 },
45 { "Ecirc"_L1, 0x00ca },
46 { "Egrave"_L1, 0x00c8 },
47 { "Epsilon"_L1, 0x0395 },
48 { "Eta"_L1, 0x0397 },
49 { "Euml"_L1, 0x00cb },
50 { "GT"_L1, 62 },
51 { "Gamma"_L1, 0x0393 },
52 { "Iacute"_L1, 0x00cd },
53 { "Icirc"_L1, 0x00ce },
54 { "Igrave"_L1, 0x00cc },
55 { "Iota"_L1, 0x0399 },
56 { "Iuml"_L1, 0x00cf },
57 { "Kappa"_L1, 0x039a },
58 { "LT"_L1, 60 },
59 { "Lambda"_L1, 0x039b },
60 { "Mu"_L1, 0x039c },
61 { "Ntilde"_L1, 0x00d1 },
62 { "Nu"_L1, 0x039d },
63 { "OElig"_L1, 0x0152 },
64 { "Oacute"_L1, 0x00d3 },
65 { "Ocirc"_L1, 0x00d4 },
66 { "Ograve"_L1, 0x00d2 },
67 { "Omega"_L1, 0x03a9 },
68 { "Omicron"_L1, 0x039f },
69 { "Oslash"_L1, 0x00d8 },
70 { "Otilde"_L1, 0x00d5 },
71 { "Ouml"_L1, 0x00d6 },
72 { "Phi"_L1, 0x03a6 },
73 { "Pi"_L1, 0x03a0 },
74 { "Prime"_L1, 0x2033 },
75 { "Psi"_L1, 0x03a8 },
76 { "QUOT"_L1, 34 },
77 { "Rho"_L1, 0x03a1 },
78 { "Scaron"_L1, 0x0160 },
79 { "Sigma"_L1, 0x03a3 },
80 { "THORN"_L1, 0x00de },
81 { "Tau"_L1, 0x03a4 },
82 { "Theta"_L1, 0x0398 },
83 { "Uacute"_L1, 0x00da },
84 { "Ucirc"_L1, 0x00db },
85 { "Ugrave"_L1, 0x00d9 },
86 { "Upsilon"_L1, 0x03a5 },
87 { "Uuml"_L1, 0x00dc },
88 { "Xi"_L1, 0x039e },
89 { "Yacute"_L1, 0x00dd },
90 { "Yuml"_L1, 0x0178 },
91 { "Zeta"_L1, 0x0396 },
92 { "aacute"_L1, 0x00e1 },
93 { "acirc"_L1, 0x00e2 },
94 { "acute"_L1, 0x00b4 },
95 { "aelig"_L1, 0x00e6 },
96 { "agrave"_L1, 0x00e0 },
97 { "alefsym"_L1, 0x2135 },
98 { "alpha"_L1, 0x03b1 },
99 { "amp"_L1, 38 },
100 { "and"_L1, 0x22a5 },
101 { "ang"_L1, 0x2220 },
102 { "apos"_L1, 0x0027 },
103 { "aring"_L1, 0x00e5 },
104 { "asymp"_L1, 0x2248 },
105 { "atilde"_L1, 0x00e3 },
106 { "auml"_L1, 0x00e4 },
107 { "bdquo"_L1, 0x201e },
108 { "beta"_L1, 0x03b2 },
109 { "brvbar"_L1, 0x00a6 },
110 { "bull"_L1, 0x2022 },
111 { "cap"_L1, 0x2229 },
112 { "ccedil"_L1, 0x00e7 },
113 { "cedil"_L1, 0x00b8 },
114 { "cent"_L1, 0x00a2 },
115 { "chi"_L1, 0x03c7 },
116 { "circ"_L1, 0x02c6 },
117 { "clubs"_L1, 0x2663 },
118 { "cong"_L1, 0x2245 },
119 { "copy"_L1, 0x00a9 },
120 { "crarr"_L1, 0x21b5 },
121 { "cup"_L1, 0x222a },
122 { "curren"_L1, 0x00a4 },
123 { "dArr"_L1, 0x21d3 },
124 { "dagger"_L1, 0x2020 },
125 { "darr"_L1, 0x2193 },
126 { "deg"_L1, 0x00b0 },
127 { "delta"_L1, 0x03b4 },
128 { "diams"_L1, 0x2666 },
129 { "divide"_L1, 0x00f7 },
130 { "eacute"_L1, 0x00e9 },
131 { "ecirc"_L1, 0x00ea },
132 { "egrave"_L1, 0x00e8 },
133 { "empty"_L1, 0x2205 },
134 { "emsp"_L1, 0x2003 },
135 { "ensp"_L1, 0x2002 },
136 { "epsilon"_L1, 0x03b5 },
137 { "equiv"_L1, 0x2261 },
138 { "eta"_L1, 0x03b7 },
139 { "eth"_L1, 0x00f0 },
140 { "euml"_L1, 0x00eb },
141 { "euro"_L1, 0x20ac },
142 { "exist"_L1, 0x2203 },
143 { "fnof"_L1, 0x0192 },
144 { "forall"_L1, 0x2200 },
145 { "frac12"_L1, 0x00bd },
146 { "frac14"_L1, 0x00bc },
147 { "frac34"_L1, 0x00be },
148 { "frasl"_L1, 0x2044 },
149 { "gamma"_L1, 0x03b3 },
150 { "ge"_L1, 0x2265 },
151 { "gt"_L1, 62 },
152 { "hArr"_L1, 0x21d4 },
153 { "harr"_L1, 0x2194 },
154 { "hearts"_L1, 0x2665 },
155 { "hellip"_L1, 0x2026 },
156 { "iacute"_L1, 0x00ed },
157 { "icirc"_L1, 0x00ee },
158 { "iexcl"_L1, 0x00a1 },
159 { "igrave"_L1, 0x00ec },
160 { "image"_L1, 0x2111 },
161 { "infin"_L1, 0x221e },
162 { "int"_L1, 0x222b },
163 { "iota"_L1, 0x03b9 },
164 { "iquest"_L1, 0x00bf },
165 { "isin"_L1, 0x2208 },
166 { "iuml"_L1, 0x00ef },
167 { "kappa"_L1, 0x03ba },
168 { "lArr"_L1, 0x21d0 },
169 { "lambda"_L1, 0x03bb },
170 { "lang"_L1, 0x2329 },
171 { "laquo"_L1, 0x00ab },
172 { "larr"_L1, 0x2190 },
173 { "lceil"_L1, 0x2308 },
174 { "ldquo"_L1, 0x201c },
175 { "le"_L1, 0x2264 },
176 { "lfloor"_L1, 0x230a },
177 { "lowast"_L1, 0x2217 },
178 { "loz"_L1, 0x25ca },
179 { "lrm"_L1, 0x200e },
180 { "lsaquo"_L1, 0x2039 },
181 { "lsquo"_L1, 0x2018 },
182 { "lt"_L1, 60 },
183 { "macr"_L1, 0x00af },
184 { "mdash"_L1, 0x2014 },
185 { "micro"_L1, 0x00b5 },
186 { "middot"_L1, 0x00b7 },
187 { "minus"_L1, 0x2212 },
188 { "mu"_L1, 0x03bc },
189 { "nabla"_L1, 0x2207 },
190 { "nbsp"_L1, 0x00a0 },
191 { "ndash"_L1, 0x2013 },
192 { "ne"_L1, 0x2260 },
193 { "ni"_L1, 0x220b },
194 { "not"_L1, 0x00ac },
195 { "notin"_L1, 0x2209 },
196 { "nsub"_L1, 0x2284 },
197 { "ntilde"_L1, 0x00f1 },
198 { "nu"_L1, 0x03bd },
199 { "oacute"_L1, 0x00f3 },
200 { "ocirc"_L1, 0x00f4 },
201 { "oelig"_L1, 0x0153 },
202 { "ograve"_L1, 0x00f2 },
203 { "oline"_L1, 0x203e },
204 { "omega"_L1, 0x03c9 },
205 { "omicron"_L1, 0x03bf },
206 { "oplus"_L1, 0x2295 },
207 { "or"_L1, 0x22a6 },
208 { "ordf"_L1, 0x00aa },
209 { "ordm"_L1, 0x00ba },
210 { "oslash"_L1, 0x00f8 },
211 { "otilde"_L1, 0x00f5 },
212 { "otimes"_L1, 0x2297 },
213 { "ouml"_L1, 0x00f6 },
214 { "para"_L1, 0x00b6 },
215 { "part"_L1, 0x2202 },
216 { "percnt"_L1, 0x0025 },
217 { "permil"_L1, 0x2030 },
218 { "perp"_L1, 0x22a5 },
219 { "phi"_L1, 0x03c6 },
220 { "pi"_L1, 0x03c0 },
221 { "piv"_L1, 0x03d6 },
222 { "plusmn"_L1, 0x00b1 },
223 { "pound"_L1, 0x00a3 },
224 { "prime"_L1, 0x2032 },
225 { "prod"_L1, 0x220f },
226 { "prop"_L1, 0x221d },
227 { "psi"_L1, 0x03c8 },
228 { "quot"_L1, 34 },
229 { "rArr"_L1, 0x21d2 },
230 { "radic"_L1, 0x221a },
231 { "rang"_L1, 0x232a },
232 { "raquo"_L1, 0x00bb },
233 { "rarr"_L1, 0x2192 },
234 { "rceil"_L1, 0x2309 },
235 { "rdquo"_L1, 0x201d },
236 { "real"_L1, 0x211c },
237 { "reg"_L1, 0x00ae },
238 { "rfloor"_L1, 0x230b },
239 { "rho"_L1, 0x03c1 },
240 { "rlm"_L1, 0x200f },
241 { "rsaquo"_L1, 0x203a },
242 { "rsquo"_L1, 0x2019 },
243 { "sbquo"_L1, 0x201a },
244 { "scaron"_L1, 0x0161 },
245 { "sdot"_L1, 0x22c5 },
246 { "sect"_L1, 0x00a7 },
247 { "shy"_L1, 0x00ad },
248 { "sigma"_L1, 0x03c3 },
249 { "sigmaf"_L1, 0x03c2 },
250 { "sim"_L1, 0x223c },
251 { "spades"_L1, 0x2660 },
252 { "sub"_L1, 0x2282 },
253 { "sube"_L1, 0x2286 },
254 { "sum"_L1, 0x2211 },
255 { "sup"_L1, 0x2283 },
256 { "sup1"_L1, 0x00b9 },
257 { "sup2"_L1, 0x00b2 },
258 { "sup3"_L1, 0x00b3 },
259 { "supe"_L1, 0x2287 },
260 { "szlig"_L1, 0x00df },
261 { "tau"_L1, 0x03c4 },
262 { "there4"_L1, 0x2234 },
263 { "theta"_L1, 0x03b8 },
264 { "thetasym"_L1, 0x03d1 },
265 { "thinsp"_L1, 0x2009 },
266 { "thorn"_L1, 0x00fe },
267 { "tilde"_L1, 0x02dc },
268 { "times"_L1, 0x00d7 },
269 { "trade"_L1, 0x2122 },
270 { "uArr"_L1, 0x21d1 },
271 { "uacute"_L1, 0x00fa },
272 { "uarr"_L1, 0x2191 },
273 { "ucirc"_L1, 0x00fb },
274 { "ugrave"_L1, 0x00f9 },
275 { "uml"_L1, 0x00a8 },
276 { "upsih"_L1, 0x03d2 },
277 { "upsilon"_L1, 0x03c5 },
278 { "uuml"_L1, 0x00fc },
279 { "weierp"_L1, 0x2118 },
280 { "xi"_L1, 0x03be },
281 { "yacute"_L1, 0x00fd },
282 { "yen"_L1, 0x00a5 },
283 { "yuml"_L1, 0x00ff },
284 { "zeta"_L1, 0x03b6 },
285 { "zwj"_L1, 0x200d },
286 { "zwnj"_L1, 0x200c }
288
289static bool operator<(QStringView entityStr, const QTextHtmlEntity &entity)
290{
291 return entityStr < entity.name;
292}
293
294static bool operator<(const QTextHtmlEntity &entity, QStringView entityStr)
295{
296 return entity.name < entityStr;
297}
298
299static QChar resolveEntity(QStringView entity)
300{
301 const QTextHtmlEntity *end = std::end(entities);
302 const QTextHtmlEntity *e = std::lower_bound(std::begin(entities), end, entity);
303 if (e == end || (entity < *e))
304 return QChar();
305 return e->code;
306}
307
308static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {
309 0x20ac, // 0x80
310 0x0081, // 0x81 direct mapping
311 0x201a, // 0x82
312 0x0192, // 0x83
313 0x201e, // 0x84
314 0x2026, // 0x85
315 0x2020, // 0x86
316 0x2021, // 0x87
317 0x02C6, // 0x88
318 0x2030, // 0x89
319 0x0160, // 0x8A
320 0x2039, // 0x8B
321 0x0152, // 0x8C
322 0x008D, // 0x8D direct mapping
323 0x017D, // 0x8E
324 0x008F, // 0x8F directmapping
325 0x0090, // 0x90 directmapping
326 0x2018, // 0x91
327 0x2019, // 0x92
328 0x201C, // 0x93
329 0X201D, // 0x94
330 0x2022, // 0x95
331 0x2013, // 0x96
332 0x2014, // 0x97
333 0x02DC, // 0x98
334 0x2122, // 0x99
335 0x0161, // 0x9A
336 0x203A, // 0x9B
337 0x0153, // 0x9C
338 0x009D, // 0x9D direct mapping
339 0x017E, // 0x9E
340 0x0178 // 0x9F
341};
342
343// the displayMode value is according to the what are blocks in the piecetable, not
344// what the w3c defines.
345static constexpr QTextHtmlElement elements[]= {
384 { "qt", Html_body /*deliberate mapping*/, QTextHtmlElement::DisplayBlock },
406};
407
408template <typename T>
409static bool operator<(T str, QTextHtmlElement e)
410{
411 return str.compare(QLatin1StringView(e.name), Qt::CaseInsensitive) < 0;
412}
413
414template <typename T>
415static bool operator<(QTextHtmlElement e, T str)
416{
417 return QLatin1StringView(e.name).compare(str, Qt::CaseInsensitive) < 0;
418}
419
420static const QTextHtmlElement *lookupElementHelper(QAnyStringView element)
421{
422 return element.visit([begin = std::begin(elements), end = std::end(elements)]
423 (auto element) -> const QTextHtmlElement * {
424 const auto e = std::lower_bound(begin, end, element);
425 if ((e == end) || (element < *e))
426 return nullptr;
427 return e;
428 });
429}
430
431int QTextHtmlParser::lookupElement(QAnyStringView element)
432{
433 const QTextHtmlElement *e = lookupElementHelper(element);
434 if (!e)
435 return -1;
436 return e->id;
437}
438
439// quotes newlines as "\\n"
440static QString quoteNewline(const QString &s)
441{
442 QString n = s;
443 n.replace(u'\n', "\\n"_L1);
444 return n;
445}
446
448 : parent(0), id(Html_unknown),
450 hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false),
455 borderCollapse(false),
457{
462
463 for (int i = 0; i < 4; ++i) {
464 tableCellBorderStyle[i] = QTextFrameFormat::BorderStyle_None;
465 tableCellBorder[i] = 0;
466 tableCellBorderBrush[i] = Qt::NoBrush;
467 }
468}
469
471{
472 for (int i = 0; i < count(); ++i) {
473 qDebug().nospace() << qPrintable(QString(depth(i) * 4, u' '))
474 << qPrintable(at(i).tag) << ':'
475 << quoteNewline(at(i).text);
476 }
477}
478
480{
481 QTextHtmlParserNode *lastNode = nodes.last();
482 QTextHtmlParserNode *newNode = nullptr;
483
484 bool reuseLastNode = true;
485
486 if (nodes.size() == 1) {
487 reuseLastNode = false;
488 } else if (lastNode->tag.isEmpty()) {
489
490 if (lastNode->text.isEmpty()) {
491 reuseLastNode = true;
492 } else { // last node is a text node (empty tag) with some text
493
494 if (lastNode->text.size() == 1 && lastNode->text.at(0).isSpace()) {
495
496 int lastSibling = count() - 2;
497 while (lastSibling
498 && at(lastSibling).parent != lastNode->parent
499 && at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
500 lastSibling = at(lastSibling).parent;
501 }
502
503 if (at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
504 reuseLastNode = false;
505 } else {
506 reuseLastNode = true;
507 }
508 } else {
509 // text node with real (non-whitespace) text -> nothing to re-use
510 reuseLastNode = false;
511 }
512
513 }
514
515 } else {
516 // last node had a proper tag -> nothing to re-use
517 reuseLastNode = false;
518 }
519
520 if (reuseLastNode) {
521 newNode = lastNode;
522 newNode->tag.clear();
523 newNode->text.clear();
524 newNode->id = Html_unknown;
525 } else {
526 nodes.append(new QTextHtmlParserNode);
527 newNode = nodes.last();
528 }
529
530 newNode->parent = parent;
531 return newNode;
532}
533
534void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider)
535{
536 qDeleteAll(nodes);
537 nodes.clear();
538 nodes.append(new QTextHtmlParserNode);
539 txt = text;
540 pos = 0;
541 len = txt.size();
542 textEditMode = false;
543 resourceProvider = _resourceProvider;
544 parse();
545 //dumpHtml();
546}
547
548int QTextHtmlParser::depth(int i) const
549{
550 int depth = 0;
551 while (i) {
552 i = at(i).parent;
553 ++depth;
554 }
555 return depth;
556}
557
558int QTextHtmlParser::margin(int i, int mar) const {
559 int m = 0;
560 const QTextHtmlParserNode *node;
561 if (mar == MarginLeft
562 || mar == MarginRight) {
563 while (i) {
564 node = &at(i);
565 if (!node->isBlock() && node->id != Html_table)
566 break;
567 if (node->isTableCell())
568 break;
569 m += node->margin[mar];
570 i = node->parent;
571 }
572 }
573 return m;
574}
575
576int QTextHtmlParser::topMargin(int i) const
577{
578 if (!i)
579 return 0;
580 return at(i).margin[MarginTop];
581}
582
584{
585 if (!i)
586 return 0;
587 return at(i).margin[MarginBottom];
588}
589
591{
592 while (pos < len && txt.at(pos).isSpace() && txt.at(pos) != QChar::ParagraphSeparator)
593 pos++;
594}
595
597{
598 while (pos < len) {
599 QChar c = txt.at(pos++);
600 if (c == u'<') {
602 } else if (c == u'&') {
603 nodes.last()->text += parseEntity();
604 } else {
605 nodes.last()->text += c;
606 }
607 }
608}
609
610// parses a tag after "<"
612{
614
615 // handle comments and other exclamation mark declarations
616 if (hasPrefix(u'!')) {
618 if (nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePre
619 && nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePreWrap
620 && !textEditMode)
622 return;
623 }
624
625 // if close tag just close
626 if (hasPrefix(u'/')) {
627 if (nodes.last()->id == Html_style) {
628#ifndef QT_NO_CSSPARSER
629 QCss::Parser parser(nodes.constLast()->text);
630 QCss::StyleSheet sheet;
631 sheet.origin = QCss::StyleSheetOrigin_Author;
632 parser.parse(&sheet, Qt::CaseInsensitive);
633 inlineStyleSheets.append(sheet);
634 resolveStyleSheetImports(sheet);
635#endif
636 }
638 return;
639 }
640
641 int p = last();
642 while (p && at(p).tag.size() == 0)
643 p = at(p).parent;
644
646
647 // parse tag name
648 node->tag = parseWord().toLower();
649
650 const QTextHtmlElement *elem = lookupElementHelper(node->tag);
651 if (elem) {
652 node->id = elem->id;
653 node->displayMode = elem->displayMode;
654 } else {
655 node->id = Html_unknown;
656 }
657
658 node->attributes.clear();
659 // _need_ at least one space after the tag name, otherwise there can't be attributes
660 if (pos < len && txt.at(pos).isSpace())
661 node->attributes = parseAttributes();
662
663 // resolveParent() may have to change the order in the tree and
664 // insert intermediate nodes for buggy HTML, so re-initialize the 'node'
665 // pointer through the return value
666 node = resolveParent();
668
669#ifndef QT_NO_CSSPARSER
670 const int nodeIndex = nodes.size() - 1; // this new node is always the last
671 node->applyCssDeclarations(declarationsForNode(nodeIndex), resourceProvider);
672#endif
673 applyAttributes(node->attributes);
674
675 // finish tag
676 bool tagClosed = false;
677 while (pos < len && txt.at(pos) != u'>') {
678 if (txt.at(pos) == u'/')
679 tagClosed = true;
680
681 pos++;
682 }
683 pos++;
684
685 // in a white-space preserving environment strip off a initial newline
686 // since the element itself already generates a newline
690 && node->isBlock()) {
691 if (pos < len - 1 && txt.at(pos) == u'\n')
692 ++pos;
693 }
694
695 if (node->mayNotHaveChildren() || tagClosed) {
698 }
699}
700
701// parses a tag beginning with "/"
703{
704 ++pos;
705 QString tag = parseWord().toLower().trimmed();
706 while (pos < len) {
707 QChar c = txt.at(pos++);
708 if (c == u'>')
709 break;
710 }
711
712 // find corresponding open node
713 int p = last();
714 if (p > 0
715 && at(p - 1).tag == tag
717 p--;
718
719 while (p && at(p).tag != tag)
720 p = at(p).parent;
721
722 // simply ignore the tag if we can't find
723 // a corresponding open node, for broken
724 // html such as <font>blah</font></font>
725 if (!p)
726 return;
727
728 // in a white-space preserving environment strip off a trailing newline
729 // since the closing of the opening block element will automatically result
730 // in a new block for elements following the <pre>
731 // ...foo\n</pre><p>blah -> foo</pre><p>blah
735 && at(p).isBlock()) {
736 if (at(last()).text.endsWith(u'\n'))
737 nodes[last()]->text.chop(1);
738 }
739
742}
743
744// parses a tag beginning with "!"
746{
747 ++pos;
748 if (hasPrefix(u'-') && hasPrefix(u'-', 1)) {
749 pos += 2;
750 // eat comments
751 int end = txt.indexOf("-->"_L1, pos);
752 pos = (end >= 0 ? end + 3 : len);
753 } else {
754 // eat internal tags
755 while (pos < len) {
756 QChar c = txt.at(pos++);
757 if (c == u'>')
758 break;
759 }
760 }
761}
762
763QString QTextHtmlParser::parseEntity(QStringView entity)
764{
765 QChar resolved = resolveEntity(entity);
766 if (!resolved.isNull())
767 return QString(resolved);
768
769 if (entity.size() > 1 && entity.at(0) == u'#') {
770 entity = entity.mid(1); // removing leading #
771
772 int base = 10;
773 bool ok = false;
774
775 if (entity.at(0).toLower() == u'x') { // hex entity?
776 entity = entity.mid(1);
777 base = 16;
778 }
779
780 uint uc = entity.toUInt(&ok, base);
781 if (ok) {
782 if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0])))
783 uc = windowsLatin1ExtendedCharacters[uc - 0x80];
784 return QStringView{QChar::fromUcs4(uc)}.toString();
785 }
786 }
787 return {};
788}
789
790// parses an entity after "&", and returns it
792{
793 const int recover = pos;
794 int entityLen = 0;
795 while (pos < len) {
796 QChar c = txt.at(pos++);
797 if (c.isSpace() || pos - recover > 9) {
798 goto error;
799 }
800 if (c == u';')
801 break;
802 ++entityLen;
803 }
804 if (entityLen) {
805 const QStringView entity = QStringView(txt).mid(recover, entityLen);
806 QString parsedEntity = parseEntity(entity);
807 if (!parsedEntity.isNull()) {
808 return parsedEntity;
809 }
810 }
811error:
812 pos = recover;
813 return "&"_L1;
814}
815
816// parses one word, possibly quoted, and returns it
818{
819 QString word;
820 if (hasPrefix(u'\"')) { // double quotes
821 ++pos;
822 while (pos < len) {
823 QChar c = txt.at(pos++);
824 if (c == u'\"')
825 break;
826 else if (c == u'&')
827 word += parseEntity();
828 else
829 word += c;
830 }
831 } else if (hasPrefix(u'\'')) { // single quotes
832 ++pos;
833 while (pos < len) {
834 QChar c = txt.at(pos++);
835 // Allow for escaped single quotes as they may be part of the string
836 if (c == u'\'' && (txt.size() > 1 && txt.at(pos - 2) != u'\\'))
837 break;
838 else
839 word += c;
840 }
841 } else { // normal text
842 while (pos < len) {
843 QChar c = txt.at(pos++);
844 if (c == u'>' || (c == u'/' && hasPrefix(u'>'))
845 || c == u'<' || c == u'=' || c.isSpace()) {
846 --pos;
847 break;
848 }
849 if (c == u'&')
850 word += parseEntity();
851 else
852 word += c;
853 }
854 }
855 return word;
856}
857
858// gives the new node the right parent
860{
861 QTextHtmlParserNode *node = nodes.last();
862
863 int p = node->parent;
864
865 // Excel gives us buggy HTML with just tr without surrounding table tags
866 // or with just td tags
867
868 if (node->id == Html_td) {
869 int n = p;
870 while (n && at(n).id != Html_tr)
871 n = at(n).parent;
872
873 if (!n) {
874 nodes.insert(nodes.size() - 1, new QTextHtmlParserNode);
875 nodes.insert(nodes.size() - 1, new QTextHtmlParserNode);
876
877 QTextHtmlParserNode *table = nodes[nodes.size() - 3];
878 table->parent = p;
879 table->id = Html_table;
880 table->tag = "table"_L1;
881 table->children.append(nodes.size() - 2); // add row as child
882
883 QTextHtmlParserNode *row = nodes[nodes.size() - 2];
884 row->parent = nodes.size() - 3; // table as parent
885 row->id = Html_tr;
886 row->tag = "tr"_L1;
887
888 p = nodes.size() - 2;
889 node = nodes.last(); // re-initialize pointer
890 }
891 }
892
893 if (node->id == Html_tr) {
894 int n = p;
895 while (n && at(n).id != Html_table)
896 n = at(n).parent;
897
898 if (!n) {
899 nodes.insert(nodes.size() - 1, new QTextHtmlParserNode);
900 QTextHtmlParserNode *table = nodes[nodes.size() - 2];
901 table->parent = p;
902 table->id = Html_table;
903 table->tag = "table"_L1;
904 p = nodes.size() - 2;
905 node = nodes.last(); // re-initialize pointer
906 }
907 }
908
909 // permit invalid html by letting block elements be children
910 // of inline elements with the exception of paragraphs:
911 //
912 // a new paragraph closes parent inline elements (while loop),
913 // unless they themselves are children of a non-paragraph block
914 // element (if statement)
915 //
916 // For example:
917 //
918 // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that
919 // belongs to the first <p>. The self-nesting
920 // check further down prevents the second <p>
921 // from nesting into the first one then.
922 // so Bar is not bold.
923 //
924 // <body><b><p>Foo <-- Foo should be bold.
925 //
926 // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold.
927 //
928 if (node->id == Html_p) {
929 while (p && !at(p).isBlock())
930 p = at(p).parent;
931
932 if (!p || at(p).id != Html_p)
933 p = node->parent;
934 }
935
936 // some elements are not self nesting
937 if (node->id == at(p).id
939 p = at(p).parent;
940
941 // some elements are not allowed in certain contexts
942 while ((p && !node->allowedInContext(at(p).id))
943 // ### make new styles aware of empty tags
945 ) {
946 p = at(p).parent;
947 }
948
949 node->parent = p;
950
951 // makes it easier to traverse the tree, later
952 nodes[p]->children.append(nodes.size() - 1);
953 return node;
954}
955
956// sets all properties on the new node
958{
959 QTextHtmlParserNode *node = nodes.last();
960 const QTextHtmlParserNode *parent = nodes.at(node->parent);
961 node->initializeProperties(parent, this);
962}
963
965{
966 if (!isListStart())
967 return false;
968
969 int p = parent;
970 while (p) {
971 if (parser->at(p).isListStart())
972 return true;
973 p = parser->at(p).parent;
974 }
975 return false;
976}
977
979{
980 // inherit properties from parent element
981 charFormat = parent->charFormat;
982
983 if (id == Html_html)
984 blockFormat.setLayoutDirection(Qt::LeftToRight); // HTML default
985 else if (parent->blockFormat.hasProperty(QTextFormat::LayoutDirection))
986 blockFormat.setLayoutDirection(parent->blockFormat.layoutDirection());
987
988 if (parent->displayMode == QTextHtmlElement::DisplayNone)
989 displayMode = QTextHtmlElement::DisplayNone;
990
991 if (parent->id != Html_table || id == Html_caption) {
992 if (parent->blockFormat.hasProperty(QTextFormat::BlockAlignment))
993 blockFormat.setAlignment(parent->blockFormat.alignment());
994 else
995 blockFormat.clearProperty(QTextFormat::BlockAlignment);
996 }
997 // we don't paint per-row background colors, yet. so as an
998 // exception inherit the background color here
999 // we also inherit the background between inline elements
1000 // we also inherit from non-body block elements since we merge them together
1001 if ((parent->id != Html_tr || !isTableCell())
1002 && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)
1003 && (parent->id == Html_body || displayMode != QTextHtmlElement::DisplayBlock || parent->displayMode != QTextHtmlElement::DisplayBlock)
1004 ) {
1005 charFormat.clearProperty(QTextFormat::BackgroundBrush);
1006 }
1007
1008 listStyle = parent->listStyle;
1009 // makes no sense to inherit that property, a named anchor is a single point
1010 // in the document, which is set by the DocumentFragment
1011 charFormat.clearProperty(QTextFormat::AnchorName);
1012 wsm = parent->wsm;
1013
1014 // initialize remaining properties
1019 cssFloat = QTextFrameFormat::InFlow;
1020
1021 for (int i = 0; i < 4; ++i)
1022 padding[i] = -1;
1023
1024 // set element specific attributes
1025 switch (id) {
1026 case Html_a:
1027 for (int i = 0; i < attributes.size(); i += 2) {
1028 const QString key = attributes.at(i);
1029 if (key.compare("href"_L1, Qt::CaseInsensitive) == 0
1030 && !attributes.at(i + 1).isEmpty()) {
1031 hasHref = true;
1032 }
1033 }
1034 charFormat.setAnchor(true);
1035 break;
1036 case Html_big:
1037 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1038 break;
1039 case Html_small:
1040 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1041 break;
1042 case Html_h1:
1043 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(3));
1046 break;
1047 case Html_h2:
1048 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(2));
1051 break;
1052 case Html_h3:
1053 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1056 break;
1057 case Html_h4:
1058 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(0));
1061 break;
1062 case Html_h5:
1063 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1066 break;
1067 case Html_p:
1070 break;
1071 case Html_ul:
1072 // nested lists don't have margins, except for the toplevel one
1073 if (!isNestedList(parser)) {
1076 }
1077 // no left margin as we use indenting instead
1078 break;
1079 case Html_ol:
1080 // nested lists don't have margins, except for the toplevel one
1081 if (!isNestedList(parser)) {
1084 }
1085 // no left margin as we use indenting instead
1086 break;
1087 case Html_br:
1088 text = QChar(QChar::LineSeparator);
1089 break;
1090 case Html_pre:
1093 break;
1094 case Html_blockquote:
1099 blockFormat.setProperty(QTextFormat::BlockQuoteLevel, 1);
1100 break;
1101 case Html_dl:
1104 break;
1105 case Html_dd:
1107 break;
1108 default: break;
1109 }
1110}
1111
1112#ifndef QT_NO_CSSPARSER
1113void QTextHtmlParserNode::setListStyle(const QList<QCss::Value> &cssValues)
1114{
1115 for (int i = 0; i < cssValues.size(); ++i) {
1116 if (cssValues.at(i).type == QCss::Value::KnownIdentifier) {
1117 switch (static_cast<QCss::KnownValue>(cssValues.at(i).variant.toInt())) {
1118 case QCss::Value_None: hasOwnListStyle = true; listStyle = QTextListFormat::ListStyleUndefined; break;
1119 case QCss::Value_Disc: hasOwnListStyle = true; listStyle = QTextListFormat::ListDisc; break;
1120 case QCss::Value_Square: hasOwnListStyle = true; listStyle = QTextListFormat::ListSquare; break;
1121 case QCss::Value_Circle: hasOwnListStyle = true; listStyle = QTextListFormat::ListCircle; break;
1122 case QCss::Value_Decimal: hasOwnListStyle = true; listStyle = QTextListFormat::ListDecimal; break;
1123 case QCss::Value_LowerAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerAlpha; break;
1124 case QCss::Value_UpperAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperAlpha; break;
1125 case QCss::Value_LowerRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerRoman; break;
1126 case QCss::Value_UpperRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperRoman; break;
1127 default: break;
1128 }
1129 }
1130 }
1131 // allow individual list items to override the style
1132 if (id == Html_li && hasOwnListStyle)
1133 blockFormat.setProperty(QTextFormat::ListStyle, listStyle);
1134}
1135
1136static QTextFrameFormat::BorderStyle toQTextFrameFormat(QCss::BorderStyle cssStyle)
1137{
1138 switch (cssStyle) {
1139 case QCss::BorderStyle::BorderStyle_Dotted:
1140 return QTextFrameFormat::BorderStyle::BorderStyle_Dotted;
1141 case QCss::BorderStyle::BorderStyle_Dashed:
1142 return QTextFrameFormat::BorderStyle::BorderStyle_Dashed;
1143 case QCss::BorderStyle::BorderStyle_Solid:
1144 return QTextFrameFormat::BorderStyle::BorderStyle_Solid;
1145 case QCss::BorderStyle::BorderStyle_Double:
1146 return QTextFrameFormat::BorderStyle::BorderStyle_Double;
1147 case QCss::BorderStyle::BorderStyle_DotDash:
1148 return QTextFrameFormat::BorderStyle::BorderStyle_DotDash;
1149 case QCss::BorderStyle::BorderStyle_DotDotDash:
1150 return QTextFrameFormat::BorderStyle::BorderStyle_DotDotDash;
1151 case QCss::BorderStyle::BorderStyle_Groove:
1152 return QTextFrameFormat::BorderStyle::BorderStyle_Groove;
1153 case QCss::BorderStyle::BorderStyle_Ridge:
1154 return QTextFrameFormat::BorderStyle::BorderStyle_Ridge;
1155 case QCss::BorderStyle::BorderStyle_Inset:
1156 return QTextFrameFormat::BorderStyle::BorderStyle_Inset;
1157 case QCss::BorderStyle::BorderStyle_Outset:
1158 return QTextFrameFormat::BorderStyle::BorderStyle_Outset;
1159 case QCss::BorderStyle::BorderStyle_Unknown:
1160 case QCss::BorderStyle::BorderStyle_None:
1161 case QCss::BorderStyle::BorderStyle_Native:
1162 return QTextFrameFormat::BorderStyle::BorderStyle_None;
1163 case QCss::BorderStyle::NumKnownBorderStyles:
1164 break;
1165 // Intentionally no "default" to allow a compiler warning when extending the enum
1166 // without updating this here. clang gives such a warning.
1167 }
1168 // Must not happen, intentionally trigger undefined behavior which sanitizers will detect.
1169 // Having all cases covered in switch is not sufficient:
1170 // MSVC would warn when there is no "default".
1171 return static_cast<QTextFrameFormat::BorderStyle>(-1);
1172}
1173
1174void QTextHtmlParserNode::applyCssDeclarations(const QList<QCss::Declaration> &declarations, const QTextDocument *resourceProvider)
1175{
1176 QCss::ValueExtractor extractor(declarations);
1177 extractor.extractBox(margin, padding);
1178
1179 auto getBorderValues = [&extractor](qreal *borderWidth, QBrush *borderBrush, QTextFrameFormat::BorderStyle *borderStyles) {
1180 QCss::BorderStyle cssStyles[4];
1181 int cssBorder[4];
1182 QSize cssRadii[4]; // unused
1183 for (int i = 0; i < 4; ++i) {
1184 cssStyles[i] = QCss::BorderStyle_None;
1185 cssBorder[i] = 0;
1186 }
1187 // this will parse (and cache) "border-width" as a list so the
1188 // QCss::BorderWidth parsing below which expects a single value
1189 // will not work as expected - which in this case does not matter
1190 // because tableBorder is not relevant for cells.
1191 bool hit = extractor.extractBorder(cssBorder, borderBrush, cssStyles, cssRadii);
1192 for (int i = 0; i < 4; ++i) {
1193 borderStyles[i] = toQTextFrameFormat(cssStyles[i]);
1194 borderWidth[i] = static_cast<qreal>(cssBorder[i]);
1195 }
1196 return hit;
1197 };
1198
1199 if (id == Html_td || id == Html_th)
1200 getBorderValues(tableCellBorder, tableCellBorderBrush, tableCellBorderStyle);
1201
1202 for (int i = 0; i < declarations.size(); ++i) {
1203 const QCss::Declaration &decl = declarations.at(i);
1204 if (decl.d->values.isEmpty()) continue;
1205
1206 QCss::KnownValue identifier = QCss::UnknownValue;
1207 if (decl.d->values.constFirst().type == QCss::Value::KnownIdentifier)
1208 identifier = static_cast<QCss::KnownValue>(decl.d->values.constFirst().variant.toInt());
1209
1210 switch (decl.d->propertyId) {
1211 case QCss::BorderColor: {
1212 QBrush bordersBrush[4];
1213 decl.brushValues(bordersBrush);
1214 if (bordersBrush[0].color().isValid())
1215 borderBrush = bordersBrush[0];
1216 break;
1217 }
1218 case QCss::BorderStyles:
1219 if (decl.styleValue() != QCss::BorderStyle_Unknown && decl.styleValue() != QCss::BorderStyle_Native)
1220 borderStyle = static_cast<QTextFrameFormat::BorderStyle>(decl.styleValue() - 1);
1221 break;
1222 case QCss::BorderWidth: {
1223 int borders[4];
1224 extractor.lengthValues(decl, borders);
1225 tableBorder = borders[0];
1226 }
1227 break;
1228 case QCss::Border: {
1229 qreal tblBorder[4];
1230 QBrush tblBorderBrush[4];
1231 QTextFrameFormat::BorderStyle tblBorderStyle[4];
1232 if (getBorderValues(tblBorder, tblBorderBrush, tblBorderStyle)) {
1233 tableBorder = tblBorder[0];
1234 if (tblBorderBrush[0].color().isValid())
1235 borderBrush = tblBorderBrush[0];
1236 if (tblBorderStyle[0] != static_cast<QTextFrameFormat::BorderStyle>(-1))
1237 borderStyle = tblBorderStyle[0];
1238 }
1239 }
1240 break;
1241 case QCss::BorderCollapse:
1242 borderCollapse = decl.borderCollapseValue();
1243 break;
1244 case QCss::Color: charFormat.setForeground(decl.colorValue()); break;
1245 case QCss::Float:
1246 cssFloat = QTextFrameFormat::InFlow;
1247 switch (identifier) {
1248 case QCss::Value_Left: cssFloat = QTextFrameFormat::FloatLeft; break;
1249 case QCss::Value_Right: cssFloat = QTextFrameFormat::FloatRight; break;
1250 default: break;
1251 }
1252 break;
1253 case QCss::QtBlockIndent:
1254 blockFormat.setIndent(decl.d->values.constFirst().variant.toInt());
1255 break;
1256 case QCss::QtLineHeightType: {
1257 QString lineHeightTypeName = decl.d->values.constFirst().variant.toString();
1258 QTextBlockFormat::LineHeightTypes lineHeightType;
1259 if (lineHeightTypeName.compare("proportional"_L1, Qt::CaseInsensitive) == 0)
1260 lineHeightType = QTextBlockFormat::ProportionalHeight;
1261 else if (lineHeightTypeName.compare("fixed"_L1, Qt::CaseInsensitive) == 0)
1262 lineHeightType = QTextBlockFormat::FixedHeight;
1263 else if (lineHeightTypeName.compare("minimum"_L1, Qt::CaseInsensitive) == 0)
1264 lineHeightType = QTextBlockFormat::MinimumHeight;
1265 else if (lineHeightTypeName.compare("line-distance"_L1, Qt::CaseInsensitive) == 0)
1266 lineHeightType = QTextBlockFormat::LineDistanceHeight;
1267 else
1268 lineHeightType = QTextBlockFormat::SingleHeight;
1269
1270 if (hasLineHeightMultiplier) {
1271 qreal lineHeight = blockFormat.lineHeight() / 100.0;
1272 blockFormat.setProperty(QTextBlockFormat::LineHeight, lineHeight);
1273 }
1274
1275 blockFormat.setProperty(QTextBlockFormat::LineHeightType, lineHeightType);
1276 hasOwnLineHeightType = true;
1277 }
1278 break;
1279 case QCss::LineHeight: {
1280 qreal lineHeight;
1281 QTextBlockFormat::LineHeightTypes lineHeightType;
1282 if (decl.realValue(&lineHeight, "px")) {
1283 lineHeightType = QTextBlockFormat::MinimumHeight;
1284 } else {
1285 bool ok;
1286 QCss::Value cssValue = decl.d->values.constFirst();
1287 QString value = cssValue.toString();
1288 lineHeight = value.toDouble(&ok);
1289 if (ok) {
1290 if (!hasOwnLineHeightType && cssValue.type == QCss::Value::Number) {
1291 lineHeight *= 100.0;
1292 hasLineHeightMultiplier = true;
1293 }
1294 lineHeightType = QTextBlockFormat::ProportionalHeight;
1295 } else {
1296 lineHeight = 0.0;
1297 lineHeightType = QTextBlockFormat::SingleHeight;
1298 }
1299 }
1300
1301 // Only override line height type if specified in same node
1302 if (hasOwnLineHeightType)
1303 lineHeightType = QTextBlockFormat::LineHeightTypes(blockFormat.lineHeightType());
1304
1305 blockFormat.setLineHeight(lineHeight, lineHeightType);
1306 break;
1307 }
1308 case QCss::TextIndent: {
1309 qreal indent = 0;
1310 if (decl.realValue(&indent, "px"))
1311 blockFormat.setTextIndent(indent);
1312 break; }
1313 case QCss::QtListIndent:
1314 if (decl.intValue(&cssListIndent))
1315 hasCssListIndent = true;
1316 break;
1317 case QCss::QtParagraphType:
1318 if (decl.d->values.constFirst().variant.toString().compare("empty"_L1, Qt::CaseInsensitive) == 0)
1319 isEmptyParagraph = true;
1320 break;
1321 case QCss::QtTableType:
1322 if (decl.d->values.constFirst().variant.toString().compare("frame"_L1, Qt::CaseInsensitive) == 0)
1323 isTextFrame = true;
1324 else if (decl.d->values.constFirst().variant.toString().compare("root"_L1, Qt::CaseInsensitive) == 0) {
1325 isTextFrame = true;
1326 isRootFrame = true;
1327 }
1328 break;
1329 case QCss::QtUserState:
1330 userState = decl.d->values.constFirst().variant.toInt();
1331 break;
1332 case QCss::Whitespace:
1333 switch (identifier) {
1334 case QCss::Value_Normal: wsm = QTextHtmlParserNode::WhiteSpaceNormal; break;
1335 case QCss::Value_Pre: wsm = QTextHtmlParserNode::WhiteSpacePre; break;
1336 case QCss::Value_NoWrap: wsm = QTextHtmlParserNode::WhiteSpaceNoWrap; break;
1337 case QCss::Value_PreWrap: wsm = QTextHtmlParserNode::WhiteSpacePreWrap; break;
1338 case QCss::Value_PreLine: wsm = QTextHtmlParserNode::WhiteSpacePreLine; break;
1339 default: break;
1340 }
1341 break;
1342 case QCss::VerticalAlignment:
1343 switch (identifier) {
1344 case QCss::Value_Sub: charFormat.setVerticalAlignment(QTextCharFormat::AlignSubScript); break;
1345 case QCss::Value_Super: charFormat.setVerticalAlignment(QTextCharFormat::AlignSuperScript); break;
1346 case QCss::Value_Middle: charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle); break;
1347 case QCss::Value_Top: charFormat.setVerticalAlignment(QTextCharFormat::AlignTop); break;
1348 case QCss::Value_Bottom: charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom); break;
1349 default: charFormat.setVerticalAlignment(QTextCharFormat::AlignNormal); break;
1350 }
1351 break;
1352 case QCss::PageBreakBefore:
1353 switch (identifier) {
1354 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysBefore); break;
1355 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysBefore); break;
1356 default: break;
1357 }
1358 break;
1359 case QCss::PageBreakAfter:
1360 switch (identifier) {
1361 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysAfter); break;
1362 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysAfter); break;
1363 default: break;
1364 }
1365 break;
1366 case QCss::TextUnderlineStyle:
1367 switch (identifier) {
1368 case QCss::Value_None: charFormat.setUnderlineStyle(QTextCharFormat::NoUnderline); break;
1369 case QCss::Value_Solid: charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); break;
1370 case QCss::Value_Dashed: charFormat.setUnderlineStyle(QTextCharFormat::DashUnderline); break;
1371 case QCss::Value_Dotted: charFormat.setUnderlineStyle(QTextCharFormat::DotLine); break;
1372 case QCss::Value_DotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotLine); break;
1373 case QCss::Value_DotDotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotDotLine); break;
1374 case QCss::Value_Wave: charFormat.setUnderlineStyle(QTextCharFormat::WaveUnderline); break;
1375 default: break;
1376 }
1377 break;
1378 case QCss::TextDecorationColor: charFormat.setUnderlineColor(decl.colorValue()); break;
1379 case QCss::ListStyleType:
1380 case QCss::ListStyle:
1381 setListStyle(decl.d->values);
1382 break;
1383 case QCss::QtListNumberPrefix:
1384 textListNumberPrefix = decl.d->values.constFirst().variant.toString();
1385 break;
1386 case QCss::QtListNumberSuffix:
1387 textListNumberSuffix = decl.d->values.constFirst().variant.toString();
1388 break;
1389 case QCss::TextAlignment:
1390 switch (identifier) {
1391 case QCss::Value_Left: blockFormat.setAlignment(Qt::AlignLeft); break;
1392 case QCss::Value_Center: blockFormat.setAlignment(Qt::AlignCenter); break;
1393 case QCss::Value_Right: blockFormat.setAlignment(Qt::AlignRight); break;
1394 default: break;
1395 }
1396 break;
1397
1398 case QCss::QtForegroundTextureCacheKey:
1399 {
1400 if (resourceProvider != nullptr && QTextDocumentPrivate::get(resourceProvider) != nullptr) {
1401 bool ok;
1402 qint64 searchKey = decl.d->values.constFirst().variant.toLongLong(&ok);
1403 if (ok)
1404 applyForegroundImage(searchKey, resourceProvider);
1405 }
1406 break;
1407 }
1408 case QCss::QtStrokeColor:
1409 {
1410 QPen pen = charFormat.textOutline();
1411 pen.setStyle(Qt::SolidLine);
1412 pen.setColor(decl.colorValue());
1413 charFormat.setTextOutline(pen);
1414 break;
1415 }
1416 case QCss::QtStrokeWidth:
1417 {
1418 qreal width;
1419 if (decl.realValue(&width, "px")) {
1420 QPen pen = charFormat.textOutline();
1421 pen.setWidthF(width);
1422 charFormat.setTextOutline(pen);
1423 }
1424 break;
1425 }
1426 case QCss::QtStrokeLineCap:
1427 {
1428 QPen pen = charFormat.textOutline();
1429 switch (identifier) {
1430 case QCss::Value_SquareCap: pen.setCapStyle(Qt::SquareCap); break;
1431 case QCss::Value_FlatCap: pen.setCapStyle(Qt::FlatCap); break;
1432 case QCss::Value_RoundCap: pen.setCapStyle(Qt::RoundCap); break;
1433 default: break;
1434 }
1435 charFormat.setTextOutline(pen);
1436 break;
1437 }
1438 case QCss::QtStrokeLineJoin:
1439 {
1440 QPen pen = charFormat.textOutline();
1441 switch (identifier) {
1442 case QCss::Value_MiterJoin: pen.setJoinStyle(Qt::MiterJoin); break;
1443 case QCss::Value_BevelJoin: pen.setJoinStyle(Qt::BevelJoin); break;
1444 case QCss::Value_RoundJoin: pen.setJoinStyle(Qt::RoundJoin); break;
1445 case QCss::Value_SvgMiterJoin: pen.setJoinStyle(Qt::SvgMiterJoin); break;
1446 default: break;
1447 }
1448 charFormat.setTextOutline(pen);
1449 break;
1450 }
1451 case QCss::QtStrokeMiterLimit:
1452 {
1453 qreal miterLimit;
1454 if (decl.realValue(&miterLimit)) {
1455 QPen pen = charFormat.textOutline();
1456 pen.setMiterLimit(miterLimit);
1457 charFormat.setTextOutline(pen);
1458 }
1459 break;
1460 }
1461 case QCss::QtStrokeDashArray:
1462 {
1463 QList<qreal> dashes = decl.dashArray();
1464 if (!dashes.empty()) {
1465 QPen pen = charFormat.textOutline();
1466 pen.setDashPattern(dashes);
1467 charFormat.setTextOutline(pen);
1468 }
1469 break;
1470 }
1471 case QCss::QtStrokeDashOffset:
1472 {
1473 qreal dashOffset;
1474 if (decl.realValue(&dashOffset)) {
1475 QPen pen = charFormat.textOutline();
1476 pen.setDashOffset(dashOffset);
1477 charFormat.setTextOutline(pen);
1478 }
1479 break;
1480 }
1481 case QCss::QtForeground:
1482 {
1483 QBrush brush = decl.brushValue();
1484 charFormat.setForeground(brush);
1485 break;
1486 }
1487 case QCss::MaximumWidth:
1488 if (id == Html_img) {
1489 auto imageFormat = charFormat.toImageFormat();
1490 imageFormat.setMaximumWidth(extractor.textLength(decl));
1491 charFormat = imageFormat;
1492 }
1493 break;
1494 default: break;
1495 }
1496 }
1497
1498 QFont f;
1499 int adjustment = -255;
1500 extractor.extractFont(&f, &adjustment);
1501 if (f.pixelSize() > INT32_MAX / 2)
1502 f.setPixelSize(INT32_MAX / 2); // avoid even more extreme values
1503 charFormat.setFont(f, QTextCharFormat::FontPropertiesSpecifiedOnly);
1504
1505 if (adjustment >= -1)
1506 charFormat.setProperty(QTextFormat::FontSizeAdjustment, adjustment);
1507
1508 {
1509 Qt::Alignment ignoredAlignment;
1510 QCss::Repeat ignoredRepeat;
1511 QString bgImage;
1512 QBrush bgBrush;
1513 QCss::Origin ignoredOrigin, ignoredClip;
1514 QCss::Attachment ignoredAttachment;
1515 extractor.extractBackground(&bgBrush, &bgImage, &ignoredRepeat, &ignoredAlignment,
1516 &ignoredOrigin, &ignoredAttachment, &ignoredClip);
1517
1518 if (!bgImage.isEmpty() && resourceProvider) {
1519 applyBackgroundImage(bgImage, resourceProvider);
1520 } else if (bgBrush.style() != Qt::NoBrush) {
1521 charFormat.setBackground(bgBrush);
1522 if (id == Html_hr)
1523 blockFormat.setProperty(QTextFormat::BackgroundBrush, bgBrush);
1524 }
1525 }
1526}
1527
1528#endif // QT_NO_CSSPARSER
1529
1530void QTextHtmlParserNode::applyForegroundImage(qint64 searchKey, const QTextDocument *resourceProvider)
1531{
1532 const QTextDocumentPrivate *priv = QTextDocumentPrivate::get(resourceProvider);
1533 for (int i = 0; i < priv->formats.numFormats(); ++i) {
1534 QTextCharFormat format = priv->formats.charFormat(i);
1535 if (format.isValid()) {
1536 QBrush brush = format.foreground();
1537 if (brush.style() == Qt::TexturePattern) {
1538 const bool isPixmap = qHasPixmapTexture(brush);
1539
1540 if (isPixmap && QCoreApplication::instance()->thread() != QThread::currentThread()) {
1541 qWarning("Can't apply QPixmap outside of GUI thread");
1542 return;
1543 }
1544
1545 const qint64 cacheKey = isPixmap ? brush.texture().cacheKey() : brush.textureImage().cacheKey();
1546 if (cacheKey == searchKey) {
1547 QBrush b;
1548 if (isPixmap)
1549 b.setTexture(brush.texture());
1550 else
1551 b.setTextureImage(brush.textureImage());
1552 b.setStyle(Qt::TexturePattern);
1553 charFormat.setForeground(b);
1554 }
1555 }
1556 }
1557 }
1558
1559}
1560
1561void QTextHtmlParserNode::applyBackgroundImage(const QString &url, const QTextDocument *resourceProvider)
1562{
1563 if (!url.isEmpty() && resourceProvider) {
1564 QVariant val = resourceProvider->resource(QTextDocument::ImageResource, QUrl{url});
1565
1566 if (QCoreApplication::instance()->thread() != QThread::currentThread()) {
1567 // must use images in non-GUI threads
1568 if (val.userType() == QMetaType::QImage) {
1569 QImage image = qvariant_cast<QImage>(val);
1570 charFormat.setBackground(image);
1571 } else if (val.userType() == QMetaType::QByteArray) {
1572 QImage image;
1573 if (image.loadFromData(val.toByteArray())) {
1574 charFormat.setBackground(image);
1575 }
1576 }
1577 } else {
1578 if (val.userType() == QMetaType::QImage || val.userType() == QMetaType::QPixmap) {
1579 charFormat.setBackground(qvariant_cast<QPixmap>(val));
1580 } else if (val.userType() == QMetaType::QByteArray) {
1581 QPixmap pm;
1582 if (pm.loadFromData(val.toByteArray())) {
1583 charFormat.setBackground(pm);
1584 }
1585 }
1586 }
1587 }
1588 if (!url.isEmpty())
1589 charFormat.setProperty(QTextFormat::BackgroundImageUrl, url);
1590}
1591
1593{
1594 for (int i = 0; i < text.size(); ++i)
1595 if (!text.at(i).isSpace() || text.at(i) == QChar::LineSeparator)
1596 return false;
1597 return true;
1598}
1599
1600static bool setIntAttribute(int *destination, const QString &value)
1601{
1602 bool ok = false;
1603 int val = value.toInt(&ok);
1604 if (ok)
1605 *destination = val;
1606
1607 return ok;
1608}
1609
1610static bool setFloatAttribute(qreal *destination, const QString &value)
1611{
1612 bool ok = false;
1613 qreal val = value.toDouble(&ok);
1614 if (ok)
1615 *destination = val;
1616
1617 return ok;
1618}
1619
1620static void setWidthAttribute(QTextLength *width, const QString &valueStr)
1621{
1622 bool ok = false;
1623 qreal realVal = valueStr.toDouble(&ok);
1624 if (ok) {
1625 *width = QTextLength(QTextLength::FixedLength, realVal);
1626 } else {
1627 auto value = QStringView(valueStr).trimmed();
1628 if (!value.isEmpty() && value.endsWith(u'%')) {
1629 value.truncate(value.size() - 1);
1630 realVal = value.toDouble(&ok);
1631 if (ok)
1632 *width = QTextLength(QTextLength::PercentageLength, realVal);
1633 }
1634 }
1635}
1636
1637#ifndef QT_NO_CSSPARSER
1638void QTextHtmlParserNode::parseStyleAttribute(const QString &value, const QTextDocument *resourceProvider)
1639{
1640 const QString css = "* {"_L1 + value + u'}';
1641 QCss::Parser parser(css);
1642 QCss::StyleSheet sheet;
1643 parser.parse(&sheet, Qt::CaseInsensitive);
1644 if (sheet.styleRules.size() != 1) return;
1645 applyCssDeclarations(sheet.styleRules.at(0).declarations, resourceProvider);
1646}
1647#endif
1648
1650{
1651 QStringList attrs;
1652
1653 while (pos < len) {
1654 eatSpace();
1655 if (hasPrefix(u'>') || hasPrefix(u'/'))
1656 break;
1657 QString key = parseWord().toLower();
1658 QString value = "1"_L1;
1659 if (key.size() == 0)
1660 break;
1661 eatSpace();
1662 if (hasPrefix(u'=')){
1663 pos++;
1664 eatSpace();
1665 value = parseWord();
1666 }
1667 if (value.size() == 0)
1668 continue;
1669 attrs << key << value;
1670 }
1671
1672 return attrs;
1673}
1674
1675void QTextHtmlParser::applyAttributes(const QStringList &attributes)
1676{
1677 // local state variable for qt3 textedit mode
1678 bool seenQt3Richtext = false;
1679 QString linkHref;
1680 QString linkType;
1681
1682 if (attributes.size() % 2 == 1)
1683 return;
1684
1685 QTextHtmlParserNode *node = nodes.last();
1686
1687 for (int i = 0; i < attributes.size(); i += 2) {
1688 QString key = attributes.at(i);
1689 QString value = attributes.at(i + 1);
1690
1691 switch (node->id) {
1692 case Html_font:
1693 // the infamous font tag
1694 if (key == "size"_L1 && value.size()) {
1695 int n = value.toInt();
1696 if (value.at(0) != u'+' && value.at(0) != u'-')
1697 n -= 3;
1698 node->charFormat.setProperty(QTextFormat::FontSizeAdjustment, n);
1699 } else if (key == "face"_L1) {
1700 if (value.contains(u',')) {
1701 QStringList families;
1702 for (auto family : value.tokenize(u','))
1703 families << family.trimmed().toString();
1704 node->charFormat.setFontFamilies(families);
1705 } else {
1706 node->charFormat.setFontFamilies(QStringList(value));
1707 }
1708 } else if (key == "color"_L1) {
1709 QColor c = QColor::fromString(value);
1710 if (!c.isValid())
1711 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1712 node->charFormat.setForeground(c);
1713 }
1714 break;
1715 case Html_ol:
1716 case Html_ul:
1717 if (key == "type"_L1) {
1718 node->hasOwnListStyle = true;
1719 if (value == "1"_L1) {
1720 node->listStyle = QTextListFormat::ListDecimal;
1721 } else if (value == "a"_L1) {
1722 node->listStyle = QTextListFormat::ListLowerAlpha;
1723 } else if (value == "A"_L1) {
1724 node->listStyle = QTextListFormat::ListUpperAlpha;
1725 } else if (value == "i"_L1) {
1726 node->listStyle = QTextListFormat::ListLowerRoman;
1727 } else if (value == "I"_L1) {
1728 node->listStyle = QTextListFormat::ListUpperRoman;
1729 } else {
1730 value = std::move(value).toLower();
1731 if (value == "square"_L1)
1732 node->listStyle = QTextListFormat::ListSquare;
1733 else if (value == "disc"_L1)
1734 node->listStyle = QTextListFormat::ListDisc;
1735 else if (value == "circle"_L1)
1736 node->listStyle = QTextListFormat::ListCircle;
1737 else if (value == "none"_L1)
1738 node->listStyle = QTextListFormat::ListStyleUndefined;
1739 }
1740 } else if (key == "start"_L1) {
1741 setIntAttribute(&node->listStart, value);
1742 }
1743 break;
1744 case Html_li:
1745 if (key == "class"_L1) {
1746 if (value == "unchecked"_L1)
1747 node->blockFormat.setMarker(QTextBlockFormat::MarkerType::Unchecked);
1748 else if (value == "checked"_L1)
1749 node->blockFormat.setMarker(QTextBlockFormat::MarkerType::Checked);
1750 }
1751 break;
1752 case Html_a:
1753 if (key == "href"_L1)
1754 node->charFormat.setAnchorHref(value);
1755 else if (key == "name"_L1)
1756 node->charFormat.setAnchorNames({value});
1757 break;
1758 case Html_img:
1759 if (key == "src"_L1 || key == "source"_L1) {
1760 node->imageName = value;
1761 } else if (key == "width"_L1) {
1762 node->imageWidth = -2; // register that there is a value for it.
1763 setFloatAttribute(&node->imageWidth, value);
1764 } else if (key == "height"_L1) {
1765 node->imageHeight = -2; // register that there is a value for it.
1766 setFloatAttribute(&node->imageHeight, value);
1767 } else if (key == "alt"_L1) {
1768 node->imageAlt = value;
1769 } else if (key == "title"_L1) {
1770 node->text = value;
1771 }
1772 break;
1773 case Html_tr:
1774 case Html_body:
1775 if (key == "bgcolor"_L1) {
1776 QColor c = QColor::fromString(value);
1777 if (!c.isValid())
1778 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1779 node->charFormat.setBackground(c);
1780 } else if (key == "background"_L1) {
1781 node->applyBackgroundImage(value, resourceProvider);
1782 }
1783 break;
1784 case Html_th:
1785 case Html_td:
1786 if (key == "width"_L1) {
1787 setWidthAttribute(&node->width, value);
1788 } else if (key == "bgcolor"_L1) {
1789 QColor c = QColor::fromString(value);
1790 if (!c.isValid())
1791 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1792 node->charFormat.setBackground(c);
1793 } else if (key == "background"_L1) {
1794 node->applyBackgroundImage(value, resourceProvider);
1795 } else if (key == "rowspan"_L1) {
1796 if (setIntAttribute(&node->tableCellRowSpan, value))
1797 node->tableCellRowSpan = qMax(1, node->tableCellRowSpan);
1798 } else if (key == "colspan"_L1) {
1799 if (setIntAttribute(&node->tableCellColSpan, value))
1800 node->tableCellColSpan = qBound(1, node->tableCellColSpan, 20480);
1801 }
1802 break;
1803 case Html_table:
1804 // If table border already set through css style, prefer that one otherwise consider this value
1805 if (key == "border"_L1 && !node->tableBorder) {
1806 setFloatAttribute(&node->tableBorder, value);
1807 } else if (key == "bgcolor"_L1) {
1808 QColor c = QColor::fromString(value);
1809 if (!c.isValid())
1810 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1811 node->charFormat.setBackground(c);
1812 } else if (key == "bordercolor"_L1) {
1813 QColor c = QColor::fromString(value);
1814 if (!c.isValid())
1815 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1816 node->borderBrush = c;
1817 } else if (key == "background"_L1) {
1818 node->applyBackgroundImage(value, resourceProvider);
1819 } else if (key == "cellspacing"_L1) {
1820 setFloatAttribute(&node->tableCellSpacing, value);
1821 } else if (key == "cellpadding"_L1) {
1822 setFloatAttribute(&node->tableCellPadding, value);
1823 } else if (key == "width"_L1) {
1824 setWidthAttribute(&node->width, value);
1825 } else if (key == "height"_L1) {
1826 setWidthAttribute(&node->height, value);
1827 }
1828 break;
1829 case Html_meta:
1830 if (key == "name"_L1 && value == "qrichtext"_L1)
1831 seenQt3Richtext = true;
1832
1833 if (key == "content"_L1 && value == "1"_L1 && seenQt3Richtext)
1834 textEditMode = true;
1835 break;
1836 case Html_hr:
1837 if (key == "width"_L1)
1838 setWidthAttribute(&node->width, value);
1839 break;
1840 case Html_link:
1841 if (key == "href"_L1)
1842 linkHref = value;
1843 else if (key == "type"_L1)
1844 linkType = value;
1845 break;
1846 case Html_pre:
1847 if (key == "class"_L1 && value.startsWith("language-"_L1))
1848 node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
1849 break;
1850 default:
1851 break;
1852 }
1853
1854 if (key == "style"_L1) {
1855#ifndef QT_NO_CSSPARSER
1856 node->parseStyleAttribute(value, resourceProvider);
1857#endif
1858 } else if (key == "align"_L1) {
1859 value = std::move(value).toLower();
1860 bool alignmentSet = true;
1861
1862 if (value == "left"_L1)
1863 node->blockFormat.setAlignment(Qt::AlignLeft|Qt::AlignAbsolute);
1864 else if (value == "right"_L1)
1865 node->blockFormat.setAlignment(Qt::AlignRight|Qt::AlignAbsolute);
1866 else if (value == "center"_L1)
1867 node->blockFormat.setAlignment(Qt::AlignHCenter);
1868 else if (value == "justify"_L1)
1869 node->blockFormat.setAlignment(Qt::AlignJustify);
1870 else
1871 alignmentSet = false;
1872
1873 if (node->id == Html_img) {
1874 // HTML4 compat
1875 if (alignmentSet) {
1876 if (node->blockFormat.alignment() & Qt::AlignLeft)
1877 node->cssFloat = QTextFrameFormat::FloatLeft;
1878 else if (node->blockFormat.alignment() & Qt::AlignRight)
1879 node->cssFloat = QTextFrameFormat::FloatRight;
1880 } else if (value == "middle"_L1) {
1881 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1882 } else if (value == "top"_L1) {
1883 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1884 }
1885 }
1886 } else if (key == "valign"_L1) {
1887 value = std::move(value).toLower();
1888 if (value == "top"_L1)
1889 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1890 else if (value == "middle"_L1)
1891 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1892 else if (value == "bottom"_L1)
1893 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom);
1894 } else if (key == "dir"_L1) {
1895 value = std::move(value).toLower();
1896 if (value == "ltr"_L1)
1897 node->blockFormat.setLayoutDirection(Qt::LeftToRight);
1898 else if (value == "rtl"_L1)
1899 node->blockFormat.setLayoutDirection(Qt::RightToLeft);
1900 } else if (key == "title"_L1) {
1901 node->charFormat.setToolTip(value);
1902 } else if (key == "id"_L1) {
1903 node->charFormat.setAnchor(true);
1904 node->charFormat.setAnchorNames({value});
1905 }
1906 }
1907
1908#ifndef QT_NO_CSSPARSER
1909 if (resourceProvider && !linkHref.isEmpty() && linkType == "text/css"_L1)
1910 importStyleSheet(linkHref);
1911#endif
1912}
1913
1914#ifndef QT_NO_CSSPARSER
1916{
1917public:
1919 : parser(parser) { nameCaseSensitivity = Qt::CaseInsensitive; }
1920
1921 bool nodeNameEquals(NodePtr node, const QString& nodeName) const override;
1922 QStringList nodeNames(NodePtr node) const override;
1923 QString attributeValue(NodePtr node, const QCss::AttributeSelector &aSelector) const override;
1924 bool hasAttributes(NodePtr node) const override;
1925 bool isNullNode(NodePtr node) const override;
1926 NodePtr parentNode(NodePtr node) const override;
1927 NodePtr previousSiblingNode(NodePtr node) const override;
1928 NodePtr duplicateNode(NodePtr node) const override;
1929 void freeNode(NodePtr node) const override;
1930
1931private:
1932 const QTextHtmlParser *parser;
1933};
1934
1935bool QTextHtmlStyleSelector::nodeNameEquals(NodePtr node, const QString& nodeName) const
1936{
1937 return parser->at(node.id).tag.compare(nodeName, nameCaseSensitivity) == 0;
1938}
1939
1941{
1942 return QStringList(parser->at(node.id).tag.toLower());
1943}
1944
1945#endif // QT_NO_CSSPARSER
1946
1947#ifndef QT_NO_CSSPARSER
1948
1949static inline int findAttribute(const QStringList &attributes, const QString &name)
1950{
1951 int idx = -1;
1952 do {
1953 idx = attributes.indexOf(name, idx + 1);
1954 } while (idx != -1 && (idx % 2 == 1));
1955 return idx;
1956}
1957
1958QString QTextHtmlStyleSelector::attributeValue(NodePtr node, const QCss::AttributeSelector &aSelector) const
1959{
1960 const QStringList &attributes = parser->at(node.id).attributes;
1961 const int idx = findAttribute(attributes, aSelector.name);
1962 if (idx == -1)
1963 return QString();
1964 return attributes.at(idx + 1);
1965}
1966
1967bool QTextHtmlStyleSelector::hasAttributes(NodePtr node) const
1968{
1969 const QStringList &attributes = parser->at(node.id).attributes;
1970 return !attributes.isEmpty();
1971}
1972
1973bool QTextHtmlStyleSelector::isNullNode(NodePtr node) const
1974{
1975 return node.id == 0;
1976}
1977
1979{
1980 NodePtr parent;
1981 parent.id = 0;
1982 if (node.id) {
1983 parent.id = parser->at(node.id).parent;
1984 }
1985 return parent;
1986}
1987
1989{
1990 return node;
1991}
1992
1994{
1995 NodePtr sibling;
1996 sibling.id = 0;
1997 if (!node.id)
1998 return sibling;
1999 int parent = parser->at(node.id).parent;
2000 if (!parent)
2001 return sibling;
2002 const int childIdx = parser->at(parent).children.indexOf(node.id);
2003 if (childIdx <= 0)
2004 return sibling;
2005 sibling.id = parser->at(parent).children.at(childIdx - 1);
2006 return sibling;
2007}
2008
2010{
2011}
2012
2013void QTextHtmlParser::resolveStyleSheetImports(const QCss::StyleSheet &sheet)
2014{
2015 for (int i = 0; i < sheet.importRules.size(); ++i) {
2016 const QCss::ImportRule &rule = sheet.importRules.at(i);
2017 if (rule.media.isEmpty() || rule.media.contains("screen"_L1, Qt::CaseInsensitive))
2018 importStyleSheet(rule.href);
2019 }
2020}
2021
2022void QTextHtmlParser::importStyleSheet(const QString &href)
2023{
2024 if (!resourceProvider)
2025 return;
2026 for (int i = 0; i < externalStyleSheets.size(); ++i)
2027 if (externalStyleSheets.at(i).url == href)
2028 return;
2029
2030 QVariant res = resourceProvider->resource(QTextDocument::StyleSheetResource, QUrl{href});
2031 QString css;
2032 if (res.userType() == QMetaType::QString) {
2033 css = res.toString();
2034 } else if (res.userType() == QMetaType::QByteArray) {
2035 // #### detect @charset
2036 css = QString::fromUtf8(res.toByteArray());
2037 }
2038 if (!css.isEmpty()) {
2039 QCss::Parser parser(css);
2040 QCss::StyleSheet sheet;
2041 parser.parse(&sheet, Qt::CaseInsensitive);
2042 externalStyleSheets.append(ExternalStyleSheet(href, sheet));
2043 resolveStyleSheetImports(sheet);
2044 }
2045}
2046
2048{
2049 QList<QCss::Declaration> decls;
2050 QCss::Declaration decl;
2051 QCss::Value val;
2052 switch (node.id) {
2053 case Html_a:
2054 case Html_u: {
2055 bool needsUnderline = (node.id == Html_u) ? true : false;
2056 if (node.id == Html_a) {
2057 for (int i = 0; i < node.attributes.size(); i += 2) {
2058 const QString key = node.attributes.at(i);
2059 if (key.compare("href"_L1, Qt::CaseInsensitive) == 0
2060 && !node.attributes.at(i + 1).isEmpty()) {
2061 needsUnderline = true;
2062 decl.d->property = "color"_L1;
2063 decl.d->propertyId = QCss::Color;
2064 val.type = QCss::Value::Function;
2065 val.variant = QStringList() << "palette"_L1 << "link"_L1;
2066 decl.d->values = QList<QCss::Value> { val };
2067 decl.d->inheritable = true;
2068 decls << decl;
2069 break;
2070 }
2071 }
2072 }
2073 if (needsUnderline) {
2074 decl = QCss::Declaration();
2075 decl.d->property = "text-decoration"_L1;
2076 decl.d->propertyId = QCss::TextDecoration;
2077 val.type = QCss::Value::KnownIdentifier;
2078 val.variant = QVariant(QCss::Value_Underline);
2079 decl.d->values = QList<QCss::Value> { val };
2080 decl.d->inheritable = true;
2081 decls << decl;
2082 }
2083 break;
2084 }
2085 case Html_b:
2086 case Html_strong:
2087 case Html_h1:
2088 case Html_h2:
2089 case Html_h3:
2090 case Html_h4:
2091 case Html_h5:
2092 case Html_th:
2093 decl = QCss::Declaration();
2094 decl.d->property = "font-weight"_L1;
2095 decl.d->propertyId = QCss::FontWeight;
2096 val.type = QCss::Value::KnownIdentifier;
2097 val.variant = QVariant(QCss::Value_Bold);
2098 decl.d->values = QList<QCss::Value> { val };
2099 decl.d->inheritable = true;
2100 decls << decl;
2101 if (node.id == Html_b || node.id == Html_strong)
2102 break;
2103 Q_FALLTHROUGH();
2104 case Html_big:
2105 case Html_small:
2106 if (node.id != Html_th) {
2107 decl = QCss::Declaration();
2108 decl.d->property = "font-size"_L1;
2109 decl.d->propertyId = QCss::FontSize;
2110 decl.d->inheritable = false;
2111 val.type = QCss::Value::KnownIdentifier;
2112 switch (node.id) {
2113 case Html_h1: val.variant = QVariant(QCss::Value_XXLarge); break;
2114 case Html_h2: val.variant = QVariant(QCss::Value_XLarge); break;
2115 case Html_h3: case Html_big: val.variant = QVariant(QCss::Value_Large); break;
2116 case Html_h4: val.variant = QVariant(QCss::Value_Medium); break;
2117 case Html_h5: case Html_small: val.variant = QVariant(QCss::Value_Small); break;
2118 default: break;
2119 }
2120 decl.d->values = QList<QCss::Value> { val };
2121 decls << decl;
2122 break;
2123 }
2124 Q_FALLTHROUGH();
2125 case Html_center:
2126 case Html_td:
2127 decl = QCss::Declaration();
2128 decl.d->property = "text-align"_L1;
2129 decl.d->propertyId = QCss::TextAlignment;
2130 val.type = QCss::Value::KnownIdentifier;
2131 val.variant = (node.id == Html_td) ? QVariant(QCss::Value_Left) : QVariant(QCss::Value_Center);
2132 decl.d->values = QList<QCss::Value> { val };
2133 decl.d->inheritable = true;
2134 decls << decl;
2135 break;
2136 case Html_del:
2137 case Html_s:
2138 decl = QCss::Declaration();
2139 decl.d->property = "text-decoration"_L1;
2140 decl.d->propertyId = QCss::TextDecoration;
2141 val.type = QCss::Value::KnownIdentifier;
2142 val.variant = QVariant(QCss::Value_LineThrough);
2143 decl.d->values = QList<QCss::Value> { val };
2144 decl.d->inheritable = true;
2145 decls << decl;
2146 break;
2147 case Html_em:
2148 case Html_i:
2149 case Html_cite:
2150 case Html_address:
2151 case Html_var:
2152 case Html_dfn:
2153 decl = QCss::Declaration();
2154 decl.d->property = "font-style"_L1;
2155 decl.d->propertyId = QCss::FontStyle;
2156 val.type = QCss::Value::KnownIdentifier;
2157 val.variant = QVariant(QCss::Value_Italic);
2158 decl.d->values = QList<QCss::Value> { val };
2159 decl.d->inheritable = true;
2160 decls << decl;
2161 break;
2162 case Html_sub:
2163 case Html_sup:
2164 decl = QCss::Declaration();
2165 decl.d->property = "vertical-align"_L1;
2166 decl.d->propertyId = QCss::VerticalAlignment;
2167 val.type = QCss::Value::KnownIdentifier;
2168 val.variant = (node.id == Html_sub) ? QVariant(QCss::Value_Sub) : QVariant(QCss::Value_Super);
2169 decl.d->values = QList<QCss::Value> { val };
2170 decl.d->inheritable = true;
2171 decls << decl;
2172 break;
2173 case Html_ul:
2174 case Html_ol:
2175 decl = QCss::Declaration();
2176 decl.d->property = "list-style"_L1;
2177 decl.d->propertyId = QCss::ListStyle;
2178 val.type = QCss::Value::KnownIdentifier;
2179 val.variant = (node.id == Html_ul) ? QVariant(QCss::Value_Disc) : QVariant(QCss::Value_Decimal);
2180 decl.d->values = QList<QCss::Value> { val };
2181 decl.d->inheritable = true;
2182 decls << decl;
2183 break;
2184 case Html_code:
2185 case Html_tt:
2186 case Html_kbd:
2187 case Html_samp:
2188 case Html_pre: {
2189 decl = QCss::Declaration();
2190 decl.d->property = "font-family"_L1;
2191 decl.d->propertyId = QCss::FontFamily;
2192 QList<QCss::Value> values;
2193 val.type = QCss::Value::String;
2194 val.variant = QFontDatabase::systemFont(QFontDatabase::FixedFont).family();
2195 values << val;
2196 decl.d->values = values;
2197 decl.d->inheritable = true;
2198 decls << decl;
2199 }
2200 if (node.id != Html_pre)
2201 break;
2202 Q_FALLTHROUGH();
2203 case Html_br:
2204 case Html_nobr:
2205 decl = QCss::Declaration();
2206 decl.d->property = "whitespace"_L1;
2207 decl.d->propertyId = QCss::Whitespace;
2208 val.type = QCss::Value::KnownIdentifier;
2209 switch (node.id) {
2210 case Html_br: val.variant = QVariant(QCss::Value_PreWrap); break;
2211 case Html_nobr: val.variant = QVariant(QCss::Value_NoWrap); break;
2212 case Html_pre: val.variant = QVariant(QCss::Value_Pre); break;
2213 default: break;
2214 }
2215 decl.d->values = QList<QCss::Value> { val };
2216 decl.d->inheritable = true;
2217 decls << decl;
2218 break;
2219 default:
2220 break;
2221 }
2222 return decls;
2223}
2224
2225QList<QCss::Declaration> QTextHtmlParser::declarationsForNode(int node) const
2226{
2227 QList<QCss::Declaration> decls;
2228
2229 QTextHtmlStyleSelector selector(this);
2230
2231 int idx = 0;
2232 selector.styleSheets.resize((resourceProvider ? 1 : 0)
2233 + externalStyleSheets.size()
2234 + inlineStyleSheets.size());
2235 if (resourceProvider)
2236 selector.styleSheets[idx++] = QTextDocumentPrivate::get(resourceProvider)->parsedDefaultStyleSheet;
2237
2238 for (int i = 0; i < externalStyleSheets.size(); ++i, ++idx)
2239 selector.styleSheets[idx] = externalStyleSheets.at(i).sheet;
2240
2241 for (int i = 0; i < inlineStyleSheets.size(); ++i, ++idx)
2242 selector.styleSheets[idx] = inlineStyleSheets.at(i);
2243
2244 selector.medium = resourceProvider ? resourceProvider->metaInformation(QTextDocument::CssMedia) : "screen"_L1;
2245
2246 QCss::StyleSelector::NodePtr n;
2247 n.id = node;
2248
2249 const char *extraPseudo = nullptr;
2250 if (nodes.at(node)->id == Html_a && nodes.at(node)->hasHref)
2251 extraPseudo = "link";
2252 // Ensure that our own style is taken into consideration
2253 decls = standardDeclarationForNode(*nodes.at(node));
2254 decls += selector.declarationsForNode(n, extraPseudo);
2255 n = selector.parentNode(n);
2256 while (!selector.isNullNode(n)) {
2257 QList<QCss::Declaration> inheritedDecls;
2258 inheritedDecls = selector.declarationsForNode(n, extraPseudo);
2259 for (int i = 0; i < inheritedDecls.size(); ++i) {
2260 const QCss::Declaration &decl = inheritedDecls.at(i);
2261 if (decl.d->inheritable)
2262 decls.prepend(decl);
2263 }
2264 n = selector.parentNode(n);
2265 }
2266 return decls;
2267}
2268
2270{
2271 while (i) {
2272 if (at(i).id == id)
2273 return true;
2274 i = at(i).parent;
2275 }
2276 return false;
2277}
2278
2279#endif // QT_NO_CSSPARSER
2280
2281QT_END_NAMESPACE
2282
2283#endif // QT_NO_TEXTHTMLPARSER
bool nodeIsChildOf(int i, QTextHTMLElements id) const
QTextHtmlParserNode * resolveParent()
void applyAttributes(const QStringList &attributes)
int margin(int i, int mar) const
int topMargin(int i) const
const QTextHtmlParserNode & at(int i) const
int depth(int i) const
QStringList parseAttributes()
int bottomMargin(int i) const
QTextHtmlParserNode * newNode(int parent)
NodePtr previousSiblingNode(NodePtr node) const override
bool isNullNode(NodePtr node) const override
bool nodeNameEquals(NodePtr node, const QString &nodeName) const override
QString attributeValue(NodePtr node, const QCss::AttributeSelector &aSelector) const override
bool hasAttributes(NodePtr node) const override
NodePtr duplicateNode(NodePtr node) const override
QTextHtmlStyleSelector(const QTextHtmlParser *parser)
void freeNode(NodePtr node) const override
NodePtr parentNode(NodePtr node) const override
QStringList nodeNames(NodePtr node) const override
static int findAttribute(const QStringList &attributes, const QString &name)
static bool operator<(QStringView entityStr, const QTextHtmlEntity &entity)
static bool operator<(const QTextHtmlEntity &entity, QStringView entityStr)
static const QTextHtmlElement * lookupElementHelper(QAnyStringView element)
static bool operator<(QTextHtmlElement e, T str)
static bool operator<(T str, QTextHtmlElement e)
static constexpr QTextHtmlElement elements[]
QList< QCss::Declaration > standardDeclarationForNode(const QTextHtmlParserNode &node)
static bool setFloatAttribute(qreal *destination, const QString &value)
static QTextFrameFormat::BorderStyle toQTextFrameFormat(QCss::BorderStyle cssStyle)
static void setWidthAttribute(QTextLength *width, const QString &valueStr)
static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80]
static bool setIntAttribute(int *destination, const QString &value)
static QChar resolveEntity(QStringView entity)
static QString quoteNewline(const QString &s)
QTextHTMLElements
@ Html_h2
@ Html_dl
@ Html_meta
@ Html_samp
@ Html_em
@ Html_code
@ Html_th
@ Html_dd
@ Html_tr
@ Html_tbody
@ Html_nobr
@ Html_tfoot
@ Html_b
@ Html_h4
@ Html_a
@ Html_caption
@ Html_h5
@ Html_big
@ Html_title
@ Html_table
@ Html_address
@ Html_div
@ Html_var
@ Html_i
@ Html_u
@ Html_tt
@ Html_font
@ Html_p
@ Html_ol
@ Html_blockquote
@ Html_head
@ Html_ul
@ Html_span
@ Html_br
@ Html_script
@ Html_thead
@ Html_kbd
@ Html_pre
@ Html_body
@ Html_cite
@ Html_link
@ Html_s
@ Html_unknown
@ Html_dfn
@ Html_sub
@ Html_td
@ Html_hr
@ Html_li
@ Html_img
@ Html_html
@ Html_h6
@ Html_h1
@ Html_small
@ Html_sup
@ Html_del
@ Html_h3
@ Html_dt
@ Html_center
@ Html_strong
@ Html_style
QTextHTMLElements id
QLatin1StringView name
bool isNotSelfNesting() const
void initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser)
QTextHTMLElements id
bool isNestedList(const QTextHtmlParser *parser) const
bool allowedInContext(int parentId) const
void applyForegroundImage(qint64 cacheKey, const QTextDocument *resourceProvider)
bool mayNotHaveChildren() const