Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qdochtmlparser.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:trusted-sources
4
5#include <qdochtmlparser_p.h>
6#include <QtCore/qregularexpression.h>
7
9
10using namespace Qt::StringLiterals;
11
12// An emprical value to avoid too much content
13static constexpr qsizetype firstIndexOfParagraphTag = 400;
14
15// A paragraph can start with <p><i>, or <p><tt>
16// We need smallest value to use QString::indexOf
17static constexpr auto lengthOfStartParagraphTag = qsizetype(std::char_traits<char>::length("<p>"));
18static constexpr auto lengthOfEndParagraphTag = qsizetype(std::char_traits<char>::length("</p>"));
19
20static QString getContentsByMarks(const QString &html, QString startMark, QString endMark)
21{
22 startMark.prepend("$$$"_L1);
23 endMark.prepend("<!-- @@@"_L1);
24
25 QString contents;
26 qsizetype start = html.indexOf(startMark);
27 if (start != -1) {
28 start = html.indexOf("-->"_L1, start);
29 if (start != -1) {
30 qsizetype end = html.indexOf(endMark, start);
31 if (end != -1) {
32 start += qsizetype(std::char_traits<char>::length("-->"));
33 contents = html.mid(start, end - start);
34 }
35 }
36 }
37 return contents;
38}
39
40
41static void stripAllHtml(QString *html)
42{
43 Q_ASSERT(html);
44 html->remove(QRegularExpression("<.*?>"_L1));
45}
46
47/*! \internal
48 \brief Process the string obtained from start mark to end mark.
49 This is duplicated from QtC's Utils::HtmlExtractor, modified on top of it.
50*/
51static void processOutput(QString *html)
52{
53 Q_ASSERT(html);
54 if (html->isEmpty())
55 return;
56
57 // Do not write the first paragraph in case it has extra tags below.
58 // <p><i>This is only used on the Maemo platform.</i></p>
59 // or: <p><tt>This is used on Windows only.</tt></p>
60 // or: <p>[Conditional]</p>
61 const auto skipFirstParagraphIfNeeded = [html](qsizetype &index){
62 const bool shouldSkipFirstParagraph = html->indexOf(QLatin1String("<p><i>")) == index ||
63 html->indexOf(QLatin1String("<p><tt>")) == index ||
64 html->indexOf(QLatin1String("<p><b>")) == index ||
65 html->indexOf(QLatin1String("<p>[Conditional]</p>")) == index;
66
67 if (shouldSkipFirstParagraph)
68 index = html->indexOf(QLatin1String("</p>"), index) + lengthOfEndParagraphTag;
69 };
70
71 // Try to get the entire first paragraph, but if one is not found or if its opening
72 // tag is not in the very beginning (using an empirical value as the limit)
73 // the html is cleared out to avoid too much content.
74 qsizetype index = html->indexOf(QLatin1String("<p>"));
75 if (index != -1 && index < firstIndexOfParagraphTag) {
76 skipFirstParagraphIfNeeded(index);
77 qsizetype endIndex = html->indexOf(QLatin1String("</p>"), index + lengthOfStartParagraphTag);
78 if (endIndex != -1) {
79 *html = html->mid(index, endIndex - index);
80 } else {
81 html->clear();
82 }
83 } else {
84 html->clear();
85 }
86}
87
88class ExtractQmlType : public HtmlExtractor
89{
90public:
91 QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override;
92};
93
94class ExtractQmlProperty : public HtmlExtractor
95{
96public:
97 QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override;
98};
99
100class ExtractQmlMethodOrSignal : public HtmlExtractor
101{
102public:
103 QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override;
104};
105
106QString ExtractQmlType::extract(const QString &code, const QString &element, ExtractionMode mode)
107{
108 QString result;
109 // Get brief description
110 if (mode == ExtractionMode::Simplified) {
111 result = getContentsByMarks(code, element + "-brief"_L1 , element);
112 // Remove More...
113 if (!result.isEmpty()) {
114 const auto tailToRemove = "More..."_L1;
115 const auto lastIndex = result.lastIndexOf(tailToRemove);
116 if (lastIndex != -1)
117 result.remove(lastIndex, tailToRemove.length());
118 }
119 } else {
120 result = getContentsByMarks(code, element + "-description"_L1, element);
121 // Remove header
122 if (!result.isEmpty()) {
123 const auto headerToRemove = "Detailed Description"_L1;
124 const auto firstIndex = result.indexOf(headerToRemove);
125 if (firstIndex != -1)
126 result.remove(firstIndex, headerToRemove.length());
127 }
128 }
129
130 stripAllHtml(&result);
131 return result.trimmed();
132}
133
134QString ExtractQmlProperty::extract(const QString &code, const QString &keyword, ExtractionMode mode)
135{
136 QString result;
137 // Qt 5.15 way of finding properties in doc
138 QString startMark = QString::fromLatin1("<a name=\"%1-prop\">").arg(keyword);
139 qsizetype startIndex = code.indexOf(startMark);
140 if (startIndex == -1) {
141 // if not found, try Qt6
142 startMark = QString::fromLatin1(
143 "<td class=\"tblQmlPropNode\"><p>\n<span class=\"name\">%1</span>")
144 .arg(keyword);
145 startIndex = code.indexOf(startMark);
146 }
147
148 if (startIndex != -1) {
149 result = code.mid(startIndex + startMark.size());
150 startIndex = result.indexOf(QLatin1String("<div class=\"qmldoc\"><p>"));
151 } else {
152 result = getContentsByMarks(code, keyword + "-prop"_L1, keyword );
153 startIndex = result.indexOf(QLatin1String("<p>"));
154 }
155
156 if (startIndex == -1)
157 return {};
158 result = result.mid(startIndex);
159 if (mode == ExtractionMode::Simplified)
160 processOutput(&result);
161 stripAllHtml(&result);
162 return result.trimmed();
163}
164
165QString ExtractQmlMethodOrSignal::extract(const QString &code, const QString &keyword, ExtractionMode mode)
166{
167 // the case with <!-- $$$childAt[overload1]$$$childAtrealreal -->
168 QString mark = QString::fromLatin1("$$$%1[overload1]$$$%1").arg(keyword);
169 qsizetype startIndex = code.indexOf(mark);
170 if (startIndex != -1) {
171 startIndex = code.indexOf("-->"_L1, startIndex + mark.length());
172 if (startIndex == -1)
173 return {};
174 } else {
175 // it could be part of the method list
176 mark = QString::fromLatin1("<span class=\"name\">%1</span>")
177 .arg(keyword);
178 startIndex = code.indexOf(mark);
179 if (startIndex != -1)
180 startIndex += mark.length();
181 else
182 return {};
183 }
184
185 startIndex = code.indexOf(QLatin1String("<div class=\"qmldoc\"><p>"), startIndex);
186 if (startIndex == -1)
187 return {};
188
189 QString endMark = QString::fromLatin1("<!-- @@@");
190 qsizetype endIndex = code.indexOf(endMark, startIndex);
191 QString contents = code.mid(startIndex, endIndex);
192 if (mode == ExtractionMode::Simplified)
193 processOutput(&contents);
194 stripAllHtml(&contents);
195 return contents.trimmed();
196}
197
198ExtractDocumentation::ExtractDocumentation(QQmlJS::Dom::DomType domType)
199{
200 using namespace QQmlJS::Dom;
201 switch (domType) {
202 case DomType::QmlObject:
203 m_extractor = std::make_unique<ExtractQmlType>();
204 break;
205 case DomType::Binding:
206 case DomType::PropertyDefinition:
207 m_extractor = std::make_unique<ExtractQmlProperty>();
208 break;
209 case DomType::MethodInfo:
210 m_extractor = std::make_unique<ExtractQmlMethodOrSignal>();
211 break;
212 default:
213 break;
214 }
215}
216
217QString ExtractDocumentation::execute(const QString &code, const QString &keyword, HtmlExtractor::ExtractionMode mode)
218{
219 Q_ASSERT(m_extractor);
220 return m_extractor->extract(code, keyword, mode);
221}
222
223QT_END_NAMESPACE
ExtractDocumentation(QQmlJS::Dom::DomType domType)
QString execute(const QString &code, const QString &keyword, HtmlExtractor::ExtractionMode mode)
QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override
QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override
QString extract(const QString &code, const QString &keyword, ExtractionMode mode) override
static constexpr auto lengthOfEndParagraphTag
static constexpr auto lengthOfStartParagraphTag
static void processOutput(QString *html)
Process the string obtained from start mark to end mark. This is duplicated from QtC's Utils::HtmlExt...
static constexpr qsizetype firstIndexOfParagraphTag
static void stripAllHtml(QString *html)
static QString getContentsByMarks(const QString &html, QString startMark, QString endMark)