Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cfx_xmlparser_unittest.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/fxcrt/xml/cfx_xmlparser.h"
6
7#include <memory>
8
9#include "core/fxcrt/cfx_read_only_span_stream.h"
10#include "core/fxcrt/fx_codepage.h"
11#include "core/fxcrt/xml/cfx_xmldocument.h"
12#include "core/fxcrt/xml/cfx_xmlelement.h"
13#include "core/fxcrt/xml/cfx_xmlinstruction.h"
14#include "testing/gtest/include/gtest/gtest.h"
15
17 public:
18 std::unique_ptr<CFX_XMLDocument> Parse(pdfium::span<const char> input) {
19 CFX_XMLParser parser(
20 pdfium::MakeRetain<CFX_ReadOnlySpanStream>(pdfium::as_bytes(input)));
21 return parser.Parse();
22 }
23};
24
26 static const char input[] =
27 "<script display=1>\n"
28 "</script>";
29 ASSERT_TRUE(Parse(input) == nullptr);
30}
31
33 static const char input[] =
34 "<script contentType=\"application/x-javascript\" display=\"1\">\n"
35 "</script>";
36
37 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
38 ASSERT_TRUE(doc != nullptr);
39
40 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
41 ASSERT_TRUE(script != nullptr);
42
43 EXPECT_EQ(L"application/x-javascript", script->GetAttribute(L"contentType"));
44 EXPECT_EQ(L"1", script->GetAttribute(L"display"));
45}
46
48 static const char input[] =
49 "<script>\n"
50 " <![CDATA[\n"
51 " if (a[1] < 3)\n"
52 " app.alert(\"Tclams\");\n"
53 " ]]>\n"
54 "</script>";
55
56 static const wchar_t cdata[] =
57 L"\n \n"
58 L" if (a[1] < 3)\n"
59 L" app.alert(\"Tclams\");\n"
60 L" \n";
61
62 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
63 ASSERT_TRUE(doc != nullptr);
64
65 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
66 ASSERT_TRUE(script != nullptr);
67 EXPECT_EQ(cdata, script->GetTextData());
68}
69
71 static const char input[] =
72 "<script>\n"
73 " <![CDATA[\n"
74 " if (a[1] < 3)\n"
75 " app.alert(\"Tclams\");\n"
76 " </script>\n"
77 " ]]>\n"
78 "</script>";
79
80 static const wchar_t cdata[] =
81 L"\n \n"
82 L" if (a[1] < 3)\n"
83 L" app.alert(\"Tclams\");\n"
84 L" </script>\n"
85 L" \n";
86
87 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
88 ASSERT_TRUE(doc != nullptr);
89
90 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
91 ASSERT_TRUE(script != nullptr);
92 EXPECT_EQ(cdata, script->GetTextData());
93}
94
96 static const char input[] =
97 "<script>\n"
98 " <!>\n"
99 "</script>";
100
101 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
102 ASSERT_TRUE(doc != nullptr);
103
104 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
105 ASSERT_TRUE(script != nullptr);
106 EXPECT_EQ(L"\n \n", script->GetTextData());
107}
108
110 static const char input[] =
111 "<script>\n"
112 " <![>\n"
113 "</script>";
114
115 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
116 ASSERT_TRUE(doc != nullptr);
117
118 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
119 ASSERT_TRUE(script != nullptr);
120 EXPECT_EQ(L"\n ", script->GetTextData());
121}
122
124 static const char input[] =
125 "<script>\n"
126 " <![CDATA>\n"
127 "</script>";
128
129 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
130 ASSERT_TRUE(doc != nullptr);
131
132 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
133 ASSERT_TRUE(script != nullptr);
134 EXPECT_EQ(L"\n ", script->GetTextData());
135}
136
138 static const char input[] =
139 "<script>\n"
140 " <![CDATA[\n"
141 "</script>";
142
143 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
144 ASSERT_TRUE(doc != nullptr);
145
146 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
147 ASSERT_TRUE(script != nullptr);
148 EXPECT_EQ(L"\n ", script->GetTextData());
149}
150
152 static const char input[] =
153 "<script>\n"
154 " <![CDATA[]]>\n"
155 "</script>";
156
157 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
158 ASSERT_TRUE(doc != nullptr);
159
160 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
161 ASSERT_TRUE(script != nullptr);
162 EXPECT_EQ(L"\n \n", script->GetTextData());
163}
164
166 static const char input[] =
167 "<script>\n"
168 " <!-- A Comment -->\n"
169 "</script>";
170
171 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
172 ASSERT_TRUE(doc != nullptr);
173
174 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
175 ASSERT_TRUE(script != nullptr);
176 EXPECT_EQ(L"\n \n", script->GetTextData());
177}
178
180 static const char input[] =
181 "<script>\n"
182 " <!- A Comment -->\n"
183 "</script>";
184
185 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
186 ASSERT_TRUE(doc != nullptr);
187
188 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
189 ASSERT_TRUE(script != nullptr);
190 EXPECT_EQ(L"\n \n", script->GetTextData());
191}
192
194 static const char input[] =
195 "<script>\n"
196 " <!---->\n"
197 "</script>";
198
199 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
200 ASSERT_TRUE(doc != nullptr);
201
202 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
203 ASSERT_TRUE(script != nullptr);
204 EXPECT_EQ(L"\n \n", script->GetTextData());
205}
206
208 static const char input[] =
209 "<script>\n"
210 " <!--->\n"
211 "</script>";
212
213 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
214 ASSERT_TRUE(doc != nullptr);
215
216 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
217 ASSERT_TRUE(script != nullptr);
218 EXPECT_EQ(L"\n ", script->GetTextData());
219}
220
222 static const char input[] =
223 "<script>\n"
224 " <!-->\n"
225 "</script>";
226
227 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
228 ASSERT_TRUE(doc != nullptr);
229
230 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
231 EXPECT_EQ(L"\n ", script->GetTextData());
232}
233
235 static const char input[] =
236 "<script>"
237 "&#66;" // B
238 "&#x54;" // T
239 "&#x6a;" // j
240 "&#x00000000000000000048;" // H
241 "&#x0000000000000000AB48;" // \xab48
242 "&#x0000000000000000000;"
243 "&amp;"
244 "&lt;"
245 "&gt;"
246 "&apos;"
247 "&quot;"
248 "&something_else;"
249 "</script>";
250
251 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
252 ASSERT_TRUE(doc != nullptr);
253
254 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
255 ASSERT_TRUE(script != nullptr);
256 EXPECT_EQ(L"BTjH\xab48&<>'\"", script->GetTextData());
257}
258
260 static const char input[] =
261 "<script>"
262 "&#xaDBDFFFFF;"
263 "&#xafffffffffffffffffffffffffffffffff;"
264 "</script>";
265
266 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
267 ASSERT_TRUE(doc != nullptr);
268
269 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
270 ASSERT_TRUE(script != nullptr);
271 EXPECT_EQ(L" ", script->GetTextData());
272}
273
275 static const char input[] =
276 "<script>"
277 "&#2914910205;"
278 "&#29149102052342342134521341234512351234213452315;"
279 "</script>";
280
281 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
282 ASSERT_TRUE(doc != nullptr);
283
284 CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script");
285 ASSERT_TRUE(script != nullptr);
286 EXPECT_EQ(L" ", script->GetTextData());
287}
288
290 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'-', true));
291 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'-', false));
292
293 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'.', true));
294 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'.', false));
295
296 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'0', true));
297 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'0', false));
298
299 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'a', true));
300 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'a', false));
301
302 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'A', true));
303 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'A', false));
304
305 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'(', false));
306 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'(', true));
307 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L')', false));
308 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L')', true));
309 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'[', false));
310 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'[', true));
311 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L']', false));
312 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L']', true));
313
314 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2069, true));
315 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2070, true));
316 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2073, true));
317 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x218F, true));
318 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2190, true));
319
320 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFDEF, true));
321 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF0, true));
322 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF1, true));
323 EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFFFD, true));
324 EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFFFE, true));
325}
326
328 ASSERT_TRUE(Parse("</endtag>") == nullptr);
329}
330
332 ASSERT_TRUE(Parse("<p></p></p>") == nullptr);
333}
334
336 static const char input[] =
337 "<?originalXFAVersion http://www.xfa.org/schema/xfa-template/3.3/ ?>"
338 "<form></form>";
339
340 std::unique_ptr<CFX_XMLDocument> doc = Parse(input);
341 ASSERT_TRUE(doc != nullptr);
342
343 CFX_XMLElement* root = doc->GetRoot();
344 ASSERT_TRUE(root->GetFirstChild() != nullptr);
345 ASSERT_EQ(CFX_XMLNode::Type::kInstruction, root->GetFirstChild()->GetType());
346
347 CFX_XMLInstruction* instruction = ToXMLInstruction(root->GetFirstChild());
348 EXPECT_TRUE(instruction->IsOriginalXFAVersion());
349}
350
352 static const char input[] =
353 "<script>"
354 "Test &<p>; thing"
355 "</script>";
356 ASSERT_TRUE(Parse(input) == nullptr);
357}
TEST_F(CFX_XMLParserTest, AttributesMustBeQuoted)
WideString GetTextData() const
WideString GetAttribute(const WideString &name) const
bool IsOriginalXFAVersion() const
std::unique_ptr< CFX_XMLDocument > Parse(pdfium::span< const char > input)
static bool IsXMLNameChar(wchar_t ch, bool bFirstChar)