Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cxfa_document_builder.cpp
Go to the documentation of this file.
1// Copyright 2016 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7#include "xfa/fxfa/parser/cxfa_document_builder.h"
8
9#include <utility>
10#include <vector>
11
12#include "core/fxcrt/autorestorer.h"
13#include "core/fxcrt/fx_codepage.h"
14#include "core/fxcrt/fx_extension.h"
15#include "core/fxcrt/xml/cfx_xmlchardata.h"
16#include "core/fxcrt/xml/cfx_xmldocument.h"
17#include "core/fxcrt/xml/cfx_xmlelement.h"
18#include "core/fxcrt/xml/cfx_xmlinstruction.h"
19#include "core/fxcrt/xml/cfx_xmlnode.h"
20#include "core/fxcrt/xml/cfx_xmltext.h"
21#include "fxjs/xfa/cjx_object.h"
22#include "third_party/abseil-cpp/absl/types/optional.h"
23#include "third_party/base/check.h"
24#include "third_party/base/notreached.h"
25#include "xfa/fxfa/parser/cxfa_document.h"
26#include "xfa/fxfa/parser/cxfa_node.h"
27#include "xfa/fxfa/parser/cxfa_subform.h"
28#include "xfa/fxfa/parser/cxfa_template.h"
29#include "xfa/fxfa/parser/xfa_basic_data.h"
30#include "xfa/fxfa/parser/xfa_utils.h"
31
32namespace {
33
34CFX_XMLNode* GetDocumentNode(CFX_XMLNode* pRootNode) {
35 for (CFX_XMLNode* pXMLNode = pRootNode->GetFirstChild(); pXMLNode;
36 pXMLNode = pXMLNode->GetNextSibling()) {
38 return pXMLNode;
39 }
40 return nullptr;
41}
42
43bool MatchNodeName(CFX_XMLNode* pNode,
44 ByteStringView bsLocalTagName,
45 ByteStringView bsNamespaceURIPrefix,
46 XFA_PacketMatch eMatch) {
47 CFX_XMLElement* pElement = ToXMLElement(pNode);
48 if (!pElement)
49 return false;
50
51 if (!pElement->GetLocalTagName().EqualsASCII(bsLocalTagName))
52 return false;
53
54 if (eMatch == XFA_PacketMatch::kNoMatch)
55 return true;
56
57 WideString wsNodeStr = pElement->GetNamespaceURI();
58 if (eMatch == XFA_PacketMatch::kPrefixMatch) {
59 return wsNodeStr.AsStringView()
60 .First(bsNamespaceURIPrefix.GetLength())
61 .EqualsASCII(bsNamespaceURIPrefix);
62 }
63 return wsNodeStr.EqualsASCII(bsNamespaceURIPrefix);
64}
65
66bool GetAttributeLocalName(WideStringView wsAttributeName,
67 WideString& wsLocalAttrName) {
68 WideString wsAttrName(wsAttributeName);
69 auto pos = wsAttrName.Find(L':', 0);
70 if (!pos.has_value()) {
71 wsLocalAttrName = std::move(wsAttrName);
72 return false;
73 }
74 wsLocalAttrName = wsAttrName.Last(wsAttrName.GetLength() - pos.value() - 1);
75 return true;
76}
77
78bool ResolveAttribute(CFX_XMLElement* pElement,
79 const WideString& wsAttrName,
80 WideString& wsLocalAttrName,
81 WideString& wsNamespaceURI) {
82 WideString wsNSPrefix;
83 if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
84 wsNSPrefix = wsAttrName.First(wsAttrName.GetLength() -
85 wsLocalAttrName.GetLength() - 1);
86 }
87 if (wsLocalAttrName.EqualsASCII("xmlns") || wsNSPrefix.EqualsASCII("xmlns") ||
88 wsNSPrefix.EqualsASCII("xml")) {
89 return false;
90 }
92 &wsNamespaceURI)) {
93 wsNamespaceURI.clear();
94 return false;
95 }
96 return true;
97}
98
99absl::optional<WideString> FindAttributeWithNS(
100 CFX_XMLElement* pElement,
101 WideStringView wsLocalAttributeName,
102 WideStringView wsNamespaceURIPrefix) {
103 WideString wsAttrNS;
104 for (auto it : pElement->GetAttributes()) {
105 auto pos = it.first.Find(L':', 0);
106 WideString wsNSPrefix;
107 if (!pos.has_value()) {
108 if (wsLocalAttributeName != it.first)
109 continue;
110 } else {
111 if (wsLocalAttributeName !=
112 it.first.Last(it.first.GetLength() - pos.value() - 1)) {
113 continue;
114 }
115 wsNSPrefix = it.first.First(pos.value());
116 }
117 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
118 &wsAttrNS) ||
119 wsAttrNS != wsNamespaceURIPrefix) {
120 continue;
121 }
122 return it.second;
123 }
124 return absl::nullopt;
125}
126
127CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) {
128 XFA_PACKETINFO datasets_packet =
130 if (MatchNodeName(pXMLDocumentNode, datasets_packet.name, datasets_packet.uri,
131 datasets_packet.match)) {
132 return pXMLDocumentNode;
133 }
135 if (!MatchNodeName(pXMLDocumentNode, xdp_packet.name, xdp_packet.uri,
136 xdp_packet.match)) {
137 return nullptr;
138 }
139 for (CFX_XMLNode* pDatasetsNode = pXMLDocumentNode->GetFirstChild();
140 pDatasetsNode; pDatasetsNode = pDatasetsNode->GetNextSibling()) {
141 if (MatchNodeName(pDatasetsNode, datasets_packet.name, datasets_packet.uri,
142 datasets_packet.match)) {
143 return pDatasetsNode;
144 }
145 }
146 return nullptr;
147}
148
149bool IsStringAllWhitespace(WideString wsText) {
150 wsText.TrimRight(L"\x20\x9\xD\xA");
151 return wsText.IsEmpty();
152}
153
154void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
155 for (CFX_XMLNode* pXMLChild = pRootXMLNode->GetFirstChild(); pXMLChild;
156 pXMLChild = pXMLChild->GetNextSibling()) {
157 switch (pXMLChild->GetType()) {
159 WideString wsTextData = ToXMLElement(pXMLChild)->GetTextData();
160 wsTextData += L"\n";
161 wsOutput += wsTextData;
162 break;
163 }
166 WideString wsText = ToXMLText(pXMLChild)->GetText();
167 if (IsStringAllWhitespace(wsText))
168 continue;
169 wsOutput = std::move(wsText);
170 break;
171 }
172 default:
173 NOTREACHED_NORETURN();
174 }
175 }
176}
177
178WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
179 if (!pXMLNode)
180 return WideString();
181
182 WideString wsPlainText;
183 switch (pXMLNode->GetType()) {
185 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
186 WideString wsTag = pXMLElement->GetLocalTagName();
187 uint32_t uTag = FX_HashCode_GetLoweredW(wsTag.AsStringView());
188 if (uTag == 0x0001f714) {
189 wsPlainText += L"\n";
190 } else if (uTag == 0x00000070) {
191 if (!wsPlainText.IsEmpty()) {
192 wsPlainText += L"\n";
193 }
194 } else if (uTag == 0xa48ac63) {
195 if (!wsPlainText.IsEmpty() && wsPlainText.Back() != '\n') {
196 wsPlainText += L"\n";
197 }
198 }
199 break;
200 }
203 WideString wsContent = ToXMLText(pXMLNode)->GetText();
204 wsPlainText += wsContent;
205 break;
206 }
207 default:
208 break;
209 }
210 for (CFX_XMLNode* pChildXML = pXMLNode->GetFirstChild(); pChildXML;
211 pChildXML = pChildXML->GetNextSibling()) {
212 wsPlainText += GetPlainTextFromRichText(pChildXML);
213 }
214
215 return wsPlainText;
216}
217
218} // namespace
219
220bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
221 return pRichTextXMLNode && pRichTextXMLNode->GetNamespaceURI().EqualsASCII(
222 "http://www.w3.org/1999/xhtml");
223}
224
227
229
231 XFA_PacketType ePacketID) {
232 DCHECK(pXML);
233
234 CFX_XMLNode* root = Build(pXML);
235 if (!root)
236 return false;
237
238 root_node_ = ParseAsXDPPacket(root, ePacketID);
239 return !!root_node_;
240}
241
243 if (!pXML)
244 return nullptr;
245
246 xml_doc_ = pXML;
247 xml_doc_->GetRoot()->InsertChildNode(
248 xml_doc_->CreateNode<CFX_XMLInstruction>(L"xml"), 0);
249
250 return GetDocumentNode(xml_doc_->GetRoot());
251}
252
254 CFX_XMLNode* pXMLNode) {
255 XFA_PacketType ePacketID = pXFANode->GetPacketType();
256 if (ePacketID == XFA_PacketType::Datasets) {
257 if (pXFANode->GetElementType() == XFA_Element::DataValue) {
258 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
259 pXMLChild = pXMLChild->GetNextSibling()) {
260 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
261 if (eNodeType == CFX_XMLNode::Type::kInstruction)
262 continue;
263
264 if (eNodeType == CFX_XMLNode::Type::kElement) {
265 CXFA_Node* pXFAChild = node_factory_->CreateNode(
266 XFA_PacketType::Datasets, XFA_Element::DataValue);
267 if (!pXFAChild)
268 return;
269
270 CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild);
271 WideString wsNodeStr = child->GetLocalTagName();
272 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr);
273 WideString wsChildValue = GetPlainTextFromRichText(child);
274 if (!wsChildValue.IsEmpty())
275 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue);
276
277 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
278 pXFAChild->SetXMLMappingNode(pXMLChild);
280 break;
281 }
282 }
283 root_node_ = pXFANode;
284 } else {
285 root_node_ = DataLoader(pXFANode, pXMLNode);
286 }
287 } else if (pXFANode->IsContentNode()) {
288 ParseContentNode(pXFANode, pXMLNode, ePacketID);
289 root_node_ = pXFANode;
290 } else {
291 root_node_ = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
292 }
293}
294
296 return root_node_;
297}
298
299CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
300 XFA_PacketType ePacketID) {
301 switch (ePacketID) {
302 case XFA_PacketType::Xdp:
303 return ParseAsXDPPacket_XDP(pXMLDocumentNode);
304 case XFA_PacketType::Config:
305 return ParseAsXDPPacket_Config(pXMLDocumentNode);
306 case XFA_PacketType::Template:
307 return ParseAsXDPPacket_Template(pXMLDocumentNode);
308 case XFA_PacketType::Form:
309 return ParseAsXDPPacket_Form(pXMLDocumentNode);
310 case XFA_PacketType::Datasets:
311 return ParseAsXDPPacket_Data(pXMLDocumentNode);
312 case XFA_PacketType::Xdc:
313 return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
314 case XFA_PacketType::LocaleSet:
315 return ParseAsXDPPacket_LocaleConnectionSourceSet(
316 pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet);
317 case XFA_PacketType::ConnectionSet:
318 return ParseAsXDPPacket_LocaleConnectionSourceSet(
319 pXMLDocumentNode, XFA_PacketType::ConnectionSet,
320 XFA_Element::ConnectionSet);
321 case XFA_PacketType::SourceSet:
322 return ParseAsXDPPacket_LocaleConnectionSourceSet(
323 pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet);
324 default:
325 return ParseAsXDPPacket_User(pXMLDocumentNode);
326 }
327}
328
329CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_XDP(
330 CFX_XMLNode* pXMLDocumentNode) {
332 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
333 return nullptr;
334
335 CXFA_Node* pXFARootNode =
336 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
337 if (!pXFARootNode)
338 return nullptr;
339
340 root_node_ = pXFARootNode;
341 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa");
342
343 for (auto it : ToXMLElement(pXMLDocumentNode)->GetAttributes()) {
344 if (it.first.EqualsASCII("uuid"))
345 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second);
346 else if (it.first.EqualsASCII("timeStamp"))
347 pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second);
348 }
349
350 CFX_XMLNode* pXMLConfigDOMRoot = nullptr;
351 CXFA_Node* pXFAConfigDOMRoot = nullptr;
353 for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
354 pChildItem = pChildItem->GetNextSibling()) {
355 if (!MatchNodeName(pChildItem, config_packet.name, config_packet.uri,
356 config_packet.match)) {
357 continue;
358 }
359 // TODO(tsepez): make GetFirstChildByName() take a name.
360 uint32_t hash = FX_HashCode_GetAsIfW(config_packet.name);
361 if (pXFARootNode->GetFirstChildByName(hash))
362 return nullptr;
363
364 pXMLConfigDOMRoot = pChildItem;
365 pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
366 if (pXFAConfigDOMRoot)
367 pXFARootNode->InsertChildAndNotify(pXFAConfigDOMRoot, nullptr);
368 }
369
370 CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr;
371 CFX_XMLNode* pXMLFormDOMRoot = nullptr;
372 CFX_XMLNode* pXMLTemplateDOMRoot = nullptr;
373 for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
374 pChildItem = pChildItem->GetNextSibling()) {
375 CFX_XMLElement* pElement = ToXMLElement(pChildItem);
376 if (!pElement || pElement == pXMLConfigDOMRoot)
377 continue;
378
379 WideString wsPacketName = pElement->GetLocalTagName();
380 absl::optional<XFA_PACKETINFO> packet_info =
381 XFA_GetPacketByName(wsPacketName.AsStringView());
382 if (packet_info.has_value() && packet_info.value().uri &&
383 !MatchNodeName(pElement, packet_info.value().name,
384 packet_info.value().uri, packet_info.value().match)) {
385 packet_info = {};
386 }
387 XFA_PacketType ePacket = XFA_PacketType::User;
388 if (packet_info.has_value())
389 ePacket = packet_info.value().packet_type;
390 if (ePacket == XFA_PacketType::Xdp)
391 continue;
392 if (ePacket == XFA_PacketType::Datasets) {
393 if (pXMLDatasetsDOMRoot)
394 return nullptr;
395
396 pXMLDatasetsDOMRoot = pElement;
397 } else if (ePacket == XFA_PacketType::Form) {
398 if (pXMLFormDOMRoot)
399 return nullptr;
400
401 pXMLFormDOMRoot = pElement;
402 } else if (ePacket == XFA_PacketType::Template) {
403 // Found a duplicate template packet.
404 if (pXMLTemplateDOMRoot)
405 return nullptr;
406
407 CXFA_Node* pPacketNode = ParseAsXDPPacket_Template(pElement);
408 if (pPacketNode) {
409 pXMLTemplateDOMRoot = pElement;
410 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
411 }
412 } else {
413 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
414 if (pPacketNode) {
415 if (packet_info.has_value() &&
416 (packet_info.value().support == XFA_PacketSupport::kSupportOne) &&
417 pXFARootNode->GetFirstChildByName(
418 FX_HashCode_GetAsIfW(packet_info.value().name))) {
419 return nullptr;
420 }
421 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
422 }
423 }
424 }
425
426 // No template is found.
427 if (!pXMLTemplateDOMRoot)
428 return nullptr;
429
430 if (pXMLDatasetsDOMRoot) {
431 CXFA_Node* pPacketNode =
432 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets);
433 if (pPacketNode)
434 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
435 }
436 if (pXMLFormDOMRoot) {
437 CXFA_Node* pPacketNode =
438 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form);
439 if (pPacketNode)
440 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
441 }
442
443 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
444 return pXFARootNode;
445}
446
447CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Config(
448 CFX_XMLNode* pXMLDocumentNode) {
450 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
451 return nullptr;
452
453 CXFA_Node* pNode =
454 node_factory_->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
455 if (!pNode)
456 return nullptr;
457
458 pNode->JSObject()->SetCData(XFA_Attribute::Name,
459 WideString::FromASCII(packet.name));
460 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true))
461 return nullptr;
462
463 pNode->SetXMLMappingNode(pXMLDocumentNode);
464 return pNode;
465}
466
467CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Template(
468 CFX_XMLNode* pXMLDocumentNode) {
470 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
471 return nullptr;
472
473 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Template,
474 XFA_Element::Template);
475 if (!pNode)
476 return nullptr;
477
478 pNode->JSObject()->SetCData(XFA_Attribute::Name,
479 WideString::FromASCII(packet.name));
480
481 CFX_XMLElement* pXMLDocumentElement = ToXMLElement(pXMLDocumentNode);
482 WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI();
483 if (wsNamespaceURI.IsEmpty())
484 wsNamespaceURI = pXMLDocumentElement->GetAttribute(L"xmlns:xfa");
485
486 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
487
488 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true))
489 return nullptr;
490
491 pNode->SetXMLMappingNode(pXMLDocumentNode);
492 return pNode;
493}
494
495CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Form(
496 CFX_XMLNode* pXMLDocumentNode) {
498 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
499 return nullptr;
500
501 CXFA_Node* pNode =
502 node_factory_->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
503 if (!pNode)
504 return nullptr;
505
506 pNode->JSObject()->SetCData(XFA_Attribute::Name,
507 WideString::FromASCII(packet.name));
508 CXFA_Template* pTemplateRoot =
509 root_node_->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
510 CXFA_Subform* pTemplateChosen =
511 pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
512 XFA_Element::Subform)
513 : nullptr;
514 bool bUseAttribute = true;
515 if (pTemplateChosen &&
516 pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) !=
517 XFA_AttributeValue::Auto) {
518 bUseAttribute = false;
519 }
520 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form,
521 bUseAttribute))
522 return nullptr;
523
524 pNode->SetXMLMappingNode(pXMLDocumentNode);
525 return pNode;
526}
527
528CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Data(
529 CFX_XMLNode* pXMLDocumentNode) {
531 CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
532 if (pDatasetsXMLNode) {
533 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
534 XFA_Element::DataModel);
535 if (!pNode)
536 return nullptr;
537
538 pNode->JSObject()->SetCData(XFA_Attribute::Name,
539 WideString::FromASCII(packet.name));
540 if (!DataLoader(pNode, pDatasetsXMLNode))
541 return nullptr;
542
543 pNode->SetXMLMappingNode(pDatasetsXMLNode);
544 return pNode;
545 }
546
547 CFX_XMLNode* pDataXMLNode = nullptr;
548 if (MatchNodeName(pXMLDocumentNode, "data", packet.uri, packet.match)) {
549 ToXMLElement(pXMLDocumentNode)->RemoveAttribute(L"xmlns:xfa");
550 pDataXMLNode = pXMLDocumentNode;
551 } else {
552 auto* pDataElement = xml_doc_->CreateNode<CFX_XMLElement>(L"xfa:data");
553 pXMLDocumentNode->RemoveSelfIfParented();
554
555 CFX_XMLElement* pElement = ToXMLElement(pXMLDocumentNode);
556 pElement->RemoveAttribute(L"xmlns:xfa");
557
558 // The node was either removed from the parent above, or already has no
559 // parent so we can take ownership.
560 pDataElement->AppendLastChild(pXMLDocumentNode);
561 pDataXMLNode = pDataElement;
562 }
563 if (!pDataXMLNode)
564 return nullptr;
565
566 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
567 XFA_Element::DataGroup);
568 if (!pNode)
569 return nullptr;
570
571 WideString wsLocalName = ToXMLElement(pDataXMLNode)->GetLocalTagName();
572 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName);
573 if (!DataLoader(pNode, pDataXMLNode))
574 return nullptr;
575
576 pNode->SetXMLMappingNode(pDataXMLNode);
577 return pNode;
578}
579
580CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_LocaleConnectionSourceSet(
581 CFX_XMLNode* pXMLDocumentNode,
582 XFA_PacketType packet_type,
583 XFA_Element element) {
584 XFA_PACKETINFO packet = XFA_GetPacketByIndex(packet_type);
585 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
586 return nullptr;
587
588 CXFA_Node* pNode = node_factory_->CreateNode(packet_type, element);
589 if (!pNode)
590 return nullptr;
591
592 pNode->JSObject()->SetCData(XFA_Attribute::Name,
593 WideString::FromASCII(packet.name));
594 if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true))
595 return nullptr;
596
597 pNode->SetXMLMappingNode(pXMLDocumentNode);
598 return pNode;
599}
600
601CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Xdc(
602 CFX_XMLNode* pXMLDocumentNode) {
604 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
605 return nullptr;
606
607 CXFA_Node* pNode =
608 node_factory_->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
609 if (!pNode)
610 return nullptr;
611
612 pNode->JSObject()->SetCData(XFA_Attribute::Name,
613 WideString::FromASCII(packet.name));
614 pNode->SetXMLMappingNode(pXMLDocumentNode);
615 return pNode;
616}
617
618CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_User(
619 CFX_XMLNode* pXMLDocumentNode) {
620 CXFA_Node* pNode =
621 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
622 if (!pNode)
623 return nullptr;
624
625 WideString wsName = ToXMLElement(pXMLDocumentNode)->GetLocalTagName();
626 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName);
627 pNode->SetXMLMappingNode(pXMLDocumentNode);
628 return pNode;
629}
630
631CXFA_Node* CXFA_DocumentBuilder::DataLoader(CXFA_Node* pXFANode,
632 CFX_XMLNode* pXMLDoc) {
633 ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets);
634 return pXFANode;
635}
636
637CXFA_Node* CXFA_DocumentBuilder::NormalLoader(CXFA_Node* pXFANode,
638 CFX_XMLNode* pXMLDoc,
639 XFA_PacketType ePacketID,
640 bool bUseAttribute) {
641 constexpr size_t kMaxExecuteRecursion = 1000;
642 if (execute_recursion_depth_ > kMaxExecuteRecursion)
643 return nullptr;
644 AutoRestorer<size_t> restorer(&execute_recursion_depth_);
645 ++execute_recursion_depth_;
646
647 bool bOneOfPropertyFound = false;
648 for (CFX_XMLNode* pXMLChild = pXMLDoc->GetFirstChild(); pXMLChild;
649 pXMLChild = pXMLChild->GetNextSibling()) {
650 switch (pXMLChild->GetType()) {
652 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
653 WideString wsTagName = pXMLElement->GetLocalTagName();
654 XFA_Element eType = XFA_GetElementByName(wsTagName.AsStringView());
655 if (eType == XFA_Element::Unknown)
656 continue;
657
660 if (bOneOfPropertyFound)
661 break;
662 bOneOfPropertyFound = true;
663 }
664
665 CXFA_Node* pXFAChild = node_factory_->CreateNode(ePacketID, eType);
666 if (!pXFAChild)
667 return nullptr;
668 if (ePacketID == XFA_PacketType::Config) {
669 pXFAChild->JSObject()->SetAttributeByEnum(XFA_Attribute::Name,
670 wsTagName, false);
671 }
672
673 bool IsNeedValue = true;
674 for (auto it : pXMLElement->GetAttributes()) {
675 WideString wsAttrName;
676 GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
677 if (wsAttrName.EqualsASCII("nil") && it.second.EqualsASCII("true"))
678 IsNeedValue = false;
679
680 absl::optional<XFA_ATTRIBUTEINFO> attr =
681 XFA_GetAttributeByName(wsAttrName.AsStringView());
682 if (!attr.has_value())
683 continue;
684
685 if (!bUseAttribute && attr.value().attribute != XFA_Attribute::Name &&
686 attr.value().attribute != XFA_Attribute::Save) {
687 continue;
688 }
689 pXFAChild->JSObject()->SetAttributeByEnum(attr.value().attribute,
690 it.second, false);
691 }
692 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
693 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
694 if (ePacketID == XFA_PacketType::Config)
695 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
696 else
697 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
698
699 break;
700 }
701 switch (pXFAChild->GetObjectType()) {
706 if (IsNeedValue)
707 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
708 break;
709 default:
710 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
711 break;
712 }
713 } break;
715 ParseInstruction(pXFANode, ToXMLInstruction(pXMLChild), ePacketID);
716 break;
717 default:
718 break;
719 }
720 }
721 return pXFANode;
722}
723
724void CXFA_DocumentBuilder::ParseContentNode(CXFA_Node* pXFANode,
725 CFX_XMLNode* pXMLNode,
726 XFA_PacketType ePacketID) {
727 XFA_Element element = XFA_Element::Sharptext;
728 if (pXFANode->GetElementType() == XFA_Element::ExData) {
729 WideString wsContentType =
730 pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType);
731 if (wsContentType.EqualsASCII("text/html"))
732 element = XFA_Element::SharpxHTML;
733 else if (wsContentType.EqualsASCII("text/xml"))
734 element = XFA_Element::Sharpxml;
735 }
736 if (element == XFA_Element::SharpxHTML)
737 pXFANode->SetXMLMappingNode(pXMLNode);
738
739 WideString wsValue;
740 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
741 pXMLChild = pXMLChild->GetNextSibling()) {
742 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
743 if (eNodeType == CFX_XMLNode::Type::kInstruction)
744 continue;
745
746 CFX_XMLElement* pElement = ToXMLElement(pXMLChild);
747 if (element == XFA_Element::SharpxHTML) {
748 if (!pElement)
749 break;
750 if (XFA_RecognizeRichText(pElement))
751 wsValue += GetPlainTextFromRichText(pElement);
752 } else if (element == XFA_Element::Sharpxml) {
753 if (!pElement)
754 break;
755 ConvertXMLToPlainText(pElement, wsValue);
756 } else {
757 if (pElement)
758 break;
759 CFX_XMLText* pText = ToXMLText(pXMLChild);
760 if (pText)
761 wsValue = pText->GetText();
762 }
763 break;
764 }
765 if (!wsValue.IsEmpty()) {
766 if (pXFANode->IsContentNode()) {
767 CXFA_Node* pContentRawDataNode =
768 node_factory_->CreateNode(ePacketID, element);
769 DCHECK(pContentRawDataNode);
770 pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
771 pXFANode->InsertChildAndNotify(pContentRawDataNode, nullptr);
772 } else {
773 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
774 }
775 }
776}
777
778void CXFA_DocumentBuilder::ParseDataGroup(CXFA_Node* pXFANode,
779 CFX_XMLNode* pXMLNode,
780 XFA_PacketType ePacketID) {
781 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
782 pXMLChild = pXMLChild->GetNextSibling()) {
783 switch (pXMLChild->GetType()) {
785 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
786 WideString wsNamespaceURI = pXMLElement->GetNamespaceURI();
787 if (wsNamespaceURI.EqualsASCII(
788 "http://www.xfa.com/schema/xfa-package/") ||
789 wsNamespaceURI.EqualsASCII(
790 "http://www.xfa.org/schema/xfa-package/") ||
791 wsNamespaceURI.EqualsASCII(
792 "http://www.w3.org/2001/XMLSchema-instance")) {
793 continue;
794 }
795
796 XFA_Element eNodeType = XFA_Element::DataModel;
797 if (eNodeType == XFA_Element::DataModel) {
798 absl::optional<WideString> wsDataNodeAttr =
799 FindAttributeWithNS(pXMLElement, L"dataNode",
800 L"http://www.xfa.org/schema/xfa-data/1.0/");
801 if (wsDataNodeAttr.has_value()) {
802 if (wsDataNodeAttr.value().EqualsASCII("dataGroup"))
803 eNodeType = XFA_Element::DataGroup;
804 else if (wsDataNodeAttr.value().EqualsASCII("dataValue"))
805 eNodeType = XFA_Element::DataValue;
806 }
807 }
808 if (eNodeType == XFA_Element::DataModel) {
809 absl::optional<WideString> wsContentType =
810 FindAttributeWithNS(pXMLElement, L"contentType",
811 L"http://www.xfa.org/schema/xfa-data/1.0/");
812 if (wsContentType.has_value() && !wsContentType.value().IsEmpty())
813 eNodeType = XFA_Element::DataValue;
814 }
815 if (eNodeType == XFA_Element::DataModel) {
816 for (CFX_XMLNode* pXMLDataChild = pXMLElement->GetFirstChild();
817 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNextSibling()) {
818 CFX_XMLElement* pElement = ToXMLElement(pXMLDataChild);
819 if (pElement && !XFA_RecognizeRichText(pElement)) {
820 eNodeType = XFA_Element::DataGroup;
821 break;
822 }
823 }
824 }
825 if (eNodeType == XFA_Element::DataModel)
826 eNodeType = XFA_Element::DataValue;
827
828 CXFA_Node* pXFAChild =
829 node_factory_->CreateNode(XFA_PacketType::Datasets, eNodeType);
830 if (!pXFAChild)
831 return;
832
833 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name,
834 pXMLElement->GetLocalTagName());
835 bool bNeedValue = true;
836
837 for (auto it : pXMLElement->GetAttributes()) {
838 WideString wsName;
839 WideString wsNS;
840 if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
841 continue;
842 }
843 if (wsName.EqualsASCII("nil") && it.second.EqualsASCII("true")) {
844 bNeedValue = false;
845 continue;
846 }
847 if (wsNS.EqualsASCII("http://www.xfa.com/schema/xfa-package/") ||
848 wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-package/") ||
849 wsNS.EqualsASCII("http://www.w3.org/2001/XMLSchema-instance") ||
850 wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-data/1.0/")) {
851 continue;
852 }
853 CXFA_Node* pXFAMetaData = node_factory_->CreateNode(
854 XFA_PacketType::Datasets, XFA_Element::DataValue);
855 if (!pXFAMetaData)
856 return;
857
858 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName);
859 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
860 it.first);
861 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second);
862 pXFAMetaData->JSObject()->SetEnum(
863 XFA_Attribute::Contains, XFA_AttributeValue::MetaData, false);
864 pXFAChild->InsertChildAndNotify(pXFAMetaData, nullptr);
865 pXFAMetaData->SetXMLMappingNode(pXMLElement);
866 pXFAMetaData->SetFlag(XFA_NodeFlag::kInitialized);
867 }
868
869 if (!bNeedValue)
870 pXMLElement->RemoveAttribute(L"xsi:nil");
871
872 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
873 if (eNodeType == XFA_Element::DataGroup)
874 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
875 else if (bNeedValue)
876 ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets);
877
878 pXFAChild->SetXMLMappingNode(pXMLElement);
880 continue;
881 }
884 CFX_XMLText* pXMLText = ToXMLText(pXMLChild);
885 WideString wsText = pXMLText->GetText();
886 if (IsStringAllWhitespace(wsText))
887 continue;
888
889 CXFA_Node* pXFAChild = node_factory_->CreateNode(
890 XFA_PacketType::Datasets, XFA_Element::DataValue);
891 if (!pXFAChild)
892 return;
893
894 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText);
895 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
896 pXFAChild->SetXMLMappingNode(pXMLText);
898 continue;
899 }
900 default:
901 continue;
902 }
903 }
904}
905
906void CXFA_DocumentBuilder::ParseDataValue(CXFA_Node* pXFANode,
907 CFX_XMLNode* pXMLNode,
908 XFA_PacketType ePacketID) {
909 WideString wsValue;
910 WideString wsCurValue;
911 bool bMarkAsCompound = false;
912 CFX_XMLNode* pXMLCurValueNode = nullptr;
913 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
914 pXMLChild = pXMLChild->GetNextSibling()) {
915 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
916 if (eNodeType == CFX_XMLNode::Type::kInstruction)
917 continue;
918
919 CFX_XMLText* pText = ToXMLText(pXMLChild);
920 if (pText) {
921 WideString wsText = pText->GetText();
922 if (!pXMLCurValueNode)
923 pXMLCurValueNode = pXMLChild;
924 wsCurValue += wsText;
925 continue;
926 }
928 WideString wsText = GetPlainTextFromRichText(ToXMLElement(pXMLChild));
929 if (!pXMLCurValueNode)
930 pXMLCurValueNode = pXMLChild;
931 wsCurValue += wsText;
932 continue;
933 }
934 bMarkAsCompound = true;
935 if (pXMLCurValueNode) {
936 if (!wsCurValue.IsEmpty()) {
937 CXFA_Node* pXFAChild =
938 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
939 if (!pXFAChild)
940 return;
941
942 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString());
943 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue);
944 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
945 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
947 wsValue += wsCurValue;
948 wsCurValue.clear();
949 }
950 pXMLCurValueNode = nullptr;
951 }
952 CXFA_Node* pXFAChild =
953 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
954 if (!pXFAChild)
955 return;
956
957 WideString wsNodeStr = ToXMLElement(pXMLChild)->GetLocalTagName();
958 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr);
959 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
960 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
961 pXFAChild->SetXMLMappingNode(pXMLChild);
963 wsValue += pXFAChild->JSObject()->GetCData(XFA_Attribute::Value);
964 }
965
966 if (pXMLCurValueNode) {
967 if (!wsCurValue.IsEmpty()) {
968 if (bMarkAsCompound) {
969 CXFA_Node* pXFAChild =
970 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
971 if (!pXFAChild)
972 return;
973
974 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString());
975 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue);
976 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
977 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
979 }
980 wsValue += wsCurValue;
981 wsCurValue.clear();
982 }
983 pXMLCurValueNode = nullptr;
984 }
985 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
986}
987
988void CXFA_DocumentBuilder::ParseInstruction(CXFA_Node* pXFANode,
989 CFX_XMLInstruction* pXMLInstruction,
990 XFA_PacketType ePacketID) {
991 const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
992 if (pXMLInstruction->IsOriginalXFAVersion()) {
993 if (target_data.size() > 1 &&
994 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
996 target_data[1].EqualsASCII("v2.7-scripting:1")) {
997 pXFANode->GetDocument()->set_is_scripting();
998 }
999 return;
1000 }
1001 if (pXMLInstruction->IsAcrobat()) {
1002 if (target_data.size() > 1 && target_data[0].EqualsASCII("JavaScript") &&
1003 target_data[1].EqualsASCII("strictScoping")) {
1004 pXFANode->GetDocument()->set_is_strict_scoping();
1005 }
1006 }
1007}
CFX_XMLElement * ToXMLElement(CFX_XMLNode *pNode)
CFX_XMLInstruction * ToXMLInstruction(CFX_XMLNode *pNode)
CFX_XMLText * ToXMLText(CFX_XMLNode *pNode)
Definition cfx_xmltext.h:38
WideString GetTextData() const
WideString GetLocalTagName() const
WideString GetAttribute(const WideString &name) const
WideString GetNamespaceURI() const
void RemoveAttribute(const WideString &name)
bool IsOriginalXFAVersion() const
virtual Type GetType() const =0
const WideString & GetText() const
Definition cfx_xmltext.h:25
bool BuildDocument(CFX_XMLDocument *pXML, XFA_PacketType ePacketID)
void ConstructXFANode(CXFA_Node *pXFANode, CFX_XMLNode *pXMLNode)
CXFA_DocumentBuilder(CXFA_Document *pNodeFactory)
CFX_XMLNode * Build(CFX_XMLDocument *pXML)
CXFA_Node * GetRootNode() const
CXFA_Node * GetFirstChildByName(uint32_t dwNodeNameHash) const
void InsertChildAndNotify(CXFA_Node *pNode, CXFA_Node *pBeforeNode)
void SetFlag(XFA_NodeFlag dwFlag)
void SetXMLMappingNode(CFX_XMLNode *node)
Definition cxfa_node.h:183
bool HasPropertyFlag(XFA_Element property, XFA_PropertyFlag flag) const
XFA_PacketType GetPacketType() const
Definition cxfa_node.h:146
XFA_Element GetElementType() const
Definition cxfa_object.h:91
bool IsContentNode() const
Definition cxfa_object.h:66
CXFA_Document * GetDocument() const
Definition cxfa_object.h:48
XFA_ObjectType GetObjectType() const
Definition cxfa_object.h:49
WideString & operator+=(const WideString &str)
WideString & operator=(WideString &&that) noexcept
WideString & operator+=(const wchar_t *str)
bool IsEmpty() const
Definition widestring.h:118
WideString & operator=(const WideString &that)
static WideString FromASCII(ByteStringView str)
CharType Back() const
Definition widestring.h:152
bool EqualsASCII(ByteStringView that) const
Definition widestring.h:216
@ XFA_VERSION_UNKNOWN
XFA_PropertyFlag
Definition cxfa_node.h:88
XFA_NodeFlag
Definition cxfa_node.h:77
XFA_ObjectType
Definition cxfa_object.h:21
XFA_Attribute
Definition fxfa_basic.h:67
XFA_Element
Definition fxfa_basic.h:75
XFA_AttributeValue
Definition fxfa_basic.h:60
XFA_PacketType
Definition fxfa_basic.h:44
XFA_PacketMatch match
const char * uri
const char * name
XFA_PacketMatch
XFA_PACKETINFO XFA_GetPacketByIndex(XFA_PacketType ePacket)
bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement *pNode, const WideString &wsQualifier, WideString *wsNamespaceURI)
bool XFA_RecognizeRichText(CFX_XMLElement *pRichTextXMLNode)