7#include "xfa/fxfa/parser/cxfa_document_builder.h"
12#include "core/fxcrt/autorestorer.h"
13#include "core/fxcrt/fx_codepage.h"
14#include "core/fxcrt/fx_extension.h"
15#include "core/fxcrt/xml/cfx_xmlchardata.h"
16#include "core/fxcrt/xml/cfx_xmldocument.h"
17#include "core/fxcrt/xml/cfx_xmlelement.h"
18#include "core/fxcrt/xml/cfx_xmlinstruction.h"
19#include "core/fxcrt/xml/cfx_xmlnode.h"
20#include "core/fxcrt/xml/cfx_xmltext.h"
21#include "fxjs/xfa/cjx_object.h"
22#include "third_party/abseil-cpp/absl/types/optional.h"
23#include "third_party/base/check.h"
24#include "third_party/base/notreached.h"
25#include "xfa/fxfa/parser/cxfa_document.h"
26#include "xfa/fxfa/parser/cxfa_node.h"
27#include "xfa/fxfa/parser/cxfa_subform.h"
28#include "xfa/fxfa/parser/cxfa_template.h"
29#include "xfa/fxfa/parser/xfa_basic_data.h"
30#include "xfa/fxfa/parser/xfa_utils.h"
35 for (
CFX_XMLNode* pXMLNode = pRootNode->GetFirstChild(); pXMLNode;
36 pXMLNode = pXMLNode->GetNextSibling()) {
44 ByteStringView bsLocalTagName,
45 ByteStringView bsNamespaceURIPrefix,
59 return wsNodeStr.AsStringView()
60 .First(bsNamespaceURIPrefix.GetLength())
61 .EqualsASCII(bsNamespaceURIPrefix);
66bool GetAttributeLocalName(WideStringView wsAttributeName,
67 WideString& wsLocalAttrName) {
68 WideString wsAttrName(wsAttributeName);
69 auto pos = wsAttrName.Find(L':', 0);
70 if (!pos.has_value()) {
71 wsLocalAttrName =
std::move(wsAttrName);
74 wsLocalAttrName = wsAttrName.Last(wsAttrName.GetLength() - pos.value() - 1);
78bool ResolveAttribute(CFX_XMLElement* pElement,
79 const WideString& wsAttrName,
80 WideString& wsLocalAttrName,
81 WideString& wsNamespaceURI) {
82 WideString wsNSPrefix;
83 if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
84 wsNSPrefix = wsAttrName.First(wsAttrName.GetLength() -
85 wsLocalAttrName.GetLength() - 1);
99absl::optional<WideString> FindAttributeWithNS(
100 CFX_XMLElement* pElement,
101 WideStringView wsLocalAttributeName,
102 WideStringView wsNamespaceURIPrefix) {
104 for (
auto it : pElement->GetAttributes()) {
105 auto pos = it.first.Find(L':', 0);
106 WideString wsNSPrefix;
107 if (!pos.has_value()) {
108 if (wsLocalAttributeName != it.first)
111 if (wsLocalAttributeName !=
112 it.first.Last(it.first.GetLength() - pos.value() - 1)) {
115 wsNSPrefix = it.first.First(pos.value());
117 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
119 wsAttrNS != wsNamespaceURIPrefix) {
124 return absl::nullopt;
130 if (MatchNodeName(pXMLDocumentNode, datasets_packet
.name, datasets_packet
.uri,
132 return pXMLDocumentNode;
135 if (!MatchNodeName(pXMLDocumentNode, xdp_packet
.name, xdp_packet
.uri,
139 for (
CFX_XMLNode* pDatasetsNode = pXMLDocumentNode->GetFirstChild();
140 pDatasetsNode; pDatasetsNode = pDatasetsNode->GetNextSibling()) {
141 if (MatchNodeName(pDatasetsNode, datasets_packet
.name, datasets_packet
.uri,
143 return pDatasetsNode;
149bool IsStringAllWhitespace(WideString wsText) {
150 wsText.TrimRight(L"\x20\x9\xD\xA");
154void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
155 for (
CFX_XMLNode* pXMLChild = pRootXMLNode->GetFirstChild(); pXMLChild;
156 pXMLChild = pXMLChild->GetNextSibling()) {
161 wsOutput
+= wsTextData;
167 if (IsStringAllWhitespace(wsText))
169 wsOutput =
std::move(wsText);
173 NOTREACHED_NORETURN();
178WideString GetPlainTextFromRichText(
CFX_XMLNode* pXMLNode) {
182 WideString wsPlainText;
185 CFX_XMLElement* pXMLElement =
static_cast<CFX_XMLElement*>(pXMLNode);
187 uint32_t uTag = FX_HashCode_GetLoweredW(wsTag.AsStringView());
188 if (uTag == 0x0001f714) {
189 wsPlainText
+= L"\n";
190 }
else if (uTag == 0x00000070) {
192 wsPlainText
+= L"\n";
194 }
else if (uTag == 0xa48ac63) {
196 wsPlainText
+= L"\n";
204 wsPlainText
+= wsContent;
210 for (
CFX_XMLNode* pChildXML = pXMLNode->GetFirstChild(); pChildXML;
211 pChildXML = pChildXML->GetNextSibling()) {
212 wsPlainText
+= GetPlainTextFromRichText(pChildXML);
222 "http://www.w3.org/1999/xhtml");
238 root_node_ = ParseAsXDPPacket(root, ePacketID);
247 xml_doc_->GetRoot()->InsertChildNode(
248 xml_doc_->CreateNode<CFX_XMLInstruction>(L"xml"), 0);
250 return GetDocumentNode(xml_doc_->GetRoot());
258 for (
CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
259 pXMLChild = pXMLChild->GetNextSibling()) {
265 CXFA_Node* pXFAChild = node_factory_->CreateNode(
266 XFA_PacketType::Datasets, XFA_Element::DataValue);
270 CFX_XMLElement* child =
static_cast<CFX_XMLElement*>(pXMLChild);
272 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Name, wsNodeStr);
273 WideString wsChildValue = GetPlainTextFromRichText(child);
275 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Value, wsChildValue);
283 root_node_ = pXFANode;
285 root_node_ = DataLoader(pXFANode, pXMLNode);
288 ParseContentNode(pXFANode, pXMLNode, ePacketID);
289 root_node_ = pXFANode;
291 root_node_ = NormalLoader(pXFANode, pXMLNode, ePacketID,
true);
303 return ParseAsXDPPacket_XDP(pXMLDocumentNode);
305 return ParseAsXDPPacket_Config(pXMLDocumentNode);
307 return ParseAsXDPPacket_Template(pXMLDocumentNode);
309 return ParseAsXDPPacket_Form(pXMLDocumentNode);
311 return ParseAsXDPPacket_Data(pXMLDocumentNode);
313 return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
315 return ParseAsXDPPacket_LocaleConnectionSourceSet(
318 return ParseAsXDPPacket_LocaleConnectionSourceSet(
322 return ParseAsXDPPacket_LocaleConnectionSourceSet(
325 return ParseAsXDPPacket_User(pXMLDocumentNode);
332 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
336 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
340 root_node_ = pXFARootNode;
341 pXFARootNode->JSObject()->SetCData(
XFA_Attribute::Name, L"xfa");
343 for (
auto it : ToXMLElement(pXMLDocumentNode)->GetAttributes()) {
344 if (it.first.EqualsASCII(
"uuid"))
345 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second);
346 else if (it.first.EqualsASCII(
"timeStamp"))
347 pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second);
353 for (
CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
354 pChildItem = pChildItem->GetNextSibling()) {
355 if (!MatchNodeName(pChildItem, config_packet
.name, config_packet
.uri,
360 uint32_t hash = FX_HashCode_GetAsIfW(config_packet
.name);
364 pXMLConfigDOMRoot = pChildItem;
365 pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
366 if (pXFAConfigDOMRoot)
373 for (
CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
374 pChildItem = pChildItem->GetNextSibling()) {
376 if (!pElement || pElement == pXMLConfigDOMRoot)
380 absl::optional<XFA_PACKETINFO> packet_info =
381 XFA_GetPacketByName(wsPacketName.AsStringView());
382 if (packet_info.has_value() && packet_info.value().uri &&
383 !MatchNodeName(pElement, packet_info.value().name,
384 packet_info.value().uri, packet_info.value().match)) {
388 if (packet_info.has_value())
389 ePacket = packet_info.value().packet_type;
393 if (pXMLDatasetsDOMRoot)
396 pXMLDatasetsDOMRoot = pElement;
401 pXMLFormDOMRoot = pElement;
404 if (pXMLTemplateDOMRoot)
407 CXFA_Node* pPacketNode = ParseAsXDPPacket_Template(pElement);
409 pXMLTemplateDOMRoot = pElement;
413 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
415 if (packet_info.has_value() &&
416 (packet_info.value().support == XFA_PacketSupport::kSupportOne) &&
417 pXFARootNode->GetFirstChildByName(
418 FX_HashCode_GetAsIfW(packet_info.value().name))) {
427 if (!pXMLTemplateDOMRoot)
430 if (pXMLDatasetsDOMRoot) {
436 if (pXMLFormDOMRoot) {
450 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
454 node_factory_->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
460 if (!NormalLoader(pNode, pXMLDocumentNode,
XFA_PacketType::Config,
true))
470 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
473 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Template,
474 XFA_Element::Template);
481 CFX_XMLElement* pXMLDocumentElement =
ToXMLElement(pXMLDocumentNode
);
486 pNode
->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
488 if (!NormalLoader(pNode, pXMLDocumentNode,
XFA_PacketType::Template,
true))
498 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
502 node_factory_->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
508 CXFA_Template* pTemplateRoot =
509 root_node_->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
510 CXFA_Subform* pTemplateChosen =
511 pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
514 bool bUseAttribute =
true;
515 if (pTemplateChosen &&
516 pTemplateChosen->JSObject()->GetEnum(
XFA_Attribute::RestoreState) !=
518 bUseAttribute =
false;
531 CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
532 if (pDatasetsXMLNode) {
533 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
534 XFA_Element::DataModel);
540 if (!DataLoader(pNode, pDatasetsXMLNode))
548 if (MatchNodeName(pXMLDocumentNode,
"data", packet
.uri, packet
.match)) {
550 pDataXMLNode = pXMLDocumentNode;
552 auto* pDataElement = xml_doc_->CreateNode<CFX_XMLElement>(L"xfa:data");
553 pXMLDocumentNode->RemoveSelfIfParented();
560 pDataElement->AppendLastChild(pXMLDocumentNode);
561 pDataXMLNode = pDataElement;
566 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
567 XFA_Element::DataGroup);
572 pNode->JSObject()->SetCData(
XFA_Attribute::Name, wsLocalName);
573 if (!DataLoader(pNode, pDataXMLNode))
585 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
588 CXFA_Node* pNode = node_factory_->CreateNode(packet_type, element);
594 if (!NormalLoader(pNode, pXMLDocumentNode, packet_type,
true))
604 if (!MatchNodeName(pXMLDocumentNode, packet
.name, packet
.uri, packet
.match))
608 node_factory_->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
621 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
640 bool bUseAttribute) {
641 constexpr size_t kMaxExecuteRecursion = 1000;
642 if (execute_recursion_depth_ > kMaxExecuteRecursion)
644 AutoRestorer<size_t> restorer(&execute_recursion_depth_);
645 ++execute_recursion_depth_;
647 bool bOneOfPropertyFound =
false;
648 for (
CFX_XMLNode* pXMLChild = pXMLDoc->GetFirstChild(); pXMLChild;
649 pXMLChild = pXMLChild->GetNextSibling()) {
652 CFX_XMLElement* pXMLElement =
static_cast<CFX_XMLElement*>(pXMLChild);
654 XFA_Element eType = XFA_GetElementByName(wsTagName.AsStringView());
660 if (bOneOfPropertyFound)
662 bOneOfPropertyFound =
true;
665 CXFA_Node* pXFAChild = node_factory_->CreateNode(ePacketID, eType);
669 pXFAChild->JSObject()->SetAttributeByEnum(
XFA_Attribute::Name,
673 bool IsNeedValue =
true;
674 for (
auto it : pXMLElement->GetAttributes()) {
675 WideString wsAttrName;
676 GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
677 if (wsAttrName.EqualsASCII(
"nil") && it.second.EqualsASCII(
"true"))
680 absl::optional<XFA_ATTRIBUTEINFO> attr =
681 XFA_GetAttributeByName(wsAttrName.AsStringView());
682 if (!attr.has_value())
685 if (!bUseAttribute && attr.value().attribute != XFA_Attribute::Name &&
686 attr.value().attribute != XFA_Attribute::Save) {
689 pXFAChild->JSObject()->SetAttributeByEnum(attr.value().attribute,
695 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
697 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
707 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
710 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
729 WideString wsContentType =
740 for (
CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
741 pXMLChild = pXMLChild->GetNextSibling()) {
751 wsValue
+= GetPlainTextFromRichText(pElement);
755 ConvertXMLToPlainText(pElement, wsValue);
768 node_factory_->CreateNode(ePacketID, element);
769 DCHECK(pContentRawDataNode);
770 pContentRawDataNode->JSObject()->SetCData(
XFA_Attribute::Value, wsValue);
773 pXFANode->JSObject()->SetCData(
XFA_Attribute::Value, wsValue);
781 for (
CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
782 pXMLChild = pXMLChild->GetNextSibling()) {
785 CFX_XMLElement* pXMLElement =
static_cast<CFX_XMLElement*>(pXMLChild);
788 "http://www.xfa.com/schema/xfa-package/") ||
790 "http://www.xfa.org/schema/xfa-package/") ||
792 "http://www.w3.org/2001/XMLSchema-instance")) {
798 absl::optional<WideString> wsDataNodeAttr =
799 FindAttributeWithNS(pXMLElement, L"dataNode",
800 L"http://www.xfa.org/schema/xfa-data/1.0/");
801 if (wsDataNodeAttr.has_value()) {
802 if (wsDataNodeAttr.value().EqualsASCII(
"dataGroup"))
804 else if (wsDataNodeAttr.value().EqualsASCII(
"dataValue"))
809 absl::optional<WideString> wsContentType =
810 FindAttributeWithNS(pXMLElement, L"contentType",
811 L"http://www.xfa.org/schema/xfa-data/1.0/");
812 if (wsContentType.has_value() && !wsContentType.value().IsEmpty())
816 for (
CFX_XMLNode* pXMLDataChild = pXMLElement->GetFirstChild();
817 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNextSibling()) {
829 node_factory_->CreateNode(XFA_PacketType::Datasets, eNodeType);
835 bool bNeedValue =
true;
837 for (
auto it : pXMLElement->GetAttributes()) {
840 if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
843 if (wsName.EqualsASCII(
"nil") && it.second.EqualsASCII(
"true")) {
847 if (wsNS.EqualsASCII(
"http://www.xfa.com/schema/xfa-package/") ||
848 wsNS.EqualsASCII(
"http://www.xfa.org/schema/xfa-package/") ||
849 wsNS.EqualsASCII(
"http://www.w3.org/2001/XMLSchema-instance") ||
850 wsNS.EqualsASCII(
"http://www.xfa.org/schema/xfa-data/1.0/")) {
853 CXFA_Node* pXFAMetaData = node_factory_->CreateNode(
854 XFA_PacketType::Datasets, XFA_Element::DataValue);
858 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName);
859 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
861 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second);
862 pXFAMetaData->JSObject()->SetEnum(
863 XFA_Attribute::Contains, XFA_AttributeValue::MetaData,
false);
864 pXFAChild->InsertChildAndNotify(pXFAMetaData,
nullptr);
865 pXFAMetaData->SetXMLMappingNode(pXMLElement);
866 pXFAMetaData->SetFlag(XFA_NodeFlag::kInitialized);
874 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
886 if (IsStringAllWhitespace(wsText))
889 CXFA_Node* pXFAChild = node_factory_->CreateNode(
890 XFA_PacketType::Datasets, XFA_Element::DataValue);
894 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Value, wsText);
910 WideString wsCurValue;
911 bool bMarkAsCompound =
false;
913 for (
CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
914 pXMLChild = pXMLChild->GetNextSibling()) {
922 if (!pXMLCurValueNode)
923 pXMLCurValueNode = pXMLChild;
924 wsCurValue
+= wsText;
928 WideString wsText = GetPlainTextFromRichText(
ToXMLElement(pXMLChild
));
929 if (!pXMLCurValueNode)
930 pXMLCurValueNode = pXMLChild;
931 wsCurValue
+= wsText;
934 bMarkAsCompound =
true;
935 if (pXMLCurValueNode) {
938 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
943 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Value, wsCurValue);
947 wsValue
+= wsCurValue;
950 pXMLCurValueNode =
nullptr;
953 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
958 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Name, wsNodeStr);
959 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
963 wsValue += pXFAChild->JSObject()->GetCData(
XFA_Attribute::Value);
966 if (pXMLCurValueNode) {
968 if (bMarkAsCompound) {
970 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
975 pXFAChild->JSObject()->SetCData(
XFA_Attribute::Value, wsCurValue);
980 wsValue
+= wsCurValue;
983 pXMLCurValueNode =
nullptr;
985 pXFANode->JSObject()->SetCData(
XFA_Attribute::Value, wsValue);
989 CFX_XMLInstruction* pXMLInstruction,
991 const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
993 if (target_data.size() > 1 &&
994 (pXFANode
->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
996 target_data[1].EqualsASCII(
"v2.7-scripting:1")) {
1002 if (target_data.size() > 1 && target_data[0].EqualsASCII(
"JavaScript") &&
1003 target_data[1].EqualsASCII(
"strictScoping")) {
CFX_XMLElement * ToXMLElement(CFX_XMLNode *pNode)
CFX_XMLInstruction * ToXMLInstruction(CFX_XMLNode *pNode)
CFX_XMLText * ToXMLText(CFX_XMLNode *pNode)
WideString GetTextData() const
WideString GetLocalTagName() const
WideString GetAttribute(const WideString &name) const
WideString GetNamespaceURI() const
void RemoveAttribute(const WideString &name)
bool IsOriginalXFAVersion() const
virtual Type GetType() const =0
const WideString & GetText() const
bool BuildDocument(CFX_XMLDocument *pXML, XFA_PacketType ePacketID)
void ConstructXFANode(CXFA_Node *pXFANode, CFX_XMLNode *pXMLNode)
CXFA_DocumentBuilder(CXFA_Document *pNodeFactory)
CFX_XMLNode * Build(CFX_XMLDocument *pXML)
CXFA_Node * GetRootNode() const
CXFA_Node * GetFirstChildByName(uint32_t dwNodeNameHash) const
void InsertChildAndNotify(CXFA_Node *pNode, CXFA_Node *pBeforeNode)
void SetFlag(XFA_NodeFlag dwFlag)
void SetXMLMappingNode(CFX_XMLNode *node)
bool HasPropertyFlag(XFA_Element property, XFA_PropertyFlag flag) const
XFA_PacketType GetPacketType() const
XFA_Element GetElementType() const
bool IsContentNode() const
CXFA_Document * GetDocument() const
XFA_ObjectType GetObjectType() const
WideString & operator+=(const WideString &str)
WideString & operator=(WideString &&that) noexcept
WideString & operator+=(const wchar_t *str)
WideString & operator=(const WideString &that)
static WideString FromASCII(ByteStringView str)
bool EqualsASCII(ByteStringView that) const
XFA_PACKETINFO XFA_GetPacketByIndex(XFA_PacketType ePacket)
bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement *pNode, const WideString &wsQualifier, WideString *wsNamespaceURI)
bool XFA_RecognizeRichText(CFX_XMLElement *pRichTextXMLNode)