// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "xfa/fxfa/parser/cxfa_document_parser.h" #include #include #include "core/fxcrt/autorestorer.h" #include "core/fxcrt/cfx_readonlymemorystream.h" #include "core/fxcrt/cfx_widetextbuf.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/xml/cfx_xmlchardata.h" #include "core/fxcrt/xml/cfx_xmldocument.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlinstruction.h" #include "core/fxcrt/xml/cfx_xmlnode.h" #include "core/fxcrt/xml/cfx_xmlparser.h" #include "core/fxcrt/xml/cfx_xmltext.h" #include "fxjs/xfa/cjx_object.h" #include "third_party/base/logging.h" #include "third_party/base/optional.h" #include "xfa/fxfa/fxfa.h" #include "xfa/fxfa/parser/cxfa_document.h" #include "xfa/fxfa/parser/cxfa_node.h" #include "xfa/fxfa/parser/cxfa_subform.h" #include "xfa/fxfa/parser/cxfa_template.h" #include "xfa/fxfa/parser/xfa_basic_data.h" #include "xfa/fxfa/parser/xfa_utils.h" namespace { CFX_XMLNode* GetDocumentNode(CFX_XMLNode* pRootNode) { for (CFX_XMLNode* pXMLNode = pRootNode->GetFirstChild(); pXMLNode; pXMLNode = pXMLNode->GetNextSibling()) { if (pXMLNode->GetType() == CFX_XMLNode::Type::kElement) return pXMLNode; } return nullptr; } bool MatchNodeName(CFX_XMLNode* pNode, WideStringView wsLocalTagName, WideStringView wsNamespaceURIPrefix, uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) { CFX_XMLElement* pElement = ToXMLElement(pNode); if (!pElement) return false; WideString wsNodeStr = pElement->GetLocalTagName(); if (wsNodeStr != wsLocalTagName) return false; wsNodeStr = pElement->GetNamespaceURI(); if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH) return true; if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) { return wsNodeStr.First(wsNamespaceURIPrefix.GetLength()) == wsNamespaceURIPrefix; } return wsNodeStr == wsNamespaceURIPrefix; } bool GetAttributeLocalName(WideStringView wsAttributeName, WideString& wsLocalAttrName) { WideString wsAttrName(wsAttributeName); auto pos = wsAttrName.Find(L':', 0); if (!pos.has_value()) { wsLocalAttrName = std::move(wsAttrName); return false; } wsLocalAttrName = wsAttrName.Last(wsAttrName.GetLength() - pos.value() - 1); return true; } bool ResolveAttribute(CFX_XMLElement* pElement, const WideString& wsAttrName, WideString& wsLocalAttrName, WideString& wsNamespaceURI) { WideString wsNSPrefix; if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) { wsNSPrefix = wsAttrName.First(wsAttrName.GetLength() - wsLocalAttrName.GetLength() - 1); } if (wsLocalAttrName.EqualsASCII("xmlns") || wsNSPrefix.EqualsASCII("xmlns") || wsNSPrefix.EqualsASCII("xml")) { return false; } if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix, &wsNamespaceURI)) { wsNamespaceURI.clear(); return false; } return true; } Optional FindAttributeWithNS(CFX_XMLElement* pElement, WideStringView wsLocalAttributeName, WideStringView wsNamespaceURIPrefix) { WideString wsAttrNS; for (auto it : pElement->GetAttributes()) { auto pos = it.first.Find(L':', 0); WideString wsNSPrefix; if (!pos.has_value()) { if (wsLocalAttributeName != it.first) continue; } else { if (wsLocalAttributeName != it.first.Last(it.first.GetLength() - pos.value() - 1)) { continue; } wsNSPrefix = it.first.First(pos.value()); } if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix, &wsAttrNS) || wsAttrNS != wsNamespaceURIPrefix) { continue; } return it.second; } return {}; } CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO datasets_packet = XFA_GetPacketByIndex(XFA_PacketType::Datasets); if (MatchNodeName(pXMLDocumentNode, datasets_packet.name, datasets_packet.uri, datasets_packet.flags)) { return pXMLDocumentNode; } XFA_PACKETINFO xdp_packet = XFA_GetPacketByIndex(XFA_PacketType::Xdp); if (!MatchNodeName(pXMLDocumentNode, xdp_packet.name, xdp_packet.uri, xdp_packet.flags)) { return nullptr; } for (CFX_XMLNode* pDatasetsNode = pXMLDocumentNode->GetFirstChild(); pDatasetsNode; pDatasetsNode = pDatasetsNode->GetNextSibling()) { if (MatchNodeName(pDatasetsNode, datasets_packet.name, datasets_packet.uri, datasets_packet.flags)) { return pDatasetsNode; } } return nullptr; } bool IsStringAllWhitespace(WideString wsText) { wsText.TrimRight(L"\x20\x9\xD\xA"); return wsText.IsEmpty(); } void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) { for (CFX_XMLNode* pXMLChild = pRootXMLNode->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { switch (pXMLChild->GetType()) { case CFX_XMLNode::Type::kElement: { WideString wsTextData = ToXMLElement(pXMLChild)->GetTextData(); wsTextData += L"\n"; wsOutput += wsTextData; break; } case CFX_XMLNode::Type::kText: case CFX_XMLNode::Type::kCharData: { WideString wsText = ToXMLText(pXMLChild)->GetText(); if (IsStringAllWhitespace(wsText)) continue; wsOutput = std::move(wsText); break; } default: NOTREACHED(); break; } } } WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) { if (!pXMLNode) return WideString(); WideString wsPlainText; switch (pXMLNode->GetType()) { case CFX_XMLNode::Type::kElement: { CFX_XMLElement* pXMLElement = static_cast(pXMLNode); WideString wsTag = pXMLElement->GetLocalTagName(); uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true); if (uTag == 0x0001f714) { wsPlainText += L"\n"; } else if (uTag == 0x00000070) { if (!wsPlainText.IsEmpty()) { wsPlainText += L"\n"; } } else if (uTag == 0xa48ac63) { if (!wsPlainText.IsEmpty() && wsPlainText.Back() != '\n') { wsPlainText += L"\n"; } } break; } case CFX_XMLNode::Type::kText: case CFX_XMLNode::Type::kCharData: { WideString wsContent = ToXMLText(pXMLNode)->GetText(); wsPlainText += wsContent; break; } default: break; } for (CFX_XMLNode* pChildXML = pXMLNode->GetFirstChild(); pChildXML; pChildXML = pChildXML->GetNextSibling()) { wsPlainText += GetPlainTextFromRichText(pChildXML); } return wsPlainText; } } // namespace bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) { return pRichTextXMLNode && pRichTextXMLNode->GetNamespaceURI().EqualsASCII( "http://www.w3.org/1999/xhtml"); } CXFA_DocumentParser::CXFA_DocumentParser(CXFA_Document* pFactory) : m_pFactory(pFactory) {} CXFA_DocumentParser::~CXFA_DocumentParser() = default; bool CXFA_DocumentParser::Parse( const RetainPtr& pStream, XFA_PacketType ePacketID) { xml_doc_ = LoadXML(pStream); if (!xml_doc_) return false; CFX_XMLNode* root = GetDocumentNode(xml_doc_->GetRoot()); if (!root) return false; m_pRootNode = ParseAsXDPPacket(root, ePacketID); return !!m_pRootNode; } CFX_XMLNode* CXFA_DocumentParser::ParseXMLData(const ByteString& wsXML) { auto pStream = pdfium::MakeRetain(wsXML.raw_span()); xml_doc_ = LoadXML(pStream); if (!xml_doc_) return nullptr; return GetDocumentNode(xml_doc_->GetRoot()); } std::unique_ptr CXFA_DocumentParser::LoadXML( const RetainPtr& pStream) { ASSERT(pStream); CFX_XMLParser parser(pStream); std::unique_ptr doc = parser.Parse(); if (doc) { doc->GetRoot()->InsertChildNode(doc->CreateNode(L"xml"), 0); } return doc; } void CXFA_DocumentParser::ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode) { XFA_PacketType ePacketID = pXFANode->GetPacketType(); if (ePacketID == XFA_PacketType::Datasets) { if (pXFANode->GetElementType() == XFA_Element::DataValue) { for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { CFX_XMLNode::Type eNodeType = pXMLChild->GetType(); if (eNodeType == CFX_XMLNode::Type::kInstruction) continue; if (eNodeType == CFX_XMLNode::Type::kElement) { CXFA_Node* pXFAChild = m_pFactory->CreateNode( XFA_PacketType::Datasets, XFA_Element::DataValue); if (!pXFAChild) return; CFX_XMLElement* child = static_cast(pXMLChild); WideString wsNodeStr = child->GetLocalTagName(); pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false, false); WideString wsChildValue = GetPlainTextFromRichText(child); if (!wsChildValue.IsEmpty()) pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue, false, false); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); pXFAChild->SetXMLMappingNode(pXMLChild); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); break; } } m_pRootNode = pXFANode; } else { m_pRootNode = DataLoader(pXFANode, pXMLNode, true); } } else if (pXFANode->IsContentNode()) { ParseContentNode(pXFANode, pXMLNode, ePacketID); m_pRootNode = pXFANode; } else { m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true); } } CXFA_Node* CXFA_DocumentParser::GetRootNode() const { return m_pRootNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode, XFA_PacketType ePacketID) { switch (ePacketID) { case XFA_PacketType::Xdp: return ParseAsXDPPacket_XDP(pXMLDocumentNode); case XFA_PacketType::Config: return ParseAsXDPPacket_Config(pXMLDocumentNode); case XFA_PacketType::Template: return ParseAsXDPPacket_Template(pXMLDocumentNode); case XFA_PacketType::Form: return ParseAsXDPPacket_Form(pXMLDocumentNode); case XFA_PacketType::Datasets: return ParseAsXDPPacket_Data(pXMLDocumentNode); case XFA_PacketType::Xdc: return ParseAsXDPPacket_Xdc(pXMLDocumentNode); case XFA_PacketType::LocaleSet: return ParseAsXDPPacket_LocaleConnectionSourceSet( pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet); case XFA_PacketType::ConnectionSet: return ParseAsXDPPacket_LocaleConnectionSourceSet( pXMLDocumentNode, XFA_PacketType::ConnectionSet, XFA_Element::ConnectionSet); case XFA_PacketType::SourceSet: return ParseAsXDPPacket_LocaleConnectionSourceSet( pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet); default: return ParseAsXDPPacket_User(pXMLDocumentNode); } } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_XDP( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Xdp); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pXFARootNode = m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa); if (!pXFARootNode) return nullptr; m_pRootNode = pXFARootNode; pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa", false, false); for (auto it : ToXMLElement(pXMLDocumentNode)->GetAttributes()) { if (it.first.EqualsASCII("uuid")) { pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second, false, false); } else if (it.first.EqualsASCII("timeStamp")) { pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second, false, false); } } CFX_XMLNode* pXMLConfigDOMRoot = nullptr; CXFA_Node* pXFAConfigDOMRoot = nullptr; XFA_PACKETINFO config_packet = XFA_GetPacketByIndex(XFA_PacketType::Config); for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem; pChildItem = pChildItem->GetNextSibling()) { if (!MatchNodeName(pChildItem, config_packet.name, config_packet.uri, config_packet.flags)) { continue; } // TODO(tsepez): make GetFirstChildByName() take a name. uint32_t hash = FX_HashCode_GetW(config_packet.name, false); if (pXFARootNode->GetFirstChildByName(hash)) return nullptr; pXMLConfigDOMRoot = pChildItem; pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot); if (pXFAConfigDOMRoot) pXFARootNode->InsertChildAndNotify(pXFAConfigDOMRoot, nullptr); } CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr; CFX_XMLNode* pXMLFormDOMRoot = nullptr; CFX_XMLNode* pXMLTemplateDOMRoot = nullptr; for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem; pChildItem = pChildItem->GetNextSibling()) { CFX_XMLElement* pElement = ToXMLElement(pChildItem); if (!pElement || pElement == pXMLConfigDOMRoot) continue; WideString wsPacketName = pElement->GetLocalTagName(); Optional packet_info = XFA_GetPacketByName(wsPacketName.AsStringView()); if (packet_info.has_value() && packet_info.value().uri && !MatchNodeName(pElement, packet_info.value().name, packet_info.value().uri, packet_info.value().flags)) { packet_info = {}; } XFA_PacketType ePacket = XFA_PacketType::User; if (packet_info.has_value()) ePacket = packet_info.value().packet_type; if (ePacket == XFA_PacketType::Xdp) continue; if (ePacket == XFA_PacketType::Datasets) { if (pXMLDatasetsDOMRoot) return nullptr; pXMLDatasetsDOMRoot = pElement; } else if (ePacket == XFA_PacketType::Form) { if (pXMLFormDOMRoot) return nullptr; pXMLFormDOMRoot = pElement; } else if (ePacket == XFA_PacketType::Template) { // Found a duplicate template packet. if (pXMLTemplateDOMRoot) return nullptr; CXFA_Node* pPacketNode = ParseAsXDPPacket_Template(pElement); if (pPacketNode) { pXMLTemplateDOMRoot = pElement; pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr); } } else { CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); if (pPacketNode) { if (packet_info.has_value() && (packet_info.value().flags & XFA_XDPPACKET_FLAGS_SUPPORTONE) && pXFARootNode->GetFirstChildByName( FX_HashCode_GetW(packet_info.value().name, false))) { return nullptr; } pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr); } } } // No template is found. if (!pXMLTemplateDOMRoot) return nullptr; if (pXMLDatasetsDOMRoot) { CXFA_Node* pPacketNode = ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets); if (pPacketNode) pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr); } if (pXMLFormDOMRoot) { CXFA_Node* pPacketNode = ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form); if (pPacketNode) pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr); } pXFARootNode->SetXMLMappingNode(pXMLDocumentNode); return pXFARootNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_Config( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Config); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Config, XFA_Element::Config); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true)) return nullptr; pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_Template( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Template); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Template, XFA_Element::Template); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); CFX_XMLElement* pXMLDocumentElement = ToXMLElement(pXMLDocumentNode); WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI(); if (wsNamespaceURI.IsEmpty()) wsNamespaceURI = pXMLDocumentElement->GetAttribute(L"xmlns:xfa"); pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI); if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true)) return nullptr; pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_Form( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Form); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Form, XFA_Element::Form); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); CXFA_Template* pTemplateRoot = m_pRootNode->GetFirstChildByClass(XFA_Element::Template); CXFA_Subform* pTemplateChosen = pTemplateRoot ? pTemplateRoot->GetFirstChildByClass( XFA_Element::Subform) : nullptr; bool bUseAttribute = true; if (pTemplateChosen && pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) != XFA_AttributeValue::Auto) { bUseAttribute = false; } if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form, bUseAttribute)) return nullptr; pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_Data( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Datasets); CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode); if (pDatasetsXMLNode) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets, XFA_Element::DataModel); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); if (!DataLoader(pNode, pDatasetsXMLNode, false)) return nullptr; pNode->SetXMLMappingNode(pDatasetsXMLNode); return pNode; } CFX_XMLNode* pDataXMLNode = nullptr; if (MatchNodeName(pXMLDocumentNode, L"data", packet.uri, packet.flags)) { ToXMLElement(pXMLDocumentNode)->RemoveAttribute(L"xmlns:xfa"); pDataXMLNode = pXMLDocumentNode; } else { auto* pDataElement = xml_doc_->CreateNode(L"xfa:data"); pXMLDocumentNode->RemoveSelfIfParented(); CFX_XMLElement* pElement = ToXMLElement(pXMLDocumentNode); pElement->RemoveAttribute(L"xmlns:xfa"); // The node was either removed from the parent above, or already has no // parent so we can take ownership. pDataElement->AppendLastChild(pXMLDocumentNode); pDataXMLNode = pDataElement; } if (!pDataXMLNode) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets, XFA_Element::DataGroup); if (!pNode) return nullptr; WideString wsLocalName = ToXMLElement(pDataXMLNode)->GetLocalTagName(); pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName, false, false); if (!DataLoader(pNode, pDataXMLNode, true)) return nullptr; pNode->SetXMLMappingNode(pDataXMLNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_LocaleConnectionSourceSet( CFX_XMLNode* pXMLDocumentNode, XFA_PacketType packet_type, XFA_Element element) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(packet_type); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(packet_type, element); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true)) return nullptr; pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_Xdc( CFX_XMLNode* pXMLDocumentNode) { XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Xdc); if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.flags)) return nullptr; CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc); if (!pNode) return nullptr; pNode->JSObject()->SetCData(XFA_Attribute::Name, packet.name, false, false); pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::ParseAsXDPPacket_User( CFX_XMLNode* pXMLDocumentNode) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet); if (!pNode) return nullptr; WideString wsName = ToXMLElement(pXMLDocumentNode)->GetLocalTagName(); pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false); pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_DocumentParser::DataLoader(CXFA_Node* pXFANode, CFX_XMLNode* pXMLDoc, bool bDoTransform) { ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets); return pXFANode; } CXFA_Node* CXFA_DocumentParser::NormalLoader(CXFA_Node* pXFANode, CFX_XMLNode* pXMLDoc, XFA_PacketType ePacketID, bool bUseAttribute) { constexpr size_t kMaxExecuteRecursion = 1000; if (m_ExecuteRecursionDepth > kMaxExecuteRecursion) return nullptr; AutoRestorer restorer(&m_ExecuteRecursionDepth); ++m_ExecuteRecursionDepth; bool bOneOfPropertyFound = false; for (CFX_XMLNode* pXMLChild = pXMLDoc->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { switch (pXMLChild->GetType()) { case CFX_XMLNode::Type::kElement: { CFX_XMLElement* pXMLElement = static_cast(pXMLChild); WideString wsTagName = pXMLElement->GetLocalTagName(); XFA_Element eType = XFA_GetElementByName(wsTagName.AsStringView()); if (eType == XFA_Element::Unknown) continue; if (pXFANode->HasPropertyFlags( eType, XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) { if (bOneOfPropertyFound) break; bOneOfPropertyFound = true; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType); if (!pXFAChild) return nullptr; if (ePacketID == XFA_PacketType::Config) { pXFAChild->JSObject()->SetAttribute(XFA_Attribute::Name, wsTagName.AsStringView(), false); } bool IsNeedValue = true; for (auto it : pXMLElement->GetAttributes()) { WideString wsAttrName; GetAttributeLocalName(it.first.AsStringView(), wsAttrName); if (wsAttrName.EqualsASCII("nil") && it.second.EqualsASCII("true")) IsNeedValue = false; Optional attr = XFA_GetAttributeByName(wsAttrName.AsStringView()); if (!attr.has_value()) continue; if (!bUseAttribute && attr.value().attribute != XFA_Attribute::Name && attr.value().attribute != XFA_Attribute::Save) { continue; } pXFAChild->JSObject()->SetAttribute(attr.value().attribute, it.second.AsStringView(), false); } pXFANode->InsertChildAndNotify(pXFAChild, nullptr); if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) { if (ePacketID == XFA_PacketType::Config) ParseContentNode(pXFAChild, pXMLElement, ePacketID); else NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); break; } switch (pXFAChild->GetObjectType()) { case XFA_ObjectType::ContentNode: case XFA_ObjectType::TextNode: case XFA_ObjectType::NodeC: case XFA_ObjectType::NodeV: if (IsNeedValue) ParseContentNode(pXFAChild, pXMLElement, ePacketID); break; default: NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); break; } } break; case CFX_XMLNode::Type::kInstruction: ParseInstruction(pXFANode, ToXMLInstruction(pXMLChild), ePacketID); break; default: break; } } return pXFANode; } void CXFA_DocumentParser::ParseContentNode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode, XFA_PacketType ePacketID) { XFA_Element element = XFA_Element::Sharptext; if (pXFANode->GetElementType() == XFA_Element::ExData) { WideString wsContentType = pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType); if (wsContentType.EqualsASCII("text/html")) element = XFA_Element::SharpxHTML; else if (wsContentType.EqualsASCII("text/xml")) element = XFA_Element::Sharpxml; } if (element == XFA_Element::SharpxHTML) pXFANode->SetXMLMappingNode(pXMLNode); WideString wsValue; for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { CFX_XMLNode::Type eNodeType = pXMLChild->GetType(); if (eNodeType == CFX_XMLNode::Type::kInstruction) continue; CFX_XMLElement* pElement = ToXMLElement(pXMLChild); if (element == XFA_Element::SharpxHTML) { if (!pElement) break; if (XFA_RecognizeRichText(pElement)) wsValue += GetPlainTextFromRichText(pElement); } else if (element == XFA_Element::Sharpxml) { if (!pElement) break; ConvertXMLToPlainText(pElement, wsValue); } else { if (pElement) break; CFX_XMLText* pText = ToXMLText(pXMLChild); if (pText) wsValue = pText->GetText(); } break; } if (!wsValue.IsEmpty()) { if (pXFANode->IsContentNode()) { CXFA_Node* pContentRawDataNode = m_pFactory->CreateNode(ePacketID, element); ASSERT(pContentRawDataNode); pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false, false); pXFANode->InsertChildAndNotify(pContentRawDataNode, nullptr); } else { pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false, false); } } } void CXFA_DocumentParser::ParseDataGroup(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode, XFA_PacketType ePacketID) { for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { switch (pXMLChild->GetType()) { case CFX_XMLNode::Type::kElement: { CFX_XMLElement* pXMLElement = static_cast(pXMLChild); WideString wsNamespaceURI = pXMLElement->GetNamespaceURI(); if (wsNamespaceURI.EqualsASCII( "http://www.xfa.com/schema/xfa-package/") || wsNamespaceURI.EqualsASCII( "http://www.xfa.org/schema/xfa-package/") || wsNamespaceURI.EqualsASCII( "http://www.w3.org/2001/XMLSchema-instance")) { continue; } XFA_Element eNodeType = XFA_Element::DataModel; if (eNodeType == XFA_Element::DataModel) { Optional wsDataNodeAttr = FindAttributeWithNS(pXMLElement, L"dataNode", L"http://www.xfa.org/schema/xfa-data/1.0/"); if (wsDataNodeAttr.has_value()) { if (wsDataNodeAttr.value().EqualsASCII("dataGroup")) eNodeType = XFA_Element::DataGroup; else if (wsDataNodeAttr.value().EqualsASCII("dataValue")) eNodeType = XFA_Element::DataValue; } } if (eNodeType == XFA_Element::DataModel) { Optional wsContentType = FindAttributeWithNS(pXMLElement, L"contentType", L"http://www.xfa.org/schema/xfa-data/1.0/"); if (wsContentType.has_value() && !wsContentType.value().IsEmpty()) eNodeType = XFA_Element::DataValue; } if (eNodeType == XFA_Element::DataModel) { for (CFX_XMLNode* pXMLDataChild = pXMLElement->GetFirstChild(); pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNextSibling()) { CFX_XMLElement* pElement = ToXMLElement(pXMLDataChild); if (pElement && !XFA_RecognizeRichText(pElement)) { eNodeType = XFA_Element::DataGroup; break; } } } if (eNodeType == XFA_Element::DataModel) eNodeType = XFA_Element::DataValue; CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets, eNodeType); if (!pXFAChild) return; pXFAChild->JSObject()->SetCData( XFA_Attribute::Name, pXMLElement->GetLocalTagName(), false, false); bool bNeedValue = true; for (auto it : pXMLElement->GetAttributes()) { WideString wsName; WideString wsNS; if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) { continue; } if (wsName.EqualsASCII("nil") && it.second.EqualsASCII("true")) { bNeedValue = false; continue; } if (wsNS.EqualsASCII("http://www.xfa.com/schema/xfa-package/") || wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-package/") || wsNS.EqualsASCII("http://www.w3.org/2001/XMLSchema-instance") || wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-data/1.0/")) { continue; } CXFA_Node* pXFAMetaData = m_pFactory->CreateNode( XFA_PacketType::Datasets, XFA_Element::DataValue); if (!pXFAMetaData) return; pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false); pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName, it.first, false, false); pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second, false, false); pXFAMetaData->JSObject()->SetEnum( XFA_Attribute::Contains, XFA_AttributeValue::MetaData, false); pXFAChild->InsertChildAndNotify(pXFAMetaData, nullptr); pXFAMetaData->SetXMLMappingNode(pXMLElement); pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized); } if (!bNeedValue) pXMLElement->RemoveAttribute(L"xsi:nil"); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); if (eNodeType == XFA_Element::DataGroup) ParseDataGroup(pXFAChild, pXMLElement, ePacketID); else if (bNeedValue) ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets); pXFAChild->SetXMLMappingNode(pXMLElement); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); continue; } case CFX_XMLNode::Type::kCharData: case CFX_XMLNode::Type::kText: { CFX_XMLText* pXMLText = ToXMLText(pXMLChild); WideString wsText = pXMLText->GetText(); if (IsStringAllWhitespace(wsText)) continue; CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets, XFA_Element::DataValue); if (!pXFAChild) return; pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText, false, false); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); pXFAChild->SetXMLMappingNode(pXMLText); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); continue; } default: continue; } } } void CXFA_DocumentParser::ParseDataValue(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode, XFA_PacketType ePacketID) { CFX_WideTextBuf wsValueTextBuf; CFX_WideTextBuf wsCurValueTextBuf; bool bMarkAsCompound = false; CFX_XMLNode* pXMLCurValueNode = nullptr; for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild; pXMLChild = pXMLChild->GetNextSibling()) { CFX_XMLNode::Type eNodeType = pXMLChild->GetType(); if (eNodeType == CFX_XMLNode::Type::kInstruction) continue; CFX_XMLText* pText = ToXMLText(pXMLChild); if (pText) { WideString wsText = pText->GetText(); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; continue; } if (XFA_RecognizeRichText(ToXMLElement(pXMLChild))) { WideString wsText = GetPlainTextFromRichText(ToXMLElement(pXMLChild)); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; continue; } bMarkAsCompound = true; if (pXMLCurValueNode) { WideString wsCurValue = wsCurValueTextBuf.MakeString(); if (!wsCurValue.IsEmpty()) { CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); if (!pXFAChild) return; pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString(), false, false); pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false, false); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); pXFAChild->SetXMLMappingNode(pXMLCurValueNode); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); wsValueTextBuf << wsCurValue; wsCurValueTextBuf.Clear(); } pXMLCurValueNode = nullptr; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); if (!pXFAChild) return; WideString wsNodeStr = ToXMLElement(pXMLChild)->GetLocalTagName(); pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false, false); ParseDataValue(pXFAChild, pXMLChild, ePacketID); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); pXFAChild->SetXMLMappingNode(pXMLChild); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); WideString wsCurValue = pXFAChild->JSObject()->GetCData(XFA_Attribute::Value); wsValueTextBuf << wsCurValue; } if (pXMLCurValueNode) { WideString wsCurValue = wsCurValueTextBuf.MakeString(); if (!wsCurValue.IsEmpty()) { if (bMarkAsCompound) { CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); if (!pXFAChild) return; pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString(), false, false); pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false, false); pXFANode->InsertChildAndNotify(pXFAChild, nullptr); pXFAChild->SetXMLMappingNode(pXMLCurValueNode); pXFAChild->SetFlag(XFA_NodeFlag_Initialized); } wsValueTextBuf << wsCurValue; wsCurValueTextBuf.Clear(); } pXMLCurValueNode = nullptr; } WideString wsNodeValue = wsValueTextBuf.MakeString(); pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsNodeValue, false, false); } void CXFA_DocumentParser::ParseInstruction(CXFA_Node* pXFANode, CFX_XMLInstruction* pXMLInstruction, XFA_PacketType ePacketID) { const std::vector& target_data = pXMLInstruction->GetTargetData(); if (pXMLInstruction->IsOriginalXFAVersion()) { if (target_data.size() > 1 && (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) != XFA_VERSION_UNKNOWN) && target_data[1].EqualsASCII("v2.7-scripting:1")) { pXFANode->GetDocument()->set_is_scripting(); } return; } if (pXMLInstruction->IsAcrobat()) { if (target_data.size() > 1 && target_data[0].EqualsASCII("JavaScript") && target_data[1].EqualsASCII("strictScoping")) { pXFANode->GetDocument()->set_is_strict_scoping(); } } }