1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8
9 #include <utility>
10 #include <vector>
11
12 #include "core/fxcrt/cfx_checksumcontext.h"
13 #include "core/fxcrt/cfx_seekablestreamproxy.h"
14 #include "core/fxcrt/cfx_widetextbuf.h"
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/xml/cfx_xmlchardata.h"
18 #include "core/fxcrt/xml/cfx_xmldoc.h"
19 #include "core/fxcrt/xml/cfx_xmlelement.h"
20 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
21 #include "core/fxcrt/xml/cfx_xmlnode.h"
22 #include "core/fxcrt/xml/cfx_xmlparser.h"
23 #include "core/fxcrt/xml/cfx_xmltext.h"
24 #include "fxjs/xfa/cjx_object.h"
25 #include "third_party/base/logging.h"
26 #include "third_party/base/ptr_util.h"
27 #include "xfa/fxfa/fxfa.h"
28 #include "xfa/fxfa/parser/cxfa_document.h"
29 #include "xfa/fxfa/parser/cxfa_node.h"
30 #include "xfa/fxfa/parser/cxfa_subform.h"
31 #include "xfa/fxfa/parser/cxfa_template.h"
32 #include "xfa/fxfa/parser/xfa_basic_data.h"
33 #include "xfa/fxfa/parser/xfa_utils.h"
34
35 namespace {
36
37 struct PacketInfo {
38 uint32_t hash;
39 const wchar_t* name;
40 XFA_PacketType packet_type;
41 const wchar_t* uri;
42 uint32_t flags;
43 };
44 const PacketInfo PacketData[] = {
45 {0x0, nullptr, XFA_PacketType::User, nullptr,
46 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
47 {0x811929d, L"sourceSet", XFA_PacketType::SourceSet,
48 L"http://www.xfa.org/schema/xfa-source-set/",
49 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
50 {0xb843dba, L"pdf", XFA_PacketType::Pdf, L"http://ns.adobe.com/xdp/pdf/",
51 XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
52 {0xc56afbf, L"xdc", XFA_PacketType::Xdc, L"http://www.xfa.org/schema/xdc/",
53 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
54 {0xc56afcc, L"xdp", XFA_PacketType::Xdp, L"http://ns.adobe.com/xdp/",
55 XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
56 {0x132a8fbc, L"xmpmeta", XFA_PacketType::Xmpmeta,
57 L"http://ns.adobe.com/xmpmeta/",
58 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
59 {0x48d004a8, L"xfdf", XFA_PacketType::Xfdf, L"http://ns.adobe.com/xfdf/",
60 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
61 {0x4e1e39b6, L"config", XFA_PacketType::Config,
62 L"http://www.xfa.org/schema/xci/",
63 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
64 {0x5473b6dc, L"localeSet", XFA_PacketType::LocaleSet,
65 L"http://www.xfa.org/schema/xfa-locale-set/",
66 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
67 {0x6038580a, L"stylesheet", XFA_PacketType::Stylesheet,
68 L"http://www.w3.org/1999/XSL/Transform",
69 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
70 {0x803550fc, L"template", XFA_PacketType::Template,
71 L"http://www.xfa.org/schema/xfa-template/",
72 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
73 {0x8b036f32, L"signature", XFA_PacketType::Signature,
74 L"http://www.w3.org/2000/09/xmldsig#",
75 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
76 {0x99b95079, L"datasets", XFA_PacketType::Datasets,
77 L"http://www.xfa.org/schema/xfa-data/",
78 XFA_XDPPACKET_FLAGS_PREFIXMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
79 {0xcd309ff4, L"form", XFA_PacketType::Form,
80 L"http://www.xfa.org/schema/xfa-form/",
81 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
82 {0xe14c801c, L"connectionSet", XFA_PacketType::ConnectionSet,
83 L"http://www.xfa.org/schema/xfa-connection-set/",
84 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
85 };
86
GetPacketByIndex(XFA_PacketType ePacket)87 const PacketInfo* GetPacketByIndex(XFA_PacketType ePacket) {
88 return PacketData + static_cast<uint8_t>(ePacket);
89 }
90
GetPacketByName(const WideStringView & wsName)91 const PacketInfo* GetPacketByName(const WideStringView& wsName) {
92 if (wsName.IsEmpty())
93 return nullptr;
94
95 uint32_t hash = FX_HashCode_GetW(wsName, false);
96 auto* elem = std::lower_bound(
97 std::begin(PacketData), std::end(PacketData), hash,
98 [](const PacketInfo& a, uint32_t hash) { return a.hash < hash; });
99 if (elem != std::end(PacketData) && elem->hash == hash)
100 return elem;
101 return nullptr;
102 }
103
GetDocumentNode(CFX_XMLDoc * pXMLDoc,bool bVerifyWellFormness=false)104 CFX_XMLNode* GetDocumentNode(CFX_XMLDoc* pXMLDoc,
105 bool bVerifyWellFormness = false) {
106 if (!pXMLDoc)
107 return nullptr;
108
109 for (CFX_XMLNode* pXMLNode =
110 pXMLDoc->GetRoot()->GetNodeItem(CFX_XMLNode::FirstChild);
111 pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
112 if (pXMLNode->GetType() != FX_XMLNODE_Element)
113 continue;
114
115 if (!bVerifyWellFormness)
116 return pXMLNode;
117
118 for (CFX_XMLNode* pNextNode =
119 pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling);
120 pNextNode;
121 pNextNode = pNextNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
122 if (pNextNode->GetType() == FX_XMLNODE_Element)
123 return nullptr;
124 }
125 return pXMLNode;
126 }
127 return nullptr;
128 }
129
GetElementTagNamespaceURI(CFX_XMLElement * pElement)130 WideString GetElementTagNamespaceURI(CFX_XMLElement* pElement) {
131 WideString wsNodeStr = pElement->GetNamespacePrefix();
132 WideString wsNamespaceURI;
133 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNodeStr,
134 &wsNamespaceURI)) {
135 return WideString();
136 }
137 return wsNamespaceURI;
138 }
139
MatchNodeName(CFX_XMLNode * pNode,const WideStringView & wsLocalTagName,const WideStringView & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)140 bool MatchNodeName(CFX_XMLNode* pNode,
141 const WideStringView& wsLocalTagName,
142 const WideStringView& wsNamespaceURIPrefix,
143 uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
144 if (!pNode || pNode->GetType() != FX_XMLNODE_Element)
145 return false;
146
147 CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pNode);
148 WideString wsNodeStr = pElement->GetLocalTagName();
149 if (wsNodeStr != wsLocalTagName)
150 return false;
151
152 wsNodeStr = GetElementTagNamespaceURI(pElement);
153 if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
154 return true;
155 if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
156 return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
157 wsNamespaceURIPrefix;
158 }
159
160 return wsNodeStr == wsNamespaceURIPrefix;
161 }
162
GetAttributeLocalName(const WideStringView & wsAttributeName,WideString & wsLocalAttrName)163 bool GetAttributeLocalName(const WideStringView& wsAttributeName,
164 WideString& wsLocalAttrName) {
165 WideString wsAttrName(wsAttributeName);
166 auto pos = wsAttrName.Find(L':', 0);
167 if (!pos.has_value()) {
168 wsLocalAttrName = wsAttrName;
169 return false;
170 }
171 wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - pos.value() - 1);
172 return true;
173 }
174
ResolveAttribute(CFX_XMLElement * pElement,const WideString & wsAttrName,WideString & wsLocalAttrName,WideString & wsNamespaceURI)175 bool ResolveAttribute(CFX_XMLElement* pElement,
176 const WideString& wsAttrName,
177 WideString& wsLocalAttrName,
178 WideString& wsNamespaceURI) {
179 WideString wsNSPrefix;
180 if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
181 wsNSPrefix = wsAttrName.Left(wsAttrName.GetLength() -
182 wsLocalAttrName.GetLength() - 1);
183 }
184 if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
185 wsNSPrefix == L"xml") {
186 return false;
187 }
188 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
189 &wsNamespaceURI)) {
190 wsNamespaceURI.clear();
191 return false;
192 }
193 return true;
194 }
195
FindAttributeWithNS(CFX_XMLElement * pElement,const WideStringView & wsLocalAttributeName,const WideStringView & wsNamespaceURIPrefix,WideString & wsValue,bool bMatchNSAsPrefix=false)196 bool FindAttributeWithNS(CFX_XMLElement* pElement,
197 const WideStringView& wsLocalAttributeName,
198 const WideStringView& wsNamespaceURIPrefix,
199 WideString& wsValue,
200 bool bMatchNSAsPrefix = false) {
201 if (!pElement)
202 return false;
203
204 WideString wsAttrNS;
205 for (auto it : pElement->GetAttributes()) {
206 auto pos = it.first.Find(L':', 0);
207 WideString wsNSPrefix;
208 if (!pos.has_value()) {
209 if (wsLocalAttributeName != it.first)
210 continue;
211 } else {
212 if (wsLocalAttributeName !=
213 it.first.Right(it.first.GetLength() - pos.value() - 1)) {
214 continue;
215 }
216 wsNSPrefix = it.first.Left(pos.value());
217 }
218
219 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
220 &wsAttrNS)) {
221 continue;
222 }
223 if (bMatchNSAsPrefix) {
224 if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
225 wsNamespaceURIPrefix) {
226 continue;
227 }
228 } else {
229 if (wsAttrNS != wsNamespaceURIPrefix)
230 continue;
231 }
232 wsValue = it.second;
233 return true;
234 }
235 return false;
236 }
237
GetDataSetsFromXDP(CFX_XMLNode * pXMLDocumentNode)238 CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) {
239 const PacketInfo* datasets_packet =
240 GetPacketByIndex(XFA_PacketType::Datasets);
241 if (MatchNodeName(pXMLDocumentNode, datasets_packet->name,
242 datasets_packet->uri, datasets_packet->flags)) {
243 return pXMLDocumentNode;
244 }
245
246 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
247 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
248 packet->flags)) {
249 return nullptr;
250 }
251
252 for (CFX_XMLNode* pDatasetsNode =
253 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
254 pDatasetsNode;
255 pDatasetsNode = pDatasetsNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
256 if (MatchNodeName(pDatasetsNode, datasets_packet->name,
257 datasets_packet->uri, datasets_packet->flags)) {
258 return pDatasetsNode;
259 }
260 }
261 return nullptr;
262 }
263
IsStringAllWhitespace(WideString wsText)264 bool IsStringAllWhitespace(WideString wsText) {
265 wsText.TrimRight(L"\x20\x9\xD\xA");
266 return wsText.IsEmpty();
267 }
268
ConvertXMLToPlainText(CFX_XMLElement * pRootXMLNode,WideString & wsOutput)269 void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
270 for (CFX_XMLNode* pXMLChild =
271 pRootXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
272 pXMLChild;
273 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
274 switch (pXMLChild->GetType()) {
275 case FX_XMLNODE_Element: {
276 WideString wsTextData =
277 static_cast<CFX_XMLElement*>(pXMLChild)->GetTextData();
278 wsTextData += L"\n";
279 wsOutput += wsTextData;
280 break;
281 }
282 case FX_XMLNODE_Text:
283 case FX_XMLNODE_CharData: {
284 WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
285 if (IsStringAllWhitespace(wsText))
286 continue;
287
288 wsOutput = wsText;
289 break;
290 }
291 default:
292 NOTREACHED();
293 break;
294 }
295 }
296 }
297
GetPlainTextFromRichText(CFX_XMLNode * pXMLNode)298 WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
299 if (!pXMLNode)
300 return L"";
301
302 WideString wsPlainText;
303 switch (pXMLNode->GetType()) {
304 case FX_XMLNODE_Element: {
305 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
306 WideString wsTag = pXMLElement->GetLocalTagName();
307 uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true);
308 if (uTag == 0x0001f714) {
309 wsPlainText += L"\n";
310 } else if (uTag == 0x00000070) {
311 if (!wsPlainText.IsEmpty()) {
312 wsPlainText += L"\n";
313 }
314 } else if (uTag == 0xa48ac63) {
315 if (!wsPlainText.IsEmpty() &&
316 wsPlainText[wsPlainText.GetLength() - 1] != '\n') {
317 wsPlainText += L"\n";
318 }
319 }
320 break;
321 }
322 case FX_XMLNODE_Text:
323 case FX_XMLNODE_CharData: {
324 WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText();
325 wsPlainText += wsContent;
326 break;
327 }
328 default:
329 break;
330 }
331 for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
332 pChildXML;
333 pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) {
334 wsPlainText += GetPlainTextFromRichText(pChildXML);
335 }
336
337 return wsPlainText;
338 }
339
340 } // namespace
341
XFA_RecognizeRichText(CFX_XMLElement * pRichTextXMLNode)342 bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
343 return pRichTextXMLNode && GetElementTagNamespaceURI(pRichTextXMLNode) ==
344 L"http://www.w3.org/1999/xhtml";
345 }
346
CXFA_SimpleParser()347 CXFA_SimpleParser::CXFA_SimpleParser() : m_bDocumentParser(true) {}
348
CXFA_SimpleParser(CXFA_Document * pFactory)349 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory)
350 : m_pFactory(pFactory), m_bDocumentParser(false) {}
351
~CXFA_SimpleParser()352 CXFA_SimpleParser::~CXFA_SimpleParser() {}
353
SetFactory(CXFA_Document * pFactory)354 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
355 ASSERT(m_bDocumentParser);
356 m_pFactory = pFactory;
357 }
358
StartParse(const RetainPtr<IFX_SeekableStream> & pStream,XFA_PacketType ePacketID)359 int32_t CXFA_SimpleParser::StartParse(
360 const RetainPtr<IFX_SeekableStream>& pStream,
361 XFA_PacketType ePacketID) {
362 CloseParser();
363 m_pFileRead = pStream;
364 m_pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false);
365 uint16_t wCodePage = m_pStream->GetCodePage();
366 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
367 wCodePage != FX_CODEPAGE_UTF8) {
368 m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
369 }
370 m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
371 auto pNewParser =
372 pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
373 m_pXMLParser = pNewParser.get();
374 if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
375 return XFA_PARSESTATUS_StatusErr;
376
377 m_bParseStarted = true;
378 m_ePacketID = ePacketID;
379 return XFA_PARSESTATUS_Ready;
380 }
381
DoParse()382 int32_t CXFA_SimpleParser::DoParse() {
383 if (!m_pXMLDoc || !m_bParseStarted)
384 return XFA_PARSESTATUS_StatusErr;
385
386 int32_t iRet = m_pXMLDoc->DoLoad();
387 if (iRet < 0)
388 return XFA_PARSESTATUS_SyntaxErr;
389 if (iRet < 100)
390 return iRet / 2;
391
392 m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
393 m_pXMLParser.Release();
394 m_pXMLDoc->CloseXML();
395 m_pStream.Reset();
396
397 if (!m_pRootNode)
398 return XFA_PARSESTATUS_StatusErr;
399
400 return XFA_PARSESTATUS_Done;
401 }
402
ParseXMLData(const ByteString & wsXML)403 CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) {
404 CloseParser();
405 m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
406
407 auto pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
408 const_cast<uint8_t*>(wsXML.raw_str()), wsXML.GetLength());
409 auto pParser =
410 pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
411 pParser->m_dwCheckStatus = 0x03;
412 if (!m_pXMLDoc->LoadXML(std::move(pParser)))
413 return nullptr;
414
415 int32_t iRet = m_pXMLDoc->DoLoad();
416 if (iRet < 0 || iRet >= 100)
417 m_pXMLDoc->CloseXML();
418 return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get());
419 }
420
ConstructXFANode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode)421 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
422 CFX_XMLNode* pXMLNode) {
423 XFA_PacketType ePacketID = pXFANode->GetPacketType();
424 if (ePacketID == XFA_PacketType::Datasets) {
425 if (pXFANode->GetElementType() == XFA_Element::DataValue) {
426 for (CFX_XMLNode* pXMLChild =
427 pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
428 pXMLChild;
429 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
430 FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
431 if (eNodeType == FX_XMLNODE_Instruction)
432 continue;
433
434 if (eNodeType == FX_XMLNODE_Element) {
435 CXFA_Node* pXFAChild = m_pFactory->CreateNode(
436 XFA_PacketType::Datasets, XFA_Element::DataValue);
437 if (!pXFAChild)
438 return;
439
440 CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild);
441 WideString wsNodeStr = child->GetLocalTagName();
442 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
443 false);
444 WideString wsChildValue = GetPlainTextFromRichText(child);
445 if (!wsChildValue.IsEmpty())
446 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue,
447 false, false);
448
449 pXFANode->InsertChild(pXFAChild, nullptr);
450 pXFAChild->SetXMLMappingNode(pXMLChild);
451 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
452 break;
453 }
454 }
455 m_pRootNode = pXFANode;
456 } else {
457 m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
458 }
459 } else if (pXFANode->IsContentNode()) {
460 ParseContentNode(pXFANode, pXMLNode, ePacketID);
461 m_pRootNode = pXFANode;
462 } else {
463 m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
464 }
465 }
466
GetRootNode() const467 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
468 return m_pRootNode;
469 }
470
GetXMLDoc() const471 CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
472 return m_pXMLDoc.get();
473 }
474
ParseAsXDPPacket(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType ePacketID)475 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
476 XFA_PacketType ePacketID) {
477 switch (ePacketID) {
478 case XFA_PacketType::Xdp:
479 return ParseAsXDPPacket_XDP(pXMLDocumentNode);
480 case XFA_PacketType::Config:
481 return ParseAsXDPPacket_Config(pXMLDocumentNode);
482 case XFA_PacketType::Template:
483 return ParseAsXDPPacket_Template(pXMLDocumentNode);
484 case XFA_PacketType::Form:
485 return ParseAsXDPPacket_Form(pXMLDocumentNode);
486 case XFA_PacketType::Datasets:
487 return ParseAsXDPPacket_Data(pXMLDocumentNode);
488 case XFA_PacketType::Xdc:
489 return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
490 case XFA_PacketType::LocaleSet:
491 return ParseAsXDPPacket_LocaleConnectionSourceSet(
492 pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet);
493 case XFA_PacketType::ConnectionSet:
494 return ParseAsXDPPacket_LocaleConnectionSourceSet(
495 pXMLDocumentNode, XFA_PacketType::ConnectionSet,
496 XFA_Element::ConnectionSet);
497 case XFA_PacketType::SourceSet:
498 return ParseAsXDPPacket_LocaleConnectionSourceSet(
499 pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet);
500 default:
501 return ParseAsXDPPacket_User(pXMLDocumentNode);
502 }
503 }
504
ParseAsXDPPacket_XDP(CFX_XMLNode * pXMLDocumentNode)505 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
506 CFX_XMLNode* pXMLDocumentNode) {
507 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
508 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
509 packet->flags)) {
510 return nullptr;
511 }
512
513 CXFA_Node* pXFARootNode =
514 m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
515 if (!pXFARootNode)
516 return nullptr;
517
518 m_pRootNode = pXFARootNode;
519 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa", false, false);
520
521 CFX_XMLElement* pElement = static_cast<CFX_XMLElement*>(pXMLDocumentNode);
522 for (auto it : pElement->GetAttributes()) {
523 if (it.first == L"uuid")
524 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second, false,
525 false);
526 else if (it.first == L"timeStamp")
527 pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second,
528 false, false);
529 }
530
531 CFX_XMLNode* pXMLConfigDOMRoot = nullptr;
532 CXFA_Node* pXFAConfigDOMRoot = nullptr;
533 for (CFX_XMLNode* pChildItem =
534 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
535 pChildItem;
536 pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
537 const PacketInfo* pPacketInfo = GetPacketByIndex(XFA_PacketType::Config);
538 if (!MatchNodeName(pChildItem, pPacketInfo->name, pPacketInfo->uri,
539 pPacketInfo->flags)) {
540 continue;
541 }
542 if (pXFARootNode->GetFirstChildByName(pPacketInfo->hash))
543 return nullptr;
544
545 pXMLConfigDOMRoot = pChildItem;
546 pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
547 if (pXFAConfigDOMRoot)
548 pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
549 }
550
551 CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr;
552 CFX_XMLNode* pXMLFormDOMRoot = nullptr;
553 CFX_XMLNode* pXMLTemplateDOMRoot = nullptr;
554 for (CFX_XMLNode* pChildItem =
555 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
556 pChildItem;
557 pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
558 if (!pChildItem || pChildItem->GetType() != FX_XMLNODE_Element)
559 continue;
560 if (pChildItem == pXMLConfigDOMRoot)
561 continue;
562
563 CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pChildItem);
564 WideString wsPacketName = pElement->GetLocalTagName();
565 const PacketInfo* pPacketInfo =
566 GetPacketByName(wsPacketName.AsStringView());
567 if (pPacketInfo && pPacketInfo->uri) {
568 if (!MatchNodeName(pElement, pPacketInfo->name, pPacketInfo->uri,
569 pPacketInfo->flags)) {
570 pPacketInfo = nullptr;
571 }
572 }
573 XFA_PacketType ePacket =
574 pPacketInfo ? pPacketInfo->packet_type : XFA_PacketType::User;
575 if (ePacket == XFA_PacketType::Xdp)
576 continue;
577 if (ePacket == XFA_PacketType::Datasets) {
578 if (pXMLDatasetsDOMRoot)
579 return nullptr;
580
581 pXMLDatasetsDOMRoot = pElement;
582 } else if (ePacket == XFA_PacketType::Form) {
583 if (pXMLFormDOMRoot)
584 return nullptr;
585
586 pXMLFormDOMRoot = pElement;
587 } else if (ePacket == XFA_PacketType::Template) {
588 // Found a duplicate template packet.
589 if (pXMLTemplateDOMRoot)
590 return nullptr;
591
592 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
593 if (pPacketNode) {
594 pXMLTemplateDOMRoot = pElement;
595 pXFARootNode->InsertChild(pPacketNode, nullptr);
596 }
597 } else {
598 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
599 if (pPacketNode) {
600 if (pPacketInfo &&
601 (pPacketInfo->flags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
602 pXFARootNode->GetFirstChildByName(pPacketInfo->hash)) {
603 return nullptr;
604 }
605 pXFARootNode->InsertChild(pPacketNode, nullptr);
606 }
607 }
608 }
609
610 // No template is found.
611 if (!pXMLTemplateDOMRoot)
612 return nullptr;
613
614 if (pXMLDatasetsDOMRoot) {
615 CXFA_Node* pPacketNode =
616 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets);
617 if (pPacketNode)
618 pXFARootNode->InsertChild(pPacketNode, nullptr);
619 }
620 if (pXMLFormDOMRoot) {
621 CXFA_Node* pPacketNode =
622 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form);
623 if (pPacketNode)
624 pXFARootNode->InsertChild(pPacketNode, nullptr);
625 }
626
627 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
628 return pXFARootNode;
629 }
630
ParseAsXDPPacket_Config(CFX_XMLNode * pXMLDocumentNode)631 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
632 CFX_XMLNode* pXMLDocumentNode) {
633 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Config);
634 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
635 packet->flags)) {
636 return nullptr;
637 }
638 CXFA_Node* pNode =
639 m_pFactory->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
640 if (!pNode)
641 return nullptr;
642
643 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
644 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true))
645 return nullptr;
646
647 pNode->SetXMLMappingNode(pXMLDocumentNode);
648 return pNode;
649 }
650
ParseAsXDPPacket_Template(CFX_XMLNode * pXMLDocumentNode)651 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Template(
652 CFX_XMLNode* pXMLDocumentNode) {
653 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Template);
654 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
655 packet->flags)) {
656 return nullptr;
657 }
658
659 CXFA_Node* pNode =
660 m_pFactory->CreateNode(XFA_PacketType::Template, XFA_Element::Template);
661 if (!pNode)
662 return nullptr;
663
664 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
665 if (m_bDocumentParser) {
666 CFX_XMLElement* pXMLDocumentElement =
667 static_cast<CFX_XMLElement*>(pXMLDocumentNode);
668 WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI();
669 if (wsNamespaceURI.IsEmpty())
670 wsNamespaceURI = pXMLDocumentElement->GetString(L"xmlns:xfa");
671
672 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
673 }
674 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true))
675 return nullptr;
676
677 pNode->SetXMLMappingNode(pXMLDocumentNode);
678 return pNode;
679 }
680
ParseAsXDPPacket_Form(CFX_XMLNode * pXMLDocumentNode)681 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Form(
682 CFX_XMLNode* pXMLDocumentNode) {
683 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Form);
684 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
685 packet->flags)) {
686 return nullptr;
687 }
688
689 CFX_XMLElement* pXMLDocumentElement =
690 static_cast<CFX_XMLElement*>(pXMLDocumentNode);
691 WideString wsChecksum = pXMLDocumentElement->GetString(L"checksum");
692 if (wsChecksum.GetLength() != 28 || m_pXMLParser->m_dwCheckStatus != 0x03) {
693 return nullptr;
694 }
695
696 auto pChecksum = pdfium::MakeUnique<CFX_ChecksumContext>();
697 pChecksum->StartChecksum();
698 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
699 m_pXMLParser->m_nSize[0]);
700 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
701 m_pXMLParser->m_nSize[1]);
702 pChecksum->FinishChecksum();
703 ByteString bsCheck = pChecksum->GetChecksum();
704 if (bsCheck != wsChecksum.UTF8Encode())
705 return nullptr;
706
707 CXFA_Node* pNode =
708 m_pFactory->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
709 if (!pNode)
710 return nullptr;
711
712 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
713 pNode->JSObject()->SetAttribute(XFA_Attribute::Checksum,
714 wsChecksum.AsStringView(), false);
715 CXFA_Template* pTemplateRoot =
716 m_pRootNode->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
717 CXFA_Subform* pTemplateChosen =
718 pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
719 XFA_Element::Subform)
720 : nullptr;
721 bool bUseAttribute = true;
722 if (pTemplateChosen &&
723 pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) !=
724 XFA_AttributeEnum::Auto) {
725 bUseAttribute = false;
726 }
727 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form,
728 bUseAttribute))
729 return nullptr;
730
731 pNode->SetXMLMappingNode(pXMLDocumentNode);
732 return pNode;
733 }
734
ParseAsXDPPacket_Data(CFX_XMLNode * pXMLDocumentNode)735 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
736 CFX_XMLNode* pXMLDocumentNode) {
737 CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
738 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Datasets);
739 if (pDatasetsXMLNode) {
740 CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
741 XFA_Element::DataModel);
742 if (!pNode)
743 return nullptr;
744
745 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false,
746 false);
747 if (!DataLoader(pNode, pDatasetsXMLNode, false))
748 return nullptr;
749
750 pNode->SetXMLMappingNode(pDatasetsXMLNode);
751 return pNode;
752 }
753
754 CFX_XMLNode* pDataXMLNode = nullptr;
755 if (MatchNodeName(pXMLDocumentNode, L"data", packet->uri, packet->flags)) {
756 static_cast<CFX_XMLElement*>(pXMLDocumentNode)
757 ->RemoveAttribute(L"xmlns:xfa");
758 pDataXMLNode = pXMLDocumentNode;
759 } else {
760 CFX_XMLElement* pDataElement = new CFX_XMLElement(L"xfa:data");
761 CFX_XMLNode* pParentXMLNode =
762 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::Parent);
763 if (pParentXMLNode)
764 pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
765
766 ASSERT(pXMLDocumentNode->GetType() == FX_XMLNODE_Element);
767 if (pXMLDocumentNode->GetType() == FX_XMLNODE_Element) {
768 static_cast<CFX_XMLElement*>(pXMLDocumentNode)
769 ->RemoveAttribute(L"xmlns:xfa");
770 }
771 pDataElement->InsertChildNode(pXMLDocumentNode);
772 pDataXMLNode = pDataElement;
773 }
774
775 if (pDataXMLNode) {
776 CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
777 XFA_Element::DataGroup);
778 if (!pNode) {
779 if (pDataXMLNode != pXMLDocumentNode)
780 delete pDataXMLNode;
781 return nullptr;
782 }
783 WideString wsLocalName =
784 static_cast<CFX_XMLElement*>(pDataXMLNode)->GetLocalTagName();
785 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName, false, false);
786 if (!DataLoader(pNode, pDataXMLNode, true))
787 return nullptr;
788
789 pNode->SetXMLMappingNode(pDataXMLNode);
790 if (pDataXMLNode != pXMLDocumentNode)
791 pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
792 return pNode;
793 }
794 return nullptr;
795 }
796
ParseAsXDPPacket_LocaleConnectionSourceSet(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType packet_type,XFA_Element element)797 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
798 CFX_XMLNode* pXMLDocumentNode,
799 XFA_PacketType packet_type,
800 XFA_Element element) {
801 const PacketInfo* packet = GetPacketByIndex(packet_type);
802 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
803 packet->flags)) {
804 return nullptr;
805 }
806
807 CXFA_Node* pNode = m_pFactory->CreateNode(packet_type, element);
808 if (!pNode)
809 return nullptr;
810
811 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
812 if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true))
813 return nullptr;
814
815 pNode->SetXMLMappingNode(pXMLDocumentNode);
816 return pNode;
817 }
818
ParseAsXDPPacket_Xdc(CFX_XMLNode * pXMLDocumentNode)819 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
820 CFX_XMLNode* pXMLDocumentNode) {
821 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdc);
822 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
823 packet->flags))
824 return nullptr;
825
826 CXFA_Node* pNode =
827 m_pFactory->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
828 if (!pNode)
829 return nullptr;
830
831 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
832 pNode->SetXMLMappingNode(pXMLDocumentNode);
833 return pNode;
834 }
835
ParseAsXDPPacket_User(CFX_XMLNode * pXMLDocumentNode)836 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
837 CFX_XMLNode* pXMLDocumentNode) {
838 CXFA_Node* pNode =
839 m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
840 if (!pNode)
841 return nullptr;
842
843 WideString wsName =
844 static_cast<CFX_XMLElement*>(pXMLDocumentNode)->GetLocalTagName();
845 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false);
846 if (!UserPacketLoader(pNode, pXMLDocumentNode))
847 return nullptr;
848
849 pNode->SetXMLMappingNode(pXMLDocumentNode);
850 return pNode;
851 }
852
UserPacketLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc)853 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
854 CFX_XMLNode* pXMLDoc) {
855 return pXFANode;
856 }
857
DataLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc,bool bDoTransform)858 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
859 CFX_XMLNode* pXMLDoc,
860 bool bDoTransform) {
861 ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets);
862 return pXFANode;
863 }
864
NormalLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc,XFA_PacketType ePacketID,bool bUseAttribute)865 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
866 CFX_XMLNode* pXMLDoc,
867 XFA_PacketType ePacketID,
868 bool bUseAttribute) {
869 bool bOneOfPropertyFound = false;
870 for (CFX_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFX_XMLNode::FirstChild);
871 pXMLChild;
872 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
873 switch (pXMLChild->GetType()) {
874 case FX_XMLNODE_Element: {
875 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
876 WideString wsTagName = pXMLElement->GetLocalTagName();
877 XFA_Element eType = CXFA_Node::NameToElement(wsTagName);
878 if (eType == XFA_Element::Unknown)
879 continue;
880
881 if (pXFANode->HasPropertyFlags(
882 eType,
883 XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) {
884 if (bOneOfPropertyFound)
885 break;
886 bOneOfPropertyFound = true;
887 }
888
889 CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
890 if (!pXFAChild)
891 return nullptr;
892 if (ePacketID == XFA_PacketType::Config) {
893 pXFAChild->JSObject()->SetAttribute(XFA_Attribute::Name,
894 wsTagName.AsStringView(), false);
895 }
896
897 bool IsNeedValue = true;
898 for (auto it : pXMLElement->GetAttributes()) {
899 WideString wsAttrName;
900 GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
901 if (wsAttrName == L"nil" && it.second == L"true")
902 IsNeedValue = false;
903
904 XFA_Attribute attr =
905 CXFA_Node::NameToAttribute(wsAttrName.AsStringView());
906 if (attr == XFA_Attribute::Unknown)
907 continue;
908
909 if (!bUseAttribute && attr != XFA_Attribute::Name &&
910 attr != XFA_Attribute::Save) {
911 continue;
912 }
913 pXFAChild->JSObject()->SetAttribute(attr, it.second.AsStringView(),
914 false);
915 }
916 pXFANode->InsertChild(pXFAChild, nullptr);
917 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
918 if (ePacketID == XFA_PacketType::Config)
919 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
920 else
921 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
922
923 break;
924 }
925 switch (pXFAChild->GetObjectType()) {
926 case XFA_ObjectType::ContentNode:
927 case XFA_ObjectType::TextNode:
928 case XFA_ObjectType::NodeC:
929 case XFA_ObjectType::NodeV:
930 if (IsNeedValue)
931 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
932 break;
933 default:
934 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
935 break;
936 }
937 } break;
938 case FX_XMLNODE_Instruction:
939 ParseInstruction(pXFANode, static_cast<CFX_XMLInstruction*>(pXMLChild),
940 ePacketID);
941 break;
942 default:
943 break;
944 }
945 }
946 return pXFANode;
947 }
948
ParseContentNode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)949 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
950 CFX_XMLNode* pXMLNode,
951 XFA_PacketType ePacketID) {
952 XFA_Element element = XFA_Element::Sharptext;
953 if (pXFANode->GetElementType() == XFA_Element::ExData) {
954 WideString wsContentType =
955 pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType);
956 if (wsContentType == L"text/html")
957 element = XFA_Element::SharpxHTML;
958 else if (wsContentType == L"text/xml")
959 element = XFA_Element::Sharpxml;
960 }
961 if (element == XFA_Element::SharpxHTML)
962 pXFANode->SetXMLMappingNode(pXMLNode);
963
964 WideString wsValue;
965 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
966 pXMLChild;
967 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
968 FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
969 if (eNodeType == FX_XMLNODE_Instruction)
970 continue;
971
972 if (element == XFA_Element::SharpxHTML) {
973 if (eNodeType != FX_XMLNODE_Element)
974 break;
975
976 if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild)))
977 wsValue +=
978 GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
979 } else if (element == XFA_Element::Sharpxml) {
980 if (eNodeType != FX_XMLNODE_Element)
981 break;
982
983 ConvertXMLToPlainText(static_cast<CFX_XMLElement*>(pXMLChild), wsValue);
984 } else {
985 if (eNodeType == FX_XMLNODE_Element)
986 break;
987 if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData)
988 wsValue = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
989 }
990 break;
991 }
992 if (!wsValue.IsEmpty()) {
993 if (pXFANode->IsContentNode()) {
994 CXFA_Node* pContentRawDataNode =
995 m_pFactory->CreateNode(ePacketID, element);
996 ASSERT(pContentRawDataNode);
997 pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue,
998 false, false);
999 pXFANode->InsertChild(pContentRawDataNode, nullptr);
1000 } else {
1001 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false,
1002 false);
1003 }
1004 }
1005 }
1006
ParseDataGroup(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)1007 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
1008 CFX_XMLNode* pXMLNode,
1009 XFA_PacketType ePacketID) {
1010 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
1011 pXMLChild;
1012 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
1013 switch (pXMLChild->GetType()) {
1014 case FX_XMLNODE_Element: {
1015 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
1016 {
1017 WideString wsNamespaceURI = GetElementTagNamespaceURI(pXMLElement);
1018 if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
1019 wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
1020 wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
1021 continue;
1022 }
1023 }
1024
1025 XFA_Element eNodeType = XFA_Element::DataModel;
1026 if (eNodeType == XFA_Element::DataModel) {
1027 WideString wsDataNodeAttr;
1028 if (FindAttributeWithNS(pXMLElement, L"dataNode",
1029 L"http://www.xfa.org/schema/xfa-data/1.0/",
1030 wsDataNodeAttr)) {
1031 if (wsDataNodeAttr == L"dataGroup")
1032 eNodeType = XFA_Element::DataGroup;
1033 else if (wsDataNodeAttr == L"dataValue")
1034 eNodeType = XFA_Element::DataValue;
1035 }
1036 }
1037 WideString wsContentType;
1038 if (eNodeType == XFA_Element::DataModel) {
1039 if (FindAttributeWithNS(pXMLElement, L"contentType",
1040 L"http://www.xfa.org/schema/xfa-data/1.0/",
1041 wsContentType)) {
1042 if (!wsContentType.IsEmpty())
1043 eNodeType = XFA_Element::DataValue;
1044 }
1045 }
1046 if (eNodeType == XFA_Element::DataModel) {
1047 for (CFX_XMLNode* pXMLDataChild =
1048 pXMLElement->GetNodeItem(CFX_XMLNode::FirstChild);
1049 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1050 CFX_XMLNode::NextSibling)) {
1051 if (pXMLDataChild->GetType() == FX_XMLNODE_Element) {
1052 if (!XFA_RecognizeRichText(
1053 static_cast<CFX_XMLElement*>(pXMLDataChild))) {
1054 eNodeType = XFA_Element::DataGroup;
1055 break;
1056 }
1057 }
1058 }
1059 }
1060 if (eNodeType == XFA_Element::DataModel)
1061 eNodeType = XFA_Element::DataValue;
1062
1063 CXFA_Node* pXFAChild =
1064 m_pFactory->CreateNode(XFA_PacketType::Datasets, eNodeType);
1065 if (!pXFAChild)
1066 return;
1067
1068 pXFAChild->JSObject()->SetCData(
1069 XFA_Attribute::Name, pXMLElement->GetLocalTagName(), false, false);
1070 bool bNeedValue = true;
1071
1072 for (auto it : pXMLElement->GetAttributes()) {
1073 WideString wsName;
1074 WideString wsNS;
1075 if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
1076 continue;
1077 }
1078 if (wsName == L"nil" && it.second == L"true") {
1079 bNeedValue = false;
1080 continue;
1081 }
1082 if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
1083 wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
1084 wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
1085 wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
1086 continue;
1087 }
1088 CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1089 XFA_PacketType::Datasets, XFA_Element::DataValue);
1090 if (!pXFAMetaData)
1091 return;
1092
1093 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName, false,
1094 false);
1095 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
1096 it.first, false, false);
1097 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second,
1098 false, false);
1099 pXFAMetaData->JSObject()->SetEnum(XFA_Attribute::Contains,
1100 XFA_AttributeEnum::MetaData, false);
1101 pXFAChild->InsertChild(pXFAMetaData, nullptr);
1102 pXFAMetaData->SetXMLMappingNode(pXMLElement);
1103 pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1104 }
1105
1106 if (!bNeedValue) {
1107 WideString wsNilName(L"xsi:nil");
1108 pXMLElement->RemoveAttribute(wsNilName.c_str());
1109 }
1110 pXFANode->InsertChild(pXFAChild, nullptr);
1111 if (eNodeType == XFA_Element::DataGroup)
1112 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1113 else if (bNeedValue)
1114 ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets);
1115
1116 pXFAChild->SetXMLMappingNode(pXMLElement);
1117 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1118 continue;
1119 }
1120 case FX_XMLNODE_CharData:
1121 case FX_XMLNODE_Text: {
1122 CFX_XMLText* pXMLText = static_cast<CFX_XMLText*>(pXMLChild);
1123 WideString wsText = pXMLText->GetText();
1124 if (IsStringAllWhitespace(wsText))
1125 continue;
1126
1127 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets,
1128 XFA_Element::DataValue);
1129 if (!pXFAChild)
1130 return;
1131
1132 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText, false,
1133 false);
1134 pXFANode->InsertChild(pXFAChild, nullptr);
1135 pXFAChild->SetXMLMappingNode(pXMLText);
1136 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1137 continue;
1138 }
1139 default:
1140 continue;
1141 }
1142 }
1143 }
1144
ParseDataValue(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)1145 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1146 CFX_XMLNode* pXMLNode,
1147 XFA_PacketType ePacketID) {
1148 CFX_WideTextBuf wsValueTextBuf;
1149 CFX_WideTextBuf wsCurValueTextBuf;
1150 bool bMarkAsCompound = false;
1151 CFX_XMLNode* pXMLCurValueNode = nullptr;
1152 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
1153 pXMLChild;
1154 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
1155 FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
1156 if (eNodeType == FX_XMLNODE_Instruction)
1157 continue;
1158
1159 if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) {
1160 WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
1161 if (!pXMLCurValueNode)
1162 pXMLCurValueNode = pXMLChild;
1163
1164 wsCurValueTextBuf << wsText;
1165 } else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) {
1166 WideString wsText =
1167 GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
1168 if (!pXMLCurValueNode)
1169 pXMLCurValueNode = pXMLChild;
1170
1171 wsCurValueTextBuf << wsText;
1172 } else {
1173 bMarkAsCompound = true;
1174 if (pXMLCurValueNode) {
1175 WideString wsCurValue = wsCurValueTextBuf.MakeString();
1176 if (!wsCurValue.IsEmpty()) {
1177 CXFA_Node* pXFAChild =
1178 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1179 if (!pXFAChild)
1180 return;
1181
1182 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false,
1183 false);
1184 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue,
1185 false, false);
1186 pXFANode->InsertChild(pXFAChild, nullptr);
1187 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1188 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1189 wsValueTextBuf << wsCurValue;
1190 wsCurValueTextBuf.Clear();
1191 }
1192 pXMLCurValueNode = nullptr;
1193 }
1194 CXFA_Node* pXFAChild =
1195 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1196 if (!pXFAChild)
1197 return;
1198
1199 WideString wsNodeStr =
1200 static_cast<CFX_XMLElement*>(pXMLChild)->GetLocalTagName();
1201 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
1202 false);
1203 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1204 pXFANode->InsertChild(pXFAChild, nullptr);
1205 pXFAChild->SetXMLMappingNode(pXMLChild);
1206 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1207 WideString wsCurValue =
1208 pXFAChild->JSObject()->GetCData(XFA_Attribute::Value);
1209 wsValueTextBuf << wsCurValue;
1210 }
1211 }
1212 if (pXMLCurValueNode) {
1213 WideString wsCurValue = wsCurValueTextBuf.MakeString();
1214 if (!wsCurValue.IsEmpty()) {
1215 if (bMarkAsCompound) {
1216 CXFA_Node* pXFAChild =
1217 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1218 if (!pXFAChild)
1219 return;
1220
1221 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false, false);
1222 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false,
1223 false);
1224 pXFANode->InsertChild(pXFAChild, nullptr);
1225 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1226 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1227 }
1228 wsValueTextBuf << wsCurValue;
1229 wsCurValueTextBuf.Clear();
1230 }
1231 pXMLCurValueNode = nullptr;
1232 }
1233 WideString wsNodeValue = wsValueTextBuf.MakeString();
1234 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsNodeValue, false,
1235 false);
1236 }
1237
ParseInstruction(CXFA_Node * pXFANode,CFX_XMLInstruction * pXMLInstruction,XFA_PacketType ePacketID)1238 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1239 CFX_XMLInstruction* pXMLInstruction,
1240 XFA_PacketType ePacketID) {
1241 if (!m_bDocumentParser)
1242 return;
1243
1244 WideString wsTargetName = pXMLInstruction->GetName();
1245 const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
1246 if (wsTargetName == L"originalXFAVersion") {
1247 if (target_data.size() > 1 &&
1248 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
1249 XFA_VERSION_UNKNOWN) &&
1250 target_data[1] == L"v2.7-scripting:1") {
1251 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
1252 }
1253 } else if (wsTargetName == L"acrobat") {
1254 if (target_data.size() > 1 && target_data[0] == L"JavaScript" &&
1255 target_data[1] == L"strictScoping") {
1256 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
1257 }
1258 }
1259 }
1260
CloseParser()1261 void CXFA_SimpleParser::CloseParser() {
1262 m_pXMLDoc.reset();
1263 m_pStream.Reset();
1264 }
1265