1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8
9 #include <utility>
10
11 #include "core/fxcrt/fx_ext.h"
12 #include "third_party/base/ptr_util.h"
13 #include "xfa/fgas/crt/fgas_codepage.h"
14 #include "xfa/fxfa/fxfa.h"
15 #include "xfa/fxfa/parser/cxfa_document.h"
16 #include "xfa/fxfa/parser/cxfa_widetextread.h"
17 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
18 #include "xfa/fxfa/parser/xfa_basic_data.h"
19 #include "xfa/fxfa/parser/xfa_utils.h"
20 #include "xfa/fxfa/xfa_checksum.h"
21
22 namespace {
23
GetDocumentNode(CFDE_XMLDoc * pXMLDoc,bool bVerifyWellFormness=false)24 CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
25 bool bVerifyWellFormness = false) {
26 if (!pXMLDoc)
27 return nullptr;
28
29 for (CFDE_XMLNode* pXMLNode =
30 pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
31 pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
32 if (pXMLNode->GetType() != FDE_XMLNODE_Element)
33 continue;
34
35 if (!bVerifyWellFormness)
36 return pXMLNode;
37
38 for (CFDE_XMLNode* pNextNode =
39 pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
40 pNextNode;
41 pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
42 if (pNextNode->GetType() == FDE_XMLNODE_Element)
43 return nullptr;
44 }
45 return pXMLNode;
46 }
47 return nullptr;
48 }
49
GetElementTagNamespaceURI(CFDE_XMLElement * pElement,CFX_WideString & wsNamespaceURI)50 void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
51 CFX_WideString& wsNamespaceURI) {
52 CFX_WideString wsNodeStr;
53 pElement->GetNamespacePrefix(wsNodeStr);
54 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
55 pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
56 wsNamespaceURI.clear();
57 }
58 }
59
MatchNodeName(CFDE_XMLNode * pNode,const CFX_WideStringC & wsLocalTagName,const CFX_WideStringC & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)60 bool MatchNodeName(CFDE_XMLNode* pNode,
61 const CFX_WideStringC& wsLocalTagName,
62 const CFX_WideStringC& wsNamespaceURIPrefix,
63 uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
64 if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
65 return false;
66
67 CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
68 CFX_WideString wsNodeStr;
69 pElement->GetLocalTagName(wsNodeStr);
70 if (wsNodeStr != wsLocalTagName)
71 return false;
72
73 GetElementTagNamespaceURI(pElement, wsNodeStr);
74 if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
75 return true;
76 if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
77 return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
78 wsNamespaceURIPrefix;
79 }
80 return wsNodeStr == wsNamespaceURIPrefix;
81 }
82
GetAttributeLocalName(const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName)83 bool GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
84 CFX_WideString& wsLocalAttrName) {
85 CFX_WideString wsAttrName(wsAttributeName);
86 FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
87 if (iFind < 0) {
88 wsLocalAttrName = wsAttrName;
89 return false;
90 }
91 wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
92 return true;
93 }
94
ResolveAttribute(CFDE_XMLElement * pElement,const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName,CFX_WideString & wsNamespaceURI)95 bool ResolveAttribute(CFDE_XMLElement* pElement,
96 const CFX_WideStringC& wsAttributeName,
97 CFX_WideString& wsLocalAttrName,
98 CFX_WideString& wsNamespaceURI) {
99 CFX_WideString wsAttrName(wsAttributeName);
100 CFX_WideString wsNSPrefix;
101 if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
102 wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
103 wsLocalAttrName.GetLength() - 1);
104 }
105 if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
106 wsNSPrefix == L"xml") {
107 return false;
108 }
109 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
110 pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
111 wsNamespaceURI.clear();
112 return false;
113 }
114 return true;
115 }
116
FindAttributeWithNS(CFDE_XMLElement * pElement,const CFX_WideStringC & wsLocalAttributeName,const CFX_WideStringC & wsNamespaceURIPrefix,CFX_WideString & wsValue,bool bMatchNSAsPrefix=false)117 bool FindAttributeWithNS(CFDE_XMLElement* pElement,
118 const CFX_WideStringC& wsLocalAttributeName,
119 const CFX_WideStringC& wsNamespaceURIPrefix,
120 CFX_WideString& wsValue,
121 bool bMatchNSAsPrefix = false) {
122 if (!pElement)
123 return false;
124
125 CFX_WideString wsAttrName;
126 CFX_WideString wsAttrValue;
127 CFX_WideString wsAttrNS;
128 for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
129 i++) {
130 pElement->GetAttribute(i, wsAttrName, wsAttrValue);
131 FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
132 CFX_WideString wsNSPrefix;
133 if (iFind < 0) {
134 if (wsLocalAttributeName != wsAttrName)
135 continue;
136 } else {
137 if (wsLocalAttributeName !=
138 wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
139 continue;
140 }
141 wsNSPrefix = wsAttrName.Left(iFind);
142 }
143 if (!XFA_FDEExtension_ResolveNamespaceQualifier(
144 pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
145 continue;
146 }
147 if (bMatchNSAsPrefix) {
148 if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
149 wsNamespaceURIPrefix) {
150 continue;
151 }
152 } else {
153 if (wsAttrNS != wsNamespaceURIPrefix)
154 continue;
155 }
156 wsValue = wsAttrValue;
157 return true;
158 }
159 return false;
160 }
161
GetDataSetsFromXDP(CFDE_XMLNode * pXMLDocumentNode)162 CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
163 if (MatchNodeName(pXMLDocumentNode,
164 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
165 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
166 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
167 return pXMLDocumentNode;
168 }
169 if (!MatchNodeName(pXMLDocumentNode,
170 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
171 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
172 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
173 return nullptr;
174 }
175 for (CFDE_XMLNode* pDatasetsNode =
176 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
177 pDatasetsNode;
178 pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
179 if (!MatchNodeName(pDatasetsNode,
180 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
181 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
182 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
183 continue;
184 }
185 return pDatasetsNode;
186 }
187 return nullptr;
188 }
189
IsStringAllWhitespace(CFX_WideString wsText)190 bool IsStringAllWhitespace(CFX_WideString wsText) {
191 wsText.TrimRight(L"\x20\x9\xD\xA");
192 return wsText.IsEmpty();
193 }
194
ConvertXMLToPlainText(CFDE_XMLElement * pRootXMLNode,CFX_WideString & wsOutput)195 void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
196 CFX_WideString& wsOutput) {
197 for (CFDE_XMLNode* pXMLChild =
198 pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
199 pXMLChild;
200 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
201 switch (pXMLChild->GetType()) {
202 case FDE_XMLNODE_Element: {
203 CFX_WideString wsTextData;
204 static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
205 wsTextData += L"\n";
206 wsOutput += wsTextData;
207 break;
208 }
209 case FDE_XMLNODE_Text: {
210 CFX_WideString wsText;
211 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
212 if (IsStringAllWhitespace(wsText))
213 continue;
214
215 wsOutput = wsText;
216 break;
217 }
218 case FDE_XMLNODE_CharData: {
219 CFX_WideString wsCharData;
220 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
221 if (IsStringAllWhitespace(wsCharData))
222 continue;
223
224 wsOutput = wsCharData;
225 break;
226 }
227 default:
228 ASSERT(false);
229 break;
230 }
231 }
232 }
233
GetPacketByName(const CFX_WideStringC & wsName)234 const XFA_PACKETINFO* GetPacketByName(const CFX_WideStringC& wsName) {
235 if (wsName.IsEmpty())
236 return nullptr;
237
238 uint32_t uHash = FX_HashCode_GetW(wsName, false);
239 int32_t iStart = 0;
240 int32_t iEnd = g_iXFAPacketCount - 1;
241 do {
242 int32_t iMid = (iStart + iEnd) / 2;
243 const XFA_PACKETINFO* pInfo = g_XFAPacketData + iMid;
244 if (uHash == pInfo->uHash)
245 return pInfo;
246 if (uHash < pInfo->uHash)
247 iEnd = iMid - 1;
248 else
249 iStart = iMid + 1;
250 } while (iStart <= iEnd);
251 return nullptr;
252 }
253
254 } // namespace
255
XFA_RecognizeRichText(CFDE_XMLElement * pRichTextXMLNode)256 bool XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
257 if (pRichTextXMLNode) {
258 CFX_WideString wsNamespaceURI;
259 GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
260 if (wsNamespaceURI == L"http://www.w3.org/1999/xhtml")
261 return true;
262 }
263 return false;
264 }
265
CXFA_SimpleParser(CXFA_Document * pFactory,bool bDocumentParser)266 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
267 bool bDocumentParser)
268 : m_pXMLParser(nullptr),
269 m_pXMLDoc(nullptr),
270 m_pStream(nullptr),
271 m_pFileRead(nullptr),
272 m_pFactory(pFactory),
273 m_pRootNode(nullptr),
274 m_ePacketID(XFA_XDPPACKET_UNKNOWN),
275 m_bDocumentParser(bDocumentParser) {}
276
~CXFA_SimpleParser()277 CXFA_SimpleParser::~CXFA_SimpleParser() {}
278
SetFactory(CXFA_Document * pFactory)279 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
280 m_pFactory = pFactory;
281 }
282
StartParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pStream,XFA_XDPPACKET ePacketID)283 int32_t CXFA_SimpleParser::StartParse(
284 const CFX_RetainPtr<IFX_SeekableReadStream>& pStream,
285 XFA_XDPPACKET ePacketID) {
286 CloseParser();
287 m_pFileRead = pStream;
288 m_pStream = IFGAS_Stream::CreateStream(
289 pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text);
290 if (!m_pStream)
291 return XFA_PARSESTATUS_StreamErr;
292
293 uint16_t wCodePage = m_pStream->GetCodePage();
294 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
295 wCodePage != FX_CODEPAGE_UTF8) {
296 m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
297 }
298 m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
299 auto pNewParser =
300 pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
301 m_pXMLParser = pNewParser.get();
302 if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
303 return XFA_PARSESTATUS_StatusErr;
304
305 m_ePacketID = ePacketID;
306 return XFA_PARSESTATUS_Ready;
307 }
308
DoParse(IFX_Pause * pPause)309 int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
310 if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
311 return XFA_PARSESTATUS_StatusErr;
312
313 int32_t iRet = m_pXMLDoc->DoLoad(pPause);
314 if (iRet < 0)
315 return XFA_PARSESTATUS_SyntaxErr;
316 if (iRet < 100)
317 return iRet / 2;
318
319 m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
320 m_pXMLDoc->CloseXML();
321 m_pStream.Reset();
322 if (!m_pRootNode)
323 return XFA_PARSESTATUS_StatusErr;
324
325 return XFA_PARSESTATUS_Done;
326 }
327
ParseXMLData(const CFX_WideString & wsXML,CFDE_XMLNode * & pXMLNode,IFX_Pause * pPause)328 int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
329 CFDE_XMLNode*& pXMLNode,
330 IFX_Pause* pPause) {
331 CloseParser();
332 pXMLNode = nullptr;
333 m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
334 auto pStream = pdfium::MakeRetain<CXFA_WideTextRead>(wsXML);
335 auto pParser =
336 pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
337 pParser->m_dwCheckStatus = 0x03;
338 if (!m_pXMLDoc->LoadXML(std::move(pParser)))
339 return XFA_PARSESTATUS_StatusErr;
340
341 int32_t iRet = m_pXMLDoc->DoLoad(pPause);
342 if (iRet < 0 || iRet >= 100)
343 m_pXMLDoc->CloseXML();
344 if (iRet < 0)
345 return XFA_PARSESTATUS_SyntaxErr;
346 if (iRet < 100)
347 return iRet / 2;
348
349 pXMLNode = GetDocumentNode(m_pXMLDoc.get());
350 return XFA_PARSESTATUS_Done;
351 }
352
ConstructXFANode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode)353 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
354 CFDE_XMLNode* pXMLNode) {
355 XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
356 if (ePacketID == XFA_XDPPACKET_Datasets) {
357 if (pXFANode->GetElementType() == XFA_Element::DataValue) {
358 for (CFDE_XMLNode* pXMLChild =
359 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
360 pXMLChild;
361 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
362 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
363 if (eNodeType == FDE_XMLNODE_Instruction)
364 continue;
365
366 if (eNodeType == FDE_XMLNODE_Element) {
367 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
368 XFA_Element::DataValue);
369 if (!pXFAChild)
370 return;
371
372 CFX_WideString wsNodeStr;
373 CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
374 child->GetLocalTagName(wsNodeStr);
375 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
376 CFX_WideString wsChildValue;
377 XFA_GetPlainTextFromRichText(child, wsChildValue);
378 if (!wsChildValue.IsEmpty())
379 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
380
381 pXFANode->InsertChild(pXFAChild);
382 pXFAChild->SetXMLMappingNode(pXMLChild);
383 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
384 break;
385 }
386 }
387 m_pRootNode = pXFANode;
388 } else {
389 m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
390 }
391 } else if (pXFANode->IsContentNode()) {
392 ParseContentNode(pXFANode, pXMLNode, ePacketID);
393 m_pRootNode = pXFANode;
394 } else {
395 m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
396 }
397 }
398
GetRootNode() const399 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
400 return m_pRootNode;
401 }
402
GetXMLDoc() const403 CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
404 return m_pXMLDoc.get();
405 }
406
XFA_FDEExtension_ResolveNamespaceQualifier(CFDE_XMLElement * pNode,const CFX_WideStringC & wsQualifier,CFX_WideString & wsNamespaceURI)407 bool XFA_FDEExtension_ResolveNamespaceQualifier(
408 CFDE_XMLElement* pNode,
409 const CFX_WideStringC& wsQualifier,
410 CFX_WideString& wsNamespaceURI) {
411 if (!pNode)
412 return false;
413
414 CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
415 CFX_WideString wsNSAttribute;
416 bool bRet = false;
417 if (wsQualifier.IsEmpty()) {
418 wsNSAttribute = L"xmlns";
419 bRet = true;
420 } else {
421 wsNSAttribute = L"xmlns:" + wsQualifier;
422 }
423 for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
424 pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
425 if (pNode->GetType() != FDE_XMLNODE_Element)
426 continue;
427
428 if (pNode->HasAttribute(wsNSAttribute.c_str())) {
429 pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
430 return true;
431 }
432 }
433 wsNamespaceURI.clear();
434 return bRet;
435 }
436
ParseAsXDPPacket(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)437 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
438 XFA_XDPPACKET ePacketID) {
439 switch (ePacketID) {
440 case XFA_XDPPACKET_UNKNOWN:
441 return nullptr;
442 case XFA_XDPPACKET_XDP:
443 return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
444 case XFA_XDPPACKET_Config:
445 return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
446 case XFA_XDPPACKET_Template:
447 case XFA_XDPPACKET_Form:
448 return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
449 case XFA_XDPPACKET_Datasets:
450 return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
451 case XFA_XDPPACKET_Xdc:
452 return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
453 case XFA_XDPPACKET_LocaleSet:
454 case XFA_XDPPACKET_ConnectionSet:
455 case XFA_XDPPACKET_SourceSet:
456 return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
457 ePacketID);
458 default:
459 return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
460 }
461 }
462
ParseAsXDPPacket_XDP(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)463 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
464 CFDE_XMLNode* pXMLDocumentNode,
465 XFA_XDPPACKET ePacketID) {
466 if (!MatchNodeName(pXMLDocumentNode,
467 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
468 XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
469 XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
470 return nullptr;
471 }
472 CXFA_Node* pXFARootNode =
473 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
474 if (!pXFARootNode)
475 return nullptr;
476
477 m_pRootNode = pXFARootNode;
478 pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
479 {
480 CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
481 int32_t iAttributeCount = pElement->CountAttributes();
482 for (int32_t i = 0; i < iAttributeCount; i++) {
483 CFX_WideString wsAttriName, wsAttriValue;
484 pElement->GetAttribute(i, wsAttriName, wsAttriValue);
485 if (wsAttriName == L"uuid")
486 pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
487 else if (wsAttriName == L"timeStamp")
488 pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
489 }
490 }
491
492 CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
493 CXFA_Node* pXFAConfigDOMRoot = nullptr;
494 {
495 for (CFDE_XMLNode* pChildItem =
496 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
497 pChildItem;
498 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
499 const XFA_PACKETINFO* pPacketInfo =
500 XFA_GetPacketByIndex(XFA_PACKET_Config);
501 if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
502 pPacketInfo->eFlags)) {
503 continue;
504 }
505 if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
506 return nullptr;
507 }
508 pXMLConfigDOMRoot = pChildItem;
509 pXFAConfigDOMRoot =
510 ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
511 pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
512 }
513 }
514
515 CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
516 CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
517 CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
518 {
519 for (CFDE_XMLNode* pChildItem =
520 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
521 pChildItem;
522 pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
523 if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
524 continue;
525 if (pChildItem == pXMLConfigDOMRoot)
526 continue;
527
528 CFDE_XMLElement* pElement =
529 reinterpret_cast<CFDE_XMLElement*>(pChildItem);
530 CFX_WideString wsPacketName;
531 pElement->GetLocalTagName(wsPacketName);
532 const XFA_PACKETINFO* pPacketInfo =
533 GetPacketByName(wsPacketName.AsStringC());
534 if (pPacketInfo && pPacketInfo->pURI) {
535 if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
536 pPacketInfo->eFlags)) {
537 pPacketInfo = nullptr;
538 }
539 }
540 XFA_XDPPACKET ePacket =
541 pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
542 if (ePacket == XFA_XDPPACKET_XDP)
543 continue;
544 if (ePacket == XFA_XDPPACKET_Datasets) {
545 if (pXMLDatasetsDOMRoot)
546 return nullptr;
547
548 pXMLDatasetsDOMRoot = pElement;
549 } else if (ePacket == XFA_XDPPACKET_Form) {
550 if (pXMLFormDOMRoot)
551 return nullptr;
552
553 pXMLFormDOMRoot = pElement;
554 } else if (ePacket == XFA_XDPPACKET_Template) {
555 if (pXMLTemplateDOMRoot) {
556 // Found a duplicate template packet.
557 return nullptr;
558 }
559 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
560 if (pPacketNode) {
561 pXMLTemplateDOMRoot = pElement;
562 pXFARootNode->InsertChild(pPacketNode);
563 }
564 } else {
565 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
566 if (pPacketNode) {
567 if (pPacketInfo &&
568 (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
569 pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
570 return nullptr;
571 }
572 pXFARootNode->InsertChild(pPacketNode);
573 }
574 }
575 }
576 }
577
578 if (!pXMLTemplateDOMRoot) {
579 // No template is found.
580 return nullptr;
581 }
582 if (pXMLDatasetsDOMRoot) {
583 CXFA_Node* pPacketNode =
584 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
585 if (pPacketNode)
586 pXFARootNode->InsertChild(pPacketNode);
587 }
588 if (pXMLFormDOMRoot) {
589 CXFA_Node* pPacketNode =
590 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
591 if (pPacketNode)
592 pXFARootNode->InsertChild(pPacketNode);
593 }
594 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
595 return pXFARootNode;
596 }
597
ParseAsXDPPacket_Config(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)598 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
599 CFDE_XMLNode* pXMLDocumentNode,
600 XFA_XDPPACKET ePacketID) {
601 if (!MatchNodeName(pXMLDocumentNode,
602 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
603 XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
604 XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
605 return nullptr;
606 }
607 CXFA_Node* pNode =
608 m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
609 if (!pNode)
610 return nullptr;
611
612 pNode->SetCData(XFA_ATTRIBUTE_Name,
613 XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
614 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
615 return nullptr;
616
617 pNode->SetXMLMappingNode(pXMLDocumentNode);
618 return pNode;
619 }
620
ParseAsXDPPacket_TemplateForm(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)621 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
622 CFDE_XMLNode* pXMLDocumentNode,
623 XFA_XDPPACKET ePacketID) {
624 CXFA_Node* pNode = nullptr;
625 if (ePacketID == XFA_XDPPACKET_Template) {
626 if (MatchNodeName(pXMLDocumentNode,
627 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
628 XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
629 XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
630 pNode =
631 m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
632 if (!pNode)
633 return nullptr;
634
635 pNode->SetCData(XFA_ATTRIBUTE_Name,
636 XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
637 if (m_bDocumentParser) {
638 CFX_WideString wsNamespaceURI;
639 CFDE_XMLElement* pXMLDocumentElement =
640 static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
641 pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
642 if (wsNamespaceURI.IsEmpty())
643 pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
644
645 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
646 }
647 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
648 return nullptr;
649 }
650 } else if (ePacketID == XFA_XDPPACKET_Form) {
651 if (MatchNodeName(pXMLDocumentNode,
652 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
653 XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
654 XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
655 CFDE_XMLElement* pXMLDocumentElement =
656 static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
657 CFX_WideString wsChecksum;
658 pXMLDocumentElement->GetString(L"checksum", wsChecksum);
659 if (wsChecksum.GetLength() != 28 ||
660 m_pXMLParser->m_dwCheckStatus != 0x03) {
661 return nullptr;
662 }
663 std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
664 pChecksum->StartChecksum();
665 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
666 m_pXMLParser->m_nSize[0]);
667 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
668 m_pXMLParser->m_nSize[1]);
669 pChecksum->FinishChecksum();
670 CFX_ByteString bsCheck = pChecksum->GetChecksum();
671 if (bsCheck != wsChecksum.UTF8Encode())
672 return nullptr;
673
674 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
675 if (!pNode)
676 return nullptr;
677
678 pNode->SetCData(XFA_ATTRIBUTE_Name,
679 XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
680 pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
681 CXFA_Node* pTemplateRoot =
682 m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
683 CXFA_Node* pTemplateChosen =
684 pTemplateRoot
685 ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
686 : nullptr;
687 bool bUseAttribute = true;
688 if (pTemplateChosen &&
689 pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
690 XFA_ATTRIBUTEENUM_Auto) {
691 bUseAttribute = false;
692 }
693 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
694 return nullptr;
695 }
696 }
697 if (pNode)
698 pNode->SetXMLMappingNode(pXMLDocumentNode);
699
700 return pNode;
701 }
702
ParseAsXDPPacket_Data(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)703 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
704 CFDE_XMLNode* pXMLDocumentNode,
705 XFA_XDPPACKET ePacketID) {
706 CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
707 if (pDatasetsXMLNode) {
708 CXFA_Node* pNode =
709 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
710 if (!pNode)
711 return nullptr;
712
713 pNode->SetCData(XFA_ATTRIBUTE_Name,
714 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
715 if (!DataLoader(pNode, pDatasetsXMLNode, false))
716 return nullptr;
717
718 pNode->SetXMLMappingNode(pDatasetsXMLNode);
719 return pNode;
720 }
721
722 CFDE_XMLNode* pDataXMLNode = nullptr;
723 if (MatchNodeName(pXMLDocumentNode, L"data",
724 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
725 XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
726 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
727 ->RemoveAttribute(L"xmlns:xfa");
728 pDataXMLNode = pXMLDocumentNode;
729 } else {
730 CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
731 CFDE_XMLNode* pParentXMLNode =
732 pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
733 if (pParentXMLNode)
734 pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
735
736 ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
737 if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
738 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
739 ->RemoveAttribute(L"xmlns:xfa");
740 }
741 pDataElement->InsertChildNode(pXMLDocumentNode);
742 pDataXMLNode = pDataElement;
743 }
744
745 if (pDataXMLNode) {
746 CXFA_Node* pNode =
747 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
748 if (!pNode) {
749 if (pDataXMLNode != pXMLDocumentNode)
750 delete pDataXMLNode;
751 return nullptr;
752 }
753 CFX_WideString wsLocalName;
754 static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
755 pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
756 if (!DataLoader(pNode, pDataXMLNode, true))
757 return nullptr;
758
759 pNode->SetXMLMappingNode(pDataXMLNode);
760 if (pDataXMLNode != pXMLDocumentNode)
761 pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
762 return pNode;
763 }
764 return nullptr;
765 }
766
ParseAsXDPPacket_LocaleConnectionSourceSet(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)767 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
768 CFDE_XMLNode* pXMLDocumentNode,
769 XFA_XDPPACKET ePacketID) {
770 CXFA_Node* pNode = nullptr;
771 if (ePacketID == XFA_XDPPACKET_LocaleSet) {
772 if (MatchNodeName(pXMLDocumentNode,
773 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
774 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
775 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
776 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
777 XFA_Element::LocaleSet);
778 if (!pNode)
779 return nullptr;
780
781 pNode->SetCData(XFA_ATTRIBUTE_Name,
782 XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
783 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
784 return nullptr;
785 }
786 } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
787 if (MatchNodeName(pXMLDocumentNode,
788 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
789 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
790 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
791 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
792 XFA_Element::ConnectionSet);
793 if (!pNode)
794 return nullptr;
795
796 pNode->SetCData(XFA_ATTRIBUTE_Name,
797 XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
798 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
799 return nullptr;
800 }
801 } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
802 if (MatchNodeName(pXMLDocumentNode,
803 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
804 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
805 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
806 pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
807 XFA_Element::SourceSet);
808 if (!pNode)
809 return nullptr;
810
811 pNode->SetCData(XFA_ATTRIBUTE_Name,
812 XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
813 if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
814 return nullptr;
815 }
816 }
817 if (pNode)
818 pNode->SetXMLMappingNode(pXMLDocumentNode);
819 return pNode;
820 }
821
ParseAsXDPPacket_Xdc(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)822 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
823 CFDE_XMLNode* pXMLDocumentNode,
824 XFA_XDPPACKET ePacketID) {
825 if (!MatchNodeName(pXMLDocumentNode,
826 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
827 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
828 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
829 return nullptr;
830
831 CXFA_Node* pNode =
832 m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
833 if (!pNode)
834 return nullptr;
835
836 pNode->SetCData(XFA_ATTRIBUTE_Name,
837 XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
838 pNode->SetXMLMappingNode(pXMLDocumentNode);
839 return pNode;
840 }
841
ParseAsXDPPacket_User(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)842 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
843 CFDE_XMLNode* pXMLDocumentNode,
844 XFA_XDPPACKET ePacketID) {
845 CXFA_Node* pNode =
846 m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
847 if (!pNode)
848 return nullptr;
849
850 CFX_WideString wsName;
851 static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
852 pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
853 if (!UserPacketLoader(pNode, pXMLDocumentNode))
854 return nullptr;
855
856 pNode->SetXMLMappingNode(pXMLDocumentNode);
857 return pNode;
858 }
859
UserPacketLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc)860 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
861 CFDE_XMLNode* pXMLDoc) {
862 return pXFANode;
863 }
864
DataLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,bool bDoTransform)865 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
866 CFDE_XMLNode* pXMLDoc,
867 bool bDoTransform) {
868 ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
869 return pXFANode;
870 }
871
NormalLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,XFA_XDPPACKET ePacketID,bool bUseAttribute)872 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
873 CFDE_XMLNode* pXMLDoc,
874 XFA_XDPPACKET ePacketID,
875 bool bUseAttribute) {
876 bool bOneOfPropertyFound = false;
877 for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
878 pXMLChild;
879 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
880 switch (pXMLChild->GetType()) {
881 case FDE_XMLNODE_Element: {
882 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
883 CFX_WideString wsTagName;
884 pXMLElement->GetLocalTagName(wsTagName);
885 XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
886 if (eType == XFA_Element::Unknown)
887 continue;
888
889 const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
890 pXFANode->GetElementType(), eType, ePacketID);
891 if (pPropertyInfo &&
892 ((pPropertyInfo->uFlags &
893 (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
894 if (bOneOfPropertyFound)
895 break;
896
897 bOneOfPropertyFound = true;
898 }
899 CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
900 if (!pXFAChild)
901 return nullptr;
902 if (ePacketID == XFA_XDPPACKET_Config)
903 pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
904
905 bool IsNeedValue = true;
906 for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
907 i++) {
908 CFX_WideString wsAttrQualifiedName;
909 CFX_WideString wsAttrName;
910 CFX_WideString wsAttrValue;
911 pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
912 GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
913 if (wsAttrName == L"nil" && wsAttrValue == L"true") {
914 IsNeedValue = false;
915 }
916 const XFA_ATTRIBUTEINFO* lpAttrInfo =
917 XFA_GetAttributeByName(wsAttrName.AsStringC());
918 if (!lpAttrInfo)
919 continue;
920
921 if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
922 lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
923 continue;
924 }
925 pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
926 }
927 pXFANode->InsertChild(pXFAChild);
928 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
929 if (ePacketID == XFA_XDPPACKET_Config)
930 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
931 else
932 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
933
934 break;
935 }
936 switch (pXFAChild->GetObjectType()) {
937 case XFA_ObjectType::ContentNode:
938 case XFA_ObjectType::TextNode:
939 case XFA_ObjectType::NodeC:
940 case XFA_ObjectType::NodeV:
941 if (IsNeedValue)
942 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
943 break;
944 default:
945 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
946 break;
947 }
948 } break;
949 case FDE_XMLNODE_Instruction:
950 ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
951 ePacketID);
952 break;
953 default:
954 break;
955 }
956 }
957 return pXFANode;
958 }
959
ParseContentNode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)960 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
961 CFDE_XMLNode* pXMLNode,
962 XFA_XDPPACKET ePacketID) {
963 XFA_Element element = XFA_Element::Sharptext;
964 if (pXFANode->GetElementType() == XFA_Element::ExData) {
965 CFX_WideStringC wsContentType =
966 pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
967 if (wsContentType == L"text/html")
968 element = XFA_Element::SharpxHTML;
969 else if (wsContentType == L"text/xml")
970 element = XFA_Element::Sharpxml;
971 }
972 if (element == XFA_Element::SharpxHTML)
973 pXFANode->SetXMLMappingNode(pXMLNode);
974
975 CFX_WideString wsValue;
976 for (CFDE_XMLNode* pXMLChild =
977 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
978 pXMLChild;
979 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
980 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
981 if (eNodeType == FDE_XMLNODE_Instruction)
982 continue;
983
984 if (element == XFA_Element::SharpxHTML) {
985 if (eNodeType != FDE_XMLNODE_Element)
986 break;
987
988 if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
989 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
990 wsValue);
991 } else if (element == XFA_Element::Sharpxml) {
992 if (eNodeType != FDE_XMLNODE_Element)
993 break;
994
995 ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
996 } else {
997 if (eNodeType == FDE_XMLNODE_Element)
998 break;
999 if (eNodeType == FDE_XMLNODE_Text)
1000 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
1001 else if (eNodeType == FDE_XMLNODE_CharData)
1002 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
1003 }
1004 break;
1005 }
1006 if (!wsValue.IsEmpty()) {
1007 if (pXFANode->IsContentNode()) {
1008 CXFA_Node* pContentRawDataNode =
1009 m_pFactory->CreateNode(ePacketID, element);
1010 ASSERT(pContentRawDataNode);
1011 pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1012 pXFANode->InsertChild(pContentRawDataNode);
1013 } else {
1014 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1015 }
1016 }
1017 }
1018
ParseDataGroup(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1019 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
1020 CFDE_XMLNode* pXMLNode,
1021 XFA_XDPPACKET ePacketID) {
1022 for (CFDE_XMLNode* pXMLChild =
1023 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1024 pXMLChild;
1025 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1026 switch (pXMLChild->GetType()) {
1027 case FDE_XMLNODE_Element: {
1028 CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
1029 {
1030 CFX_WideString wsNamespaceURI;
1031 GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
1032 if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
1033 wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
1034 wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
1035 continue;
1036 }
1037 }
1038
1039 XFA_Element eNodeType = XFA_Element::DataModel;
1040 if (eNodeType == XFA_Element::DataModel) {
1041 CFX_WideString wsDataNodeAttr;
1042 if (FindAttributeWithNS(pXMLElement, L"dataNode",
1043 L"http://www.xfa.org/schema/xfa-data/1.0/",
1044 wsDataNodeAttr)) {
1045 if (wsDataNodeAttr == L"dataGroup")
1046 eNodeType = XFA_Element::DataGroup;
1047 else if (wsDataNodeAttr == L"dataValue")
1048 eNodeType = XFA_Element::DataValue;
1049 }
1050 }
1051 CFX_WideString wsContentType;
1052 if (eNodeType == XFA_Element::DataModel) {
1053 if (FindAttributeWithNS(pXMLElement, L"contentType",
1054 L"http://www.xfa.org/schema/xfa-data/1.0/",
1055 wsContentType)) {
1056 if (!wsContentType.IsEmpty())
1057 eNodeType = XFA_Element::DataValue;
1058 }
1059 }
1060 if (eNodeType == XFA_Element::DataModel) {
1061 for (CFDE_XMLNode* pXMLDataChild =
1062 pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
1063 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1064 CFDE_XMLNode::NextSibling)) {
1065 if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
1066 if (!XFA_RecognizeRichText(
1067 static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
1068 eNodeType = XFA_Element::DataGroup;
1069 break;
1070 }
1071 }
1072 }
1073 }
1074 if (eNodeType == XFA_Element::DataModel)
1075 eNodeType = XFA_Element::DataValue;
1076
1077 CXFA_Node* pXFAChild =
1078 m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
1079 if (!pXFAChild)
1080 return;
1081
1082 CFX_WideString wsNodeName;
1083 pXMLElement->GetLocalTagName(wsNodeName);
1084 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
1085 bool bNeedValue = true;
1086 for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
1087 CFX_WideString wsQualifiedName;
1088 CFX_WideString wsValue;
1089 CFX_WideString wsName;
1090 CFX_WideString wsNS;
1091 pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
1092 if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
1093 wsName, wsNS)) {
1094 continue;
1095 }
1096 if (wsName == L"nil" && wsValue == L"true") {
1097 bNeedValue = false;
1098 continue;
1099 }
1100 if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
1101 wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
1102 wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
1103 wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
1104 continue;
1105 }
1106 CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1107 XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
1108 if (!pXFAMetaData)
1109 return;
1110
1111 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
1112 pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
1113 pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1114 pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
1115 XFA_ATTRIBUTEENUM_MetaData);
1116 pXFAChild->InsertChild(pXFAMetaData);
1117 pXFAMetaData->SetXMLMappingNode(pXMLElement);
1118 pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1119 }
1120
1121 if (!bNeedValue) {
1122 CFX_WideString wsNilName(L"xsi:nil");
1123 pXMLElement->RemoveAttribute(wsNilName.c_str());
1124 }
1125 pXFANode->InsertChild(pXFAChild);
1126 if (eNodeType == XFA_Element::DataGroup)
1127 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1128 else if (bNeedValue)
1129 ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
1130
1131 pXFAChild->SetXMLMappingNode(pXMLElement);
1132 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1133 continue;
1134 }
1135 case FDE_XMLNODE_CharData: {
1136 CFDE_XMLCharData* pXMLCharData =
1137 static_cast<CFDE_XMLCharData*>(pXMLChild);
1138 CFX_WideString wsCharData;
1139 pXMLCharData->GetCharData(wsCharData);
1140 if (IsStringAllWhitespace(wsCharData))
1141 continue;
1142
1143 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1144 XFA_Element::DataValue);
1145 if (!pXFAChild)
1146 return;
1147
1148 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
1149 pXFANode->InsertChild(pXFAChild);
1150 pXFAChild->SetXMLMappingNode(pXMLCharData);
1151 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1152 continue;
1153 }
1154 case FDE_XMLNODE_Text: {
1155 CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
1156 CFX_WideString wsText;
1157 pXMLText->GetText(wsText);
1158 if (IsStringAllWhitespace(wsText))
1159 continue;
1160
1161 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1162 XFA_Element::DataValue);
1163 if (!pXFAChild)
1164 return;
1165
1166 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
1167 pXFANode->InsertChild(pXFAChild);
1168 pXFAChild->SetXMLMappingNode(pXMLText);
1169 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1170 continue;
1171 }
1172 default:
1173 continue;
1174 }
1175 }
1176 }
1177
ParseDataValue(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1178 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1179 CFDE_XMLNode* pXMLNode,
1180 XFA_XDPPACKET ePacketID) {
1181 CFX_WideTextBuf wsValueTextBuf;
1182 CFX_WideTextBuf wsCurValueTextBuf;
1183 bool bMarkAsCompound = false;
1184 CFDE_XMLNode* pXMLCurValueNode = nullptr;
1185 for (CFDE_XMLNode* pXMLChild =
1186 pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1187 pXMLChild;
1188 pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1189 FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
1190 if (eNodeType == FDE_XMLNODE_Instruction)
1191 continue;
1192
1193 CFX_WideString wsText;
1194 if (eNodeType == FDE_XMLNODE_Text) {
1195 static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
1196 if (!pXMLCurValueNode)
1197 pXMLCurValueNode = pXMLChild;
1198
1199 wsCurValueTextBuf << wsText;
1200 } else if (eNodeType == FDE_XMLNODE_CharData) {
1201 static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
1202 if (!pXMLCurValueNode)
1203 pXMLCurValueNode = pXMLChild;
1204
1205 wsCurValueTextBuf << wsText;
1206 } else if (XFA_RecognizeRichText(
1207 static_cast<CFDE_XMLElement*>(pXMLChild))) {
1208 XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
1209 wsText);
1210 if (!pXMLCurValueNode)
1211 pXMLCurValueNode = pXMLChild;
1212
1213 wsCurValueTextBuf << wsText;
1214 } else {
1215 bMarkAsCompound = true;
1216 if (pXMLCurValueNode) {
1217 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1218 if (!wsCurValue.IsEmpty()) {
1219 CXFA_Node* pXFAChild =
1220 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1221 if (!pXFAChild)
1222 return;
1223
1224 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1225 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1226 pXFANode->InsertChild(pXFAChild);
1227 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1228 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1229 wsValueTextBuf << wsCurValue;
1230 wsCurValueTextBuf.Clear();
1231 }
1232 pXMLCurValueNode = nullptr;
1233 }
1234 CXFA_Node* pXFAChild =
1235 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1236 if (!pXFAChild)
1237 return;
1238
1239 CFX_WideString wsNodeStr;
1240 static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
1241 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
1242 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1243 pXFANode->InsertChild(pXFAChild);
1244 pXFAChild->SetXMLMappingNode(pXMLChild);
1245 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1246 CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
1247 wsValueTextBuf << wsCurValue;
1248 }
1249 }
1250 if (pXMLCurValueNode) {
1251 CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1252 if (!wsCurValue.IsEmpty()) {
1253 if (bMarkAsCompound) {
1254 CXFA_Node* pXFAChild =
1255 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1256 if (!pXFAChild)
1257 return;
1258
1259 pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1260 pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1261 pXFANode->InsertChild(pXFAChild);
1262 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1263 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1264 }
1265 wsValueTextBuf << wsCurValue;
1266 wsCurValueTextBuf.Clear();
1267 }
1268 pXMLCurValueNode = nullptr;
1269 }
1270 CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
1271 pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
1272 }
1273
ParseInstruction(CXFA_Node * pXFANode,CFDE_XMLInstruction * pXMLInstruction,XFA_XDPPACKET ePacketID)1274 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1275 CFDE_XMLInstruction* pXMLInstruction,
1276 XFA_XDPPACKET ePacketID) {
1277 if (!m_bDocumentParser)
1278 return;
1279
1280 CFX_WideString wsTargetName;
1281 pXMLInstruction->GetTargetName(wsTargetName);
1282 if (wsTargetName == L"originalXFAVersion") {
1283 CFX_WideString wsData;
1284 if (pXMLInstruction->GetData(0, wsData) &&
1285 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
1286 XFA_VERSION_UNKNOWN)) {
1287 wsData.clear();
1288 if (pXMLInstruction->GetData(1, wsData) &&
1289 wsData == L"v2.7-scripting:1") {
1290 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
1291 }
1292 }
1293 } else if (wsTargetName == L"acrobat") {
1294 CFX_WideString wsData;
1295 if (pXMLInstruction->GetData(0, wsData) && wsData == L"JavaScript") {
1296 if (pXMLInstruction->GetData(1, wsData) && wsData == L"strictScoping") {
1297 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
1298 }
1299 }
1300 }
1301 }
1302
CloseParser()1303 void CXFA_SimpleParser::CloseParser() {
1304 m_pXMLDoc.reset();
1305 m_pStream.Reset();
1306 }
1307