1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fxfa/parser/cxfa_document_builder.h"
8
9 #include <utility>
10 #include <vector>
11
12 #include "core/fxcrt/autorestorer.h"
13 #include "core/fxcrt/fx_codepage.h"
14 #include "core/fxcrt/fx_extension.h"
15 #include "core/fxcrt/xml/cfx_xmlchardata.h"
16 #include "core/fxcrt/xml/cfx_xmldocument.h"
17 #include "core/fxcrt/xml/cfx_xmlelement.h"
18 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
19 #include "core/fxcrt/xml/cfx_xmlnode.h"
20 #include "core/fxcrt/xml/cfx_xmltext.h"
21 #include "fxjs/xfa/cjx_object.h"
22 #include "third_party/abseil-cpp/absl/types/optional.h"
23 #include "third_party/base/check.h"
24 #include "third_party/base/notreached.h"
25 #include "xfa/fxfa/parser/cxfa_document.h"
26 #include "xfa/fxfa/parser/cxfa_node.h"
27 #include "xfa/fxfa/parser/cxfa_subform.h"
28 #include "xfa/fxfa/parser/cxfa_template.h"
29 #include "xfa/fxfa/parser/xfa_basic_data.h"
30 #include "xfa/fxfa/parser/xfa_utils.h"
31
32 namespace {
33
GetDocumentNode(CFX_XMLNode * pRootNode)34 CFX_XMLNode* GetDocumentNode(CFX_XMLNode* pRootNode) {
35 for (CFX_XMLNode* pXMLNode = pRootNode->GetFirstChild(); pXMLNode;
36 pXMLNode = pXMLNode->GetNextSibling()) {
37 if (pXMLNode->GetType() == CFX_XMLNode::Type::kElement)
38 return pXMLNode;
39 }
40 return nullptr;
41 }
42
MatchNodeName(CFX_XMLNode * pNode,ByteStringView bsLocalTagName,ByteStringView bsNamespaceURIPrefix,XFA_PacketMatch eMatch)43 bool MatchNodeName(CFX_XMLNode* pNode,
44 ByteStringView bsLocalTagName,
45 ByteStringView bsNamespaceURIPrefix,
46 XFA_PacketMatch eMatch) {
47 CFX_XMLElement* pElement = ToXMLElement(pNode);
48 if (!pElement)
49 return false;
50
51 if (!pElement->GetLocalTagName().EqualsASCII(bsLocalTagName))
52 return false;
53
54 if (eMatch == XFA_PacketMatch::kNoMatch)
55 return true;
56
57 WideString wsNodeStr = pElement->GetNamespaceURI();
58 if (eMatch == XFA_PacketMatch::kPrefixMatch) {
59 return wsNodeStr.AsStringView()
60 .First(bsNamespaceURIPrefix.GetLength())
61 .EqualsASCII(bsNamespaceURIPrefix);
62 }
63 return wsNodeStr.EqualsASCII(bsNamespaceURIPrefix);
64 }
65
GetAttributeLocalName(WideStringView wsAttributeName,WideString & wsLocalAttrName)66 bool GetAttributeLocalName(WideStringView wsAttributeName,
67 WideString& wsLocalAttrName) {
68 WideString wsAttrName(wsAttributeName);
69 auto pos = wsAttrName.Find(L':', 0);
70 if (!pos.has_value()) {
71 wsLocalAttrName = std::move(wsAttrName);
72 return false;
73 }
74 wsLocalAttrName = wsAttrName.Last(wsAttrName.GetLength() - pos.value() - 1);
75 return true;
76 }
77
ResolveAttribute(CFX_XMLElement * pElement,const WideString & wsAttrName,WideString & wsLocalAttrName,WideString & wsNamespaceURI)78 bool ResolveAttribute(CFX_XMLElement* pElement,
79 const WideString& wsAttrName,
80 WideString& wsLocalAttrName,
81 WideString& wsNamespaceURI) {
82 WideString wsNSPrefix;
83 if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
84 wsNSPrefix = wsAttrName.First(wsAttrName.GetLength() -
85 wsLocalAttrName.GetLength() - 1);
86 }
87 if (wsLocalAttrName.EqualsASCII("xmlns") || wsNSPrefix.EqualsASCII("xmlns") ||
88 wsNSPrefix.EqualsASCII("xml")) {
89 return false;
90 }
91 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
92 &wsNamespaceURI)) {
93 wsNamespaceURI.clear();
94 return false;
95 }
96 return true;
97 }
98
FindAttributeWithNS(CFX_XMLElement * pElement,WideStringView wsLocalAttributeName,WideStringView wsNamespaceURIPrefix)99 absl::optional<WideString> FindAttributeWithNS(
100 CFX_XMLElement* pElement,
101 WideStringView wsLocalAttributeName,
102 WideStringView wsNamespaceURIPrefix) {
103 WideString wsAttrNS;
104 for (auto it : pElement->GetAttributes()) {
105 auto pos = it.first.Find(L':', 0);
106 WideString wsNSPrefix;
107 if (!pos.has_value()) {
108 if (wsLocalAttributeName != it.first)
109 continue;
110 } else {
111 if (wsLocalAttributeName !=
112 it.first.Last(it.first.GetLength() - pos.value() - 1)) {
113 continue;
114 }
115 wsNSPrefix = it.first.First(pos.value());
116 }
117 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
118 &wsAttrNS) ||
119 wsAttrNS != wsNamespaceURIPrefix) {
120 continue;
121 }
122 return it.second;
123 }
124 return absl::nullopt;
125 }
126
GetDataSetsFromXDP(CFX_XMLNode * pXMLDocumentNode)127 CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) {
128 XFA_PACKETINFO datasets_packet =
129 XFA_GetPacketByIndex(XFA_PacketType::Datasets);
130 if (MatchNodeName(pXMLDocumentNode, datasets_packet.name, datasets_packet.uri,
131 datasets_packet.match)) {
132 return pXMLDocumentNode;
133 }
134 XFA_PACKETINFO xdp_packet = XFA_GetPacketByIndex(XFA_PacketType::Xdp);
135 if (!MatchNodeName(pXMLDocumentNode, xdp_packet.name, xdp_packet.uri,
136 xdp_packet.match)) {
137 return nullptr;
138 }
139 for (CFX_XMLNode* pDatasetsNode = pXMLDocumentNode->GetFirstChild();
140 pDatasetsNode; pDatasetsNode = pDatasetsNode->GetNextSibling()) {
141 if (MatchNodeName(pDatasetsNode, datasets_packet.name, datasets_packet.uri,
142 datasets_packet.match)) {
143 return pDatasetsNode;
144 }
145 }
146 return nullptr;
147 }
148
IsStringAllWhitespace(WideString wsText)149 bool IsStringAllWhitespace(WideString wsText) {
150 wsText.TrimRight(L"\x20\x9\xD\xA");
151 return wsText.IsEmpty();
152 }
153
ConvertXMLToPlainText(CFX_XMLElement * pRootXMLNode,WideString & wsOutput)154 void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
155 for (CFX_XMLNode* pXMLChild = pRootXMLNode->GetFirstChild(); pXMLChild;
156 pXMLChild = pXMLChild->GetNextSibling()) {
157 switch (pXMLChild->GetType()) {
158 case CFX_XMLNode::Type::kElement: {
159 WideString wsTextData = ToXMLElement(pXMLChild)->GetTextData();
160 wsTextData += L"\n";
161 wsOutput += wsTextData;
162 break;
163 }
164 case CFX_XMLNode::Type::kText:
165 case CFX_XMLNode::Type::kCharData: {
166 WideString wsText = ToXMLText(pXMLChild)->GetText();
167 if (IsStringAllWhitespace(wsText))
168 continue;
169 wsOutput = std::move(wsText);
170 break;
171 }
172 default:
173 NOTREACHED();
174 break;
175 }
176 }
177 }
178
GetPlainTextFromRichText(CFX_XMLNode * pXMLNode)179 WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
180 if (!pXMLNode)
181 return WideString();
182
183 WideString wsPlainText;
184 switch (pXMLNode->GetType()) {
185 case CFX_XMLNode::Type::kElement: {
186 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
187 WideString wsTag = pXMLElement->GetLocalTagName();
188 uint32_t uTag = FX_HashCode_GetLoweredW(wsTag.AsStringView());
189 if (uTag == 0x0001f714) {
190 wsPlainText += L"\n";
191 } else if (uTag == 0x00000070) {
192 if (!wsPlainText.IsEmpty()) {
193 wsPlainText += L"\n";
194 }
195 } else if (uTag == 0xa48ac63) {
196 if (!wsPlainText.IsEmpty() && wsPlainText.Back() != '\n') {
197 wsPlainText += L"\n";
198 }
199 }
200 break;
201 }
202 case CFX_XMLNode::Type::kText:
203 case CFX_XMLNode::Type::kCharData: {
204 WideString wsContent = ToXMLText(pXMLNode)->GetText();
205 wsPlainText += wsContent;
206 break;
207 }
208 default:
209 break;
210 }
211 for (CFX_XMLNode* pChildXML = pXMLNode->GetFirstChild(); pChildXML;
212 pChildXML = pChildXML->GetNextSibling()) {
213 wsPlainText += GetPlainTextFromRichText(pChildXML);
214 }
215
216 return wsPlainText;
217 }
218
219 } // namespace
220
XFA_RecognizeRichText(CFX_XMLElement * pRichTextXMLNode)221 bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
222 return pRichTextXMLNode && pRichTextXMLNode->GetNamespaceURI().EqualsASCII(
223 "http://www.w3.org/1999/xhtml");
224 }
225
CXFA_DocumentBuilder(CXFA_Document * pNodeFactory)226 CXFA_DocumentBuilder::CXFA_DocumentBuilder(CXFA_Document* pNodeFactory)
227 : node_factory_(pNodeFactory) {}
228
229 CXFA_DocumentBuilder::~CXFA_DocumentBuilder() = default;
230
BuildDocument(CFX_XMLDocument * pXML,XFA_PacketType ePacketID)231 bool CXFA_DocumentBuilder::BuildDocument(CFX_XMLDocument* pXML,
232 XFA_PacketType ePacketID) {
233 DCHECK(pXML);
234
235 CFX_XMLNode* root = Build(pXML);
236 if (!root)
237 return false;
238
239 root_node_ = ParseAsXDPPacket(root, ePacketID);
240 return !!root_node_;
241 }
242
Build(CFX_XMLDocument * pXML)243 CFX_XMLNode* CXFA_DocumentBuilder::Build(CFX_XMLDocument* pXML) {
244 if (!pXML)
245 return nullptr;
246
247 xml_doc_ = pXML;
248 xml_doc_->GetRoot()->InsertChildNode(
249 xml_doc_->CreateNode<CFX_XMLInstruction>(L"xml"), 0);
250
251 return GetDocumentNode(xml_doc_->GetRoot());
252 }
253
ConstructXFANode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode)254 void CXFA_DocumentBuilder::ConstructXFANode(CXFA_Node* pXFANode,
255 CFX_XMLNode* pXMLNode) {
256 XFA_PacketType ePacketID = pXFANode->GetPacketType();
257 if (ePacketID == XFA_PacketType::Datasets) {
258 if (pXFANode->GetElementType() == XFA_Element::DataValue) {
259 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
260 pXMLChild = pXMLChild->GetNextSibling()) {
261 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
262 if (eNodeType == CFX_XMLNode::Type::kInstruction)
263 continue;
264
265 if (eNodeType == CFX_XMLNode::Type::kElement) {
266 CXFA_Node* pXFAChild = node_factory_->CreateNode(
267 XFA_PacketType::Datasets, XFA_Element::DataValue);
268 if (!pXFAChild)
269 return;
270
271 CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild);
272 WideString wsNodeStr = child->GetLocalTagName();
273 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr);
274 WideString wsChildValue = GetPlainTextFromRichText(child);
275 if (!wsChildValue.IsEmpty())
276 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue);
277
278 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
279 pXFAChild->SetXMLMappingNode(pXMLChild);
280 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
281 break;
282 }
283 }
284 root_node_ = pXFANode;
285 } else {
286 root_node_ = DataLoader(pXFANode, pXMLNode);
287 }
288 } else if (pXFANode->IsContentNode()) {
289 ParseContentNode(pXFANode, pXMLNode, ePacketID);
290 root_node_ = pXFANode;
291 } else {
292 root_node_ = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
293 }
294 }
295
GetRootNode() const296 CXFA_Node* CXFA_DocumentBuilder::GetRootNode() const {
297 return root_node_;
298 }
299
ParseAsXDPPacket(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType ePacketID)300 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
301 XFA_PacketType ePacketID) {
302 switch (ePacketID) {
303 case XFA_PacketType::Xdp:
304 return ParseAsXDPPacket_XDP(pXMLDocumentNode);
305 case XFA_PacketType::Config:
306 return ParseAsXDPPacket_Config(pXMLDocumentNode);
307 case XFA_PacketType::Template:
308 return ParseAsXDPPacket_Template(pXMLDocumentNode);
309 case XFA_PacketType::Form:
310 return ParseAsXDPPacket_Form(pXMLDocumentNode);
311 case XFA_PacketType::Datasets:
312 return ParseAsXDPPacket_Data(pXMLDocumentNode);
313 case XFA_PacketType::Xdc:
314 return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
315 case XFA_PacketType::LocaleSet:
316 return ParseAsXDPPacket_LocaleConnectionSourceSet(
317 pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet);
318 case XFA_PacketType::ConnectionSet:
319 return ParseAsXDPPacket_LocaleConnectionSourceSet(
320 pXMLDocumentNode, XFA_PacketType::ConnectionSet,
321 XFA_Element::ConnectionSet);
322 case XFA_PacketType::SourceSet:
323 return ParseAsXDPPacket_LocaleConnectionSourceSet(
324 pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet);
325 default:
326 return ParseAsXDPPacket_User(pXMLDocumentNode);
327 }
328 }
329
ParseAsXDPPacket_XDP(CFX_XMLNode * pXMLDocumentNode)330 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_XDP(
331 CFX_XMLNode* pXMLDocumentNode) {
332 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Xdp);
333 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
334 return nullptr;
335
336 CXFA_Node* pXFARootNode =
337 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
338 if (!pXFARootNode)
339 return nullptr;
340
341 root_node_ = pXFARootNode;
342 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa");
343
344 for (auto it : ToXMLElement(pXMLDocumentNode)->GetAttributes()) {
345 if (it.first.EqualsASCII("uuid"))
346 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second);
347 else if (it.first.EqualsASCII("timeStamp"))
348 pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second);
349 }
350
351 CFX_XMLNode* pXMLConfigDOMRoot = nullptr;
352 CXFA_Node* pXFAConfigDOMRoot = nullptr;
353 XFA_PACKETINFO config_packet = XFA_GetPacketByIndex(XFA_PacketType::Config);
354 for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
355 pChildItem = pChildItem->GetNextSibling()) {
356 if (!MatchNodeName(pChildItem, config_packet.name, config_packet.uri,
357 config_packet.match)) {
358 continue;
359 }
360 // TODO(tsepez): make GetFirstChildByName() take a name.
361 uint32_t hash = FX_HashCode_GetAsIfW(config_packet.name);
362 if (pXFARootNode->GetFirstChildByName(hash))
363 return nullptr;
364
365 pXMLConfigDOMRoot = pChildItem;
366 pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
367 if (pXFAConfigDOMRoot)
368 pXFARootNode->InsertChildAndNotify(pXFAConfigDOMRoot, nullptr);
369 }
370
371 CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr;
372 CFX_XMLNode* pXMLFormDOMRoot = nullptr;
373 CFX_XMLNode* pXMLTemplateDOMRoot = nullptr;
374 for (CFX_XMLNode* pChildItem = pXMLDocumentNode->GetFirstChild(); pChildItem;
375 pChildItem = pChildItem->GetNextSibling()) {
376 CFX_XMLElement* pElement = ToXMLElement(pChildItem);
377 if (!pElement || pElement == pXMLConfigDOMRoot)
378 continue;
379
380 WideString wsPacketName = pElement->GetLocalTagName();
381 absl::optional<XFA_PACKETINFO> packet_info =
382 XFA_GetPacketByName(wsPacketName.AsStringView());
383 if (packet_info.has_value() && packet_info.value().uri &&
384 !MatchNodeName(pElement, packet_info.value().name,
385 packet_info.value().uri, packet_info.value().match)) {
386 packet_info = {};
387 }
388 XFA_PacketType ePacket = XFA_PacketType::User;
389 if (packet_info.has_value())
390 ePacket = packet_info.value().packet_type;
391 if (ePacket == XFA_PacketType::Xdp)
392 continue;
393 if (ePacket == XFA_PacketType::Datasets) {
394 if (pXMLDatasetsDOMRoot)
395 return nullptr;
396
397 pXMLDatasetsDOMRoot = pElement;
398 } else if (ePacket == XFA_PacketType::Form) {
399 if (pXMLFormDOMRoot)
400 return nullptr;
401
402 pXMLFormDOMRoot = pElement;
403 } else if (ePacket == XFA_PacketType::Template) {
404 // Found a duplicate template packet.
405 if (pXMLTemplateDOMRoot)
406 return nullptr;
407
408 CXFA_Node* pPacketNode = ParseAsXDPPacket_Template(pElement);
409 if (pPacketNode) {
410 pXMLTemplateDOMRoot = pElement;
411 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
412 }
413 } else {
414 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
415 if (pPacketNode) {
416 if (packet_info.has_value() &&
417 (packet_info.value().support == XFA_PacketSupport::kSupportOne) &&
418 pXFARootNode->GetFirstChildByName(
419 FX_HashCode_GetAsIfW(packet_info.value().name))) {
420 return nullptr;
421 }
422 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
423 }
424 }
425 }
426
427 // No template is found.
428 if (!pXMLTemplateDOMRoot)
429 return nullptr;
430
431 if (pXMLDatasetsDOMRoot) {
432 CXFA_Node* pPacketNode =
433 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets);
434 if (pPacketNode)
435 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
436 }
437 if (pXMLFormDOMRoot) {
438 CXFA_Node* pPacketNode =
439 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form);
440 if (pPacketNode)
441 pXFARootNode->InsertChildAndNotify(pPacketNode, nullptr);
442 }
443
444 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
445 return pXFARootNode;
446 }
447
ParseAsXDPPacket_Config(CFX_XMLNode * pXMLDocumentNode)448 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Config(
449 CFX_XMLNode* pXMLDocumentNode) {
450 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Config);
451 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
452 return nullptr;
453
454 CXFA_Node* pNode =
455 node_factory_->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
456 if (!pNode)
457 return nullptr;
458
459 pNode->JSObject()->SetCData(XFA_Attribute::Name,
460 WideString::FromASCII(packet.name));
461 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true))
462 return nullptr;
463
464 pNode->SetXMLMappingNode(pXMLDocumentNode);
465 return pNode;
466 }
467
ParseAsXDPPacket_Template(CFX_XMLNode * pXMLDocumentNode)468 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Template(
469 CFX_XMLNode* pXMLDocumentNode) {
470 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Template);
471 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
472 return nullptr;
473
474 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Template,
475 XFA_Element::Template);
476 if (!pNode)
477 return nullptr;
478
479 pNode->JSObject()->SetCData(XFA_Attribute::Name,
480 WideString::FromASCII(packet.name));
481
482 CFX_XMLElement* pXMLDocumentElement = ToXMLElement(pXMLDocumentNode);
483 WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI();
484 if (wsNamespaceURI.IsEmpty())
485 wsNamespaceURI = pXMLDocumentElement->GetAttribute(L"xmlns:xfa");
486
487 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
488
489 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true))
490 return nullptr;
491
492 pNode->SetXMLMappingNode(pXMLDocumentNode);
493 return pNode;
494 }
495
ParseAsXDPPacket_Form(CFX_XMLNode * pXMLDocumentNode)496 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Form(
497 CFX_XMLNode* pXMLDocumentNode) {
498 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Form);
499 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
500 return nullptr;
501
502 CXFA_Node* pNode =
503 node_factory_->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
504 if (!pNode)
505 return nullptr;
506
507 pNode->JSObject()->SetCData(XFA_Attribute::Name,
508 WideString::FromASCII(packet.name));
509 CXFA_Template* pTemplateRoot =
510 root_node_->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
511 CXFA_Subform* pTemplateChosen =
512 pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
513 XFA_Element::Subform)
514 : nullptr;
515 bool bUseAttribute = true;
516 if (pTemplateChosen &&
517 pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) !=
518 XFA_AttributeValue::Auto) {
519 bUseAttribute = false;
520 }
521 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form,
522 bUseAttribute))
523 return nullptr;
524
525 pNode->SetXMLMappingNode(pXMLDocumentNode);
526 return pNode;
527 }
528
ParseAsXDPPacket_Data(CFX_XMLNode * pXMLDocumentNode)529 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Data(
530 CFX_XMLNode* pXMLDocumentNode) {
531 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Datasets);
532 CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
533 if (pDatasetsXMLNode) {
534 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
535 XFA_Element::DataModel);
536 if (!pNode)
537 return nullptr;
538
539 pNode->JSObject()->SetCData(XFA_Attribute::Name,
540 WideString::FromASCII(packet.name));
541 if (!DataLoader(pNode, pDatasetsXMLNode))
542 return nullptr;
543
544 pNode->SetXMLMappingNode(pDatasetsXMLNode);
545 return pNode;
546 }
547
548 CFX_XMLNode* pDataXMLNode = nullptr;
549 if (MatchNodeName(pXMLDocumentNode, "data", packet.uri, packet.match)) {
550 ToXMLElement(pXMLDocumentNode)->RemoveAttribute(L"xmlns:xfa");
551 pDataXMLNode = pXMLDocumentNode;
552 } else {
553 auto* pDataElement = xml_doc_->CreateNode<CFX_XMLElement>(L"xfa:data");
554 pXMLDocumentNode->RemoveSelfIfParented();
555
556 CFX_XMLElement* pElement = ToXMLElement(pXMLDocumentNode);
557 pElement->RemoveAttribute(L"xmlns:xfa");
558
559 // The node was either removed from the parent above, or already has no
560 // parent so we can take ownership.
561 pDataElement->AppendLastChild(pXMLDocumentNode);
562 pDataXMLNode = pDataElement;
563 }
564 if (!pDataXMLNode)
565 return nullptr;
566
567 CXFA_Node* pNode = node_factory_->CreateNode(XFA_PacketType::Datasets,
568 XFA_Element::DataGroup);
569 if (!pNode)
570 return nullptr;
571
572 WideString wsLocalName = ToXMLElement(pDataXMLNode)->GetLocalTagName();
573 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName);
574 if (!DataLoader(pNode, pDataXMLNode))
575 return nullptr;
576
577 pNode->SetXMLMappingNode(pDataXMLNode);
578 return pNode;
579 }
580
ParseAsXDPPacket_LocaleConnectionSourceSet(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType packet_type,XFA_Element element)581 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_LocaleConnectionSourceSet(
582 CFX_XMLNode* pXMLDocumentNode,
583 XFA_PacketType packet_type,
584 XFA_Element element) {
585 XFA_PACKETINFO packet = XFA_GetPacketByIndex(packet_type);
586 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
587 return nullptr;
588
589 CXFA_Node* pNode = node_factory_->CreateNode(packet_type, element);
590 if (!pNode)
591 return nullptr;
592
593 pNode->JSObject()->SetCData(XFA_Attribute::Name,
594 WideString::FromASCII(packet.name));
595 if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true))
596 return nullptr;
597
598 pNode->SetXMLMappingNode(pXMLDocumentNode);
599 return pNode;
600 }
601
ParseAsXDPPacket_Xdc(CFX_XMLNode * pXMLDocumentNode)602 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_Xdc(
603 CFX_XMLNode* pXMLDocumentNode) {
604 XFA_PACKETINFO packet = XFA_GetPacketByIndex(XFA_PacketType::Xdc);
605 if (!MatchNodeName(pXMLDocumentNode, packet.name, packet.uri, packet.match))
606 return nullptr;
607
608 CXFA_Node* pNode =
609 node_factory_->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
610 if (!pNode)
611 return nullptr;
612
613 pNode->JSObject()->SetCData(XFA_Attribute::Name,
614 WideString::FromASCII(packet.name));
615 pNode->SetXMLMappingNode(pXMLDocumentNode);
616 return pNode;
617 }
618
ParseAsXDPPacket_User(CFX_XMLNode * pXMLDocumentNode)619 CXFA_Node* CXFA_DocumentBuilder::ParseAsXDPPacket_User(
620 CFX_XMLNode* pXMLDocumentNode) {
621 CXFA_Node* pNode =
622 node_factory_->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
623 if (!pNode)
624 return nullptr;
625
626 WideString wsName = ToXMLElement(pXMLDocumentNode)->GetLocalTagName();
627 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName);
628 pNode->SetXMLMappingNode(pXMLDocumentNode);
629 return pNode;
630 }
631
DataLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc)632 CXFA_Node* CXFA_DocumentBuilder::DataLoader(CXFA_Node* pXFANode,
633 CFX_XMLNode* pXMLDoc) {
634 ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets);
635 return pXFANode;
636 }
637
NormalLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc,XFA_PacketType ePacketID,bool bUseAttribute)638 CXFA_Node* CXFA_DocumentBuilder::NormalLoader(CXFA_Node* pXFANode,
639 CFX_XMLNode* pXMLDoc,
640 XFA_PacketType ePacketID,
641 bool bUseAttribute) {
642 constexpr size_t kMaxExecuteRecursion = 1000;
643 if (execute_recursion_depth_ > kMaxExecuteRecursion)
644 return nullptr;
645 AutoRestorer<size_t> restorer(&execute_recursion_depth_);
646 ++execute_recursion_depth_;
647
648 bool bOneOfPropertyFound = false;
649 for (CFX_XMLNode* pXMLChild = pXMLDoc->GetFirstChild(); pXMLChild;
650 pXMLChild = pXMLChild->GetNextSibling()) {
651 switch (pXMLChild->GetType()) {
652 case CFX_XMLNode::Type::kElement: {
653 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
654 WideString wsTagName = pXMLElement->GetLocalTagName();
655 XFA_Element eType = XFA_GetElementByName(wsTagName.AsStringView());
656 if (eType == XFA_Element::Unknown)
657 continue;
658
659 if (pXFANode->HasPropertyFlag(eType, XFA_PropertyFlag::kOneOf) ||
660 pXFANode->HasPropertyFlag(eType, XFA_PropertyFlag::kDefaultOneOf)) {
661 if (bOneOfPropertyFound)
662 break;
663 bOneOfPropertyFound = true;
664 }
665
666 CXFA_Node* pXFAChild = node_factory_->CreateNode(ePacketID, eType);
667 if (!pXFAChild)
668 return nullptr;
669 if (ePacketID == XFA_PacketType::Config) {
670 pXFAChild->JSObject()->SetAttributeByEnum(XFA_Attribute::Name,
671 wsTagName, false);
672 }
673
674 bool IsNeedValue = true;
675 for (auto it : pXMLElement->GetAttributes()) {
676 WideString wsAttrName;
677 GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
678 if (wsAttrName.EqualsASCII("nil") && it.second.EqualsASCII("true"))
679 IsNeedValue = false;
680
681 absl::optional<XFA_ATTRIBUTEINFO> attr =
682 XFA_GetAttributeByName(wsAttrName.AsStringView());
683 if (!attr.has_value())
684 continue;
685
686 if (!bUseAttribute && attr.value().attribute != XFA_Attribute::Name &&
687 attr.value().attribute != XFA_Attribute::Save) {
688 continue;
689 }
690 pXFAChild->JSObject()->SetAttributeByEnum(attr.value().attribute,
691 it.second, false);
692 }
693 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
694 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
695 if (ePacketID == XFA_PacketType::Config)
696 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
697 else
698 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
699
700 break;
701 }
702 switch (pXFAChild->GetObjectType()) {
703 case XFA_ObjectType::ContentNode:
704 case XFA_ObjectType::TextNode:
705 case XFA_ObjectType::NodeC:
706 case XFA_ObjectType::NodeV:
707 if (IsNeedValue)
708 ParseContentNode(pXFAChild, pXMLElement, ePacketID);
709 break;
710 default:
711 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
712 break;
713 }
714 } break;
715 case CFX_XMLNode::Type::kInstruction:
716 ParseInstruction(pXFANode, ToXMLInstruction(pXMLChild), ePacketID);
717 break;
718 default:
719 break;
720 }
721 }
722 return pXFANode;
723 }
724
ParseContentNode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)725 void CXFA_DocumentBuilder::ParseContentNode(CXFA_Node* pXFANode,
726 CFX_XMLNode* pXMLNode,
727 XFA_PacketType ePacketID) {
728 XFA_Element element = XFA_Element::Sharptext;
729 if (pXFANode->GetElementType() == XFA_Element::ExData) {
730 WideString wsContentType =
731 pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType);
732 if (wsContentType.EqualsASCII("text/html"))
733 element = XFA_Element::SharpxHTML;
734 else if (wsContentType.EqualsASCII("text/xml"))
735 element = XFA_Element::Sharpxml;
736 }
737 if (element == XFA_Element::SharpxHTML)
738 pXFANode->SetXMLMappingNode(pXMLNode);
739
740 WideString wsValue;
741 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
742 pXMLChild = pXMLChild->GetNextSibling()) {
743 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
744 if (eNodeType == CFX_XMLNode::Type::kInstruction)
745 continue;
746
747 CFX_XMLElement* pElement = ToXMLElement(pXMLChild);
748 if (element == XFA_Element::SharpxHTML) {
749 if (!pElement)
750 break;
751 if (XFA_RecognizeRichText(pElement))
752 wsValue += GetPlainTextFromRichText(pElement);
753 } else if (element == XFA_Element::Sharpxml) {
754 if (!pElement)
755 break;
756 ConvertXMLToPlainText(pElement, wsValue);
757 } else {
758 if (pElement)
759 break;
760 CFX_XMLText* pText = ToXMLText(pXMLChild);
761 if (pText)
762 wsValue = pText->GetText();
763 }
764 break;
765 }
766 if (!wsValue.IsEmpty()) {
767 if (pXFANode->IsContentNode()) {
768 CXFA_Node* pContentRawDataNode =
769 node_factory_->CreateNode(ePacketID, element);
770 DCHECK(pContentRawDataNode);
771 pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
772 pXFANode->InsertChildAndNotify(pContentRawDataNode, nullptr);
773 } else {
774 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
775 }
776 }
777 }
778
ParseDataGroup(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)779 void CXFA_DocumentBuilder::ParseDataGroup(CXFA_Node* pXFANode,
780 CFX_XMLNode* pXMLNode,
781 XFA_PacketType ePacketID) {
782 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
783 pXMLChild = pXMLChild->GetNextSibling()) {
784 switch (pXMLChild->GetType()) {
785 case CFX_XMLNode::Type::kElement: {
786 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
787 WideString wsNamespaceURI = pXMLElement->GetNamespaceURI();
788 if (wsNamespaceURI.EqualsASCII(
789 "http://www.xfa.com/schema/xfa-package/") ||
790 wsNamespaceURI.EqualsASCII(
791 "http://www.xfa.org/schema/xfa-package/") ||
792 wsNamespaceURI.EqualsASCII(
793 "http://www.w3.org/2001/XMLSchema-instance")) {
794 continue;
795 }
796
797 XFA_Element eNodeType = XFA_Element::DataModel;
798 if (eNodeType == XFA_Element::DataModel) {
799 absl::optional<WideString> wsDataNodeAttr =
800 FindAttributeWithNS(pXMLElement, L"dataNode",
801 L"http://www.xfa.org/schema/xfa-data/1.0/");
802 if (wsDataNodeAttr.has_value()) {
803 if (wsDataNodeAttr.value().EqualsASCII("dataGroup"))
804 eNodeType = XFA_Element::DataGroup;
805 else if (wsDataNodeAttr.value().EqualsASCII("dataValue"))
806 eNodeType = XFA_Element::DataValue;
807 }
808 }
809 if (eNodeType == XFA_Element::DataModel) {
810 absl::optional<WideString> wsContentType =
811 FindAttributeWithNS(pXMLElement, L"contentType",
812 L"http://www.xfa.org/schema/xfa-data/1.0/");
813 if (wsContentType.has_value() && !wsContentType.value().IsEmpty())
814 eNodeType = XFA_Element::DataValue;
815 }
816 if (eNodeType == XFA_Element::DataModel) {
817 for (CFX_XMLNode* pXMLDataChild = pXMLElement->GetFirstChild();
818 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNextSibling()) {
819 CFX_XMLElement* pElement = ToXMLElement(pXMLDataChild);
820 if (pElement && !XFA_RecognizeRichText(pElement)) {
821 eNodeType = XFA_Element::DataGroup;
822 break;
823 }
824 }
825 }
826 if (eNodeType == XFA_Element::DataModel)
827 eNodeType = XFA_Element::DataValue;
828
829 CXFA_Node* pXFAChild =
830 node_factory_->CreateNode(XFA_PacketType::Datasets, eNodeType);
831 if (!pXFAChild)
832 return;
833
834 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name,
835 pXMLElement->GetLocalTagName());
836 bool bNeedValue = true;
837
838 for (auto it : pXMLElement->GetAttributes()) {
839 WideString wsName;
840 WideString wsNS;
841 if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
842 continue;
843 }
844 if (wsName.EqualsASCII("nil") && it.second.EqualsASCII("true")) {
845 bNeedValue = false;
846 continue;
847 }
848 if (wsNS.EqualsASCII("http://www.xfa.com/schema/xfa-package/") ||
849 wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-package/") ||
850 wsNS.EqualsASCII("http://www.w3.org/2001/XMLSchema-instance") ||
851 wsNS.EqualsASCII("http://www.xfa.org/schema/xfa-data/1.0/")) {
852 continue;
853 }
854 CXFA_Node* pXFAMetaData = node_factory_->CreateNode(
855 XFA_PacketType::Datasets, XFA_Element::DataValue);
856 if (!pXFAMetaData)
857 return;
858
859 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName);
860 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
861 it.first);
862 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second);
863 pXFAMetaData->JSObject()->SetEnum(
864 XFA_Attribute::Contains, XFA_AttributeValue::MetaData, false);
865 pXFAChild->InsertChildAndNotify(pXFAMetaData, nullptr);
866 pXFAMetaData->SetXMLMappingNode(pXMLElement);
867 pXFAMetaData->SetFlag(XFA_NodeFlag::kInitialized);
868 }
869
870 if (!bNeedValue)
871 pXMLElement->RemoveAttribute(L"xsi:nil");
872
873 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
874 if (eNodeType == XFA_Element::DataGroup)
875 ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
876 else if (bNeedValue)
877 ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets);
878
879 pXFAChild->SetXMLMappingNode(pXMLElement);
880 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
881 continue;
882 }
883 case CFX_XMLNode::Type::kCharData:
884 case CFX_XMLNode::Type::kText: {
885 CFX_XMLText* pXMLText = ToXMLText(pXMLChild);
886 WideString wsText = pXMLText->GetText();
887 if (IsStringAllWhitespace(wsText))
888 continue;
889
890 CXFA_Node* pXFAChild = node_factory_->CreateNode(
891 XFA_PacketType::Datasets, XFA_Element::DataValue);
892 if (!pXFAChild)
893 return;
894
895 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText);
896 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
897 pXFAChild->SetXMLMappingNode(pXMLText);
898 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
899 continue;
900 }
901 default:
902 continue;
903 }
904 }
905 }
906
ParseDataValue(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)907 void CXFA_DocumentBuilder::ParseDataValue(CXFA_Node* pXFANode,
908 CFX_XMLNode* pXMLNode,
909 XFA_PacketType ePacketID) {
910 WideString wsValue;
911 WideString wsCurValue;
912 bool bMarkAsCompound = false;
913 CFX_XMLNode* pXMLCurValueNode = nullptr;
914 for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
915 pXMLChild = pXMLChild->GetNextSibling()) {
916 CFX_XMLNode::Type eNodeType = pXMLChild->GetType();
917 if (eNodeType == CFX_XMLNode::Type::kInstruction)
918 continue;
919
920 CFX_XMLText* pText = ToXMLText(pXMLChild);
921 if (pText) {
922 WideString wsText = pText->GetText();
923 if (!pXMLCurValueNode)
924 pXMLCurValueNode = pXMLChild;
925 wsCurValue += wsText;
926 continue;
927 }
928 if (XFA_RecognizeRichText(ToXMLElement(pXMLChild))) {
929 WideString wsText = GetPlainTextFromRichText(ToXMLElement(pXMLChild));
930 if (!pXMLCurValueNode)
931 pXMLCurValueNode = pXMLChild;
932 wsCurValue += wsText;
933 continue;
934 }
935 bMarkAsCompound = true;
936 if (pXMLCurValueNode) {
937 if (!wsCurValue.IsEmpty()) {
938 CXFA_Node* pXFAChild =
939 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
940 if (!pXFAChild)
941 return;
942
943 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString());
944 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue);
945 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
946 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
947 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
948 wsValue += wsCurValue;
949 wsCurValue.clear();
950 }
951 pXMLCurValueNode = nullptr;
952 }
953 CXFA_Node* pXFAChild =
954 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
955 if (!pXFAChild)
956 return;
957
958 WideString wsNodeStr = ToXMLElement(pXMLChild)->GetLocalTagName();
959 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr);
960 ParseDataValue(pXFAChild, pXMLChild, ePacketID);
961 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
962 pXFAChild->SetXMLMappingNode(pXMLChild);
963 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
964 wsValue += pXFAChild->JSObject()->GetCData(XFA_Attribute::Value);
965 }
966
967 if (pXMLCurValueNode) {
968 if (!wsCurValue.IsEmpty()) {
969 if (bMarkAsCompound) {
970 CXFA_Node* pXFAChild =
971 node_factory_->CreateNode(ePacketID, XFA_Element::DataValue);
972 if (!pXFAChild)
973 return;
974
975 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, WideString());
976 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue);
977 pXFANode->InsertChildAndNotify(pXFAChild, nullptr);
978 pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
979 pXFAChild->SetFlag(XFA_NodeFlag::kInitialized);
980 }
981 wsValue += wsCurValue;
982 wsCurValue.clear();
983 }
984 pXMLCurValueNode = nullptr;
985 }
986 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue);
987 }
988
ParseInstruction(CXFA_Node * pXFANode,CFX_XMLInstruction * pXMLInstruction,XFA_PacketType ePacketID)989 void CXFA_DocumentBuilder::ParseInstruction(CXFA_Node* pXFANode,
990 CFX_XMLInstruction* pXMLInstruction,
991 XFA_PacketType ePacketID) {
992 const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
993 if (pXMLInstruction->IsOriginalXFAVersion()) {
994 if (target_data.size() > 1 &&
995 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
996 XFA_VERSION_UNKNOWN) &&
997 target_data[1].EqualsASCII("v2.7-scripting:1")) {
998 pXFANode->GetDocument()->set_is_scripting();
999 }
1000 return;
1001 }
1002 if (pXMLInstruction->IsAcrobat()) {
1003 if (target_data.size() > 1 && target_data[0].EqualsASCII("JavaScript") &&
1004 target_data[1].EqualsASCII("strictScoping")) {
1005 pXFANode->GetDocument()->set_is_strict_scoping();
1006 }
1007 }
1008 }
1009