1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/fde/xml/fde_xml_imp.h"
8
9 #include <algorithm>
10 #include <utility>
11
12 #include "core/fxcrt/fx_ext.h"
13 #include "core/fxcrt/fx_safe_types.h"
14 #include "third_party/base/stl_util.h"
15 #include "xfa/fgas/crt/fgas_codepage.h"
16
17 namespace {
18
19 const uint32_t kMaxCharRange = 0x10ffff;
20
21 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09},
22 {0x0A, 0x0A},
23 {0x0D, 0x0D},
24 {0x20, 0xD7FF},
25 {0xE000, 0xFFFD}};
26
FDE_IsXMLWhiteSpace(FX_WCHAR ch)27 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) {
28 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
29 }
30
31 struct FDE_XMLNAMECHAR {
32 uint16_t wStart;
33 uint16_t wEnd;
34 bool bStartChar;
35 };
36
37 const FDE_XMLNAMECHAR g_XMLNameChars[] = {
38 {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false},
39 {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true},
40 {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true},
41 {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
42 {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false},
43 {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true},
44 {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
45 };
46
FDE_IsXMLNameChar(FX_WCHAR ch,bool bFirstChar)47 bool FDE_IsXMLNameChar(FX_WCHAR ch, bool bFirstChar) {
48 int32_t iStart = 0;
49 int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1;
50 while (iStart <= iEnd) {
51 int32_t iMid = (iStart + iEnd) / 2;
52 if (ch < g_XMLNameChars[iMid].wStart) {
53 iEnd = iMid - 1;
54 } else if (ch > g_XMLNameChars[iMid].wEnd) {
55 iStart = iMid + 1;
56 } else {
57 return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true;
58 }
59 }
60 return false;
61 }
62
63 } // namespace
64
FDE_IsXMLValidChar(FX_WCHAR ch)65 bool FDE_IsXMLValidChar(FX_WCHAR ch) {
66 int32_t iStart = 0;
67 int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1;
68 while (iStart <= iEnd) {
69 int32_t iMid = (iStart + iEnd) / 2;
70 if (ch < g_XMLValidCharRange[iMid][0]) {
71 iEnd = iMid - 1;
72 } else if (ch > g_XMLValidCharRange[iMid][1]) {
73 iStart = iMid + 1;
74 } else {
75 return true;
76 }
77 }
78 return false;
79 }
80
CFDE_XMLNode()81 CFDE_XMLNode::CFDE_XMLNode()
82 : m_pParent(nullptr),
83 m_pChild(nullptr),
84 m_pPrior(nullptr),
85 m_pNext(nullptr) {}
86
GetType() const87 FDE_XMLNODETYPE CFDE_XMLNode::GetType() const {
88 return FDE_XMLNODE_Unknown;
89 }
90
~CFDE_XMLNode()91 CFDE_XMLNode::~CFDE_XMLNode() {
92 DeleteChildren();
93 }
94
DeleteChildren()95 void CFDE_XMLNode::DeleteChildren() {
96 CFDE_XMLNode* pChild = m_pChild;
97 while (pChild) {
98 CFDE_XMLNode* pNext = pChild->m_pNext;
99 delete pChild;
100 pChild = pNext;
101 }
102 m_pChild = nullptr;
103 }
104
CountChildNodes() const105 int32_t CFDE_XMLNode::CountChildNodes() const {
106 int32_t iCount = 0;
107 CFDE_XMLNode* pChild = m_pChild;
108 while (pChild) {
109 iCount++;
110 pChild = pChild->m_pNext;
111 }
112 return iCount;
113 }
114
GetChildNode(int32_t index) const115 CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const {
116 CFDE_XMLNode* pChild = m_pChild;
117 while (pChild) {
118 if (index == 0) {
119 return pChild;
120 }
121 index--;
122 pChild = pChild->m_pNext;
123 }
124 return nullptr;
125 }
126
GetChildNodeIndex(CFDE_XMLNode * pNode) const127 int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const {
128 int32_t index = 0;
129 CFDE_XMLNode* pChild = m_pChild;
130 while (pChild) {
131 if (pChild == pNode) {
132 return index;
133 }
134 index++;
135 pChild = pChild->m_pNext;
136 }
137 return -1;
138 }
139
GetPath(const FX_WCHAR * pPath,int32_t iLength,bool bQualifiedName) const140 CFDE_XMLNode* CFDE_XMLNode::GetPath(const FX_WCHAR* pPath,
141 int32_t iLength,
142 bool bQualifiedName) const {
143 ASSERT(pPath);
144 if (iLength < 0) {
145 iLength = FXSYS_wcslen(pPath);
146 }
147 if (iLength == 0) {
148 return nullptr;
149 }
150 CFX_WideString csPath;
151 const FX_WCHAR* pStart = pPath;
152 const FX_WCHAR* pEnd = pPath + iLength;
153 FX_WCHAR ch;
154 while (pStart < pEnd) {
155 ch = *pStart++;
156 if (ch == L'/') {
157 break;
158 } else {
159 csPath += ch;
160 }
161 }
162 iLength -= pStart - pPath;
163 CFDE_XMLNode* pFind = nullptr;
164 if (csPath.GetLength() < 1) {
165 pFind = GetNodeItem(CFDE_XMLNode::Root);
166 } else if (csPath.Compare(L"..") == 0) {
167 pFind = m_pParent;
168 } else if (csPath.Compare(L".") == 0) {
169 pFind = (CFDE_XMLNode*)this;
170 } else {
171 CFX_WideString wsTag;
172 CFDE_XMLNode* pNode = m_pChild;
173 while (pNode) {
174 if (pNode->GetType() == FDE_XMLNODE_Element) {
175 if (bQualifiedName) {
176 ((CFDE_XMLElement*)pNode)->GetTagName(wsTag);
177 } else {
178 ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag);
179 }
180 if (wsTag.Compare(csPath) == 0) {
181 if (iLength < 1) {
182 pFind = pNode;
183 } else {
184 pFind = pNode->GetPath(pStart, iLength, bQualifiedName);
185 }
186 if (pFind)
187 return pFind;
188 }
189 }
190 pNode = pNode->m_pNext;
191 }
192 }
193 if (!pFind || iLength < 1)
194 return pFind;
195 return pFind->GetPath(pStart, iLength, bQualifiedName);
196 }
197
InsertChildNode(CFDE_XMLNode * pNode,int32_t index)198 int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) {
199 pNode->m_pParent = this;
200 if (!m_pChild) {
201 m_pChild = pNode;
202 pNode->m_pPrior = nullptr;
203 pNode->m_pNext = nullptr;
204 return 0;
205 }
206 if (index == 0) {
207 pNode->m_pNext = m_pChild;
208 pNode->m_pPrior = nullptr;
209 m_pChild->m_pPrior = pNode;
210 m_pChild = pNode;
211 return 0;
212 }
213 int32_t iCount = 0;
214 CFDE_XMLNode* pFind = m_pChild;
215 while (++iCount != index && pFind->m_pNext) {
216 pFind = pFind->m_pNext;
217 }
218 pNode->m_pPrior = pFind;
219 pNode->m_pNext = pFind->m_pNext;
220 if (pFind->m_pNext)
221 pFind->m_pNext->m_pPrior = pNode;
222 pFind->m_pNext = pNode;
223 return iCount;
224 }
225
RemoveChildNode(CFDE_XMLNode * pNode)226 void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) {
227 ASSERT(m_pChild && pNode);
228 if (m_pChild == pNode) {
229 m_pChild = pNode->m_pNext;
230 } else {
231 pNode->m_pPrior->m_pNext = pNode->m_pNext;
232 }
233 if (pNode->m_pNext)
234 pNode->m_pNext->m_pPrior = pNode->m_pPrior;
235 pNode->m_pParent = nullptr;
236 pNode->m_pNext = nullptr;
237 pNode->m_pPrior = nullptr;
238 }
239
GetNodeItem(CFDE_XMLNode::NodeItem eItem) const240 CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const {
241 switch (eItem) {
242 case CFDE_XMLNode::Root: {
243 CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
244 while (pParent->m_pParent) {
245 pParent = pParent->m_pParent;
246 }
247 return pParent;
248 }
249 case CFDE_XMLNode::Parent:
250 return m_pParent;
251 case CFDE_XMLNode::FirstSibling: {
252 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
253 while (pItem->m_pPrior) {
254 pItem = pItem->m_pPrior;
255 }
256 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
257 }
258 case CFDE_XMLNode::PriorSibling:
259 return m_pPrior;
260 case CFDE_XMLNode::NextSibling:
261 return m_pNext;
262 case CFDE_XMLNode::LastSibling: {
263 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
264 while (pItem->m_pNext)
265 pItem = pItem->m_pNext;
266 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
267 }
268 case CFDE_XMLNode::FirstNeighbor: {
269 CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
270 while (pParent->m_pParent)
271 pParent = pParent->m_pParent;
272 return pParent == (CFDE_XMLNode*)this ? nullptr : pParent;
273 }
274 case CFDE_XMLNode::PriorNeighbor: {
275 if (!m_pPrior)
276 return m_pParent;
277
278 CFDE_XMLNode* pItem = m_pPrior;
279 while (pItem->m_pChild) {
280 pItem = pItem->m_pChild;
281 while (pItem->m_pNext)
282 pItem = pItem->m_pNext;
283 }
284 return pItem;
285 }
286 case CFDE_XMLNode::NextNeighbor: {
287 if (m_pChild)
288 return m_pChild;
289 if (m_pNext)
290 return m_pNext;
291 CFDE_XMLNode* pItem = m_pParent;
292 while (pItem) {
293 if (pItem->m_pNext)
294 return pItem->m_pNext;
295 pItem = pItem->m_pParent;
296 }
297 return nullptr;
298 }
299 case CFDE_XMLNode::LastNeighbor: {
300 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
301 while (pItem->m_pParent) {
302 pItem = pItem->m_pParent;
303 }
304 while (true) {
305 while (pItem->m_pNext)
306 pItem = pItem->m_pNext;
307 if (!pItem->m_pChild)
308 break;
309 pItem = pItem->m_pChild;
310 }
311 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
312 }
313 case CFDE_XMLNode::FirstChild:
314 return m_pChild;
315 case CFDE_XMLNode::LastChild: {
316 if (!m_pChild)
317 return nullptr;
318
319 CFDE_XMLNode* pChild = m_pChild;
320 while (pChild->m_pNext)
321 pChild = pChild->m_pNext;
322 return pChild;
323 }
324 default:
325 break;
326 }
327 return nullptr;
328 }
329
GetNodeLevel() const330 int32_t CFDE_XMLNode::GetNodeLevel() const {
331 int32_t iLevel = 0;
332 const CFDE_XMLNode* pItem = m_pParent;
333 while (pItem) {
334 iLevel++;
335 pItem = pItem->m_pParent;
336 }
337 return iLevel;
338 }
339
InsertNodeItem(CFDE_XMLNode::NodeItem eItem,CFDE_XMLNode * pNode)340 bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem,
341 CFDE_XMLNode* pNode) {
342 switch (eItem) {
343 case CFDE_XMLNode::NextSibling: {
344 pNode->m_pParent = m_pParent;
345 pNode->m_pNext = m_pNext;
346 pNode->m_pPrior = this;
347 if (m_pNext) {
348 m_pNext->m_pPrior = pNode;
349 }
350 m_pNext = pNode;
351 return true;
352 }
353 case CFDE_XMLNode::PriorSibling: {
354 pNode->m_pParent = m_pParent;
355 pNode->m_pNext = this;
356 pNode->m_pPrior = m_pPrior;
357 if (m_pPrior) {
358 m_pPrior->m_pNext = pNode;
359 } else if (m_pParent) {
360 m_pParent->m_pChild = pNode;
361 }
362 m_pPrior = pNode;
363 return true;
364 }
365 default:
366 return false;
367 }
368 }
369
RemoveNodeItem(CFDE_XMLNode::NodeItem eItem)370 CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) {
371 CFDE_XMLNode* pNode = nullptr;
372 switch (eItem) {
373 case CFDE_XMLNode::NextSibling:
374 if (m_pNext) {
375 pNode = m_pNext;
376 m_pNext = pNode->m_pNext;
377 if (m_pNext) {
378 m_pNext->m_pPrior = this;
379 }
380 pNode->m_pParent = nullptr;
381 pNode->m_pNext = nullptr;
382 pNode->m_pPrior = nullptr;
383 }
384 break;
385 default:
386 break;
387 }
388 return pNode;
389 }
390
Clone(bool bRecursive)391 CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) {
392 return nullptr;
393 }
394
SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream> & pXMLStream)395 void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) {
396 CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
397 switch (pNode->GetType()) {
398 case FDE_XMLNODE_Instruction: {
399 CFX_WideString ws;
400 CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
401 if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
402 ws = L"<?xml version=\"1.0\" encoding=\"";
403 uint16_t wCodePage = pXMLStream->GetCodePage();
404 if (wCodePage == FX_CODEPAGE_UTF16LE) {
405 ws += L"UTF-16";
406 } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
407 ws += L"UTF-16be";
408 } else {
409 ws += L"UTF-8";
410 }
411 ws += L"\"?>";
412 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
413 } else {
414 ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
415 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
416 std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
417 int32_t i;
418 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
419 CFX_WideString wsValue;
420 for (i = 0; i < iCount; i += 2) {
421 ws = L" ";
422 ws += attributes[i];
423 ws += L"=\"";
424 wsValue = attributes[i + 1];
425 wsValue.Replace(L"&", L"&");
426 wsValue.Replace(L"<", L"<");
427 wsValue.Replace(L">", L">");
428 wsValue.Replace(L"\'", L"'");
429 wsValue.Replace(L"\"", L""");
430 ws += wsValue;
431 ws += L"\"";
432 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
433 }
434 std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
435 iCount = pdfium::CollectionSize<int32_t>(targetdata);
436 for (i = 0; i < iCount; i++) {
437 ws = L" \"";
438 ws += targetdata[i];
439 ws += L"\"";
440 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
441 }
442 ws = L"?>";
443 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
444 }
445 } break;
446 case FDE_XMLNODE_Element: {
447 CFX_WideString ws;
448 ws = L"<";
449 ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
450 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
451 std::vector<CFX_WideString>& attributes =
452 static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
453 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
454 CFX_WideString wsValue;
455 for (int32_t i = 0; i < iCount; i += 2) {
456 ws = L" ";
457 ws += attributes[i];
458 ws += L"=\"";
459 wsValue = attributes[i + 1];
460 wsValue.Replace(L"&", L"&");
461 wsValue.Replace(L"<", L"<");
462 wsValue.Replace(L">", L">");
463 wsValue.Replace(L"\'", L"'");
464 wsValue.Replace(L"\"", L""");
465 ws += wsValue;
466 ws += L"\"";
467 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
468 }
469 if (pNode->m_pChild) {
470 ws = L"\n>";
471 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
472 CFDE_XMLNode* pChild = pNode->m_pChild;
473 while (pChild) {
474 pChild->SaveXMLNode(pXMLStream);
475 pChild = pChild->m_pNext;
476 }
477 ws = L"</";
478 ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
479 ws += L"\n>";
480 } else {
481 ws = L"\n/>";
482 }
483 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
484 } break;
485 case FDE_XMLNODE_Text: {
486 CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
487 ws.Replace(L"&", L"&");
488 ws.Replace(L"<", L"<");
489 ws.Replace(L">", L">");
490 ws.Replace(L"\'", L"'");
491 ws.Replace(L"\"", L""");
492 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
493 } break;
494 case FDE_XMLNODE_CharData: {
495 CFX_WideString ws = L"<![CDATA[";
496 ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
497 ws += L"]]>";
498 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
499 } break;
500 case FDE_XMLNODE_Unknown:
501 break;
502 default:
503 break;
504 }
505 }
506
CloneChildren(CFDE_XMLNode * pClone)507 void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) {
508 if (!m_pChild) {
509 return;
510 }
511 CFDE_XMLNode* pNext = m_pChild;
512 CFDE_XMLNode* pCloneNext = pNext->Clone(true);
513 pClone->InsertChildNode(pCloneNext);
514 pNext = pNext->m_pNext;
515 while (pNext) {
516 CFDE_XMLNode* pChild = pNext->Clone(true);
517 pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild);
518 pCloneNext = pChild;
519 pNext = pNext->m_pNext;
520 }
521 }
522
CFDE_XMLInstruction(const CFX_WideString & wsTarget)523 CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget)
524 : m_wsTarget(wsTarget) {
525 ASSERT(m_wsTarget.GetLength() > 0);
526 }
527
GetType() const528 FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const {
529 return FDE_XMLNODE_Instruction;
530 }
531
Clone(bool bRecursive)532 CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) {
533 CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget);
534 if (!pClone)
535 return nullptr;
536
537 pClone->m_Attributes = m_Attributes;
538 pClone->m_TargetData = m_TargetData;
539 if (bRecursive)
540 CloneChildren(pClone);
541
542 return pClone;
543 }
544
CountAttributes() const545 int32_t CFDE_XMLInstruction::CountAttributes() const {
546 return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
547 }
548
GetAttribute(int32_t index,CFX_WideString & wsAttriName,CFX_WideString & wsAttriValue) const549 bool CFDE_XMLInstruction::GetAttribute(int32_t index,
550 CFX_WideString& wsAttriName,
551 CFX_WideString& wsAttriValue) const {
552 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
553 ASSERT(index > -1 && index < iCount / 2);
554 for (int32_t i = 0; i < iCount; i += 2) {
555 if (index == 0) {
556 wsAttriName = m_Attributes[i];
557 wsAttriValue = m_Attributes[i + 1];
558 return true;
559 }
560 index--;
561 }
562 return false;
563 }
564
HasAttribute(const FX_WCHAR * pwsAttriName) const565 bool CFDE_XMLInstruction::HasAttribute(const FX_WCHAR* pwsAttriName) const {
566 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
567 for (int32_t i = 0; i < iCount; i += 2) {
568 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
569 return true;
570 }
571 }
572 return false;
573 }
574
GetString(const FX_WCHAR * pwsAttriName,CFX_WideString & wsAttriValue,const FX_WCHAR * pwsDefValue) const575 void CFDE_XMLInstruction::GetString(const FX_WCHAR* pwsAttriName,
576 CFX_WideString& wsAttriValue,
577 const FX_WCHAR* pwsDefValue) const {
578 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
579 for (int32_t i = 0; i < iCount; i += 2) {
580 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
581 wsAttriValue = m_Attributes[i + 1];
582 return;
583 }
584 }
585 wsAttriValue = pwsDefValue;
586 }
587
SetString(const CFX_WideString & wsAttriName,const CFX_WideString & wsAttriValue)588 void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName,
589 const CFX_WideString& wsAttriValue) {
590 ASSERT(wsAttriName.GetLength() > 0);
591 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
592 for (int32_t i = 0; i < iCount; i += 2) {
593 if (m_Attributes[i].Compare(wsAttriName) == 0) {
594 m_Attributes[i] = wsAttriName;
595 m_Attributes[i + 1] = wsAttriValue;
596 return;
597 }
598 }
599 m_Attributes.push_back(wsAttriName);
600 m_Attributes.push_back(wsAttriValue);
601 }
602
GetInteger(const FX_WCHAR * pwsAttriName,int32_t iDefValue) const603 int32_t CFDE_XMLInstruction::GetInteger(const FX_WCHAR* pwsAttriName,
604 int32_t iDefValue) const {
605 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
606 for (int32_t i = 0; i < iCount; i += 2) {
607 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
608 return FXSYS_wtoi(m_Attributes[i + 1].c_str());
609 }
610 }
611 return iDefValue;
612 }
613
SetInteger(const FX_WCHAR * pwsAttriName,int32_t iAttriValue)614 void CFDE_XMLInstruction::SetInteger(const FX_WCHAR* pwsAttriName,
615 int32_t iAttriValue) {
616 CFX_WideString wsValue;
617 wsValue.Format(L"%d", iAttriValue);
618 SetString(pwsAttriName, wsValue);
619 }
620
GetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fDefValue) const621 FX_FLOAT CFDE_XMLInstruction::GetFloat(const FX_WCHAR* pwsAttriName,
622 FX_FLOAT fDefValue) const {
623 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
624 for (int32_t i = 0; i < iCount; i += 2) {
625 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
626 return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
627 }
628 }
629 return fDefValue;
630 }
631
SetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fAttriValue)632 void CFDE_XMLInstruction::SetFloat(const FX_WCHAR* pwsAttriName,
633 FX_FLOAT fAttriValue) {
634 CFX_WideString wsValue;
635 wsValue.Format(L"%f", fAttriValue);
636 SetString(pwsAttriName, wsValue);
637 }
638
RemoveAttribute(const FX_WCHAR * pwsAttriName)639 void CFDE_XMLInstruction::RemoveAttribute(const FX_WCHAR* pwsAttriName) {
640 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
641 for (int32_t i = 0; i < iCount; i += 2) {
642 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
643 m_Attributes.erase(m_Attributes.begin() + i,
644 m_Attributes.begin() + i + 2);
645 return;
646 }
647 }
648 }
649
CountData() const650 int32_t CFDE_XMLInstruction::CountData() const {
651 return pdfium::CollectionSize<int32_t>(m_TargetData);
652 }
653
GetData(int32_t index,CFX_WideString & wsData) const654 bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const {
655 if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData))
656 return false;
657
658 wsData = m_TargetData[index];
659 return true;
660 }
661
AppendData(const CFX_WideString & wsData)662 void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) {
663 m_TargetData.push_back(wsData);
664 }
665
RemoveData(int32_t index)666 void CFDE_XMLInstruction::RemoveData(int32_t index) {
667 if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData))
668 return;
669
670 m_TargetData.erase(m_TargetData.begin() + index);
671 }
672
~CFDE_XMLInstruction()673 CFDE_XMLInstruction::~CFDE_XMLInstruction() {}
674
CFDE_XMLElement(const CFX_WideString & wsTag)675 CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag)
676 : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() {
677 ASSERT(m_wsTag.GetLength() > 0);
678 }
679
~CFDE_XMLElement()680 CFDE_XMLElement::~CFDE_XMLElement() {}
681
GetType() const682 FDE_XMLNODETYPE CFDE_XMLElement::GetType() const {
683 return FDE_XMLNODE_Element;
684 }
685
Clone(bool bRecursive)686 CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) {
687 CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag);
688 if (!pClone)
689 return nullptr;
690
691 pClone->m_Attributes = m_Attributes;
692 if (bRecursive) {
693 CloneChildren(pClone);
694 } else {
695 CFX_WideString wsText;
696 CFDE_XMLNode* pChild = m_pChild;
697 while (pChild) {
698 switch (pChild->GetType()) {
699 case FDE_XMLNODE_Text:
700 wsText += ((CFDE_XMLText*)pChild)->m_wsText;
701 break;
702 default:
703 break;
704 }
705 pChild = pChild->m_pNext;
706 }
707 pClone->SetTextData(wsText);
708 }
709 return pClone;
710 }
711
GetTagName(CFX_WideString & wsTag) const712 void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const {
713 wsTag = m_wsTag;
714 }
715
GetLocalTagName(CFX_WideString & wsTag) const716 void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const {
717 FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
718 if (iFind < 0) {
719 wsTag = m_wsTag;
720 } else {
721 wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1);
722 }
723 }
724
GetNamespacePrefix(CFX_WideString & wsPrefix) const725 void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const {
726 FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
727 if (iFind < 0) {
728 wsPrefix.clear();
729 } else {
730 wsPrefix = m_wsTag.Left(iFind);
731 }
732 }
733
GetNamespaceURI(CFX_WideString & wsNamespace) const734 void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const {
735 CFX_WideString wsAttri(L"xmlns"), wsPrefix;
736 GetNamespacePrefix(wsPrefix);
737 if (wsPrefix.GetLength() > 0) {
738 wsAttri += L":";
739 wsAttri += wsPrefix;
740 }
741 wsNamespace.clear();
742 CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
743 while (pNode) {
744 if (pNode->GetType() != FDE_XMLNODE_Element) {
745 break;
746 }
747 CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode;
748 if (!pElement->HasAttribute(wsAttri.c_str())) {
749 pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent);
750 continue;
751 }
752 pElement->GetString(wsAttri.c_str(), wsNamespace);
753 break;
754 }
755 }
756
CountAttributes() const757 int32_t CFDE_XMLElement::CountAttributes() const {
758 return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
759 }
760
GetAttribute(int32_t index,CFX_WideString & wsAttriName,CFX_WideString & wsAttriValue) const761 bool CFDE_XMLElement::GetAttribute(int32_t index,
762 CFX_WideString& wsAttriName,
763 CFX_WideString& wsAttriValue) const {
764 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
765 ASSERT(index > -1 && index < iCount / 2);
766 for (int32_t i = 0; i < iCount; i += 2) {
767 if (index == 0) {
768 wsAttriName = m_Attributes[i];
769 wsAttriValue = m_Attributes[i + 1];
770 return true;
771 }
772 index--;
773 }
774 return false;
775 }
776
HasAttribute(const FX_WCHAR * pwsAttriName) const777 bool CFDE_XMLElement::HasAttribute(const FX_WCHAR* pwsAttriName) const {
778 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
779 for (int32_t i = 0; i < iCount; i += 2) {
780 if (m_Attributes[i].Compare(pwsAttriName) == 0)
781 return true;
782 }
783 return false;
784 }
785
GetString(const FX_WCHAR * pwsAttriName,CFX_WideString & wsAttriValue,const FX_WCHAR * pwsDefValue) const786 void CFDE_XMLElement::GetString(const FX_WCHAR* pwsAttriName,
787 CFX_WideString& wsAttriValue,
788 const FX_WCHAR* pwsDefValue) const {
789 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
790 for (int32_t i = 0; i < iCount; i += 2) {
791 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
792 wsAttriValue = m_Attributes[i + 1];
793 return;
794 }
795 }
796 wsAttriValue = pwsDefValue;
797 }
798
SetString(const CFX_WideString & wsAttriName,const CFX_WideString & wsAttriValue)799 void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName,
800 const CFX_WideString& wsAttriValue) {
801 ASSERT(wsAttriName.GetLength() > 0);
802 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
803 for (int32_t i = 0; i < iCount; i += 2) {
804 if (m_Attributes[i].Compare(wsAttriName) == 0) {
805 m_Attributes[i] = wsAttriName;
806 m_Attributes[i + 1] = wsAttriValue;
807 return;
808 }
809 }
810 m_Attributes.push_back(wsAttriName);
811 m_Attributes.push_back(wsAttriValue);
812 }
813
GetInteger(const FX_WCHAR * pwsAttriName,int32_t iDefValue) const814 int32_t CFDE_XMLElement::GetInteger(const FX_WCHAR* pwsAttriName,
815 int32_t iDefValue) const {
816 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
817 for (int32_t i = 0; i < iCount; i += 2) {
818 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
819 return FXSYS_wtoi(m_Attributes[i + 1].c_str());
820 }
821 }
822 return iDefValue;
823 }
824
SetInteger(const FX_WCHAR * pwsAttriName,int32_t iAttriValue)825 void CFDE_XMLElement::SetInteger(const FX_WCHAR* pwsAttriName,
826 int32_t iAttriValue) {
827 CFX_WideString wsValue;
828 wsValue.Format(L"%d", iAttriValue);
829 SetString(pwsAttriName, wsValue);
830 }
831
GetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fDefValue) const832 FX_FLOAT CFDE_XMLElement::GetFloat(const FX_WCHAR* pwsAttriName,
833 FX_FLOAT fDefValue) const {
834 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
835 for (int32_t i = 0; i < iCount; i += 2) {
836 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
837 return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
838 }
839 }
840 return fDefValue;
841 }
842
SetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fAttriValue)843 void CFDE_XMLElement::SetFloat(const FX_WCHAR* pwsAttriName,
844 FX_FLOAT fAttriValue) {
845 CFX_WideString wsValue;
846 wsValue.Format(L"%f", fAttriValue);
847 SetString(pwsAttriName, wsValue);
848 }
849
RemoveAttribute(const FX_WCHAR * pwsAttriName)850 void CFDE_XMLElement::RemoveAttribute(const FX_WCHAR* pwsAttriName) {
851 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
852 for (int32_t i = 0; i < iCount; i += 2) {
853 if (m_Attributes[i].Compare(pwsAttriName) == 0) {
854 m_Attributes.erase(m_Attributes.begin() + i,
855 m_Attributes.begin() + i + 2);
856 return;
857 }
858 }
859 }
860
GetTextData(CFX_WideString & wsText) const861 void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const {
862 CFX_WideTextBuf buffer;
863 CFDE_XMLNode* pChild = m_pChild;
864 while (pChild) {
865 switch (pChild->GetType()) {
866 case FDE_XMLNODE_Text:
867 buffer << ((CFDE_XMLText*)pChild)->m_wsText;
868 break;
869 case FDE_XMLNODE_CharData:
870 buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData;
871 break;
872 default:
873 break;
874 }
875 pChild = pChild->m_pNext;
876 }
877 wsText = buffer.AsStringC();
878 }
879
SetTextData(const CFX_WideString & wsText)880 void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) {
881 if (wsText.GetLength() < 1) {
882 return;
883 }
884 InsertChildNode(new CFDE_XMLText(wsText));
885 }
886
CFDE_XMLText(const CFX_WideString & wsText)887 CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText)
888 : CFDE_XMLNode(), m_wsText(wsText) {}
889
GetType() const890 FDE_XMLNODETYPE CFDE_XMLText::GetType() const {
891 return FDE_XMLNODE_Text;
892 }
893
Clone(bool bRecursive)894 CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) {
895 CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText);
896 return pClone;
897 }
898
~CFDE_XMLText()899 CFDE_XMLText::~CFDE_XMLText() {}
900
CFDE_XMLCharData(const CFX_WideString & wsCData)901 CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData)
902 : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {}
903
GetType() const904 FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const {
905 return FDE_XMLNODE_CharData;
906 }
907
Clone(bool bRecursive)908 CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) {
909 CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData);
910 return pClone;
911 }
912
~CFDE_XMLCharData()913 CFDE_XMLCharData::~CFDE_XMLCharData() {}
914
CFDE_XMLDoc()915 CFDE_XMLDoc::CFDE_XMLDoc() : m_pRoot(nullptr) {
916 Reset(true);
917 CFDE_XMLInstruction* pXML = new CFDE_XMLInstruction(L"xml");
918 m_pRoot->InsertChildNode(pXML);
919 }
920
~CFDE_XMLDoc()921 CFDE_XMLDoc::~CFDE_XMLDoc() {
922 Reset(false);
923 }
924
Reset(bool bInitRoot)925 void CFDE_XMLDoc::Reset(bool bInitRoot) {
926 m_iStatus = 0;
927 m_pStream = nullptr;
928 if (bInitRoot) {
929 if (m_pRoot)
930 m_pRoot->DeleteChildren();
931 else
932 m_pRoot = new CFDE_XMLNode;
933 } else {
934 delete m_pRoot;
935 m_pRoot = nullptr;
936 }
937 ReleaseParser();
938 }
939
ReleaseParser()940 void CFDE_XMLDoc::ReleaseParser() {
941 m_pXMLParser.reset();
942 }
943
LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser)944 bool CFDE_XMLDoc::LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser) {
945 if (!pXMLParser)
946 return false;
947
948 Reset(true);
949 m_pXMLParser = std::move(pXMLParser);
950 return true;
951 }
952
DoLoad(IFX_Pause * pPause)953 int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) {
954 if (m_iStatus < 100)
955 m_iStatus = m_pXMLParser->DoParser(pPause);
956
957 return m_iStatus;
958 }
959
CloseXML()960 void CFDE_XMLDoc::CloseXML() {
961 ReleaseParser();
962 }
963
SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream> & pXMLStream,CFDE_XMLNode * pINode)964 void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
965 CFDE_XMLNode* pINode) {
966 CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode;
967 switch (pNode->GetType()) {
968 case FDE_XMLNODE_Instruction: {
969 CFX_WideString ws;
970 CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
971 if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
972 ws = L"<?xml version=\"1.0\" encoding=\"";
973 uint16_t wCodePage = pXMLStream->GetCodePage();
974 if (wCodePage == FX_CODEPAGE_UTF16LE) {
975 ws += L"UTF-16";
976 } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
977 ws += L"UTF-16be";
978 } else {
979 ws += L"UTF-8";
980 }
981 ws += L"\"?>";
982 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
983 } else {
984 ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
985 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
986 std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
987 int32_t i;
988 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
989 CFX_WideString wsValue;
990 for (i = 0; i < iCount; i += 2) {
991 ws = L" ";
992 ws += attributes[i];
993 ws += L"=\"";
994 wsValue = attributes[i + 1];
995 wsValue.Replace(L"&", L"&");
996 wsValue.Replace(L"<", L"<");
997 wsValue.Replace(L">", L">");
998 wsValue.Replace(L"\'", L"'");
999 wsValue.Replace(L"\"", L""");
1000 ws += wsValue;
1001 ws += L"\"";
1002 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1003 }
1004 std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
1005 iCount = pdfium::CollectionSize<int32_t>(targetdata);
1006 for (i = 0; i < iCount; i++) {
1007 ws = L" \"";
1008 ws += targetdata[i];
1009 ws += L"\"";
1010 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1011 }
1012 ws = L"?>";
1013 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1014 }
1015 } break;
1016 case FDE_XMLNODE_Element: {
1017 CFX_WideString ws;
1018 ws = L"<";
1019 ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
1020 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1021 std::vector<CFX_WideString>& attributes =
1022 static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
1023 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
1024 CFX_WideString wsValue;
1025 for (int32_t i = 0; i < iCount; i += 2) {
1026 ws = L" ";
1027 ws += attributes[i];
1028 ws += L"=\"";
1029 wsValue = attributes[i + 1];
1030 wsValue.Replace(L"&", L"&");
1031 wsValue.Replace(L"<", L"<");
1032 wsValue.Replace(L">", L">");
1033 wsValue.Replace(L"\'", L"'");
1034 wsValue.Replace(L"\"", L""");
1035 ws += wsValue;
1036 ws += L"\"";
1037 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1038 }
1039 if (pNode->m_pChild) {
1040 ws = L"\n>";
1041 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1042 CFDE_XMLNode* pChild = pNode->m_pChild;
1043 while (pChild) {
1044 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild));
1045 pChild = pChild->m_pNext;
1046 }
1047 ws = L"</";
1048 ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
1049 ws += L"\n>";
1050 } else {
1051 ws = L"\n/>";
1052 }
1053 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1054 } break;
1055 case FDE_XMLNODE_Text: {
1056 CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
1057 ws.Replace(L"&", L"&");
1058 ws.Replace(L"<", L"<");
1059 ws.Replace(L">", L">");
1060 ws.Replace(L"\'", L"'");
1061 ws.Replace(L"\"", L""");
1062 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1063 } break;
1064 case FDE_XMLNODE_CharData: {
1065 CFX_WideString ws = L"<![CDATA[";
1066 ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
1067 ws += L"]]>";
1068 pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1069 } break;
1070 case FDE_XMLNODE_Unknown:
1071 break;
1072 default:
1073 break;
1074 }
1075 }
1076
SaveXML(CFX_RetainPtr<IFGAS_Stream> & pXMLStream,bool bSaveBOM)1077 void CFDE_XMLDoc::SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
1078 bool bSaveBOM) {
1079 if (!pXMLStream || pXMLStream == m_pStream) {
1080 m_pStream->Seek(FX_STREAMSEEK_Begin, 0);
1081 pXMLStream = m_pStream;
1082 }
1083 ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Text) != 0);
1084 ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Write) != 0);
1085 uint16_t wCodePage = pXMLStream->GetCodePage();
1086 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
1087 wCodePage != FX_CODEPAGE_UTF8) {
1088 wCodePage = FX_CODEPAGE_UTF8;
1089 pXMLStream->SetCodePage(wCodePage);
1090 }
1091 if (bSaveBOM) {
1092 pXMLStream->WriteString(L"\xFEFF", 1);
1093 }
1094 CFDE_XMLNode* pNode = m_pRoot->m_pChild;
1095 while (pNode) {
1096 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pNode));
1097 pNode = pNode->m_pNext;
1098 }
1099 if (pXMLStream == m_pStream) {
1100 int32_t iPos = pXMLStream->GetPosition();
1101 pXMLStream->SetLength(iPos);
1102 }
1103 }
1104
CFDE_BlockBuffer(int32_t iAllocStep)1105 CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep)
1106 : m_iDataLength(0),
1107 m_iBufferSize(0),
1108 m_iAllocStep(iAllocStep),
1109 m_iStartPosition(0) {}
1110
~CFDE_BlockBuffer()1111 CFDE_BlockBuffer::~CFDE_BlockBuffer() {
1112 ClearBuffer();
1113 }
1114
GetAvailableBlock(int32_t & iIndexInBlock)1115 FX_WCHAR* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) {
1116 iIndexInBlock = 0;
1117 if (!m_BlockArray.GetSize()) {
1118 return nullptr;
1119 }
1120 int32_t iRealIndex = m_iStartPosition + m_iDataLength;
1121 if (iRealIndex == m_iBufferSize) {
1122 FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep);
1123 m_BlockArray.Add(pBlock);
1124 m_iBufferSize += m_iAllocStep;
1125 return pBlock;
1126 }
1127 iIndexInBlock = iRealIndex % m_iAllocStep;
1128 return m_BlockArray[iRealIndex / m_iAllocStep];
1129 }
1130
InitBuffer(int32_t iBufferSize)1131 bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) {
1132 ClearBuffer();
1133 int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1;
1134 for (int32_t i = 0; i < iNumOfBlock; i++) {
1135 m_BlockArray.Add(FX_Alloc(FX_WCHAR, m_iAllocStep));
1136 }
1137 m_iBufferSize = iNumOfBlock * m_iAllocStep;
1138 return true;
1139 }
1140
SetTextChar(int32_t iIndex,FX_WCHAR ch)1141 void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, FX_WCHAR ch) {
1142 if (iIndex < 0) {
1143 return;
1144 }
1145 int32_t iRealIndex = m_iStartPosition + iIndex;
1146 int32_t iBlockIndex = iRealIndex / m_iAllocStep;
1147 int32_t iInnerIndex = iRealIndex % m_iAllocStep;
1148 int32_t iBlockSize = m_BlockArray.GetSize();
1149 if (iBlockIndex >= iBlockSize) {
1150 int32_t iNewBlocks = iBlockIndex - iBlockSize + 1;
1151 do {
1152 FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep);
1153 m_BlockArray.Add(pBlock);
1154 m_iBufferSize += m_iAllocStep;
1155 } while (--iNewBlocks);
1156 }
1157 FX_WCHAR* pTextData = m_BlockArray[iBlockIndex];
1158 *(pTextData + iInnerIndex) = ch;
1159 if (m_iDataLength <= iIndex) {
1160 m_iDataLength = iIndex + 1;
1161 }
1162 }
1163
DeleteTextChars(int32_t iCount,bool bDirection)1164 int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) {
1165 if (iCount <= 0) {
1166 return m_iDataLength;
1167 }
1168 if (iCount >= m_iDataLength) {
1169 Reset(false);
1170 return 0;
1171 }
1172 if (bDirection) {
1173 m_iStartPosition += iCount;
1174 m_iDataLength -= iCount;
1175 } else {
1176 m_iDataLength -= iCount;
1177 }
1178 return m_iDataLength;
1179 }
1180
GetTextData(CFX_WideString & wsTextData,int32_t iStart,int32_t iLength) const1181 void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData,
1182 int32_t iStart,
1183 int32_t iLength) const {
1184 wsTextData.clear();
1185 int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition;
1186 if (iStart < 0 || iStart > iMaybeDataLength) {
1187 return;
1188 }
1189 if (iLength == -1 || iLength > iMaybeDataLength) {
1190 iLength = iMaybeDataLength;
1191 }
1192 if (iLength <= 0) {
1193 return;
1194 }
1195 FX_WCHAR* pBuf = wsTextData.GetBuffer(iLength);
1196 if (!pBuf) {
1197 return;
1198 }
1199 int32_t iStartBlockIndex = 0;
1200 int32_t iStartInnerIndex = 0;
1201 TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex);
1202 int32_t iEndBlockIndex = 0;
1203 int32_t iEndInnerIndex = 0;
1204 TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex);
1205 int32_t iPointer = 0;
1206 for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) {
1207 int32_t iBufferPointer = 0;
1208 int32_t iCopyLength = m_iAllocStep;
1209 if (i == iStartBlockIndex) {
1210 iCopyLength -= iStartInnerIndex;
1211 iBufferPointer = iStartInnerIndex;
1212 }
1213 if (i == iEndBlockIndex) {
1214 iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex);
1215 }
1216 FX_WCHAR* pBlockBuf = m_BlockArray[i];
1217 FXSYS_memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer,
1218 iCopyLength * sizeof(FX_WCHAR));
1219 iPointer += iCopyLength;
1220 }
1221 wsTextData.ReleaseBuffer(iLength);
1222 }
1223
TextDataIndex2BufIndex(const int32_t iIndex,int32_t & iBlockIndex,int32_t & iInnerIndex) const1224 void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex,
1225 int32_t& iBlockIndex,
1226 int32_t& iInnerIndex) const {
1227 ASSERT(iIndex >= 0);
1228 int32_t iRealIndex = m_iStartPosition + iIndex;
1229 iBlockIndex = iRealIndex / m_iAllocStep;
1230 iInnerIndex = iRealIndex % m_iAllocStep;
1231 }
1232
ClearBuffer()1233 void CFDE_BlockBuffer::ClearBuffer() {
1234 m_iBufferSize = 0;
1235 int32_t iSize = m_BlockArray.GetSize();
1236 for (int32_t i = 0; i < iSize; i++) {
1237 FX_Free(m_BlockArray[i]);
1238 }
1239 m_BlockArray.RemoveAll();
1240 }
1241
CFDE_XMLSyntaxParser()1242 CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser()
1243 : m_pStream(nullptr),
1244 m_iXMLPlaneSize(-1),
1245 m_iCurrentPos(0),
1246 m_iCurrentNodeNum(-1),
1247 m_iLastNodeNum(-1),
1248 m_iParsedChars(0),
1249 m_iParsedBytes(0),
1250 m_pBuffer(nullptr),
1251 m_iBufferChars(0),
1252 m_bEOS(false),
1253 m_pStart(nullptr),
1254 m_pEnd(nullptr),
1255 m_XMLNodeStack(16),
1256 m_iAllocStep(m_BlockBuffer.GetAllocStep()),
1257 m_iDataLength(m_BlockBuffer.GetDataLengthRef()),
1258 m_pCurrentBlock(nullptr),
1259 m_iIndexInBlock(0),
1260 m_iTextDataLength(0),
1261 m_syntaxParserResult(FDE_XmlSyntaxResult::None),
1262 m_syntaxParserState(FDE_XmlSyntaxState::Text),
1263 m_wQuotationMark(0),
1264 m_iEntityStart(-1),
1265 m_SkipStack(16) {
1266 m_CurNode.iNodeNum = -1;
1267 m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1268 }
1269
Init(const CFX_RetainPtr<IFGAS_Stream> & pStream,int32_t iXMLPlaneSize,int32_t iTextDataSize)1270 void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
1271 int32_t iXMLPlaneSize,
1272 int32_t iTextDataSize) {
1273 ASSERT(!m_pStream && !m_pBuffer);
1274 ASSERT(pStream && iXMLPlaneSize > 0);
1275 int32_t iStreamLength = pStream->GetLength();
1276 ASSERT(iStreamLength > 0);
1277 m_pStream = pStream;
1278 m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength);
1279 uint8_t bom[4];
1280 m_iCurrentPos = m_pStream->GetBOM(bom);
1281 ASSERT(!m_pBuffer);
1282
1283 FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize;
1284 alloc_size_safe += 1; // For NUL.
1285 if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
1286 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1287 return;
1288 }
1289
1290 m_pBuffer = FX_Alloc(
1291 FX_WCHAR, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
1292 m_pStart = m_pEnd = m_pBuffer;
1293 ASSERT(!m_BlockBuffer.IsInitialized());
1294 m_BlockBuffer.InitBuffer();
1295 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1296 m_iParsedBytes = m_iParsedChars = 0;
1297 m_iBufferChars = 0;
1298 }
1299
DoSyntaxParse()1300 FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() {
1301 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
1302 m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
1303 return m_syntaxParserResult;
1304 }
1305 ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized());
1306 int32_t iStreamLength = m_pStream->GetLength();
1307 int32_t iPos;
1308
1309 FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None;
1310 while (true) {
1311 if (m_pStart >= m_pEnd) {
1312 if (m_bEOS || m_iCurrentPos >= iStreamLength) {
1313 m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
1314 return m_syntaxParserResult;
1315 }
1316 m_iParsedChars += (m_pEnd - m_pBuffer);
1317 m_iParsedBytes = m_iCurrentPos;
1318 if (m_pStream->GetPosition() != m_iCurrentPos) {
1319 m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos);
1320 }
1321 m_iBufferChars =
1322 m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS);
1323 iPos = m_pStream->GetPosition();
1324 if (m_iBufferChars < 1) {
1325 m_iCurrentPos = iStreamLength;
1326 m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
1327 return m_syntaxParserResult;
1328 }
1329 m_iCurrentPos = iPos;
1330 m_pStart = m_pBuffer;
1331 m_pEnd = m_pBuffer + m_iBufferChars;
1332 }
1333
1334 while (m_pStart < m_pEnd) {
1335 FX_WCHAR ch = *m_pStart;
1336 switch (m_syntaxParserState) {
1337 case FDE_XmlSyntaxState::Text:
1338 if (ch == L'<') {
1339 if (m_iDataLength > 0) {
1340 m_iTextDataLength = m_iDataLength;
1341 m_BlockBuffer.Reset();
1342 m_pCurrentBlock =
1343 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1344 m_iEntityStart = -1;
1345 syntaxParserResult = FDE_XmlSyntaxResult::Text;
1346 } else {
1347 m_pStart++;
1348 m_syntaxParserState = FDE_XmlSyntaxState::Node;
1349 }
1350 } else {
1351 ParseTextChar(ch);
1352 }
1353 break;
1354 case FDE_XmlSyntaxState::Node:
1355 if (ch == L'!') {
1356 m_pStart++;
1357 m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
1358 } else if (ch == L'/') {
1359 m_pStart++;
1360 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
1361 } else if (ch == L'?') {
1362 m_iLastNodeNum++;
1363 m_iCurrentNodeNum = m_iLastNodeNum;
1364 m_CurNode.iNodeNum = m_iLastNodeNum;
1365 m_CurNode.eNodeType = FDE_XMLNODE_Instruction;
1366 m_XMLNodeStack.Push(m_CurNode);
1367 m_pStart++;
1368 m_syntaxParserState = FDE_XmlSyntaxState::Target;
1369 syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen;
1370 } else {
1371 m_iLastNodeNum++;
1372 m_iCurrentNodeNum = m_iLastNodeNum;
1373 m_CurNode.iNodeNum = m_iLastNodeNum;
1374 m_CurNode.eNodeType = FDE_XMLNODE_Element;
1375 m_XMLNodeStack.Push(m_CurNode);
1376 m_syntaxParserState = FDE_XmlSyntaxState::Tag;
1377 syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen;
1378 }
1379 break;
1380 case FDE_XmlSyntaxState::Target:
1381 case FDE_XmlSyntaxState::Tag:
1382 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1383 if (m_iDataLength < 1) {
1384 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1385 return m_syntaxParserResult;
1386 } else {
1387 m_iTextDataLength = m_iDataLength;
1388 m_BlockBuffer.Reset();
1389 m_pCurrentBlock =
1390 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1391 if (m_syntaxParserState != FDE_XmlSyntaxState::Target) {
1392 syntaxParserResult = FDE_XmlSyntaxResult::TagName;
1393 } else {
1394 syntaxParserResult = FDE_XmlSyntaxResult::TargetName;
1395 }
1396 m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
1397 }
1398 } else {
1399 if (m_iIndexInBlock == m_iAllocStep) {
1400 m_pCurrentBlock =
1401 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1402 if (!m_pCurrentBlock) {
1403 return FDE_XmlSyntaxResult::Error;
1404 }
1405 }
1406 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1407 m_iDataLength++;
1408 m_pStart++;
1409 }
1410 break;
1411 case FDE_XmlSyntaxState::AttriName:
1412 if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) {
1413 m_pStart++;
1414 break;
1415 }
1416 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1417 if (m_iDataLength < 1) {
1418 if (m_CurNode.eNodeType == FDE_XMLNODE_Element) {
1419 if (ch == L'>' || ch == L'/') {
1420 m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
1421 break;
1422 }
1423 } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1424 if (ch == L'?') {
1425 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
1426 m_pStart++;
1427 } else {
1428 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1429 }
1430 break;
1431 }
1432 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1433 return m_syntaxParserResult;
1434 } else {
1435 if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1436 if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) {
1437 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1438 break;
1439 }
1440 }
1441 m_iTextDataLength = m_iDataLength;
1442 m_BlockBuffer.Reset();
1443 m_pCurrentBlock =
1444 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1445 m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
1446 syntaxParserResult = FDE_XmlSyntaxResult::AttriName;
1447 }
1448 } else {
1449 if (m_iIndexInBlock == m_iAllocStep) {
1450 m_pCurrentBlock =
1451 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1452 if (!m_pCurrentBlock) {
1453 return FDE_XmlSyntaxResult::Error;
1454 }
1455 }
1456 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1457 m_iDataLength++;
1458 m_pStart++;
1459 }
1460 break;
1461 case FDE_XmlSyntaxState::AttriEqualSign:
1462 if (FDE_IsXMLWhiteSpace(ch)) {
1463 m_pStart++;
1464 break;
1465 }
1466 if (ch != L'=') {
1467 if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1468 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1469 break;
1470 }
1471 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1472 return m_syntaxParserResult;
1473 } else {
1474 m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
1475 m_pStart++;
1476 }
1477 break;
1478 case FDE_XmlSyntaxState::AttriQuotation:
1479 if (FDE_IsXMLWhiteSpace(ch)) {
1480 m_pStart++;
1481 break;
1482 }
1483 if (ch != L'\"' && ch != L'\'') {
1484 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1485 return m_syntaxParserResult;
1486 } else {
1487 m_wQuotationMark = ch;
1488 m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
1489 m_pStart++;
1490 }
1491 break;
1492 case FDE_XmlSyntaxState::AttriValue:
1493 if (ch == m_wQuotationMark) {
1494 if (m_iEntityStart > -1) {
1495 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1496 return m_syntaxParserResult;
1497 }
1498 m_iTextDataLength = m_iDataLength;
1499 m_wQuotationMark = 0;
1500 m_BlockBuffer.Reset();
1501 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1502 m_pStart++;
1503 m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
1504 syntaxParserResult = FDE_XmlSyntaxResult::AttriValue;
1505 } else {
1506 ParseTextChar(ch);
1507 }
1508 break;
1509 case FDE_XmlSyntaxState::CloseInstruction:
1510 if (ch != L'>') {
1511 if (m_iIndexInBlock == m_iAllocStep) {
1512 m_pCurrentBlock =
1513 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1514 if (!m_pCurrentBlock) {
1515 return FDE_XmlSyntaxResult::Error;
1516 }
1517 }
1518 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1519 m_iDataLength++;
1520 m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1521 } else if (m_iDataLength > 0) {
1522 m_iTextDataLength = m_iDataLength;
1523 m_BlockBuffer.Reset();
1524 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1525 syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1526 } else {
1527 m_pStart++;
1528 FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement();
1529 if (!pXMLNode) {
1530 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1531 return m_syntaxParserResult;
1532 }
1533 m_XMLNodeStack.Pop();
1534 pXMLNode = m_XMLNodeStack.GetTopElement();
1535 if (pXMLNode) {
1536 m_CurNode = *pXMLNode;
1537 } else {
1538 m_CurNode.iNodeNum = -1;
1539 m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1540 }
1541 m_iCurrentNodeNum = m_CurNode.iNodeNum;
1542 m_BlockBuffer.Reset();
1543 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1544 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1545 syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose;
1546 }
1547 break;
1548 case FDE_XmlSyntaxState::BreakElement:
1549 if (ch == L'>') {
1550 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1551 syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak;
1552 } else if (ch == L'/') {
1553 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
1554 } else {
1555 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1556 return m_syntaxParserResult;
1557 }
1558 m_pStart++;
1559 break;
1560 case FDE_XmlSyntaxState::CloseElement:
1561 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1562 if (ch == L'>') {
1563 FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement();
1564 if (!pXMLNode) {
1565 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1566 return m_syntaxParserResult;
1567 }
1568 m_XMLNodeStack.Pop();
1569 pXMLNode = m_XMLNodeStack.GetTopElement();
1570 if (pXMLNode) {
1571 m_CurNode = *pXMLNode;
1572 } else {
1573 m_CurNode.iNodeNum = -1;
1574 m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1575 }
1576 m_iCurrentNodeNum = m_CurNode.iNodeNum;
1577 m_iTextDataLength = m_iDataLength;
1578 m_BlockBuffer.Reset();
1579 m_pCurrentBlock =
1580 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1581 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1582 syntaxParserResult = FDE_XmlSyntaxResult::ElementClose;
1583 } else if (!FDE_IsXMLWhiteSpace(ch)) {
1584 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1585 return m_syntaxParserResult;
1586 }
1587 } else {
1588 if (m_iIndexInBlock == m_iAllocStep) {
1589 m_pCurrentBlock =
1590 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1591 if (!m_pCurrentBlock) {
1592 return FDE_XmlSyntaxResult::Error;
1593 }
1594 }
1595 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1596 m_iDataLength++;
1597 }
1598 m_pStart++;
1599 break;
1600 case FDE_XmlSyntaxState::SkipCommentOrDecl:
1601 if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) {
1602 m_pStart += 2;
1603 m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
1604 } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) {
1605 m_pStart += 7;
1606 m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
1607 } else {
1608 m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
1609 m_SkipChar = L'>';
1610 m_SkipStack.Push(L'>');
1611 }
1612 break;
1613 case FDE_XmlSyntaxState::SkipCData: {
1614 if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) {
1615 m_pStart += 3;
1616 syntaxParserResult = FDE_XmlSyntaxResult::CData;
1617 m_iTextDataLength = m_iDataLength;
1618 m_BlockBuffer.Reset();
1619 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1620 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1621 } else {
1622 if (m_iIndexInBlock == m_iAllocStep) {
1623 m_pCurrentBlock =
1624 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1625 if (!m_pCurrentBlock)
1626 return FDE_XmlSyntaxResult::Error;
1627 }
1628 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1629 m_iDataLength++;
1630 m_pStart++;
1631 }
1632 break;
1633 }
1634 case FDE_XmlSyntaxState::SkipDeclNode:
1635 if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
1636 m_pStart++;
1637 if (ch != m_SkipChar)
1638 break;
1639
1640 m_SkipStack.Pop();
1641 uint32_t* pDWord = m_SkipStack.GetTopElement();
1642 if (!pDWord)
1643 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1644 else
1645 m_SkipChar = (FX_WCHAR)*pDWord;
1646 } else {
1647 switch (ch) {
1648 case L'<':
1649 m_SkipChar = L'>';
1650 m_SkipStack.Push(L'>');
1651 break;
1652 case L'[':
1653 m_SkipChar = L']';
1654 m_SkipStack.Push(L']');
1655 break;
1656 case L'(':
1657 m_SkipChar = L')';
1658 m_SkipStack.Push(L')');
1659 break;
1660 case L'\'':
1661 m_SkipChar = L'\'';
1662 m_SkipStack.Push(L'\'');
1663 break;
1664 case L'\"':
1665 m_SkipChar = L'\"';
1666 m_SkipStack.Push(L'\"');
1667 break;
1668 default:
1669 if (ch == m_SkipChar) {
1670 m_SkipStack.Pop();
1671 uint32_t* pDWord = m_SkipStack.GetTopElement();
1672 if (!pDWord) {
1673 if (m_iDataLength >= 9) {
1674 CFX_WideString wsHeader;
1675 m_BlockBuffer.GetTextData(wsHeader, 0, 7);
1676 }
1677 m_iTextDataLength = m_iDataLength;
1678 m_BlockBuffer.Reset();
1679 m_pCurrentBlock =
1680 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1681 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1682 } else {
1683 m_SkipChar = static_cast<FX_WCHAR>(*pDWord);
1684 }
1685 }
1686 break;
1687 }
1688 if (m_SkipStack.GetSize() > 0) {
1689 if (m_iIndexInBlock == m_iAllocStep) {
1690 m_pCurrentBlock =
1691 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1692 if (!m_pCurrentBlock) {
1693 return FDE_XmlSyntaxResult::Error;
1694 }
1695 }
1696 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1697 m_iDataLength++;
1698 }
1699 m_pStart++;
1700 }
1701 break;
1702 case FDE_XmlSyntaxState::SkipComment:
1703 if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) {
1704 m_pStart += 2;
1705 m_syntaxParserState = FDE_XmlSyntaxState::Text;
1706 }
1707
1708 m_pStart++;
1709 break;
1710 case FDE_XmlSyntaxState::TargetData:
1711 if (FDE_IsXMLWhiteSpace(ch)) {
1712 if (m_iDataLength < 1) {
1713 m_pStart++;
1714 break;
1715 } else if (m_wQuotationMark == 0) {
1716 m_iTextDataLength = m_iDataLength;
1717 m_wQuotationMark = 0;
1718 m_BlockBuffer.Reset();
1719 m_pCurrentBlock =
1720 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1721 m_pStart++;
1722 syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1723 break;
1724 }
1725 }
1726 if (ch == '?') {
1727 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
1728 m_pStart++;
1729 } else if (ch == '\"') {
1730 if (m_wQuotationMark == 0) {
1731 m_wQuotationMark = ch;
1732 m_pStart++;
1733 } else if (ch == m_wQuotationMark) {
1734 m_iTextDataLength = m_iDataLength;
1735 m_wQuotationMark = 0;
1736 m_BlockBuffer.Reset();
1737 m_pCurrentBlock =
1738 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1739 m_pStart++;
1740 syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1741 } else {
1742 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1743 return m_syntaxParserResult;
1744 }
1745 } else {
1746 if (m_iIndexInBlock == m_iAllocStep) {
1747 m_pCurrentBlock =
1748 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1749 if (!m_pCurrentBlock) {
1750 return FDE_XmlSyntaxResult::Error;
1751 }
1752 }
1753 m_pCurrentBlock[m_iIndexInBlock++] = ch;
1754 m_iDataLength++;
1755 m_pStart++;
1756 }
1757 break;
1758 default:
1759 break;
1760 }
1761 if (syntaxParserResult != FDE_XmlSyntaxResult::None)
1762 return syntaxParserResult;
1763 }
1764 }
1765 return FDE_XmlSyntaxResult::Text;
1766 }
1767
~CFDE_XMLSyntaxParser()1768 CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {
1769 m_pCurrentBlock = nullptr;
1770 FX_Free(m_pBuffer);
1771 }
1772
GetStatus() const1773 int32_t CFDE_XMLSyntaxParser::GetStatus() const {
1774 if (!m_pStream)
1775 return -1;
1776
1777 int32_t iStreamLength = m_pStream->GetLength();
1778 if (iStreamLength < 1)
1779 return 100;
1780
1781 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
1782 return -1;
1783
1784 if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
1785 return 100;
1786 return m_iParsedBytes * 100 / iStreamLength;
1787 }
1788
FX_GetUTF8EncodeLength(const FX_WCHAR * pSrc,int32_t iSrcLen)1789 static int32_t FX_GetUTF8EncodeLength(const FX_WCHAR* pSrc, int32_t iSrcLen) {
1790 uint32_t unicode = 0;
1791 int32_t iDstNum = 0;
1792 while (iSrcLen-- > 0) {
1793 unicode = *pSrc++;
1794 int nbytes = 0;
1795 if ((uint32_t)unicode < 0x80) {
1796 nbytes = 1;
1797 } else if ((uint32_t)unicode < 0x800) {
1798 nbytes = 2;
1799 } else if ((uint32_t)unicode < 0x10000) {
1800 nbytes = 3;
1801 } else if ((uint32_t)unicode < 0x200000) {
1802 nbytes = 4;
1803 } else if ((uint32_t)unicode < 0x4000000) {
1804 nbytes = 5;
1805 } else {
1806 nbytes = 6;
1807 }
1808 iDstNum += nbytes;
1809 }
1810 return iDstNum;
1811 }
1812
GetCurrentBinaryPos() const1813 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
1814 if (!m_pStream)
1815 return 0;
1816
1817 int32_t nSrcLen = m_pStart - m_pBuffer;
1818 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen);
1819 return m_iParsedBytes + nDstLen;
1820 }
1821
ParseTextChar(FX_WCHAR character)1822 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) {
1823 if (m_iIndexInBlock == m_iAllocStep) {
1824 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1825 if (!m_pCurrentBlock) {
1826 return;
1827 }
1828 }
1829 m_pCurrentBlock[m_iIndexInBlock++] = character;
1830 m_iDataLength++;
1831 if (m_iEntityStart > -1 && character == L';') {
1832 CFX_WideString csEntity;
1833 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1,
1834 (m_iDataLength - 1) - m_iEntityStart - 1);
1835 int32_t iLen = csEntity.GetLength();
1836 if (iLen > 0) {
1837 if (csEntity[0] == L'#') {
1838 uint32_t ch = 0;
1839 FX_WCHAR w;
1840 if (iLen > 1 && csEntity[1] == L'x') {
1841 for (int32_t i = 2; i < iLen; i++) {
1842 w = csEntity[i];
1843 if (w >= L'0' && w <= L'9') {
1844 ch = (ch << 4) + w - L'0';
1845 } else if (w >= L'A' && w <= L'F') {
1846 ch = (ch << 4) + w - 55;
1847 } else if (w >= L'a' && w <= L'f') {
1848 ch = (ch << 4) + w - 87;
1849 } else {
1850 break;
1851 }
1852 }
1853 } else {
1854 for (int32_t i = 1; i < iLen; i++) {
1855 w = csEntity[i];
1856 if (w < L'0' || w > L'9')
1857 break;
1858 ch = ch * 10 + w - L'0';
1859 }
1860 }
1861 if (ch > kMaxCharRange)
1862 ch = ' ';
1863
1864 character = static_cast<FX_WCHAR>(ch);
1865 if (character != 0) {
1866 m_BlockBuffer.SetTextChar(m_iEntityStart, character);
1867 m_iEntityStart++;
1868 }
1869 } else {
1870 if (csEntity.Compare(L"amp") == 0) {
1871 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
1872 m_iEntityStart++;
1873 } else if (csEntity.Compare(L"lt") == 0) {
1874 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
1875 m_iEntityStart++;
1876 } else if (csEntity.Compare(L"gt") == 0) {
1877 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
1878 m_iEntityStart++;
1879 } else if (csEntity.Compare(L"apos") == 0) {
1880 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
1881 m_iEntityStart++;
1882 } else if (csEntity.Compare(L"quot") == 0) {
1883 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
1884 m_iEntityStart++;
1885 }
1886 }
1887 }
1888 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false);
1889 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1890 m_iEntityStart = -1;
1891 } else {
1892 if (m_iEntityStart < 0 && character == L'&') {
1893 m_iEntityStart = m_iDataLength - 1;
1894 }
1895 }
1896 m_pStart++;
1897 }
1898