• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
8 
9 #include <algorithm>
10 #include <cwctype>
11 #include <iterator>
12 
13 #include "core/fxcrt/fx_extension.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 
16 namespace {
17 
18 const uint32_t kMaxCharRange = 0x10ffff;
19 
IsXMLWhiteSpace(wchar_t ch)20 bool IsXMLWhiteSpace(wchar_t ch) {
21   return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
22 }
23 
24 struct FX_XMLNAMECHAR {
25   uint16_t wStart;
26   uint16_t wEnd;
27   bool bStartChar;
28 };
29 
30 const FX_XMLNAMECHAR g_XMLNameChars[] = {
31     {L'-', L'.', false},    {L'0', L'9', false},     {L':', L':', false},
32     {L'A', L'Z', true},     {L'_', L'_', true},      {L'a', L'z', true},
33     {0xB7, 0xB7, false},    {0xC0, 0xD6, true},      {0xD8, 0xF6, true},
34     {0xF8, 0x02FF, true},   {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
35     {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true},  {0x203F, 0x2040, false},
36     {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true},  {0x3001, 0xD7FF, true},
37     {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
38 };
39 
40 
GetUTF8EncodeLength(const std::vector<wchar_t> & src,FX_FILESIZE iSrcLen)41 int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
42                             FX_FILESIZE iSrcLen) {
43   uint32_t unicode = 0;
44   int32_t iDstNum = 0;
45   const wchar_t* pSrc = src.data();
46   while (iSrcLen-- > 0) {
47     unicode = *pSrc++;
48     int nbytes = 0;
49     if ((uint32_t)unicode < 0x80) {
50       nbytes = 1;
51     } else if ((uint32_t)unicode < 0x800) {
52       nbytes = 2;
53     } else if ((uint32_t)unicode < 0x10000) {
54       nbytes = 3;
55     } else if ((uint32_t)unicode < 0x200000) {
56       nbytes = 4;
57     } else if ((uint32_t)unicode < 0x4000000) {
58       nbytes = 5;
59     } else {
60       nbytes = 6;
61     }
62     iDstNum += nbytes;
63   }
64   return iDstNum;
65 }
66 
67 }  // namespace
68 
69 // static
IsXMLNameChar(wchar_t ch,bool bFirstChar)70 bool CFX_XMLSyntaxParser::IsXMLNameChar(wchar_t ch, bool bFirstChar) {
71   auto* it = std::lower_bound(
72       std::begin(g_XMLNameChars), std::end(g_XMLNameChars), ch,
73       [](const FX_XMLNAMECHAR& arg, wchar_t ch) { return arg.wEnd < ch; });
74   return it != std::end(g_XMLNameChars) && ch >= it->wStart &&
75          (!bFirstChar || it->bStartChar);
76 }
77 
CFX_XMLSyntaxParser(const RetainPtr<CFX_SeekableStreamProxy> & pStream)78 CFX_XMLSyntaxParser::CFX_XMLSyntaxParser(
79     const RetainPtr<CFX_SeekableStreamProxy>& pStream)
80     : m_pStream(pStream),
81       m_iXMLPlaneSize(32 * 1024),
82       m_iCurrentPos(0),
83       m_iCurrentNodeNum(-1),
84       m_iLastNodeNum(-1),
85       m_iParsedBytes(0),
86       m_ParsedChars(0),
87       m_iBufferChars(0),
88       m_bEOS(false),
89       m_Start(0),
90       m_End(0),
91       m_iAllocStep(m_BlockBuffer.GetAllocStep()),
92       m_pCurrentBlock(nullptr),
93       m_iIndexInBlock(0),
94       m_iTextDataLength(0),
95       m_syntaxParserResult(FX_XmlSyntaxResult::None),
96       m_syntaxParserState(FDE_XmlSyntaxState::Text),
97       m_wQuotationMark(0),
98       m_iEntityStart(-1) {
99   ASSERT(pStream);
100 
101   m_CurNode.iNodeNum = -1;
102   m_CurNode.eNodeType = FX_XMLNODE_Unknown;
103 
104   m_iXMLPlaneSize =
105       std::min(m_iXMLPlaneSize,
106                pdfium::base::checked_cast<size_t>(m_pStream->GetLength()));
107   m_iCurrentPos = m_pStream->GetBOMLength();
108 
109   FX_SAFE_SIZE_T alloc_size_safe = m_iXMLPlaneSize;
110   alloc_size_safe += 1;  // For NUL.
111   if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
112     m_syntaxParserResult = FX_XmlSyntaxResult::Error;
113     return;
114   }
115 
116   m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
117 
118   m_BlockBuffer.InitBuffer();
119   std::tie(m_pCurrentBlock, m_iIndexInBlock) =
120       m_BlockBuffer.GetAvailableBlock();
121 }
122 
~CFX_XMLSyntaxParser()123 CFX_XMLSyntaxParser::~CFX_XMLSyntaxParser() {}
124 
DoSyntaxParse()125 FX_XmlSyntaxResult CFX_XMLSyntaxParser::DoSyntaxParse() {
126   if (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
127       m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) {
128     return m_syntaxParserResult;
129   }
130 
131   FX_FILESIZE iStreamLength = m_pStream->GetLength();
132   FX_FILESIZE iPos;
133 
134   FX_XmlSyntaxResult syntaxParserResult = FX_XmlSyntaxResult::None;
135   while (true) {
136     if (m_Start >= m_End) {
137       if (m_bEOS || m_iCurrentPos >= iStreamLength) {
138         m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
139         return m_syntaxParserResult;
140       }
141       m_ParsedChars += m_End;
142       m_iParsedBytes = m_iCurrentPos;
143       if (m_pStream->GetPosition() != m_iCurrentPos)
144         m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos);
145 
146       m_iBufferChars =
147           m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
148       iPos = m_pStream->GetPosition();
149       if (m_iBufferChars < 1) {
150         m_iCurrentPos = iStreamLength;
151         m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
152         return m_syntaxParserResult;
153       }
154       m_iCurrentPos = iPos;
155       m_Start = 0;
156       m_End = m_iBufferChars;
157     }
158 
159     while (m_Start < m_End) {
160       wchar_t ch = m_Buffer[m_Start];
161       switch (m_syntaxParserState) {
162         case FDE_XmlSyntaxState::Text:
163           if (ch == L'<') {
164             if (!m_BlockBuffer.IsEmpty()) {
165               m_iTextDataLength = m_BlockBuffer.GetDataLength();
166               m_BlockBuffer.Reset(true);
167               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
168                   m_BlockBuffer.GetAvailableBlock();
169               m_iEntityStart = -1;
170               syntaxParserResult = FX_XmlSyntaxResult::Text;
171             } else {
172               m_Start++;
173               m_syntaxParserState = FDE_XmlSyntaxState::Node;
174             }
175           } else {
176             ParseTextChar(ch);
177           }
178           break;
179         case FDE_XmlSyntaxState::Node:
180           if (ch == L'!') {
181             m_Start++;
182             m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
183           } else if (ch == L'/') {
184             m_Start++;
185             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
186           } else if (ch == L'?') {
187             m_iLastNodeNum++;
188             m_iCurrentNodeNum = m_iLastNodeNum;
189             m_CurNode.iNodeNum = m_iLastNodeNum;
190             m_CurNode.eNodeType = FX_XMLNODE_Instruction;
191             m_XMLNodeStack.push(m_CurNode);
192             m_Start++;
193             m_syntaxParserState = FDE_XmlSyntaxState::Target;
194             syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen;
195           } else {
196             m_iLastNodeNum++;
197             m_iCurrentNodeNum = m_iLastNodeNum;
198             m_CurNode.iNodeNum = m_iLastNodeNum;
199             m_CurNode.eNodeType = FX_XMLNODE_Element;
200             m_XMLNodeStack.push(m_CurNode);
201             m_syntaxParserState = FDE_XmlSyntaxState::Tag;
202             syntaxParserResult = FX_XmlSyntaxResult::ElementOpen;
203           }
204           break;
205         case FDE_XmlSyntaxState::Target:
206         case FDE_XmlSyntaxState::Tag:
207           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
208             if (m_BlockBuffer.IsEmpty()) {
209               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
210               return m_syntaxParserResult;
211             }
212 
213             m_iTextDataLength = m_BlockBuffer.GetDataLength();
214             m_BlockBuffer.Reset(true);
215             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
216                 m_BlockBuffer.GetAvailableBlock();
217             if (m_syntaxParserState != FDE_XmlSyntaxState::Target)
218               syntaxParserResult = FX_XmlSyntaxResult::TagName;
219             else
220               syntaxParserResult = FX_XmlSyntaxResult::TargetName;
221 
222             m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
223           } else {
224             if (m_iIndexInBlock == m_iAllocStep) {
225               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
226                   m_BlockBuffer.GetAvailableBlock();
227               if (!m_pCurrentBlock) {
228                 return FX_XmlSyntaxResult::Error;
229               }
230             }
231             m_pCurrentBlock[m_iIndexInBlock++] = ch;
232             m_BlockBuffer.IncrementDataLength();
233             m_Start++;
234           }
235           break;
236         case FDE_XmlSyntaxState::AttriName:
237           if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) {
238             m_Start++;
239             break;
240           }
241           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
242             if (m_BlockBuffer.IsEmpty()) {
243               if (m_CurNode.eNodeType == FX_XMLNODE_Element) {
244                 if (ch == L'>' || ch == L'/') {
245                   m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
246                   break;
247                 }
248               } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
249                 if (ch == L'?') {
250                   m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
251                   m_Start++;
252                 } else {
253                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
254                 }
255                 break;
256               }
257               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
258               return m_syntaxParserResult;
259             } else {
260               if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
261                 if (ch != '=' && !IsXMLWhiteSpace(ch)) {
262                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
263                   break;
264                 }
265               }
266               m_iTextDataLength = m_BlockBuffer.GetDataLength();
267               m_BlockBuffer.Reset(true);
268               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
269                   m_BlockBuffer.GetAvailableBlock();
270               m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
271               syntaxParserResult = FX_XmlSyntaxResult::AttriName;
272             }
273           } else {
274             if (m_iIndexInBlock == m_iAllocStep) {
275               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
276                   m_BlockBuffer.GetAvailableBlock();
277               if (!m_pCurrentBlock) {
278                 return FX_XmlSyntaxResult::Error;
279               }
280             }
281             m_pCurrentBlock[m_iIndexInBlock++] = ch;
282             m_BlockBuffer.IncrementDataLength();
283             m_Start++;
284           }
285           break;
286         case FDE_XmlSyntaxState::AttriEqualSign:
287           if (IsXMLWhiteSpace(ch)) {
288             m_Start++;
289             break;
290           }
291           if (ch != L'=') {
292             if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
293               m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
294               break;
295             }
296             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
297             return m_syntaxParserResult;
298           } else {
299             m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
300             m_Start++;
301           }
302           break;
303         case FDE_XmlSyntaxState::AttriQuotation:
304           if (IsXMLWhiteSpace(ch)) {
305             m_Start++;
306             break;
307           }
308           if (ch != L'\"' && ch != L'\'') {
309             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
310             return m_syntaxParserResult;
311           } else {
312             m_wQuotationMark = ch;
313             m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
314             m_Start++;
315           }
316           break;
317         case FDE_XmlSyntaxState::AttriValue:
318           if (ch == m_wQuotationMark) {
319             if (m_iEntityStart > -1) {
320               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
321               return m_syntaxParserResult;
322             }
323             m_iTextDataLength = m_BlockBuffer.GetDataLength();
324             m_wQuotationMark = 0;
325             m_BlockBuffer.Reset(true);
326             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
327                 m_BlockBuffer.GetAvailableBlock();
328             m_Start++;
329             m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
330             syntaxParserResult = FX_XmlSyntaxResult::AttriValue;
331           } else {
332             ParseTextChar(ch);
333           }
334           break;
335         case FDE_XmlSyntaxState::CloseInstruction:
336           if (ch != L'>') {
337             if (m_iIndexInBlock == m_iAllocStep) {
338               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
339                   m_BlockBuffer.GetAvailableBlock();
340               if (!m_pCurrentBlock) {
341                 return FX_XmlSyntaxResult::Error;
342               }
343             }
344             m_pCurrentBlock[m_iIndexInBlock++] = ch;
345             m_BlockBuffer.IncrementDataLength();
346             m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
347           } else if (!m_BlockBuffer.IsEmpty()) {
348             m_iTextDataLength = m_BlockBuffer.GetDataLength();
349             m_BlockBuffer.Reset(true);
350             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
351                 m_BlockBuffer.GetAvailableBlock();
352             syntaxParserResult = FX_XmlSyntaxResult::TargetData;
353           } else {
354             m_Start++;
355             if (m_XMLNodeStack.empty()) {
356               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
357               return m_syntaxParserResult;
358             }
359             m_XMLNodeStack.pop();
360             if (!m_XMLNodeStack.empty()) {
361               m_CurNode = m_XMLNodeStack.top();
362             } else {
363               m_CurNode.iNodeNum = -1;
364               m_CurNode.eNodeType = FX_XMLNODE_Unknown;
365             }
366             m_iCurrentNodeNum = m_CurNode.iNodeNum;
367             m_BlockBuffer.Reset(true);
368             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
369                 m_BlockBuffer.GetAvailableBlock();
370             m_syntaxParserState = FDE_XmlSyntaxState::Text;
371             syntaxParserResult = FX_XmlSyntaxResult::InstructionClose;
372           }
373           break;
374         case FDE_XmlSyntaxState::BreakElement:
375           if (ch == L'>') {
376             m_syntaxParserState = FDE_XmlSyntaxState::Text;
377             syntaxParserResult = FX_XmlSyntaxResult::ElementBreak;
378           } else if (ch == L'/') {
379             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
380           } else {
381             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
382             return m_syntaxParserResult;
383           }
384           m_Start++;
385           break;
386         case FDE_XmlSyntaxState::CloseElement:
387           if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
388             if (ch == L'>') {
389               if (m_XMLNodeStack.empty()) {
390                 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
391                 return m_syntaxParserResult;
392               }
393               m_XMLNodeStack.pop();
394               if (!m_XMLNodeStack.empty()) {
395                 m_CurNode = m_XMLNodeStack.top();
396               } else {
397                 m_CurNode.iNodeNum = -1;
398                 m_CurNode.eNodeType = FX_XMLNODE_Unknown;
399               }
400               m_iCurrentNodeNum = m_CurNode.iNodeNum;
401               m_iTextDataLength = m_BlockBuffer.GetDataLength();
402               m_BlockBuffer.Reset(true);
403               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
404                   m_BlockBuffer.GetAvailableBlock();
405               m_syntaxParserState = FDE_XmlSyntaxState::Text;
406               syntaxParserResult = FX_XmlSyntaxResult::ElementClose;
407             } else if (!IsXMLWhiteSpace(ch)) {
408               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
409               return m_syntaxParserResult;
410             }
411           } else {
412             if (m_iIndexInBlock == m_iAllocStep) {
413               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
414                   m_BlockBuffer.GetAvailableBlock();
415               if (!m_pCurrentBlock) {
416                 return FX_XmlSyntaxResult::Error;
417               }
418             }
419             m_pCurrentBlock[m_iIndexInBlock++] = ch;
420             m_BlockBuffer.IncrementDataLength();
421           }
422           m_Start++;
423           break;
424         case FDE_XmlSyntaxState::SkipCommentOrDecl:
425           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) {
426             m_Start += 2;
427             m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
428           } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) ==
429                      0) {
430             m_Start += 7;
431             m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
432           } else {
433             m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
434             m_SkipChar = L'>';
435             m_SkipStack.push(L'>');
436           }
437           break;
438         case FDE_XmlSyntaxState::SkipCData: {
439           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) {
440             m_Start += 3;
441             syntaxParserResult = FX_XmlSyntaxResult::CData;
442             m_iTextDataLength = m_BlockBuffer.GetDataLength();
443             m_BlockBuffer.Reset(true);
444             std::tie(m_pCurrentBlock, m_iIndexInBlock) =
445                 m_BlockBuffer.GetAvailableBlock();
446             m_syntaxParserState = FDE_XmlSyntaxState::Text;
447           } else {
448             if (m_iIndexInBlock == m_iAllocStep) {
449               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
450                   m_BlockBuffer.GetAvailableBlock();
451               if (!m_pCurrentBlock)
452                 return FX_XmlSyntaxResult::Error;
453             }
454             m_pCurrentBlock[m_iIndexInBlock++] = ch;
455             m_BlockBuffer.IncrementDataLength();
456             m_Start++;
457           }
458           break;
459         }
460         case FDE_XmlSyntaxState::SkipDeclNode:
461           if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
462             m_Start++;
463             if (ch != m_SkipChar)
464               break;
465 
466             m_SkipStack.pop();
467             if (m_SkipStack.empty())
468               m_syntaxParserState = FDE_XmlSyntaxState::Text;
469             else
470               m_SkipChar = m_SkipStack.top();
471           } else {
472             switch (ch) {
473               case L'<':
474                 m_SkipChar = L'>';
475                 m_SkipStack.push(L'>');
476                 break;
477               case L'[':
478                 m_SkipChar = L']';
479                 m_SkipStack.push(L']');
480                 break;
481               case L'(':
482                 m_SkipChar = L')';
483                 m_SkipStack.push(L')');
484                 break;
485               case L'\'':
486                 m_SkipChar = L'\'';
487                 m_SkipStack.push(L'\'');
488                 break;
489               case L'\"':
490                 m_SkipChar = L'\"';
491                 m_SkipStack.push(L'\"');
492                 break;
493               default:
494                 if (ch == m_SkipChar) {
495                   m_SkipStack.pop();
496                   if (m_SkipStack.empty()) {
497                     if (m_BlockBuffer.GetDataLength() >= 9)
498                       (void)m_BlockBuffer.GetTextData(0, 7);
499 
500                     m_iTextDataLength = m_BlockBuffer.GetDataLength();
501                     m_BlockBuffer.Reset(true);
502                     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
503                         m_BlockBuffer.GetAvailableBlock();
504                     m_syntaxParserState = FDE_XmlSyntaxState::Text;
505                   } else {
506                     m_SkipChar = m_SkipStack.top();
507                   }
508                 }
509                 break;
510             }
511             if (!m_SkipStack.empty()) {
512               if (m_iIndexInBlock == m_iAllocStep) {
513                 std::tie(m_pCurrentBlock, m_iIndexInBlock) =
514                     m_BlockBuffer.GetAvailableBlock();
515                 if (!m_pCurrentBlock) {
516                   return FX_XmlSyntaxResult::Error;
517                 }
518               }
519               m_pCurrentBlock[m_iIndexInBlock++] = ch;
520               m_BlockBuffer.IncrementDataLength();
521             }
522             m_Start++;
523           }
524           break;
525         case FDE_XmlSyntaxState::SkipComment:
526           if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) {
527             m_Start += 2;
528             m_syntaxParserState = FDE_XmlSyntaxState::Text;
529           }
530 
531           m_Start++;
532           break;
533         case FDE_XmlSyntaxState::TargetData:
534           if (IsXMLWhiteSpace(ch)) {
535             if (m_BlockBuffer.IsEmpty()) {
536               m_Start++;
537               break;
538             }
539             if (m_wQuotationMark == 0) {
540               m_iTextDataLength = m_BlockBuffer.GetDataLength();
541               m_wQuotationMark = 0;
542               m_BlockBuffer.Reset(true);
543               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
544                   m_BlockBuffer.GetAvailableBlock();
545               m_Start++;
546               syntaxParserResult = FX_XmlSyntaxResult::TargetData;
547               break;
548             }
549           }
550           if (ch == '?') {
551             m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
552             m_Start++;
553           } else if (ch == '\"') {
554             if (m_wQuotationMark == 0) {
555               m_wQuotationMark = ch;
556               m_Start++;
557             } else if (ch == m_wQuotationMark) {
558               m_iTextDataLength = m_BlockBuffer.GetDataLength();
559               m_wQuotationMark = 0;
560               m_BlockBuffer.Reset(true);
561               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
562                   m_BlockBuffer.GetAvailableBlock();
563               m_Start++;
564               syntaxParserResult = FX_XmlSyntaxResult::TargetData;
565             } else {
566               m_syntaxParserResult = FX_XmlSyntaxResult::Error;
567               return m_syntaxParserResult;
568             }
569           } else {
570             if (m_iIndexInBlock == m_iAllocStep) {
571               std::tie(m_pCurrentBlock, m_iIndexInBlock) =
572                   m_BlockBuffer.GetAvailableBlock();
573               if (!m_pCurrentBlock) {
574                 return FX_XmlSyntaxResult::Error;
575               }
576             }
577             m_pCurrentBlock[m_iIndexInBlock++] = ch;
578             m_BlockBuffer.IncrementDataLength();
579             m_Start++;
580           }
581           break;
582         default:
583           break;
584       }
585       if (syntaxParserResult != FX_XmlSyntaxResult::None)
586         return syntaxParserResult;
587     }
588   }
589   return FX_XmlSyntaxResult::Text;
590 }
591 
GetStatus() const592 int32_t CFX_XMLSyntaxParser::GetStatus() const {
593   if (!m_pStream)
594     return -1;
595 
596   int32_t iStreamLength = m_pStream->GetLength();
597   if (iStreamLength < 1)
598     return 100;
599 
600   if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
601     return -1;
602 
603   if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
604     return 100;
605   return m_iParsedBytes * 100 / iStreamLength;
606 }
607 
GetCurrentBinaryPos() const608 FX_FILESIZE CFX_XMLSyntaxParser::GetCurrentBinaryPos() const {
609   if (!m_pStream)
610     return 0;
611 
612   int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
613   return m_iParsedBytes + nDstLen;
614 }
615 
ParseTextChar(wchar_t character)616 void CFX_XMLSyntaxParser::ParseTextChar(wchar_t character) {
617   if (m_iIndexInBlock == m_iAllocStep) {
618     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
619         m_BlockBuffer.GetAvailableBlock();
620     if (!m_pCurrentBlock)
621       return;
622   }
623 
624   m_pCurrentBlock[m_iIndexInBlock++] = character;
625   m_BlockBuffer.IncrementDataLength();
626   if (m_iEntityStart > -1 && character == L';') {
627     WideString csEntity = m_BlockBuffer.GetTextData(
628         m_iEntityStart + 1,
629         m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1);
630     int32_t iLen = csEntity.GetLength();
631     if (iLen > 0) {
632       if (csEntity[0] == L'#') {
633         uint32_t ch = 0;
634         wchar_t w;
635         if (iLen > 1 && csEntity[1] == L'x') {
636           for (int32_t i = 2; i < iLen; i++) {
637             w = csEntity[i];
638             if (std::iswdigit(w))
639               ch = (ch << 4) + w - L'0';
640             else if (w >= L'A' && w <= L'F')
641               ch = (ch << 4) + w - 55;
642             else if (w >= L'a' && w <= L'f')
643               ch = (ch << 4) + w - 87;
644             else
645               break;
646           }
647         } else {
648           for (int32_t i = 1; i < iLen; i++) {
649             w = csEntity[i];
650             if (!std::iswdigit(w))
651               break;
652             ch = ch * 10 + w - L'0';
653           }
654         }
655         if (ch > kMaxCharRange)
656           ch = ' ';
657 
658         character = static_cast<wchar_t>(ch);
659         if (character != 0) {
660           m_BlockBuffer.SetTextChar(m_iEntityStart, character);
661           m_iEntityStart++;
662         }
663       } else {
664         if (csEntity.Compare(L"amp") == 0) {
665           m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
666           m_iEntityStart++;
667         } else if (csEntity.Compare(L"lt") == 0) {
668           m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
669           m_iEntityStart++;
670         } else if (csEntity.Compare(L"gt") == 0) {
671           m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
672           m_iEntityStart++;
673         } else if (csEntity.Compare(L"apos") == 0) {
674           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
675           m_iEntityStart++;
676         } else if (csEntity.Compare(L"quot") == 0) {
677           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
678           m_iEntityStart++;
679         }
680       }
681     }
682     if (m_iEntityStart >= 0 &&
683         m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) {
684       m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() -
685                                     m_iEntityStart);
686     }
687     std::tie(m_pCurrentBlock, m_iIndexInBlock) =
688         m_BlockBuffer.GetAvailableBlock();
689     m_iEntityStart = -1;
690   } else if (m_iEntityStart < 0 && character == L'&') {
691     m_iEntityStart = m_BlockBuffer.GetDataLength() - 1;
692   }
693   m_Start++;
694 }
695