1 // Copyright 2017 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_ 8 #define CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_ 9 10 #include <stack> 11 #include <vector> 12 13 #include "core/fxcrt/cfx_blockbuffer.h" 14 #include "core/fxcrt/cfx_seekablestreamproxy.h" 15 #include "core/fxcrt/fx_string.h" 16 #include "core/fxcrt/retain_ptr.h" 17 #include "core/fxcrt/xml/cfx_xmlnode.h" 18 19 enum class FX_XmlSyntaxResult { 20 None, 21 InstructionOpen, 22 InstructionClose, 23 ElementOpen, 24 ElementBreak, 25 ElementClose, 26 TargetName, 27 TagName, 28 AttriName, 29 AttriValue, 30 Text, 31 CData, 32 TargetData, 33 Error, 34 EndOfString 35 }; 36 37 class CFX_XMLSyntaxParser { 38 public: 39 static bool IsXMLNameChar(wchar_t ch, bool bFirstChar); 40 41 explicit CFX_XMLSyntaxParser( 42 const RetainPtr<CFX_SeekableStreamProxy>& pStream); 43 ~CFX_XMLSyntaxParser(); 44 45 FX_XmlSyntaxResult DoSyntaxParse(); 46 47 int32_t GetStatus() const; GetCurrentPos()48 FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; } 49 FX_FILESIZE GetCurrentBinaryPos() const; GetCurrentNodeNumber()50 int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } GetLastNodeNumber()51 int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } 52 GetTargetName()53 WideString GetTargetName() const { 54 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 55 } 56 GetTagName()57 WideString GetTagName() const { 58 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 59 } 60 GetAttributeName()61 WideString GetAttributeName() const { 62 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 63 } 64 GetAttributeValue()65 WideString GetAttributeValue() const { 66 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 67 } 68 GetTextData()69 WideString GetTextData() const { 70 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 71 } 72 GetTargetData()73 WideString GetTargetData() const { 74 return m_BlockBuffer.GetTextData(0, m_iTextDataLength); 75 } 76 77 protected: 78 enum class FDE_XmlSyntaxState { 79 Text, 80 Node, 81 Target, 82 Tag, 83 AttriName, 84 AttriEqualSign, 85 AttriQuotation, 86 AttriValue, 87 Entity, 88 EntityDecimal, 89 EntityHex, 90 CloseInstruction, 91 BreakElement, 92 CloseElement, 93 SkipDeclNode, 94 DeclCharData, 95 SkipComment, 96 SkipCommentOrDecl, 97 SkipCData, 98 TargetData 99 }; 100 101 void ParseTextChar(wchar_t ch); 102 103 RetainPtr<CFX_SeekableStreamProxy> m_pStream; 104 size_t m_iXMLPlaneSize; 105 FX_FILESIZE m_iCurrentPos; 106 int32_t m_iCurrentNodeNum; 107 int32_t m_iLastNodeNum; 108 int32_t m_iParsedBytes; 109 FX_FILESIZE m_ParsedChars; 110 std::vector<wchar_t> m_Buffer; 111 size_t m_iBufferChars; 112 bool m_bEOS; 113 FX_FILESIZE m_Start; // Start position in m_Buffer 114 FX_FILESIZE m_End; // End position in m_Buffer 115 FX_XMLNODE m_CurNode; 116 std::stack<FX_XMLNODE> m_XMLNodeStack; 117 CFX_BlockBuffer m_BlockBuffer; 118 int32_t m_iAllocStep; 119 wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer 120 int32_t m_iIndexInBlock; 121 int32_t m_iTextDataLength; 122 FX_XmlSyntaxResult m_syntaxParserResult; 123 FDE_XmlSyntaxState m_syntaxParserState; 124 wchar_t m_wQuotationMark; 125 int32_t m_iEntityStart; 126 std::stack<wchar_t> m_SkipStack; 127 wchar_t m_SkipChar; 128 }; 129 130 #endif // CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_ 131