1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef XFA_FDE_XML_FDE_XML_IMP_H_ 8 #define XFA_FDE_XML_FDE_XML_IMP_H_ 9 10 #include <memory> 11 #include <vector> 12 13 #include "core/fxcrt/fx_system.h" 14 #include "xfa/fde/xml/fde_xml.h" 15 #include "xfa/fgas/crt/fgas_stream.h" 16 #include "xfa/fgas/crt/fgas_utils.h" 17 18 class CFDE_BlockBuffer; 19 class CFDE_XMLInstruction; 20 class CFDE_XMLElement; 21 class CFDE_XMLText; 22 class CFDE_XMLDoc; 23 class CFDE_XMLDOMParser; 24 class CFDE_XMLSyntaxParser; 25 class IFDE_XMLParser; 26 27 class CFDE_XMLNode { 28 public: 29 enum NodeItem { 30 Root = 0, 31 Parent, 32 FirstSibling, 33 PriorSibling, 34 NextSibling, 35 LastSibling, 36 FirstNeighbor, 37 PriorNeighbor, 38 NextNeighbor, 39 LastNeighbor, 40 FirstChild, 41 LastChild 42 }; 43 44 CFDE_XMLNode(); 45 virtual ~CFDE_XMLNode(); 46 47 virtual FDE_XMLNODETYPE GetType() const; 48 virtual CFDE_XMLNode* Clone(bool bRecursive); 49 50 int32_t CountChildNodes() const; 51 CFDE_XMLNode* GetChildNode(int32_t index) const; 52 int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const; 53 int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1); 54 void RemoveChildNode(CFDE_XMLNode* pNode); 55 void DeleteChildren(); 56 void CloneChildren(CFDE_XMLNode* pClone); 57 58 CFDE_XMLNode* GetPath(const FX_WCHAR* pPath, 59 int32_t iLength = -1, 60 bool bQualifiedName = true) const; 61 62 int32_t GetNodeLevel() const; 63 CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const; 64 bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode); 65 CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem); 66 67 void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream); 68 69 CFDE_XMLNode* m_pParent; 70 CFDE_XMLNode* m_pChild; 71 CFDE_XMLNode* m_pPrior; 72 CFDE_XMLNode* m_pNext; 73 }; 74 75 class CFDE_XMLInstruction : public CFDE_XMLNode { 76 public: 77 explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget); 78 ~CFDE_XMLInstruction() override; 79 80 // CFDE_XMLNode 81 FDE_XMLNODETYPE GetType() const override; 82 CFDE_XMLNode* Clone(bool bRecursive) override; 83 GetTargetName(CFX_WideString & wsTarget)84 void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; } 85 int32_t CountAttributes() const; 86 bool GetAttribute(int32_t index, 87 CFX_WideString& wsAttriName, 88 CFX_WideString& wsAttriValue) const; 89 bool HasAttribute(const FX_WCHAR* pwsAttriName) const; 90 void GetString(const FX_WCHAR* pwsAttriName, 91 CFX_WideString& wsAttriValue, 92 const FX_WCHAR* pwsDefValue = nullptr) const; 93 void SetString(const CFX_WideString& wsAttriName, 94 const CFX_WideString& wsAttriValue); 95 int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const; 96 void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue); 97 FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const; 98 void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue); 99 void RemoveAttribute(const FX_WCHAR* pwsAttriName); 100 int32_t CountData() const; 101 bool GetData(int32_t index, CFX_WideString& wsData) const; 102 void AppendData(const CFX_WideString& wsData); 103 void RemoveData(int32_t index); 104 105 CFX_WideString m_wsTarget; 106 std::vector<CFX_WideString> m_Attributes; 107 std::vector<CFX_WideString> m_TargetData; 108 }; 109 110 class CFDE_XMLElement : public CFDE_XMLNode { 111 public: 112 explicit CFDE_XMLElement(const CFX_WideString& wsTag); 113 ~CFDE_XMLElement() override; 114 115 // CFDE_XMLNode 116 FDE_XMLNODETYPE GetType() const override; 117 CFDE_XMLNode* Clone(bool bRecursive) override; 118 119 void GetTagName(CFX_WideString& wsTag) const; 120 void GetLocalTagName(CFX_WideString& wsTag) const; 121 122 void GetNamespacePrefix(CFX_WideString& wsPrefix) const; 123 void GetNamespaceURI(CFX_WideString& wsNamespace) const; 124 125 int32_t CountAttributes() const; 126 bool GetAttribute(int32_t index, 127 CFX_WideString& wsAttriName, 128 CFX_WideString& wsAttriValue) const; 129 bool HasAttribute(const FX_WCHAR* pwsAttriName) const; 130 void RemoveAttribute(const FX_WCHAR* pwsAttriName); 131 132 void GetString(const FX_WCHAR* pwsAttriName, 133 CFX_WideString& wsAttriValue, 134 const FX_WCHAR* pwsDefValue = nullptr) const; 135 void SetString(const CFX_WideString& wsAttriName, 136 const CFX_WideString& wsAttriValue); 137 138 int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const; 139 void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue); 140 141 FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const; 142 void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue); 143 144 void GetTextData(CFX_WideString& wsText) const; 145 void SetTextData(const CFX_WideString& wsText); 146 147 CFX_WideString m_wsTag; 148 std::vector<CFX_WideString> m_Attributes; 149 }; 150 151 class CFDE_XMLText : public CFDE_XMLNode { 152 public: 153 explicit CFDE_XMLText(const CFX_WideString& wsText); 154 ~CFDE_XMLText() override; 155 156 // CFDE_XMLNode 157 FDE_XMLNODETYPE GetType() const override; 158 CFDE_XMLNode* Clone(bool bRecursive) override; 159 GetText(CFX_WideString & wsText)160 void GetText(CFX_WideString& wsText) const { wsText = m_wsText; } SetText(const CFX_WideString & wsText)161 void SetText(const CFX_WideString& wsText) { m_wsText = wsText; } 162 163 CFX_WideString m_wsText; 164 }; 165 166 class CFDE_XMLDeclaration : public CFDE_XMLNode { 167 public: CFDE_XMLDeclaration()168 CFDE_XMLDeclaration() {} ~CFDE_XMLDeclaration()169 ~CFDE_XMLDeclaration() override {} 170 }; 171 172 class CFDE_XMLCharData : public CFDE_XMLDeclaration { 173 public: 174 explicit CFDE_XMLCharData(const CFX_WideString& wsCData); 175 ~CFDE_XMLCharData() override; 176 177 FDE_XMLNODETYPE GetType() const override; 178 CFDE_XMLNode* Clone(bool bRecursive) override; 179 GetCharData(CFX_WideString & wsCharData)180 void GetCharData(CFX_WideString& wsCharData) const { 181 wsCharData = m_wsCharData; 182 } SetCharData(const CFX_WideString & wsCData)183 void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; } 184 185 CFX_WideString m_wsCharData; 186 }; 187 188 class CFDE_XMLDoc { 189 public: 190 CFDE_XMLDoc(); 191 ~CFDE_XMLDoc(); 192 193 bool LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser); 194 int32_t DoLoad(IFX_Pause* pPause = nullptr); 195 void CloseXML(); GetRoot()196 CFDE_XMLNode* GetRoot() const { return m_pRoot; } 197 void SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream, bool bSaveBOM = true); 198 void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream, 199 CFDE_XMLNode* pNode); 200 201 protected: 202 void Reset(bool bInitRoot); 203 void ReleaseParser(); 204 205 CFX_RetainPtr<IFGAS_Stream> m_pStream; 206 int32_t m_iStatus; 207 CFDE_XMLNode* m_pRoot; 208 std::unique_ptr<IFDE_XMLParser> m_pXMLParser; 209 }; 210 211 class IFDE_XMLParser { 212 public: ~IFDE_XMLParser()213 virtual ~IFDE_XMLParser() {} 214 virtual int32_t DoParser(IFX_Pause* pPause) = 0; 215 }; 216 217 class CFDE_BlockBuffer { 218 public: 219 explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024); 220 ~CFDE_BlockBuffer(); 221 222 bool InitBuffer(int32_t iBufferSize = 1024 * 1024); IsInitialized()223 bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; } 224 FX_WCHAR* GetAvailableBlock(int32_t& iIndexInBlock); GetAllocStep()225 inline int32_t GetAllocStep() const { return m_iAllocStep; } GetDataLengthRef()226 inline int32_t& GetDataLengthRef() { return m_iDataLength; } 227 inline void Reset(bool bReserveData = true) { 228 if (!bReserveData) { 229 m_iStartPosition = 0; 230 } 231 m_iDataLength = 0; 232 } 233 void SetTextChar(int32_t iIndex, FX_WCHAR ch); 234 int32_t DeleteTextChars(int32_t iCount, bool bDirection = true); 235 void GetTextData(CFX_WideString& wsTextData, 236 int32_t iStart = 0, 237 int32_t iLength = -1) const; 238 239 protected: 240 inline void TextDataIndex2BufIndex(const int32_t iIndex, 241 int32_t& iBlockIndex, 242 int32_t& iInnerIndex) const; 243 void ClearBuffer(); 244 245 CFX_ArrayTemplate<FX_WCHAR*> m_BlockArray; 246 int32_t m_iDataLength; 247 int32_t m_iBufferSize; 248 int32_t m_iAllocStep; 249 int32_t m_iStartPosition; 250 }; 251 252 class CFDE_XMLSyntaxParser { 253 public: 254 CFDE_XMLSyntaxParser(); 255 ~CFDE_XMLSyntaxParser(); 256 257 void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream, 258 int32_t iXMLPlaneSize, 259 int32_t iTextDataSize = 256); 260 261 FDE_XmlSyntaxResult DoSyntaxParse(); 262 263 int32_t GetStatus() const; GetCurrentPos()264 int32_t GetCurrentPos() const { 265 return m_iParsedChars + (m_pStart - m_pBuffer); 266 } 267 FX_FILESIZE GetCurrentBinaryPos() const; GetCurrentNodeNumber()268 int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } GetLastNodeNumber()269 int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } 270 GetTargetName(CFX_WideString & wsTarget)271 void GetTargetName(CFX_WideString& wsTarget) const { 272 m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength); 273 } GetTagName(CFX_WideString & wsTag)274 void GetTagName(CFX_WideString& wsTag) const { 275 m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength); 276 } GetAttributeName(CFX_WideString & wsAttriName)277 void GetAttributeName(CFX_WideString& wsAttriName) const { 278 m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength); 279 } GetAttributeValue(CFX_WideString & wsAttriValue)280 void GetAttributeValue(CFX_WideString& wsAttriValue) const { 281 m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength); 282 } GetTextData(CFX_WideString & wsText)283 void GetTextData(CFX_WideString& wsText) const { 284 m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength); 285 } GetTargetData(CFX_WideString & wsData)286 void GetTargetData(CFX_WideString& wsData) const { 287 m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength); 288 } 289 290 protected: 291 enum class FDE_XmlSyntaxState { 292 Text, 293 Node, 294 Target, 295 Tag, 296 AttriName, 297 AttriEqualSign, 298 AttriQuotation, 299 AttriValue, 300 Entity, 301 EntityDecimal, 302 EntityHex, 303 CloseInstruction, 304 BreakElement, 305 CloseElement, 306 SkipDeclNode, 307 DeclCharData, 308 SkipComment, 309 SkipCommentOrDecl, 310 SkipCData, 311 TargetData 312 }; 313 314 void ParseTextChar(FX_WCHAR ch); 315 316 CFX_RetainPtr<IFGAS_Stream> m_pStream; 317 int32_t m_iXMLPlaneSize; 318 int32_t m_iCurrentPos; 319 int32_t m_iCurrentNodeNum; 320 int32_t m_iLastNodeNum; 321 int32_t m_iParsedChars; 322 int32_t m_iParsedBytes; 323 FX_WCHAR* m_pBuffer; 324 int32_t m_iBufferChars; 325 bool m_bEOS; 326 FX_WCHAR* m_pStart; 327 FX_WCHAR* m_pEnd; 328 FDE_XMLNODE m_CurNode; 329 CFX_StackTemplate<FDE_XMLNODE> m_XMLNodeStack; 330 CFDE_BlockBuffer m_BlockBuffer; 331 int32_t m_iAllocStep; 332 int32_t& m_iDataLength; 333 FX_WCHAR* m_pCurrentBlock; 334 int32_t m_iIndexInBlock; 335 int32_t m_iTextDataLength; 336 FDE_XmlSyntaxResult m_syntaxParserResult; 337 FDE_XmlSyntaxState m_syntaxParserState; 338 FX_WCHAR m_wQuotationMark; 339 int32_t m_iEntityStart; 340 CFX_StackTemplate<uint32_t> m_SkipStack; 341 FX_WCHAR m_SkipChar; 342 }; 343 344 #endif // XFA_FDE_XML_FDE_XML_IMP_H_ 345