1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_XML_CFX_SAXREADER_H_ 8 #define CORE_FXCRT_XML_CFX_SAXREADER_H_ 9 10 #include <memory> 11 #include <stack> 12 #include <vector> 13 14 #include "core/fxcrt/fx_string.h" 15 #include "core/fxcrt/retain_ptr.h" 16 17 class CFX_SAXCommentContext; 18 class CFX_SAXContext; 19 class IFX_SeekableReadStream; 20 enum class CFX_SaxMode; 21 22 class CFX_SAXItem { 23 public: 24 enum class Type { 25 Unknown = 0, 26 Instruction, 27 Declaration, 28 Comment, 29 Tag, 30 Text, 31 CharData, 32 }; 33 CFX_SAXItem(uint32_t id)34 explicit CFX_SAXItem(uint32_t id) 35 : m_pNode(nullptr), m_eNode(Type::Unknown), m_dwID(id), m_bSkip(false) {} 36 37 CFX_SAXContext* m_pNode; 38 Type m_eNode; 39 const uint32_t m_dwID; 40 bool m_bSkip; 41 }; 42 43 class CFX_SAXFile { 44 public: 45 CFX_SAXFile(); 46 ~CFX_SAXFile(); 47 48 bool StartFile(const RetainPtr<IFX_SeekableReadStream>& pFile, 49 uint32_t dwStart, 50 uint32_t dwLen); 51 bool ReadNextBlock(); 52 void Reset(); 53 54 RetainPtr<IFX_SeekableReadStream> m_pFile; 55 uint32_t m_dwStart; 56 uint32_t m_dwEnd; 57 uint32_t m_dwCur; 58 uint8_t* m_pBuf; 59 uint32_t m_dwBufSize; 60 uint32_t m_dwBufIndex; 61 }; 62 63 enum CFX_SaxParseMode { 64 CFX_SaxParseMode_NotConvert_amp = 1 << 0, 65 CFX_SaxParseMode_NotConvert_lt = 1 << 1, 66 CFX_SaxParseMode_NotConvert_gt = 1 << 2, 67 CFX_SaxParseMode_NotConvert_apos = 1 << 3, 68 CFX_SaxParseMode_NotConvert_quot = 1 << 4, 69 CFX_SaxParseMode_NotConvert_sharp = 1 << 5, 70 CFX_SaxParseMode_NotSkipSpace = 1 << 6 71 }; 72 73 class CFX_SAXReader { 74 public: 75 class HandlerIface { 76 public: ~HandlerIface()77 virtual ~HandlerIface() {} 78 virtual CFX_SAXContext* OnTagEnter(const ByteStringView& bsTagName, 79 CFX_SAXItem::Type eType, 80 uint32_t dwStartPos) = 0; 81 virtual void OnTagAttribute(CFX_SAXContext* pTag, 82 const ByteStringView& bsAttri, 83 const ByteStringView& bsValue) = 0; 84 virtual void OnTagBreak(CFX_SAXContext* pTag) = 0; 85 virtual void OnTagData(CFX_SAXContext* pTag, 86 CFX_SAXItem::Type eType, 87 const ByteStringView& bsData, 88 uint32_t dwStartPos) = 0; 89 virtual void OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) = 0; 90 virtual void OnTagEnd(CFX_SAXContext* pTag, 91 const ByteStringView& bsTagName, 92 uint32_t dwEndPos) = 0; 93 virtual void OnTargetData(CFX_SAXContext* pTag, 94 CFX_SAXItem::Type eType, 95 const ByteStringView& bsData, 96 uint32_t dwStartPos) = 0; 97 }; 98 99 CFX_SAXReader(); 100 ~CFX_SAXReader(); 101 102 int32_t StartParse(const RetainPtr<IFX_SeekableReadStream>& pFile, 103 uint32_t dwStart = 0, 104 uint32_t dwLen = -1, 105 uint32_t dwParseMode = 0); 106 int32_t ContinueParse(); SetHandler(HandlerIface * pHandler)107 void SetHandler(HandlerIface* pHandler) { m_pHandler = pHandler; } 108 109 private: 110 void ParseInternal(); 111 void SkipCurrentNode(); 112 void AppendData(uint8_t ch); 113 void AppendName(uint8_t ch); 114 void ParseText(); 115 void ParseNodeStart(); 116 void ParseInstruction(); 117 void ParseDeclOrComment(); 118 void ParseDeclNode(); 119 void ParseComment(); 120 void ParseCommentContent(); 121 void ParseTagName(); 122 void ParseTagAttributeName(); 123 void ParseTagAttributeEqual(); 124 void ParseTagAttributeValue(); 125 void ParseMaybeClose(); 126 void ParseTagClose(); 127 void ParseTagEnd(); 128 void ParseTargetData(); 129 void Reset(); 130 void ClearData(); 131 void ClearName(); 132 void AppendToData(uint8_t ch); 133 void AppendToName(uint8_t ch); 134 void BackUpAndReplaceDataAt(int32_t index, uint8_t ch); 135 bool IsEntityStart(uint8_t ch) const; 136 bool IsEntityEnd(uint8_t ch) const; 137 int32_t CurrentDataIndex() const; 138 void Push(); 139 void Pop(); 140 CFX_SAXItem* GetCurrentItem() const; 141 bool SkipSpace(uint8_t ch); 142 void SkipNode(); 143 void NotifyData(); 144 void NotifyEnter(); 145 void NotifyAttribute(); 146 void NotifyBreak(); 147 void NotifyClose(); 148 void NotifyEnd(); 149 void NotifyTargetData(); 150 void ReallocDataBuffer(); 151 void ReallocNameBuffer(); 152 void ParseChar(uint8_t ch); 153 154 CFX_SAXFile m_File; 155 HandlerIface* m_pHandler; 156 int32_t m_iState; 157 std::stack<std::unique_ptr<CFX_SAXItem>> m_Stack; 158 uint32_t m_dwItemID; 159 CFX_SaxMode m_eMode; 160 CFX_SaxMode m_ePrevMode; 161 bool m_bCharData; 162 uint8_t m_CurByte; 163 uint32_t m_dwDataOffset; 164 std::stack<char> m_SkipStack; 165 uint8_t m_SkipChar; 166 uint32_t m_dwNodePos; 167 std::vector<uint8_t> m_Data; 168 int32_t m_iEntityStart; // Index into m_Data. 169 std::vector<uint8_t> m_Name; 170 uint32_t m_dwParseMode; 171 std::unique_ptr<CFX_SAXCommentContext> m_pCommentContext; 172 }; 173 174 #endif // CORE_FXCRT_XML_CFX_SAXREADER_H_ 175