1 #ifndef _XEXMLPARSER_HPP
2 #define _XEXMLPARSER_HPP
3 /*-------------------------------------------------------------------------
4 * drawElements Quality Program Test Executor
5 * ------------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief XML Parser.
24 *
25 * \todo [2012-06-07 pyry] Not supported / handled properly:
26 * - xml namespaces (<ns:Element>)
27 * - backslash escapes in strings
28 * - " -style escapes
29 * - utf-8
30 *//*--------------------------------------------------------------------*/
31
32 #include "xeDefs.hpp"
33 #include "deRingBuffer.hpp"
34
35 #include <string>
36 #include <map>
37
38 namespace xe
39 {
40 namespace xml
41 {
42
43 enum Token
44 {
45 TOKEN_INCOMPLETE = 0, //!< Not enough data to determine token.
46 TOKEN_END_OF_STRING, //!< End of document string.
47 TOKEN_DATA, //!< Block of data (anything outside tags).
48 TOKEN_COMMENT, //!< <!-- comment -->
49 TOKEN_IDENTIFIER, //!< Identifier (in tags).
50 TOKEN_STRING, //!< String (in tags).
51 TOKEN_TAG_START, //!< <
52 TOKEN_TAG_END, //!< >
53 TOKEN_END_TAG_START, //!< </
54 TOKEN_EMPTY_ELEMENT_END, //!< />
55 TOKEN_PROCESSING_INSTRUCTION_START, //!< <?
56 TOKEN_PROCESSING_INSTRUCTION_END, //!< ?>
57 TOKEN_EQUAL, //!< =
58 TOKEN_ENTITY, //!< Entity reference, such as &
59
60 TOKEN_LAST
61 };
62
63 enum Element
64 {
65 ELEMENT_INCOMPLETE = 0, //!< Incomplete element.
66 ELEMENT_START, //!< Element start.
67 ELEMENT_END, //!< Element end.
68 ELEMENT_DATA, //!< Data element.
69 ELEMENT_END_OF_STRING, //!< End of document string.
70
71 ELEMENT_LAST
72 };
73
74 const char* getTokenName (Token token);
75
76 // \todo [2012-10-17 pyry] Add line number etc.
77 class ParseError : public xe::ParseError
78 {
79 public:
ParseError(const std::string & message)80 ParseError (const std::string& message) : xe::ParseError(message) {}
81 };
82
83 class Tokenizer
84 {
85 public:
86 Tokenizer (void);
87 ~Tokenizer (void);
88
89 void clear (void); //!< Resets tokenizer to initial state.
90
91 void feed (const deUint8* bytes, int numBytes);
92 void advance (void);
93
getToken(void) const94 Token getToken (void) const { return m_curToken; }
getTokenLen(void) const95 int getTokenLen (void) const { return m_curTokenLen; }
getTokenByte(int offset) const96 deUint8 getTokenByte (int offset) const { DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); return m_buf.peekBack(offset); }
97 void getTokenStr (std::string& dst) const;
98 void appendTokenStr (std::string& dst) const;
99
100 void getString (std::string& dst) const;
101
102 private:
103 Tokenizer (const Tokenizer& other);
104 Tokenizer& operator= (const Tokenizer& other);
105
106 int getChar (int offset) const;
107
108 void error (const std::string& what);
109
110 enum State
111 {
112 STATE_DATA = 0,
113 STATE_TAG,
114 STATE_IDENTIFIER,
115 STATE_VALUE,
116 STATE_COMMENT,
117 STATE_ENTITY,
118
119 STATE_LAST
120 };
121
122 enum
123 {
124 END_OF_STRING = 0, //!< End of string (0).
125 END_OF_BUFFER = 0xffffffff //!< End of current data buffer.
126 };
127
128 Token m_curToken; //!< Current token.
129 int m_curTokenLen; //!< Length of current token.
130
131 State m_state; //!< Tokenization state.
132
133 de::RingBuffer<deUint8> m_buf;
134 };
135
136 class Parser
137 {
138 public:
139 typedef std::map<std::string, std::string> AttributeMap;
140 typedef AttributeMap::const_iterator AttributeIter;
141
142 Parser (void);
143 ~Parser (void);
144
145 void clear (void); //!< Resets parser to initial state.
146
147 void feed (const deUint8* bytes, int numBytes);
148 void advance (void);
149
getElement(void) const150 Element getElement (void) const { return m_element; }
151
152 // For ELEMENT_START / ELEMENT_END.
getElementName(void) const153 const char* getElementName (void) const { return m_elementName.c_str(); }
154
155 // For ELEMENT_START.
hasAttribute(const char * name) const156 bool hasAttribute (const char* name) const { return m_attributes.find(name) != m_attributes.end(); }
getAttribute(const char * name) const157 const char* getAttribute (const char* name) const { return m_attributes.find(name)->second.c_str(); }
attributes(void) const158 const AttributeMap& attributes (void) const { return m_attributes; }
159
160 // For ELEMENT_DATA.
161 int getDataSize (void) const;
162 deUint8 getDataByte (int offset) const;
163 void getDataStr (std::string& dst) const;
164 void appendDataStr (std::string& dst) const;
165
166 private:
167 Parser (const Parser& other);
168 Parser& operator= (const Parser& other);
169
170 void parseEntityValue (void);
171
172 void error (const std::string& what);
173
174 enum State
175 {
176 STATE_DATA = 0, //!< Initial state - assuming data or tag open.
177 STATE_ENTITY, //!< Parsed entity is stored - overrides data.
178 STATE_IN_PROCESSING_INSTRUCTION, //!< In processing instruction.
179 STATE_START_TAG_OPEN, //!< Start tag open.
180 STATE_END_TAG_OPEN, //!< End tag open.
181 STATE_EXPECTING_END_TAG_CLOSE, //!< Expecting end tag close.
182 STATE_ATTRIBUTE_LIST, //!< Expecting attribute list.
183 STATE_EXPECTING_ATTRIBUTE_EQ, //!< Got attribute name, expecting =.
184 STATE_EXPECTING_ATTRIBUTE_VALUE, //!< Expecting attribute value.
185 STATE_YIELD_EMPTY_ELEMENT_END, //!< Empty element: start has been reported but not end.
186
187 STATE_LAST
188 };
189
190 Tokenizer m_tokenizer;
191
192 Element m_element;
193 std::string m_elementName;
194 AttributeMap m_attributes;
195
196 State m_state;
197 std::string m_attribName;
198 std::string m_entityValue; //!< Data override, such as entity value.
199 };
200
201 // Inline implementations
202
getTokenStr(std::string & dst) const203 inline void Tokenizer::getTokenStr (std::string& dst) const
204 {
205 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
206 dst.resize(m_curTokenLen);
207 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
208 dst[ndx] = m_buf.peekBack(ndx);
209 }
210
appendTokenStr(std::string & dst) const211 inline void Tokenizer::appendTokenStr (std::string& dst) const
212 {
213 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
214
215 size_t oldLen = dst.size();
216 dst.resize(oldLen+m_curTokenLen);
217
218 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
219 dst[oldLen+ndx] = m_buf.peekBack(ndx);
220 }
221
getDataSize(void) const222 inline int Parser::getDataSize (void) const
223 {
224 if (m_state != STATE_ENTITY)
225 return m_tokenizer.getTokenLen();
226 else
227 return (int)m_entityValue.size();
228 }
229
getDataByte(int offset) const230 inline deUint8 Parser::getDataByte (int offset) const
231 {
232 if (m_state != STATE_ENTITY)
233 return m_tokenizer.getTokenByte(offset);
234 else
235 return (deUint8)m_entityValue[offset];
236 }
237
getDataStr(std::string & dst) const238 inline void Parser::getDataStr (std::string& dst) const
239 {
240 if (m_state != STATE_ENTITY)
241 return m_tokenizer.getTokenStr(dst);
242 else
243 dst = m_entityValue;
244 }
245
appendDataStr(std::string & dst) const246 inline void Parser::appendDataStr (std::string& dst) const
247 {
248 if (m_state != STATE_ENTITY)
249 return m_tokenizer.appendTokenStr(dst);
250 else
251 dst += m_entityValue;
252 }
253
254 } // xml
255 } // xe
256
257 #endif // _XEXMLPARSER_HPP
258