1 #ifndef _XEXMLPARSER_HPP
2 #define _XEXMLPARSER_HPP
3 /*-------------------------------------------------------------------------
4  * drawElements Quality Program Test Executor
5  * ------------------------------------------
6  *
7  * Copyright 2014 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief XML Parser.
24  *
25  * \todo [2012-06-07 pyry] Not supported / handled properly:
26  *  - xml namespaces (<ns:Element>)
27  *  - backslash escapes in strings
28  *  - " -style escapes
29  *  - utf-8
30  *//*--------------------------------------------------------------------*/
31 
32 #include "xeDefs.hpp"
33 #include "deRingBuffer.hpp"
34 
35 #include <string>
36 #include <map>
37 
38 namespace xe
39 {
40 namespace xml
41 {
42 
43 enum Token
44 {
45     TOKEN_INCOMPLETE = 0,               //!< Not enough data to determine token.
46     TOKEN_END_OF_STRING,                //!< End of document string.
47     TOKEN_DATA,                         //!< Block of data (anything outside tags).
48     TOKEN_COMMENT,                      //!< <!-- comment -->
49     TOKEN_IDENTIFIER,                   //!< Identifier (in tags).
50     TOKEN_STRING,                       //!< String (in tags).
51     TOKEN_TAG_START,                    //!< <
52     TOKEN_TAG_END,                      //!< >
53     TOKEN_END_TAG_START,                //!< </
54     TOKEN_EMPTY_ELEMENT_END,            //!< />
55     TOKEN_PROCESSING_INSTRUCTION_START, //!< <?
56     TOKEN_PROCESSING_INSTRUCTION_END,   //!< ?>
57     TOKEN_EQUAL,                        //!< =
58     TOKEN_ENTITY,                       //!< Entity reference, such as &
59 
60     TOKEN_LAST
61 };
62 
63 enum Element
64 {
65     ELEMENT_INCOMPLETE = 0, //!< Incomplete element.
66     ELEMENT_START,          //!< Element start.
67     ELEMENT_END,            //!< Element end.
68     ELEMENT_DATA,           //!< Data element.
69     ELEMENT_END_OF_STRING,  //!< End of document string.
70 
71     ELEMENT_LAST
72 };
73 
74 const char *getTokenName(Token token);
75 
76 // \todo [2012-10-17 pyry] Add line number etc.
77 class ParseError : public xe::ParseError
78 {
79 public:
ParseError(const std::string & message)80     ParseError(const std::string &message) : xe::ParseError(message)
81     {
82     }
83 };
84 
85 class Tokenizer
86 {
87 public:
88     Tokenizer(void);
89     ~Tokenizer(void);
90 
91     void clear(void); //!< Resets tokenizer to initial state.
92 
93     void feed(const uint8_t *bytes, int numBytes);
94     void advance(void);
95 
getToken(void) const96     Token getToken(void) const
97     {
98         return m_curToken;
99     }
getTokenLen(void) const100     int getTokenLen(void) const
101     {
102         return m_curTokenLen;
103     }
getTokenByte(int offset) const104     uint8_t getTokenByte(int offset) const
105     {
106         DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
107         return m_buf.peekBack(offset);
108     }
109     void getTokenStr(std::string &dst) const;
110     void appendTokenStr(std::string &dst) const;
111 
112     void getString(std::string &dst) const;
113 
114 private:
115     Tokenizer(const Tokenizer &other);
116     Tokenizer &operator=(const Tokenizer &other);
117 
118     int getChar(int offset) const;
119 
120     void error(const std::string &what);
121 
122     enum State
123     {
124         STATE_DATA = 0,
125         STATE_TAG,
126         STATE_IDENTIFIER,
127         STATE_VALUE,
128         STATE_COMMENT,
129         STATE_ENTITY,
130 
131         STATE_LAST
132     };
133 
134     enum
135     {
136         END_OF_STRING = 0,         //!< End of string (0).
137         END_OF_BUFFER = 0xffffffff //!< End of current data buffer.
138     };
139 
140     Token m_curToken;  //!< Current token.
141     int m_curTokenLen; //!< Length of current token.
142 
143     State m_state; //!< Tokenization state.
144 
145     de::RingBuffer<uint8_t> m_buf;
146 };
147 
148 class Parser
149 {
150 public:
151     typedef std::map<std::string, std::string> AttributeMap;
152     typedef AttributeMap::const_iterator AttributeIter;
153 
154     Parser(void);
155     ~Parser(void);
156 
157     void clear(void); //!< Resets parser to initial state.
158 
159     void feed(const uint8_t *bytes, int numBytes);
160     void advance(void);
161 
getElement(void) const162     Element getElement(void) const
163     {
164         return m_element;
165     }
166 
167     // For ELEMENT_START / ELEMENT_END.
getElementName(void) const168     const char *getElementName(void) const
169     {
170         return m_elementName.c_str();
171     }
172 
173     // For ELEMENT_START.
hasAttribute(const char * name) const174     bool hasAttribute(const char *name) const
175     {
176         return m_attributes.find(name) != m_attributes.end();
177     }
getAttribute(const char * name) const178     const char *getAttribute(const char *name) const
179     {
180         return m_attributes.find(name)->second.c_str();
181     }
attributes(void) const182     const AttributeMap &attributes(void) const
183     {
184         return m_attributes;
185     }
186 
187     // For ELEMENT_DATA.
188     int getDataSize(void) const;
189     uint8_t getDataByte(int offset) const;
190     void getDataStr(std::string &dst) const;
191     void appendDataStr(std::string &dst) const;
192 
193 private:
194     Parser(const Parser &other);
195     Parser &operator=(const Parser &other);
196 
197     void parseEntityValue(void);
198 
199     void error(const std::string &what);
200 
201     enum State
202     {
203         STATE_DATA = 0,                  //!< Initial state - assuming data or tag open.
204         STATE_ENTITY,                    //!< Parsed entity is stored - overrides data.
205         STATE_IN_PROCESSING_INSTRUCTION, //!< In processing instruction.
206         STATE_START_TAG_OPEN,            //!< Start tag open.
207         STATE_END_TAG_OPEN,              //!< End tag open.
208         STATE_EXPECTING_END_TAG_CLOSE,   //!< Expecting end tag close.
209         STATE_ATTRIBUTE_LIST,            //!< Expecting attribute list.
210         STATE_EXPECTING_ATTRIBUTE_EQ,    //!< Got attribute name, expecting =.
211         STATE_EXPECTING_ATTRIBUTE_VALUE, //!< Expecting attribute value.
212         STATE_YIELD_EMPTY_ELEMENT_END,   //!< Empty element: start has been reported but not end.
213 
214         STATE_LAST
215     };
216 
217     Tokenizer m_tokenizer;
218 
219     Element m_element;
220     std::string m_elementName;
221     AttributeMap m_attributes;
222 
223     State m_state;
224     std::string m_attribName;
225     std::string m_entityValue; //!< Data override, such as entity value.
226 };
227 
228 // Inline implementations
229 
getTokenStr(std::string & dst) const230 inline void Tokenizer::getTokenStr(std::string &dst) const
231 {
232     DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
233     dst.resize(m_curTokenLen);
234     for (int ndx = 0; ndx < m_curTokenLen; ndx++)
235         dst[ndx] = m_buf.peekBack(ndx);
236 }
237 
appendTokenStr(std::string & dst) const238 inline void Tokenizer::appendTokenStr(std::string &dst) const
239 {
240     DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
241 
242     size_t oldLen = dst.size();
243     dst.resize(oldLen + m_curTokenLen);
244 
245     for (int ndx = 0; ndx < m_curTokenLen; ndx++)
246         dst[oldLen + ndx] = m_buf.peekBack(ndx);
247 }
248 
getDataSize(void) const249 inline int Parser::getDataSize(void) const
250 {
251     if (m_state != STATE_ENTITY)
252         return m_tokenizer.getTokenLen();
253     else
254         return (int)m_entityValue.size();
255 }
256 
getDataByte(int offset) const257 inline uint8_t Parser::getDataByte(int offset) const
258 {
259     if (m_state != STATE_ENTITY)
260         return m_tokenizer.getTokenByte(offset);
261     else
262         return (uint8_t)m_entityValue[offset];
263 }
264 
getDataStr(std::string & dst) const265 inline void Parser::getDataStr(std::string &dst) const
266 {
267     if (m_state != STATE_ENTITY)
268         return m_tokenizer.getTokenStr(dst);
269     else
270         dst = m_entityValue;
271 }
272 
appendDataStr(std::string & dst) const273 inline void Parser::appendDataStr(std::string &dst) const
274 {
275     if (m_state != STATE_ENTITY)
276         return m_tokenizer.appendTokenStr(dst);
277     else
278         dst += m_entityValue;
279 }
280 
281 } // namespace xml
282 } // namespace xe
283 
284 #endif // _XEXMLPARSER_HPP
285