• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2 Copyright (c) 2017 Daniel James
3 
4 Use, modification and distribution is subject to the Boost Software
5 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 
9 #include "xml_parse.hpp"
10 #include "simple_parse.hpp"
11 #include "stream.hpp"
12 #include "utils.hpp"
13 
14 namespace quickbook
15 {
16     namespace detail
17     {
18         // write_xml_tree
19 
20         void write_xml_tree_impl(
21             std::string& out, xml_element* node, unsigned int depth);
22 
write_xml_tree(xml_element * node)23         void write_xml_tree(xml_element* node)
24         {
25             std::string result;
26             write_xml_tree_impl(result, node, 0);
27             quickbook::detail::out() << result << std::flush;
28         }
29 
write_xml_tree_impl(std::string & out,xml_element * node,unsigned int depth)30         void write_xml_tree_impl(
31             std::string& out, xml_element* node, unsigned int depth)
32         {
33             if (!node) {
34                 return;
35             }
36 
37             for (unsigned i = 0; i < depth; ++i) {
38                 out += "  ";
39             }
40             switch (node->type_) {
41             case xml_element::element_node:
42                 out += "Node: ";
43                 out += node->name_;
44                 break;
45             case xml_element::element_text:
46                 out += "Text";
47                 break;
48             case xml_element::element_html:
49                 out += "HTML";
50                 break;
51             default:
52                 out += "Unknown node type";
53                 break;
54             }
55             out += "\n";
56             for (xml_element* it = node->children(); it; it = it->next()) {
57                 write_xml_tree_impl(out, it, depth + 1);
58             }
59         }
60 
61         // xml_parse
62 
63         void read_tag(
64             xml_tree_builder&,
65             string_iterator& it,
66             string_iterator start,
67             string_iterator end);
68         void read_close_tag(
69             xml_tree_builder&,
70             string_iterator& it,
71             string_iterator start,
72             string_iterator end);
73         void skip_question_mark_tag(
74             string_iterator& it, string_iterator start, string_iterator end);
75         void skip_exclamation_mark_tag(
76             string_iterator& it, string_iterator start, string_iterator end);
77         quickbook::string_view read_tag_name(
78             string_iterator& it, string_iterator start, string_iterator end);
79         quickbook::string_view read_attribute_value(
80             string_iterator& it, string_iterator start, string_iterator end);
81         quickbook::string_view read_string(
82             string_iterator& it, string_iterator end);
83 
xml_parse(quickbook::string_view source)84         xml_tree xml_parse(quickbook::string_view source)
85         {
86             typedef string_iterator iterator;
87             iterator it = source.begin(), end = source.end();
88 
89             xml_tree_builder builder;
90 
91             while (true) {
92                 iterator start = it;
93                 read_to(it, end, '<');
94                 if (start != it) {
95                     builder.add_element(xml_element::text_node(
96                         quickbook::string_view(start, it - start)));
97                 }
98 
99                 if (it == end) {
100                     break;
101                 }
102                 start = it++;
103                 if (it == end) {
104                     throw xml_parse_error("Invalid tag", start);
105                 }
106 
107                 switch (*it) {
108                 case '?':
109                     skip_question_mark_tag(it, start, end);
110                     break;
111                 case '!':
112                     skip_exclamation_mark_tag(it, start, end);
113                     break;
114                 case '/':
115                     read_close_tag(builder, it, start, end);
116                     break;
117                 default:
118                     read_tag(builder, it, start, end);
119                     break;
120                 }
121             }
122 
123             return builder.release();
124         }
125 
read_tag(xml_tree_builder & builder,string_iterator & it,string_iterator start,string_iterator end)126         void read_tag(
127             xml_tree_builder& builder,
128             string_iterator& it,
129             string_iterator start,
130             string_iterator end)
131         {
132             assert(it == start + 1 && it != end);
133             quickbook::string_view name = read_tag_name(it, start, end);
134             xml_element* node = xml_element::node(name);
135             builder.add_element(node);
136 
137             // Read attributes
138             while (true) {
139                 read_some_of(it, end, " \t\n\r");
140                 if (it == end) {
141                     throw xml_parse_error("Invalid tag", start);
142                 }
143                 if (*it == '>') {
144                     ++it;
145                     builder.start_children();
146                     break;
147                 }
148                 if (*it == '/') {
149                     ++it;
150                     read_some_of(it, end, " \t\n\r");
151                     if (it == end || *it != '>') {
152                         throw xml_parse_error("Invalid tag", start);
153                     }
154                     ++it;
155                     break;
156                 }
157                 quickbook::string_view attribute_name =
158                     read_tag_name(it, start, end);
159                 read_some_of(it, end, " \t\n\r");
160                 if (it == end) {
161                     throw xml_parse_error("Invalid tag", start);
162                 }
163                 quickbook::string_view attribute_value;
164                 if (*it == '=') {
165                     ++it;
166                     attribute_value = read_attribute_value(it, start, end);
167                 }
168                 node->set_attribute(
169                     attribute_name,
170                     quickbook::detail::decode_string(attribute_value));
171             }
172         }
173 
read_close_tag(xml_tree_builder & builder,string_iterator & it,string_iterator start,string_iterator end)174         void read_close_tag(
175             xml_tree_builder& builder,
176             string_iterator& it,
177             string_iterator start,
178             string_iterator end)
179         {
180             assert(it == start + 1 && it != end && *it == '/');
181             ++it;
182             quickbook::string_view name = read_tag_name(it, start, end);
183             read_some_of(it, end, " \t\n\r");
184             if (it == end || *it != '>') {
185                 throw xml_parse_error("Invalid close tag", start);
186             }
187             ++it;
188 
189             if (!builder.parent() || builder.parent()->name_ != name) {
190                 throw xml_parse_error("Close tag doesn't match", start);
191             }
192 
193             builder.end_children();
194         }
195 
skip_question_mark_tag(string_iterator & it,string_iterator start,string_iterator end)196         void skip_question_mark_tag(
197             string_iterator& it, string_iterator start, string_iterator end)
198         {
199             assert(it == start + 1 && it != end && *it == '?');
200             ++it;
201 
202             while (true) {
203                 read_to_one_of(it, end, "\"'?<>");
204                 if (it == end) {
205                     throw xml_parse_error("Invalid tag", start);
206                 }
207                 switch (*it) {
208                 case '"':
209                 case '\'':
210                     read_string(it, end);
211                     break;
212                 case '?':
213                     if (read(it, end, "?>")) {
214                         return;
215                     }
216                     else {
217                         ++it;
218                     }
219                     break;
220                 default:
221                     throw xml_parse_error("Invalid tag", start);
222                 }
223             }
224         }
225 
skip_exclamation_mark_tag(string_iterator & it,string_iterator start,string_iterator end)226         void skip_exclamation_mark_tag(
227             string_iterator& it, string_iterator start, string_iterator end)
228         {
229             assert(it == start + 1 && it != end && *it == '!');
230             ++it;
231 
232             if (read(it, end, "--")) {
233                 if (read_past(it, end, "-->")) {
234                     return;
235                 }
236                 else {
237                     throw xml_parse_error("Invalid comment", start);
238                 }
239             }
240 
241             while (true) {
242                 read_to_one_of(it, end, "\"'<>");
243                 if (it == end) {
244                     throw xml_parse_error("Invalid tag", start);
245                 }
246                 switch (*it) {
247                 case '"':
248                 case '\'':
249                     read_string(it, end);
250                     break;
251                 case '>':
252                     ++it;
253                     return;
254                 default:
255                     throw xml_parse_error("Invalid tag", start);
256                 }
257             }
258         }
259 
read_tag_name(string_iterator & it,string_iterator start,string_iterator end)260         quickbook::string_view read_tag_name(
261             string_iterator& it, string_iterator start, string_iterator end)
262         {
263             read_some_of(it, end, " \t\n\r");
264             string_iterator name_start = it;
265             read_some_of(
266                 it, end,
267                 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:-");
268             if (name_start == it) {
269                 throw xml_parse_error("Invalid tag", start);
270             }
271             return quickbook::string_view(name_start, it - name_start);
272         }
273 
read_attribute_value(string_iterator & it,string_iterator start,string_iterator end)274         quickbook::string_view read_attribute_value(
275             string_iterator& it, string_iterator start, string_iterator end)
276         {
277             read_some_of(it, end, " \t\n\r");
278             if (*it == '"' || *it == '\'') {
279                 return read_string(it, end);
280             }
281             else {
282                 throw xml_parse_error("Invalid tag", start);
283             }
284         }
285 
read_string(string_iterator & it,string_iterator end)286         quickbook::string_view read_string(
287             string_iterator& it, string_iterator end)
288         {
289             assert(it != end && (*it == '"' || *it == '\''));
290 
291             string_iterator start = it;
292             char deliminator = *it;
293             ++it;
294             read_to(it, end, deliminator);
295             if (it == end) {
296                 throw xml_parse_error("Invalid string", start);
297             }
298             ++it;
299             return quickbook::string_view(start + 1, it - start - 2);
300         }
301     }
302 }
303