1 /*============================================================================= 2 Copyright (c) 2017 Daniel James 3 4 Use, modification and distribution is subject to the Boost Software 5 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 6 http://www.boost.org/LICENSE_1_0.txt) 7 =============================================================================*/ 8 9 #include "xml_parse.hpp" 10 #include "simple_parse.hpp" 11 #include "stream.hpp" 12 #include "utils.hpp" 13 14 namespace quickbook 15 { 16 namespace detail 17 { 18 // write_xml_tree 19 20 void write_xml_tree_impl( 21 std::string& out, xml_element* node, unsigned int depth); 22 write_xml_tree(xml_element * node)23 void write_xml_tree(xml_element* node) 24 { 25 std::string result; 26 write_xml_tree_impl(result, node, 0); 27 quickbook::detail::out() << result << std::flush; 28 } 29 write_xml_tree_impl(std::string & out,xml_element * node,unsigned int depth)30 void write_xml_tree_impl( 31 std::string& out, xml_element* node, unsigned int depth) 32 { 33 if (!node) { 34 return; 35 } 36 37 for (unsigned i = 0; i < depth; ++i) { 38 out += " "; 39 } 40 switch (node->type_) { 41 case xml_element::element_node: 42 out += "Node: "; 43 out += node->name_; 44 break; 45 case xml_element::element_text: 46 out += "Text"; 47 break; 48 case xml_element::element_html: 49 out += "HTML"; 50 break; 51 default: 52 out += "Unknown node type"; 53 break; 54 } 55 out += "\n"; 56 for (xml_element* it = node->children(); it; it = it->next()) { 57 write_xml_tree_impl(out, it, depth + 1); 58 } 59 } 60 61 // xml_parse 62 63 void read_tag( 64 xml_tree_builder&, 65 string_iterator& it, 66 string_iterator start, 67 string_iterator end); 68 void read_close_tag( 69 xml_tree_builder&, 70 string_iterator& it, 71 string_iterator start, 72 string_iterator end); 73 void skip_question_mark_tag( 74 string_iterator& it, string_iterator start, string_iterator end); 75 void skip_exclamation_mark_tag( 76 string_iterator& it, string_iterator start, string_iterator end); 77 quickbook::string_view read_tag_name( 78 string_iterator& it, string_iterator start, string_iterator end); 79 quickbook::string_view read_attribute_value( 80 string_iterator& it, string_iterator start, string_iterator end); 81 quickbook::string_view read_string( 82 string_iterator& it, string_iterator end); 83 xml_parse(quickbook::string_view source)84 xml_tree xml_parse(quickbook::string_view source) 85 { 86 typedef string_iterator iterator; 87 iterator it = source.begin(), end = source.end(); 88 89 xml_tree_builder builder; 90 91 while (true) { 92 iterator start = it; 93 read_to(it, end, '<'); 94 if (start != it) { 95 builder.add_element(xml_element::text_node( 96 quickbook::string_view(start, it - start))); 97 } 98 99 if (it == end) { 100 break; 101 } 102 start = it++; 103 if (it == end) { 104 throw xml_parse_error("Invalid tag", start); 105 } 106 107 switch (*it) { 108 case '?': 109 skip_question_mark_tag(it, start, end); 110 break; 111 case '!': 112 skip_exclamation_mark_tag(it, start, end); 113 break; 114 case '/': 115 read_close_tag(builder, it, start, end); 116 break; 117 default: 118 read_tag(builder, it, start, end); 119 break; 120 } 121 } 122 123 return builder.release(); 124 } 125 read_tag(xml_tree_builder & builder,string_iterator & it,string_iterator start,string_iterator end)126 void read_tag( 127 xml_tree_builder& builder, 128 string_iterator& it, 129 string_iterator start, 130 string_iterator end) 131 { 132 assert(it == start + 1 && it != end); 133 quickbook::string_view name = read_tag_name(it, start, end); 134 xml_element* node = xml_element::node(name); 135 builder.add_element(node); 136 137 // Read attributes 138 while (true) { 139 read_some_of(it, end, " \t\n\r"); 140 if (it == end) { 141 throw xml_parse_error("Invalid tag", start); 142 } 143 if (*it == '>') { 144 ++it; 145 builder.start_children(); 146 break; 147 } 148 if (*it == '/') { 149 ++it; 150 read_some_of(it, end, " \t\n\r"); 151 if (it == end || *it != '>') { 152 throw xml_parse_error("Invalid tag", start); 153 } 154 ++it; 155 break; 156 } 157 quickbook::string_view attribute_name = 158 read_tag_name(it, start, end); 159 read_some_of(it, end, " \t\n\r"); 160 if (it == end) { 161 throw xml_parse_error("Invalid tag", start); 162 } 163 quickbook::string_view attribute_value; 164 if (*it == '=') { 165 ++it; 166 attribute_value = read_attribute_value(it, start, end); 167 } 168 node->set_attribute( 169 attribute_name, 170 quickbook::detail::decode_string(attribute_value)); 171 } 172 } 173 read_close_tag(xml_tree_builder & builder,string_iterator & it,string_iterator start,string_iterator end)174 void read_close_tag( 175 xml_tree_builder& builder, 176 string_iterator& it, 177 string_iterator start, 178 string_iterator end) 179 { 180 assert(it == start + 1 && it != end && *it == '/'); 181 ++it; 182 quickbook::string_view name = read_tag_name(it, start, end); 183 read_some_of(it, end, " \t\n\r"); 184 if (it == end || *it != '>') { 185 throw xml_parse_error("Invalid close tag", start); 186 } 187 ++it; 188 189 if (!builder.parent() || builder.parent()->name_ != name) { 190 throw xml_parse_error("Close tag doesn't match", start); 191 } 192 193 builder.end_children(); 194 } 195 skip_question_mark_tag(string_iterator & it,string_iterator start,string_iterator end)196 void skip_question_mark_tag( 197 string_iterator& it, string_iterator start, string_iterator end) 198 { 199 assert(it == start + 1 && it != end && *it == '?'); 200 ++it; 201 202 while (true) { 203 read_to_one_of(it, end, "\"'?<>"); 204 if (it == end) { 205 throw xml_parse_error("Invalid tag", start); 206 } 207 switch (*it) { 208 case '"': 209 case '\'': 210 read_string(it, end); 211 break; 212 case '?': 213 if (read(it, end, "?>")) { 214 return; 215 } 216 else { 217 ++it; 218 } 219 break; 220 default: 221 throw xml_parse_error("Invalid tag", start); 222 } 223 } 224 } 225 skip_exclamation_mark_tag(string_iterator & it,string_iterator start,string_iterator end)226 void skip_exclamation_mark_tag( 227 string_iterator& it, string_iterator start, string_iterator end) 228 { 229 assert(it == start + 1 && it != end && *it == '!'); 230 ++it; 231 232 if (read(it, end, "--")) { 233 if (read_past(it, end, "-->")) { 234 return; 235 } 236 else { 237 throw xml_parse_error("Invalid comment", start); 238 } 239 } 240 241 while (true) { 242 read_to_one_of(it, end, "\"'<>"); 243 if (it == end) { 244 throw xml_parse_error("Invalid tag", start); 245 } 246 switch (*it) { 247 case '"': 248 case '\'': 249 read_string(it, end); 250 break; 251 case '>': 252 ++it; 253 return; 254 default: 255 throw xml_parse_error("Invalid tag", start); 256 } 257 } 258 } 259 read_tag_name(string_iterator & it,string_iterator start,string_iterator end)260 quickbook::string_view read_tag_name( 261 string_iterator& it, string_iterator start, string_iterator end) 262 { 263 read_some_of(it, end, " \t\n\r"); 264 string_iterator name_start = it; 265 read_some_of( 266 it, end, 267 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:-"); 268 if (name_start == it) { 269 throw xml_parse_error("Invalid tag", start); 270 } 271 return quickbook::string_view(name_start, it - name_start); 272 } 273 read_attribute_value(string_iterator & it,string_iterator start,string_iterator end)274 quickbook::string_view read_attribute_value( 275 string_iterator& it, string_iterator start, string_iterator end) 276 { 277 read_some_of(it, end, " \t\n\r"); 278 if (*it == '"' || *it == '\'') { 279 return read_string(it, end); 280 } 281 else { 282 throw xml_parse_error("Invalid tag", start); 283 } 284 } 285 read_string(string_iterator & it,string_iterator end)286 quickbook::string_view read_string( 287 string_iterator& it, string_iterator end) 288 { 289 assert(it != end && (*it == '"' || *it == '\'')); 290 291 string_iterator start = it; 292 char deliminator = *it; 293 ++it; 294 read_to(it, end, deliminator); 295 if (it == end) { 296 throw xml_parse_error("Invalid string", start); 297 } 298 ++it; 299 return quickbook::string_view(start + 1, it - start - 2); 300 } 301 } 302 } 303