1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP 2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP 3 4 #include <boost/property_tree/json_parser/error.hpp> 5 6 #include <boost/ref.hpp> 7 #include <boost/bind.hpp> 8 #include <boost/format.hpp> 9 10 #include <iterator> 11 #include <sstream> 12 #include <string> 13 14 namespace boost { namespace property_tree { 15 namespace json_parser { namespace detail 16 { 17 18 template <typename Encoding, typename Iterator, typename Sentinel> 19 class source 20 { 21 public: 22 typedef typename std::iterator_traits<Iterator>::value_type 23 code_unit; 24 typedef bool (Encoding::*encoding_predicate)(code_unit c) const; 25 source(Encoding & encoding)26 explicit source(Encoding& encoding) : encoding(encoding) {} 27 28 template <typename Range> set_input(const std::string & filename,const Range & r)29 void set_input(const std::string& filename, const Range& r) 30 { 31 this->filename = filename; 32 cur = r.begin(); 33 end = r.end(); 34 // Note that there is no backtracking, so if e.g. a UTF-8 file 35 // starts with something that initially looks like a BOM but isn't, 36 // there's trouble. 37 // However, no valid JSON file can start with a UTF-8 EF byte. 38 encoding.skip_introduction(cur, end); 39 line = 1; 40 offset = 0; 41 } 42 done() const43 bool done() const { return cur == end; } 44 parse_error(const char * msg)45 void parse_error(const char* msg) { 46 BOOST_PROPERTY_TREE_THROW( 47 json_parser_error(msg, filename, line)); 48 } 49 next()50 void next() { 51 if (encoding.is_nl(*cur)) { 52 ++line; 53 offset = 0; 54 } else { 55 ++offset; 56 } 57 ++cur; 58 } 59 60 template <typename Action> have(encoding_predicate p,Action & a)61 bool have(encoding_predicate p, Action& a) { 62 bool found = cur != end && (encoding.*p)(*cur); 63 if (found) { 64 a(*cur); 65 next(); 66 } 67 return found; 68 } 69 have(encoding_predicate p)70 bool have(encoding_predicate p) { 71 DoNothing n; 72 return have(p, n); 73 } 74 75 template <typename Action> expect(encoding_predicate p,const char * msg,Action & a)76 void expect(encoding_predicate p, const char* msg, Action& a) { 77 if (!have(p, a)) { 78 parse_error(msg); 79 } 80 } 81 expect(encoding_predicate p,const char * msg)82 void expect(encoding_predicate p, const char* msg) { 83 DoNothing n; 84 expect(p, msg, n); 85 } 86 need_cur(const char * msg)87 code_unit need_cur(const char* msg) { 88 if (cur == end) { 89 parse_error(msg); 90 } 91 return *cur; 92 } 93 raw_cur()94 Iterator& raw_cur() { return cur; } raw_end()95 Sentinel raw_end() { return end; } 96 97 private: 98 struct DoNothing { operator ()boost::property_tree::json_parser::detail::source::DoNothing99 void operator ()(code_unit) const {} 100 }; 101 102 Encoding& encoding; 103 Iterator cur; 104 Sentinel end; 105 std::string filename; 106 int line; 107 int offset; 108 }; 109 110 template <typename Callbacks, typename Encoding, typename Iterator, 111 typename = typename std::iterator_traits<Iterator> 112 ::iterator_category> 113 class number_callback_adapter 114 { 115 public: number_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)116 number_callback_adapter(Callbacks& callbacks, Encoding& encoding, 117 Iterator& cur) 118 : callbacks(callbacks), encoding(encoding), first(cur), cur(cur) 119 {} 120 operator ()(typename Encoding::external_char)121 void operator ()(typename Encoding::external_char) {} 122 finish() const123 void finish() const { 124 callbacks.on_number(encoding.to_internal(first, cur)); 125 } 126 127 private: 128 number_callback_adapter(const number_callback_adapter&); 129 130 Callbacks& callbacks; 131 Encoding& encoding; 132 Iterator first; 133 Iterator& cur; 134 }; 135 136 template <typename Callbacks, typename Encoding, typename Iterator> 137 class number_callback_adapter<Callbacks, Encoding, Iterator, 138 std::input_iterator_tag> 139 { 140 public: number_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator &)141 number_callback_adapter(Callbacks& callbacks, Encoding& encoding, 142 Iterator&) 143 : callbacks(callbacks), encoding(encoding), first(true) 144 {} 145 operator ()(typename Encoding::external_char c)146 void operator ()(typename Encoding::external_char c) { 147 if (first) { 148 callbacks.on_begin_number(); 149 first = false; 150 } 151 callbacks.on_digit(encoding.to_internal_trivial(c)); 152 } 153 finish() const154 void finish() const { 155 callbacks.on_end_number(); 156 } 157 private: 158 number_callback_adapter(const number_callback_adapter&); 159 160 Callbacks& callbacks; 161 Encoding& encoding; 162 bool first; 163 }; 164 165 template <typename Callbacks, typename Encoding, typename Iterator, 166 typename = typename std::iterator_traits<Iterator> 167 ::iterator_category> 168 class string_callback_adapter 169 { 170 public: string_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)171 string_callback_adapter(Callbacks& callbacks, Encoding& encoding, 172 Iterator& cur) 173 : callbacks(callbacks), encoding(encoding), cur(cur), 174 run_begin(cur) 175 {} 176 start_run()177 void start_run() { 178 run_begin = cur; 179 } 180 finish_run()181 void finish_run() { 182 callbacks.on_code_units(encoding.to_internal(run_begin, cur)); 183 } 184 185 template <typename Sentinel, typename EncodingErrorFn> process_codepoint(Sentinel end,EncodingErrorFn error_fn)186 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { 187 encoding.skip_codepoint(cur, end, error_fn); 188 } 189 190 private: 191 string_callback_adapter(const string_callback_adapter&); 192 193 Callbacks& callbacks; 194 Encoding& encoding; 195 Iterator& cur; 196 Iterator run_begin; 197 }; 198 199 template <typename Callbacks, typename Encoding, typename Iterator> 200 class string_callback_adapter<Callbacks, Encoding, Iterator, 201 std::input_iterator_tag> 202 { 203 public: string_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)204 string_callback_adapter(Callbacks& callbacks, Encoding& encoding, 205 Iterator& cur) 206 : callbacks(callbacks), encoding(encoding), cur(cur) 207 {} 208 start_run()209 void start_run() {} 210 finish_run()211 void finish_run() {} 212 213 template <typename Sentinel, typename EncodingErrorFn> process_codepoint(Sentinel end,EncodingErrorFn error_fn)214 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { 215 encoding.transcode_codepoint(cur, end, 216 boost::bind(&Callbacks::on_code_unit, 217 boost::ref(callbacks), _1), 218 error_fn); 219 } 220 221 private: 222 string_callback_adapter(const string_callback_adapter&); 223 224 Callbacks& callbacks; 225 Encoding& encoding; 226 Iterator& cur; 227 }; 228 229 template <typename Callbacks, typename Encoding, typename Iterator, 230 typename Sentinel> 231 class parser 232 { 233 typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator> 234 number_adapter; 235 typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator> 236 string_adapter; 237 typedef detail::source<Encoding, Iterator, Sentinel> source; 238 typedef typename source::code_unit code_unit; 239 240 public: parser(Callbacks & callbacks,Encoding & encoding)241 parser(Callbacks& callbacks, Encoding& encoding) 242 : callbacks(callbacks), encoding(encoding), src(encoding) 243 {} 244 245 template <typename Range> set_input(const std::string & filename,const Range & r)246 void set_input(const std::string& filename, const Range& r) { 247 src.set_input(filename, r); 248 } 249 finish()250 void finish() { 251 skip_ws(); 252 if (!src.done()) { 253 parse_error("garbage after data"); 254 } 255 } 256 parse_value()257 void parse_value() { 258 if (parse_object()) return; 259 if (parse_array()) return; 260 if (parse_string()) return; 261 if (parse_boolean()) return; 262 if (parse_null()) return; 263 if (parse_number()) return; 264 parse_error("expected value"); 265 } 266 parse_null()267 bool parse_null() { 268 skip_ws(); 269 if (!have(&Encoding::is_n)) { 270 return false; 271 } 272 expect(&Encoding::is_u, "expected 'null'"); 273 expect(&Encoding::is_l, "expected 'null'"); 274 expect(&Encoding::is_l, "expected 'null'"); 275 callbacks.on_null(); 276 return true; 277 } 278 parse_boolean()279 bool parse_boolean() { 280 skip_ws(); 281 if (have(&Encoding::is_t)) { 282 expect(&Encoding::is_r, "expected 'true'"); 283 expect(&Encoding::is_u, "expected 'true'"); 284 expect(&Encoding::is_e, "expected 'true'"); 285 callbacks.on_boolean(true); 286 return true; 287 } 288 if (have(&Encoding::is_f)) { 289 expect(&Encoding::is_a, "expected 'false'"); 290 expect(&Encoding::is_l, "expected 'false'"); 291 expect(&Encoding::is_s, "expected 'false'"); 292 expect(&Encoding::is_e, "expected 'false'"); 293 callbacks.on_boolean(false); 294 return true; 295 } 296 return false; 297 } 298 parse_number()299 bool parse_number() { 300 skip_ws(); 301 302 number_adapter adapter(callbacks, encoding, src.raw_cur()); 303 bool started = false; 304 if (have(&Encoding::is_minus, adapter)) { 305 started = true; 306 } 307 if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) { 308 if (started) { 309 parse_error("expected digits after -"); 310 } 311 return false; 312 } 313 parse_frac_part(adapter); 314 parse_exp_part(adapter); 315 adapter.finish(); 316 return true; 317 } 318 parse_string()319 bool parse_string() { 320 skip_ws(); 321 322 if (!have(&Encoding::is_quote)) { 323 return false; 324 } 325 326 callbacks.on_begin_string(); 327 string_adapter adapter(callbacks, encoding, src.raw_cur()); 328 while (!encoding.is_quote(need_cur("unterminated string"))) { 329 if (encoding.is_backslash(*src.raw_cur())) { 330 adapter.finish_run(); 331 next(); 332 parse_escape(); 333 adapter.start_run(); 334 } else { 335 adapter.process_codepoint(src.raw_end(), 336 boost::bind(&parser::parse_error, 337 this, "invalid code sequence")); 338 } 339 } 340 adapter.finish_run(); 341 callbacks.on_end_string(); 342 next(); 343 return true; 344 } 345 parse_array()346 bool parse_array() { 347 skip_ws(); 348 349 if (!have(&Encoding::is_open_bracket)) { 350 return false; 351 } 352 353 callbacks.on_begin_array(); 354 skip_ws(); 355 if (have(&Encoding::is_close_bracket)) { 356 callbacks.on_end_array(); 357 return true; 358 } 359 do { 360 parse_value(); 361 skip_ws(); 362 } while (have(&Encoding::is_comma)); 363 expect(&Encoding::is_close_bracket, "expected ']' or ','"); 364 callbacks.on_end_array(); 365 return true; 366 } 367 parse_object()368 bool parse_object() { 369 skip_ws(); 370 371 if (!have(&Encoding::is_open_brace)) { 372 return false; 373 } 374 375 callbacks.on_begin_object(); 376 skip_ws(); 377 if (have(&Encoding::is_close_brace)) { 378 callbacks.on_end_object(); 379 return true; 380 } 381 do { 382 if (!parse_string()) { 383 parse_error("expected key string"); 384 } 385 skip_ws(); 386 expect(&Encoding::is_colon, "expected ':'"); 387 parse_value(); 388 skip_ws(); 389 } while (have(&Encoding::is_comma)); 390 expect(&Encoding::is_close_brace, "expected '}' or ','"); 391 callbacks.on_end_object(); 392 return true; 393 } 394 395 private: 396 typedef typename source::encoding_predicate encoding_predicate; 397 parse_error(const char * msg)398 void parse_error(const char* msg) { src.parse_error(msg); } next()399 void next() { src.next(); } 400 template <typename Action> have(encoding_predicate p,Action & a)401 bool have(encoding_predicate p, Action& a) { return src.have(p, a); } have(encoding_predicate p)402 bool have(encoding_predicate p) { return src.have(p); } 403 template <typename Action> expect(encoding_predicate p,const char * msg,Action & a)404 void expect(encoding_predicate p, const char* msg, Action& a) { 405 src.expect(p, msg, a); 406 } expect(encoding_predicate p,const char * msg)407 void expect(encoding_predicate p, const char* msg) { 408 src.expect(p, msg); 409 } need_cur(const char * msg)410 code_unit need_cur(const char* msg) { return src.need_cur(msg); } 411 skip_ws()412 void skip_ws() { 413 while (have(&Encoding::is_ws)) { 414 } 415 } 416 parse_int_part(number_adapter & action)417 bool parse_int_part(number_adapter& action) { 418 if (!have(&Encoding::is_digit0, action)) { 419 return false; 420 } 421 parse_digits(action); 422 return true; 423 } 424 parse_frac_part(number_adapter & action)425 void parse_frac_part(number_adapter& action) { 426 if (!have(&Encoding::is_dot, action)) { 427 return; 428 } 429 expect(&Encoding::is_digit, "need at least one digit after '.'", 430 action); 431 parse_digits(action); 432 } 433 parse_exp_part(number_adapter & action)434 void parse_exp_part(number_adapter& action) { 435 if (!have(&Encoding::is_eE, action)) { 436 return; 437 } 438 have(&Encoding::is_plusminus, action); 439 expect(&Encoding::is_digit, "need at least one digit in exponent", 440 action); 441 parse_digits(action); 442 } 443 parse_digits(number_adapter & action)444 void parse_digits(number_adapter& action) { 445 while (have(&Encoding::is_digit, action)) { 446 } 447 } 448 parse_escape()449 void parse_escape() { 450 if (have(&Encoding::is_quote)) { 451 feed(0x22); 452 } else if (have(&Encoding::is_backslash)) { 453 feed(0x5c); 454 } else if (have(&Encoding::is_slash)) { 455 feed(0x2f); 456 } else if (have(&Encoding::is_b)) { 457 feed(0x08); // backspace 458 } else if (have(&Encoding::is_f)) { 459 feed(0x0c); // formfeed 460 } else if (have(&Encoding::is_n)) { 461 feed(0x0a); // line feed 462 } else if (have(&Encoding::is_r)) { 463 feed(0x0d); // carriage return 464 } else if (have(&Encoding::is_t)) { 465 feed(0x09); // horizontal tab 466 } else if (have(&Encoding::is_u)) { 467 parse_codepoint_ref(); 468 } else { 469 parse_error("invalid escape sequence"); 470 } 471 } 472 parse_hex_quad()473 unsigned parse_hex_quad() { 474 unsigned codepoint = 0; 475 for (int i = 0; i < 4; ++i) { 476 int value = encoding.decode_hexdigit( 477 need_cur("invalid escape sequence")); 478 if (value < 0) { 479 parse_error("invalid escape sequence"); 480 } 481 codepoint *= 16; 482 codepoint += value; 483 next(); 484 } 485 return codepoint; 486 } 487 is_surrogate_high(unsigned codepoint)488 static bool is_surrogate_high(unsigned codepoint) { 489 return (codepoint & 0xfc00) == 0xd800; 490 } is_surrogate_low(unsigned codepoint)491 static bool is_surrogate_low(unsigned codepoint) { 492 return (codepoint & 0xfc00) == 0xdc00; 493 } combine_surrogates(unsigned high,unsigned low)494 static unsigned combine_surrogates(unsigned high, unsigned low) { 495 return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff)); 496 } 497 parse_codepoint_ref()498 void parse_codepoint_ref() { 499 unsigned codepoint = parse_hex_quad(); 500 if (is_surrogate_low(codepoint)) { 501 parse_error("invalid codepoint, stray low surrogate"); 502 } 503 if (is_surrogate_high(codepoint)) { 504 expect(&Encoding::is_backslash, 505 "invalid codepoint, stray high surrogate"); 506 expect(&Encoding::is_u, 507 "expected codepoint reference after high surrogate"); 508 int low = parse_hex_quad(); 509 if (!is_surrogate_low(low)) { 510 parse_error("expected low surrogate after high surrogate"); 511 } 512 codepoint = combine_surrogates(codepoint, low); 513 } 514 feed(codepoint); 515 } 516 feed(unsigned codepoint)517 void feed(unsigned codepoint) { 518 encoding.feed_codepoint(codepoint, 519 boost::bind(&Callbacks::on_code_unit, 520 boost::ref(callbacks), _1)); 521 } 522 523 Callbacks& callbacks; 524 Encoding& encoding; 525 source src; 526 }; 527 528 }}}} 529 530 #endif 531