• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
3 
4 #include <boost/property_tree/json_parser/error.hpp>
5 
6 #include <boost/ref.hpp>
7 #include <boost/bind.hpp>
8 #include <boost/format.hpp>
9 
10 #include <iterator>
11 #include <sstream>
12 #include <string>
13 
14 namespace boost { namespace property_tree {
15     namespace json_parser { namespace detail
16 {
17 
18     template <typename Encoding, typename Iterator, typename Sentinel>
19     class source
20     {
21     public:
22         typedef typename std::iterator_traits<Iterator>::value_type
23             code_unit;
24         typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
25 
source(Encoding & encoding)26         explicit source(Encoding& encoding) : encoding(encoding) {}
27 
28         template <typename Range>
set_input(const std::string & filename,const Range & r)29         void set_input(const std::string& filename, const Range& r)
30         {
31             this->filename = filename;
32             cur = r.begin();
33             end = r.end();
34             // Note that there is no backtracking, so if e.g. a UTF-8 file
35             // starts with something that initially looks like a BOM but isn't,
36             // there's trouble.
37             // However, no valid JSON file can start with a UTF-8 EF byte.
38             encoding.skip_introduction(cur, end);
39             line = 1;
40             offset = 0;
41         }
42 
done() const43         bool done() const { return cur == end; }
44 
parse_error(const char * msg)45         void parse_error(const char* msg) {
46             BOOST_PROPERTY_TREE_THROW(
47                 json_parser_error(msg, filename, line));
48         }
49 
next()50         void next() {
51             if (encoding.is_nl(*cur)) {
52                 ++line;
53                 offset = 0;
54             } else {
55                 ++offset;
56             }
57             ++cur;
58         }
59 
60         template <typename Action>
have(encoding_predicate p,Action & a)61         bool have(encoding_predicate p, Action& a) {
62             bool found = cur != end && (encoding.*p)(*cur);
63             if (found) {
64                 a(*cur);
65                 next();
66             }
67             return found;
68         }
69 
have(encoding_predicate p)70         bool have(encoding_predicate p) {
71             DoNothing n;
72             return have(p, n);
73         }
74 
75         template <typename Action>
expect(encoding_predicate p,const char * msg,Action & a)76         void expect(encoding_predicate p, const char* msg, Action& a) {
77             if (!have(p, a)) {
78                 parse_error(msg);
79             }
80         }
81 
expect(encoding_predicate p,const char * msg)82         void expect(encoding_predicate p, const char* msg) {
83             DoNothing n;
84             expect(p, msg, n);
85         }
86 
need_cur(const char * msg)87         code_unit need_cur(const char* msg) {
88             if (cur == end) {
89                 parse_error(msg);
90             }
91             return *cur;
92         }
93 
raw_cur()94         Iterator& raw_cur() { return cur; }
raw_end()95         Sentinel raw_end() { return end; }
96 
97     private:
98         struct DoNothing {
operator ()boost::property_tree::json_parser::detail::source::DoNothing99             void operator ()(code_unit) const {}
100         };
101 
102         Encoding& encoding;
103         Iterator cur;
104         Sentinel end;
105         std::string filename;
106         int line;
107         int offset;
108     };
109 
110     template <typename Callbacks, typename Encoding, typename Iterator,
111         typename = typename std::iterator_traits<Iterator>
112             ::iterator_category>
113     class number_callback_adapter
114     {
115     public:
number_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)116         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
117                                 Iterator& cur)
118             : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
119         {}
120 
operator ()(typename Encoding::external_char)121         void operator ()(typename Encoding::external_char) {}
122 
finish() const123         void finish() const {
124             callbacks.on_number(encoding.to_internal(first, cur));
125         }
126 
127     private:
128         number_callback_adapter(const number_callback_adapter&);
129 
130         Callbacks& callbacks;
131         Encoding& encoding;
132         Iterator first;
133         Iterator& cur;
134     };
135 
136     template <typename Callbacks, typename Encoding, typename Iterator>
137     class number_callback_adapter<Callbacks, Encoding, Iterator,
138                                   std::input_iterator_tag>
139     {
140     public:
number_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator &)141         number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
142                                 Iterator&)
143             : callbacks(callbacks), encoding(encoding), first(true)
144         {}
145 
operator ()(typename Encoding::external_char c)146         void operator ()(typename Encoding::external_char c) {
147             if (first) {
148                 callbacks.on_begin_number();
149                 first = false;
150             }
151             callbacks.on_digit(encoding.to_internal_trivial(c));
152         }
153 
finish() const154         void finish() const {
155             callbacks.on_end_number();
156         }
157     private:
158         number_callback_adapter(const number_callback_adapter&);
159 
160         Callbacks& callbacks;
161         Encoding& encoding;
162         bool first;
163     };
164 
165     template <typename Callbacks, typename Encoding, typename Iterator,
166         typename = typename std::iterator_traits<Iterator>
167             ::iterator_category>
168     class string_callback_adapter
169     {
170     public:
string_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)171         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
172                                 Iterator& cur)
173             : callbacks(callbacks), encoding(encoding), cur(cur),
174               run_begin(cur)
175         {}
176 
start_run()177         void start_run() {
178             run_begin = cur;
179         }
180 
finish_run()181         void finish_run() {
182             callbacks.on_code_units(encoding.to_internal(run_begin, cur));
183         }
184 
185         template <typename Sentinel, typename EncodingErrorFn>
process_codepoint(Sentinel end,EncodingErrorFn error_fn)186         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
187             encoding.skip_codepoint(cur, end, error_fn);
188         }
189 
190     private:
191         string_callback_adapter(const string_callback_adapter&);
192 
193         Callbacks& callbacks;
194         Encoding& encoding;
195         Iterator& cur;
196         Iterator run_begin;
197     };
198 
199     template <typename Callbacks, typename Encoding, typename Iterator>
200     class string_callback_adapter<Callbacks, Encoding, Iterator,
201                                   std::input_iterator_tag>
202     {
203     public:
string_callback_adapter(Callbacks & callbacks,Encoding & encoding,Iterator & cur)204         string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
205                                 Iterator& cur)
206             : callbacks(callbacks), encoding(encoding), cur(cur)
207         {}
208 
start_run()209         void start_run() {}
210 
finish_run()211         void finish_run() {}
212 
213         template <typename Sentinel, typename EncodingErrorFn>
process_codepoint(Sentinel end,EncodingErrorFn error_fn)214         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
215             encoding.transcode_codepoint(cur, end,
216                 boost::bind(&Callbacks::on_code_unit,
217                             boost::ref(callbacks), _1),
218                 error_fn);
219         }
220 
221     private:
222         string_callback_adapter(const string_callback_adapter&);
223 
224         Callbacks& callbacks;
225         Encoding& encoding;
226         Iterator& cur;
227     };
228 
229     template <typename Callbacks, typename Encoding, typename Iterator,
230               typename Sentinel>
231     class parser
232     {
233         typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
234             number_adapter;
235         typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
236             string_adapter;
237         typedef detail::source<Encoding, Iterator, Sentinel> source;
238         typedef typename source::code_unit code_unit;
239 
240     public:
parser(Callbacks & callbacks,Encoding & encoding)241         parser(Callbacks& callbacks, Encoding& encoding)
242             : callbacks(callbacks), encoding(encoding), src(encoding)
243         {}
244 
245         template <typename Range>
set_input(const std::string & filename,const Range & r)246         void set_input(const std::string& filename, const Range& r) {
247             src.set_input(filename, r);
248         }
249 
finish()250         void finish() {
251             skip_ws();
252             if (!src.done()) {
253                 parse_error("garbage after data");
254             }
255         }
256 
parse_value()257         void parse_value() {
258             if (parse_object()) return;
259             if (parse_array()) return;
260             if (parse_string()) return;
261             if (parse_boolean()) return;
262             if (parse_null()) return;
263             if (parse_number()) return;
264             parse_error("expected value");
265         }
266 
parse_null()267         bool parse_null() {
268             skip_ws();
269             if (!have(&Encoding::is_n)) {
270                 return false;
271             }
272             expect(&Encoding::is_u, "expected 'null'");
273             expect(&Encoding::is_l, "expected 'null'");
274             expect(&Encoding::is_l, "expected 'null'");
275             callbacks.on_null();
276             return true;
277         }
278 
parse_boolean()279         bool parse_boolean() {
280             skip_ws();
281             if (have(&Encoding::is_t)) {
282                 expect(&Encoding::is_r, "expected 'true'");
283                 expect(&Encoding::is_u, "expected 'true'");
284                 expect(&Encoding::is_e, "expected 'true'");
285                 callbacks.on_boolean(true);
286                 return true;
287             }
288             if (have(&Encoding::is_f)) {
289                 expect(&Encoding::is_a, "expected 'false'");
290                 expect(&Encoding::is_l, "expected 'false'");
291                 expect(&Encoding::is_s, "expected 'false'");
292                 expect(&Encoding::is_e, "expected 'false'");
293                 callbacks.on_boolean(false);
294                 return true;
295             }
296             return false;
297         }
298 
parse_number()299         bool parse_number() {
300             skip_ws();
301 
302             number_adapter adapter(callbacks, encoding, src.raw_cur());
303             bool started = false;
304             if (have(&Encoding::is_minus, adapter)) {
305                 started = true;
306             }
307             if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
308                 if (started) {
309                     parse_error("expected digits after -");
310                 }
311                 return false;
312             }
313             parse_frac_part(adapter);
314             parse_exp_part(adapter);
315             adapter.finish();
316             return true;
317         }
318 
parse_string()319         bool parse_string() {
320             skip_ws();
321 
322             if (!have(&Encoding::is_quote)) {
323                 return false;
324             }
325 
326             callbacks.on_begin_string();
327             string_adapter adapter(callbacks, encoding, src.raw_cur());
328             while (!encoding.is_quote(need_cur("unterminated string"))) {
329                 if (encoding.is_backslash(*src.raw_cur())) {
330                     adapter.finish_run();
331                     next();
332                     parse_escape();
333                     adapter.start_run();
334                 } else {
335                     adapter.process_codepoint(src.raw_end(),
336                         boost::bind(&parser::parse_error,
337                                     this, "invalid code sequence"));
338                 }
339             }
340             adapter.finish_run();
341             callbacks.on_end_string();
342             next();
343             return true;
344         }
345 
parse_array()346         bool parse_array() {
347             skip_ws();
348 
349             if (!have(&Encoding::is_open_bracket)) {
350                 return false;
351             }
352 
353             callbacks.on_begin_array();
354             skip_ws();
355             if (have(&Encoding::is_close_bracket)) {
356                 callbacks.on_end_array();
357                 return true;
358             }
359             do {
360                 parse_value();
361                 skip_ws();
362             } while (have(&Encoding::is_comma));
363             expect(&Encoding::is_close_bracket, "expected ']' or ','");
364             callbacks.on_end_array();
365             return true;
366         }
367 
parse_object()368         bool parse_object() {
369             skip_ws();
370 
371             if (!have(&Encoding::is_open_brace)) {
372                 return false;
373             }
374 
375             callbacks.on_begin_object();
376             skip_ws();
377             if (have(&Encoding::is_close_brace)) {
378                 callbacks.on_end_object();
379                 return true;
380             }
381             do {
382                 if (!parse_string()) {
383                     parse_error("expected key string");
384                 }
385                 skip_ws();
386                 expect(&Encoding::is_colon, "expected ':'");
387                 parse_value();
388                 skip_ws();
389             } while (have(&Encoding::is_comma));
390             expect(&Encoding::is_close_brace, "expected '}' or ','");
391             callbacks.on_end_object();
392             return true;
393         }
394 
395     private:
396         typedef typename source::encoding_predicate encoding_predicate;
397 
parse_error(const char * msg)398         void parse_error(const char* msg) { src.parse_error(msg); }
next()399         void next() { src.next(); }
400         template <typename Action>
have(encoding_predicate p,Action & a)401         bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
have(encoding_predicate p)402         bool have(encoding_predicate p) { return src.have(p); }
403         template <typename Action>
expect(encoding_predicate p,const char * msg,Action & a)404         void expect(encoding_predicate p, const char* msg, Action& a) {
405             src.expect(p, msg, a);
406         }
expect(encoding_predicate p,const char * msg)407         void expect(encoding_predicate p, const char* msg) {
408             src.expect(p, msg);
409         }
need_cur(const char * msg)410         code_unit need_cur(const char* msg) { return src.need_cur(msg); }
411 
skip_ws()412         void skip_ws() {
413             while (have(&Encoding::is_ws)) {
414             }
415         }
416 
parse_int_part(number_adapter & action)417         bool parse_int_part(number_adapter& action) {
418             if (!have(&Encoding::is_digit0, action)) {
419                 return false;
420             }
421             parse_digits(action);
422             return true;
423         }
424 
parse_frac_part(number_adapter & action)425         void parse_frac_part(number_adapter& action) {
426             if (!have(&Encoding::is_dot, action)) {
427                 return;
428             }
429             expect(&Encoding::is_digit, "need at least one digit after '.'",
430                    action);
431             parse_digits(action);
432         }
433 
parse_exp_part(number_adapter & action)434         void parse_exp_part(number_adapter& action) {
435             if (!have(&Encoding::is_eE, action)) {
436                 return;
437             }
438             have(&Encoding::is_plusminus, action);
439             expect(&Encoding::is_digit, "need at least one digit in exponent",
440                    action);
441             parse_digits(action);
442         }
443 
parse_digits(number_adapter & action)444         void parse_digits(number_adapter& action) {
445             while (have(&Encoding::is_digit, action)) {
446             }
447         }
448 
parse_escape()449         void parse_escape() {
450             if (have(&Encoding::is_quote)) {
451                 feed(0x22);
452             } else if (have(&Encoding::is_backslash)) {
453                 feed(0x5c);
454             } else if (have(&Encoding::is_slash)) {
455                 feed(0x2f);
456             } else if (have(&Encoding::is_b)) {
457                 feed(0x08); // backspace
458             } else if (have(&Encoding::is_f)) {
459                 feed(0x0c); // formfeed
460             } else if (have(&Encoding::is_n)) {
461                 feed(0x0a); // line feed
462             } else if (have(&Encoding::is_r)) {
463                 feed(0x0d); // carriage return
464             } else if (have(&Encoding::is_t)) {
465                 feed(0x09); // horizontal tab
466             } else if (have(&Encoding::is_u)) {
467                 parse_codepoint_ref();
468             } else {
469                 parse_error("invalid escape sequence");
470             }
471         }
472 
parse_hex_quad()473         unsigned parse_hex_quad() {
474             unsigned codepoint = 0;
475             for (int i = 0; i < 4; ++i) {
476                 int value = encoding.decode_hexdigit(
477                     need_cur("invalid escape sequence"));
478                 if (value < 0) {
479                     parse_error("invalid escape sequence");
480                 }
481                 codepoint *= 16;
482                 codepoint += value;
483                 next();
484             }
485             return codepoint;
486         }
487 
is_surrogate_high(unsigned codepoint)488         static bool is_surrogate_high(unsigned codepoint) {
489             return (codepoint & 0xfc00) == 0xd800;
490         }
is_surrogate_low(unsigned codepoint)491         static bool is_surrogate_low(unsigned codepoint) {
492             return (codepoint & 0xfc00) == 0xdc00;
493         }
combine_surrogates(unsigned high,unsigned low)494         static unsigned combine_surrogates(unsigned high, unsigned low) {
495             return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
496         }
497 
parse_codepoint_ref()498         void parse_codepoint_ref() {
499             unsigned codepoint = parse_hex_quad();
500             if (is_surrogate_low(codepoint)) {
501                 parse_error("invalid codepoint, stray low surrogate");
502             }
503             if (is_surrogate_high(codepoint)) {
504                 expect(&Encoding::is_backslash,
505                     "invalid codepoint, stray high surrogate");
506                 expect(&Encoding::is_u,
507                     "expected codepoint reference after high surrogate");
508                 int low = parse_hex_quad();
509                 if (!is_surrogate_low(low)) {
510                     parse_error("expected low surrogate after high surrogate");
511                 }
512                 codepoint = combine_surrogates(codepoint, low);
513             }
514             feed(codepoint);
515         }
516 
feed(unsigned codepoint)517         void feed(unsigned codepoint) {
518             encoding.feed_codepoint(codepoint,
519                                     boost::bind(&Callbacks::on_code_unit,
520                                                 boost::ref(callbacks), _1));
521         }
522 
523         Callbacks& callbacks;
524         Encoding& encoding;
525         source src;
526     };
527 
528 }}}}
529 
530 #endif
531