• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //     __ _____ _____ _____
2 //  __|  |   __|     |   | |  JSON for Modern C++
3 // |  |  |__   |  |  | | | |  version 3.11.3
4 // |_____|_____|_____|_|___|  https://github.com/nlohmann/json
5 //
6 // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
7 // SPDX-License-Identifier: MIT
8 
9 #pragma once
10 
11 #include <cmath> // isfinite
12 #include <cstdint> // uint8_t
13 #include <functional> // function
14 #include <string> // string
15 #include <utility> // move
16 #include <vector> // vector
17 
18 #include <nlohmann/detail/exceptions.hpp>
19 #include <nlohmann/detail/input/input_adapters.hpp>
20 #include <nlohmann/detail/input/json_sax.hpp>
21 #include <nlohmann/detail/input/lexer.hpp>
22 #include <nlohmann/detail/macro_scope.hpp>
23 #include <nlohmann/detail/meta/is_sax.hpp>
24 #include <nlohmann/detail/string_concat.hpp>
25 #include <nlohmann/detail/value_t.hpp>
26 
27 NLOHMANN_JSON_NAMESPACE_BEGIN
28 namespace detail
29 {
30 ////////////
31 // parser //
32 ////////////
33 
34 enum class parse_event_t : std::uint8_t
35 {
36     /// the parser read `{` and started to process a JSON object
37     object_start,
38     /// the parser read `}` and finished processing a JSON object
39     object_end,
40     /// the parser read `[` and started to process a JSON array
41     array_start,
42     /// the parser read `]` and finished processing a JSON array
43     array_end,
44     /// the parser read a key of a value in an object
45     key,
46     /// the parser finished reading a JSON value
47     value
48 };
49 
50 template<typename BasicJsonType>
51 using parser_callback_t =
52     std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>;
53 
54 /*!
55 @brief syntax analysis
56 
57 This class implements a recursive descent parser.
58 */
59 template<typename BasicJsonType, typename InputAdapterType>
60 class parser
61 {
62     using number_integer_t = typename BasicJsonType::number_integer_t;
63     using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
64     using number_float_t = typename BasicJsonType::number_float_t;
65     using string_t = typename BasicJsonType::string_t;
66     using lexer_t = lexer<BasicJsonType, InputAdapterType>;
67     using token_type = typename lexer_t::token_type;
68 
69   public:
70     /// a parser reading from an input adapter
parser(InputAdapterType && adapter,const parser_callback_t<BasicJsonType> cb=nullptr,const bool allow_exceptions_=true,const bool skip_comments=false)71     explicit parser(InputAdapterType&& adapter,
72                     const parser_callback_t<BasicJsonType> cb = nullptr,
73                     const bool allow_exceptions_ = true,
74                     const bool skip_comments = false)
75         : callback(cb)
76         , m_lexer(std::move(adapter), skip_comments)
77         , allow_exceptions(allow_exceptions_)
78     {
79         // read first token
80         get_token();
81     }
82 
83     /*!
84     @brief public parser interface
85 
86     @param[in] strict      whether to expect the last token to be EOF
87     @param[in,out] result  parsed JSON value
88 
89     @throw parse_error.101 in case of an unexpected token
90     @throw parse_error.102 if to_unicode fails or surrogate error
91     @throw parse_error.103 if to_unicode fails
92     */
parse(const bool strict,BasicJsonType & result)93     void parse(const bool strict, BasicJsonType& result)
94     {
95         if (callback)
96         {
97             json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
98             sax_parse_internal(&sdp);
99 
100             // in strict mode, input must be completely read
101             if (strict && (get_token() != token_type::end_of_input))
102             {
103                 sdp.parse_error(m_lexer.get_position(),
104                                 m_lexer.get_token_string(),
105                                 parse_error::create(101, m_lexer.get_position(),
106                                                     exception_message(token_type::end_of_input, "value"), nullptr));
107             }
108 
109             // in case of an error, return discarded value
110             if (sdp.is_errored())
111             {
112                 result = value_t::discarded;
113                 return;
114             }
115 
116             // set top-level value to null if it was discarded by the callback
117             // function
118             if (result.is_discarded())
119             {
120                 result = nullptr;
121             }
122         }
123         else
124         {
125             json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
126             sax_parse_internal(&sdp);
127 
128             // in strict mode, input must be completely read
129             if (strict && (get_token() != token_type::end_of_input))
130             {
131                 sdp.parse_error(m_lexer.get_position(),
132                                 m_lexer.get_token_string(),
133                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
134             }
135 
136             // in case of an error, return discarded value
137             if (sdp.is_errored())
138             {
139                 result = value_t::discarded;
140                 return;
141             }
142         }
143 
144         result.assert_invariant();
145     }
146 
147     /*!
148     @brief public accept interface
149 
150     @param[in] strict  whether to expect the last token to be EOF
151     @return whether the input is a proper JSON text
152     */
accept(const bool strict=true)153     bool accept(const bool strict = true)
154     {
155         json_sax_acceptor<BasicJsonType> sax_acceptor;
156         return sax_parse(&sax_acceptor, strict);
157     }
158 
159     template<typename SAX>
160     JSON_HEDLEY_NON_NULL(2)
sax_parse(SAX * sax,const bool strict=true)161     bool sax_parse(SAX* sax, const bool strict = true)
162     {
163         (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
164         const bool result = sax_parse_internal(sax);
165 
166         // strict mode: next byte must be EOF
167         if (result && strict && (get_token() != token_type::end_of_input))
168         {
169             return sax->parse_error(m_lexer.get_position(),
170                                     m_lexer.get_token_string(),
171                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr));
172         }
173 
174         return result;
175     }
176 
177   private:
178     template<typename SAX>
179     JSON_HEDLEY_NON_NULL(2)
sax_parse_internal(SAX * sax)180     bool sax_parse_internal(SAX* sax)
181     {
182         // stack to remember the hierarchy of structured values we are parsing
183         // true = array; false = object
184         std::vector<bool> states;
185         // value to avoid a goto (see comment where set to true)
186         bool skip_to_state_evaluation = false;
187 
188         while (true)
189         {
190             if (!skip_to_state_evaluation)
191             {
192                 // invariant: get_token() was called before each iteration
193                 switch (last_token)
194                 {
195                     case token_type::begin_object:
196                     {
197                         if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
198                         {
199                             return false;
200                         }
201 
202                         // closing } -> we are done
203                         if (get_token() == token_type::end_object)
204                         {
205                             if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
206                             {
207                                 return false;
208                             }
209                             break;
210                         }
211 
212                         // parse key
213                         if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string))
214                         {
215                             return sax->parse_error(m_lexer.get_position(),
216                                                     m_lexer.get_token_string(),
217                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
218                         }
219                         if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
220                         {
221                             return false;
222                         }
223 
224                         // parse separator (:)
225                         if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
226                         {
227                             return sax->parse_error(m_lexer.get_position(),
228                                                     m_lexer.get_token_string(),
229                                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
230                         }
231 
232                         // remember we are now inside an object
233                         states.push_back(false);
234 
235                         // parse values
236                         get_token();
237                         continue;
238                     }
239 
240                     case token_type::begin_array:
241                     {
242                         if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
243                         {
244                             return false;
245                         }
246 
247                         // closing ] -> we are done
248                         if (get_token() == token_type::end_array)
249                         {
250                             if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
251                             {
252                                 return false;
253                             }
254                             break;
255                         }
256 
257                         // remember we are now inside an array
258                         states.push_back(true);
259 
260                         // parse values (no need to call get_token)
261                         continue;
262                     }
263 
264                     case token_type::value_float:
265                     {
266                         const auto res = m_lexer.get_number_float();
267 
268                         if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res)))
269                         {
270                             return sax->parse_error(m_lexer.get_position(),
271                                                     m_lexer.get_token_string(),
272                                                     out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr));
273                         }
274 
275                         if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string())))
276                         {
277                             return false;
278                         }
279 
280                         break;
281                     }
282 
283                     case token_type::literal_false:
284                     {
285                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false)))
286                         {
287                             return false;
288                         }
289                         break;
290                     }
291 
292                     case token_type::literal_null:
293                     {
294                         if (JSON_HEDLEY_UNLIKELY(!sax->null()))
295                         {
296                             return false;
297                         }
298                         break;
299                     }
300 
301                     case token_type::literal_true:
302                     {
303                         if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true)))
304                         {
305                             return false;
306                         }
307                         break;
308                     }
309 
310                     case token_type::value_integer:
311                     {
312                         if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer())))
313                         {
314                             return false;
315                         }
316                         break;
317                     }
318 
319                     case token_type::value_string:
320                     {
321                         if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string())))
322                         {
323                             return false;
324                         }
325                         break;
326                     }
327 
328                     case token_type::value_unsigned:
329                     {
330                         if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned())))
331                         {
332                             return false;
333                         }
334                         break;
335                     }
336 
337                     case token_type::parse_error:
338                     {
339                         // using "uninitialized" to avoid "expected" message
340                         return sax->parse_error(m_lexer.get_position(),
341                                                 m_lexer.get_token_string(),
342                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr));
343                     }
344                     case token_type::end_of_input:
345                     {
346                         if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1))
347                         {
348                             return sax->parse_error(m_lexer.get_position(),
349                                                     m_lexer.get_token_string(),
350                                                     parse_error::create(101, m_lexer.get_position(),
351                                                             "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr));
352                         }
353 
354                         return sax->parse_error(m_lexer.get_position(),
355                                                 m_lexer.get_token_string(),
356                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
357                     }
358                     case token_type::uninitialized:
359                     case token_type::end_array:
360                     case token_type::end_object:
361                     case token_type::name_separator:
362                     case token_type::value_separator:
363                     case token_type::literal_or_value:
364                     default: // the last token was unexpected
365                     {
366                         return sax->parse_error(m_lexer.get_position(),
367                                                 m_lexer.get_token_string(),
368                                                 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr));
369                     }
370                 }
371             }
372             else
373             {
374                 skip_to_state_evaluation = false;
375             }
376 
377             // we reached this line after we successfully parsed a value
378             if (states.empty())
379             {
380                 // empty stack: we reached the end of the hierarchy: done
381                 return true;
382             }
383 
384             if (states.back())  // array
385             {
386                 // comma -> next value
387                 if (get_token() == token_type::value_separator)
388                 {
389                     // parse a new value
390                     get_token();
391                     continue;
392                 }
393 
394                 // closing ]
395                 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array))
396                 {
397                     if (JSON_HEDLEY_UNLIKELY(!sax->end_array()))
398                     {
399                         return false;
400                     }
401 
402                     // We are done with this array. Before we can parse a
403                     // new value, we need to evaluate the new state first.
404                     // By setting skip_to_state_evaluation to false, we
405                     // are effectively jumping to the beginning of this if.
406                     JSON_ASSERT(!states.empty());
407                     states.pop_back();
408                     skip_to_state_evaluation = true;
409                     continue;
410                 }
411 
412                 return sax->parse_error(m_lexer.get_position(),
413                                         m_lexer.get_token_string(),
414                                         parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr));
415             }
416 
417             // states.back() is false -> object
418 
419             // comma -> next value
420             if (get_token() == token_type::value_separator)
421             {
422                 // parse key
423                 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string))
424                 {
425                     return sax->parse_error(m_lexer.get_position(),
426                                             m_lexer.get_token_string(),
427                                             parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr));
428                 }
429 
430                 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string())))
431                 {
432                     return false;
433                 }
434 
435                 // parse separator (:)
436                 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator))
437                 {
438                     return sax->parse_error(m_lexer.get_position(),
439                                             m_lexer.get_token_string(),
440                                             parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr));
441                 }
442 
443                 // parse values
444                 get_token();
445                 continue;
446             }
447 
448             // closing }
449             if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object))
450             {
451                 if (JSON_HEDLEY_UNLIKELY(!sax->end_object()))
452                 {
453                     return false;
454                 }
455 
456                 // We are done with this object. Before we can parse a
457                 // new value, we need to evaluate the new state first.
458                 // By setting skip_to_state_evaluation to false, we
459                 // are effectively jumping to the beginning of this if.
460                 JSON_ASSERT(!states.empty());
461                 states.pop_back();
462                 skip_to_state_evaluation = true;
463                 continue;
464             }
465 
466             return sax->parse_error(m_lexer.get_position(),
467                                     m_lexer.get_token_string(),
468                                     parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr));
469         }
470     }
471 
472     /// get next token from lexer
get_token()473     token_type get_token()
474     {
475         return last_token = m_lexer.scan();
476     }
477 
exception_message(const token_type expected,const std::string & context)478     std::string exception_message(const token_type expected, const std::string& context)
479     {
480         std::string error_msg = "syntax error ";
481 
482         if (!context.empty())
483         {
484             error_msg += concat("while parsing ", context, ' ');
485         }
486 
487         error_msg += "- ";
488 
489         if (last_token == token_type::parse_error)
490         {
491             error_msg += concat(m_lexer.get_error_message(), "; last read: '",
492                                 m_lexer.get_token_string(), '\'');
493         }
494         else
495         {
496             error_msg += concat("unexpected ", lexer_t::token_type_name(last_token));
497         }
498 
499         if (expected != token_type::uninitialized)
500         {
501             error_msg += concat("; expected ", lexer_t::token_type_name(expected));
502         }
503 
504         return error_msg;
505     }
506 
507   private:
508     /// callback function
509     const parser_callback_t<BasicJsonType> callback = nullptr;
510     /// the type of the last read token
511     token_type last_token = token_type::uninitialized;
512     /// the lexer
513     lexer_t m_lexer;
514     /// whether to throw exceptions in case of errors
515     const bool allow_exceptions = true;
516 };
517 
518 }  // namespace detail
519 NLOHMANN_JSON_NAMESPACE_END
520