1 // __ _____ _____ _____ 2 // __| | __| | | | JSON for Modern C++ 3 // | | |__ | | | | | | version 3.11.3 4 // |_____|_____|_____|_|___| https://github.com/nlohmann/json 5 // 6 // SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me> 7 // SPDX-License-Identifier: MIT 8 9 #pragma once 10 11 #include <cmath> // isfinite 12 #include <cstdint> // uint8_t 13 #include <functional> // function 14 #include <string> // string 15 #include <utility> // move 16 #include <vector> // vector 17 18 #include <nlohmann/detail/exceptions.hpp> 19 #include <nlohmann/detail/input/input_adapters.hpp> 20 #include <nlohmann/detail/input/json_sax.hpp> 21 #include <nlohmann/detail/input/lexer.hpp> 22 #include <nlohmann/detail/macro_scope.hpp> 23 #include <nlohmann/detail/meta/is_sax.hpp> 24 #include <nlohmann/detail/string_concat.hpp> 25 #include <nlohmann/detail/value_t.hpp> 26 27 NLOHMANN_JSON_NAMESPACE_BEGIN 28 namespace detail 29 { 30 //////////// 31 // parser // 32 //////////// 33 34 enum class parse_event_t : std::uint8_t 35 { 36 /// the parser read `{` and started to process a JSON object 37 object_start, 38 /// the parser read `}` and finished processing a JSON object 39 object_end, 40 /// the parser read `[` and started to process a JSON array 41 array_start, 42 /// the parser read `]` and finished processing a JSON array 43 array_end, 44 /// the parser read a key of a value in an object 45 key, 46 /// the parser finished reading a JSON value 47 value 48 }; 49 50 template<typename BasicJsonType> 51 using parser_callback_t = 52 std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>; 53 54 /*! 55 @brief syntax analysis 56 57 This class implements a recursive descent parser. 58 */ 59 template<typename BasicJsonType, typename InputAdapterType> 60 class parser 61 { 62 using number_integer_t = typename BasicJsonType::number_integer_t; 63 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 64 using number_float_t = typename BasicJsonType::number_float_t; 65 using string_t = typename BasicJsonType::string_t; 66 using lexer_t = lexer<BasicJsonType, InputAdapterType>; 67 using token_type = typename lexer_t::token_type; 68 69 public: 70 /// a parser reading from an input adapter parser(InputAdapterType && adapter,const parser_callback_t<BasicJsonType> cb=nullptr,const bool allow_exceptions_=true,const bool skip_comments=false)71 explicit parser(InputAdapterType&& adapter, 72 const parser_callback_t<BasicJsonType> cb = nullptr, 73 const bool allow_exceptions_ = true, 74 const bool skip_comments = false) 75 : callback(cb) 76 , m_lexer(std::move(adapter), skip_comments) 77 , allow_exceptions(allow_exceptions_) 78 { 79 // read first token 80 get_token(); 81 } 82 83 /*! 84 @brief public parser interface 85 86 @param[in] strict whether to expect the last token to be EOF 87 @param[in,out] result parsed JSON value 88 89 @throw parse_error.101 in case of an unexpected token 90 @throw parse_error.102 if to_unicode fails or surrogate error 91 @throw parse_error.103 if to_unicode fails 92 */ parse(const bool strict,BasicJsonType & result)93 void parse(const bool strict, BasicJsonType& result) 94 { 95 if (callback) 96 { 97 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 98 sax_parse_internal(&sdp); 99 100 // in strict mode, input must be completely read 101 if (strict && (get_token() != token_type::end_of_input)) 102 { 103 sdp.parse_error(m_lexer.get_position(), 104 m_lexer.get_token_string(), 105 parse_error::create(101, m_lexer.get_position(), 106 exception_message(token_type::end_of_input, "value"), nullptr)); 107 } 108 109 // in case of an error, return discarded value 110 if (sdp.is_errored()) 111 { 112 result = value_t::discarded; 113 return; 114 } 115 116 // set top-level value to null if it was discarded by the callback 117 // function 118 if (result.is_discarded()) 119 { 120 result = nullptr; 121 } 122 } 123 else 124 { 125 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 126 sax_parse_internal(&sdp); 127 128 // in strict mode, input must be completely read 129 if (strict && (get_token() != token_type::end_of_input)) 130 { 131 sdp.parse_error(m_lexer.get_position(), 132 m_lexer.get_token_string(), 133 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); 134 } 135 136 // in case of an error, return discarded value 137 if (sdp.is_errored()) 138 { 139 result = value_t::discarded; 140 return; 141 } 142 } 143 144 result.assert_invariant(); 145 } 146 147 /*! 148 @brief public accept interface 149 150 @param[in] strict whether to expect the last token to be EOF 151 @return whether the input is a proper JSON text 152 */ accept(const bool strict=true)153 bool accept(const bool strict = true) 154 { 155 json_sax_acceptor<BasicJsonType> sax_acceptor; 156 return sax_parse(&sax_acceptor, strict); 157 } 158 159 template<typename SAX> 160 JSON_HEDLEY_NON_NULL(2) sax_parse(SAX * sax,const bool strict=true)161 bool sax_parse(SAX* sax, const bool strict = true) 162 { 163 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 164 const bool result = sax_parse_internal(sax); 165 166 // strict mode: next byte must be EOF 167 if (result && strict && (get_token() != token_type::end_of_input)) 168 { 169 return sax->parse_error(m_lexer.get_position(), 170 m_lexer.get_token_string(), 171 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); 172 } 173 174 return result; 175 } 176 177 private: 178 template<typename SAX> 179 JSON_HEDLEY_NON_NULL(2) sax_parse_internal(SAX * sax)180 bool sax_parse_internal(SAX* sax) 181 { 182 // stack to remember the hierarchy of structured values we are parsing 183 // true = array; false = object 184 std::vector<bool> states; 185 // value to avoid a goto (see comment where set to true) 186 bool skip_to_state_evaluation = false; 187 188 while (true) 189 { 190 if (!skip_to_state_evaluation) 191 { 192 // invariant: get_token() was called before each iteration 193 switch (last_token) 194 { 195 case token_type::begin_object: 196 { 197 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) 198 { 199 return false; 200 } 201 202 // closing } -> we are done 203 if (get_token() == token_type::end_object) 204 { 205 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 206 { 207 return false; 208 } 209 break; 210 } 211 212 // parse key 213 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) 214 { 215 return sax->parse_error(m_lexer.get_position(), 216 m_lexer.get_token_string(), 217 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); 218 } 219 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 220 { 221 return false; 222 } 223 224 // parse separator (:) 225 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 226 { 227 return sax->parse_error(m_lexer.get_position(), 228 m_lexer.get_token_string(), 229 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); 230 } 231 232 // remember we are now inside an object 233 states.push_back(false); 234 235 // parse values 236 get_token(); 237 continue; 238 } 239 240 case token_type::begin_array: 241 { 242 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) 243 { 244 return false; 245 } 246 247 // closing ] -> we are done 248 if (get_token() == token_type::end_array) 249 { 250 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 251 { 252 return false; 253 } 254 break; 255 } 256 257 // remember we are now inside an array 258 states.push_back(true); 259 260 // parse values (no need to call get_token) 261 continue; 262 } 263 264 case token_type::value_float: 265 { 266 const auto res = m_lexer.get_number_float(); 267 268 if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) 269 { 270 return sax->parse_error(m_lexer.get_position(), 271 m_lexer.get_token_string(), 272 out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr)); 273 } 274 275 if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) 276 { 277 return false; 278 } 279 280 break; 281 } 282 283 case token_type::literal_false: 284 { 285 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false))) 286 { 287 return false; 288 } 289 break; 290 } 291 292 case token_type::literal_null: 293 { 294 if (JSON_HEDLEY_UNLIKELY(!sax->null())) 295 { 296 return false; 297 } 298 break; 299 } 300 301 case token_type::literal_true: 302 { 303 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true))) 304 { 305 return false; 306 } 307 break; 308 } 309 310 case token_type::value_integer: 311 { 312 if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer()))) 313 { 314 return false; 315 } 316 break; 317 } 318 319 case token_type::value_string: 320 { 321 if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string()))) 322 { 323 return false; 324 } 325 break; 326 } 327 328 case token_type::value_unsigned: 329 { 330 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned()))) 331 { 332 return false; 333 } 334 break; 335 } 336 337 case token_type::parse_error: 338 { 339 // using "uninitialized" to avoid "expected" message 340 return sax->parse_error(m_lexer.get_position(), 341 m_lexer.get_token_string(), 342 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr)); 343 } 344 case token_type::end_of_input: 345 { 346 if (JSON_HEDLEY_UNLIKELY(m_lexer.get_position().chars_read_total == 1)) 347 { 348 return sax->parse_error(m_lexer.get_position(), 349 m_lexer.get_token_string(), 350 parse_error::create(101, m_lexer.get_position(), 351 "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); 352 } 353 354 return sax->parse_error(m_lexer.get_position(), 355 m_lexer.get_token_string(), 356 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); 357 } 358 case token_type::uninitialized: 359 case token_type::end_array: 360 case token_type::end_object: 361 case token_type::name_separator: 362 case token_type::value_separator: 363 case token_type::literal_or_value: 364 default: // the last token was unexpected 365 { 366 return sax->parse_error(m_lexer.get_position(), 367 m_lexer.get_token_string(), 368 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); 369 } 370 } 371 } 372 else 373 { 374 skip_to_state_evaluation = false; 375 } 376 377 // we reached this line after we successfully parsed a value 378 if (states.empty()) 379 { 380 // empty stack: we reached the end of the hierarchy: done 381 return true; 382 } 383 384 if (states.back()) // array 385 { 386 // comma -> next value 387 if (get_token() == token_type::value_separator) 388 { 389 // parse a new value 390 get_token(); 391 continue; 392 } 393 394 // closing ] 395 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) 396 { 397 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 398 { 399 return false; 400 } 401 402 // We are done with this array. Before we can parse a 403 // new value, we need to evaluate the new state first. 404 // By setting skip_to_state_evaluation to false, we 405 // are effectively jumping to the beginning of this if. 406 JSON_ASSERT(!states.empty()); 407 states.pop_back(); 408 skip_to_state_evaluation = true; 409 continue; 410 } 411 412 return sax->parse_error(m_lexer.get_position(), 413 m_lexer.get_token_string(), 414 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr)); 415 } 416 417 // states.back() is false -> object 418 419 // comma -> next value 420 if (get_token() == token_type::value_separator) 421 { 422 // parse key 423 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) 424 { 425 return sax->parse_error(m_lexer.get_position(), 426 m_lexer.get_token_string(), 427 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); 428 } 429 430 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 431 { 432 return false; 433 } 434 435 // parse separator (:) 436 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 437 { 438 return sax->parse_error(m_lexer.get_position(), 439 m_lexer.get_token_string(), 440 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); 441 } 442 443 // parse values 444 get_token(); 445 continue; 446 } 447 448 // closing } 449 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) 450 { 451 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 452 { 453 return false; 454 } 455 456 // We are done with this object. Before we can parse a 457 // new value, we need to evaluate the new state first. 458 // By setting skip_to_state_evaluation to false, we 459 // are effectively jumping to the beginning of this if. 460 JSON_ASSERT(!states.empty()); 461 states.pop_back(); 462 skip_to_state_evaluation = true; 463 continue; 464 } 465 466 return sax->parse_error(m_lexer.get_position(), 467 m_lexer.get_token_string(), 468 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr)); 469 } 470 } 471 472 /// get next token from lexer get_token()473 token_type get_token() 474 { 475 return last_token = m_lexer.scan(); 476 } 477 exception_message(const token_type expected,const std::string & context)478 std::string exception_message(const token_type expected, const std::string& context) 479 { 480 std::string error_msg = "syntax error "; 481 482 if (!context.empty()) 483 { 484 error_msg += concat("while parsing ", context, ' '); 485 } 486 487 error_msg += "- "; 488 489 if (last_token == token_type::parse_error) 490 { 491 error_msg += concat(m_lexer.get_error_message(), "; last read: '", 492 m_lexer.get_token_string(), '\''); 493 } 494 else 495 { 496 error_msg += concat("unexpected ", lexer_t::token_type_name(last_token)); 497 } 498 499 if (expected != token_type::uninitialized) 500 { 501 error_msg += concat("; expected ", lexer_t::token_type_name(expected)); 502 } 503 504 return error_msg; 505 } 506 507 private: 508 /// callback function 509 const parser_callback_t<BasicJsonType> callback = nullptr; 510 /// the type of the last read token 511 token_type last_token = token_type::uninitialized; 512 /// the lexer 513 lexer_t m_lexer; 514 /// whether to throw exceptions in case of errors 515 const bool allow_exceptions = true; 516 }; 517 518 } // namespace detail 519 NLOHMANN_JSON_NAMESPACE_END 520