1 // __ _____ _____ _____ 2 // __| | __| | | | JSON for Modern C++ 3 // | | |__ | | | | | | version 3.11.2 4 // |_____|_____|_____|_|___| https://github.com/nlohmann/json 5 // 6 // SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me> 7 // SPDX-License-Identifier: MIT 8 9 #pragma once 10 11 #include <cmath> // isfinite 12 #include <cstdint> // uint8_t 13 #include <functional> // function 14 #include <string> // string 15 #include <utility> // move 16 #include <vector> // vector 17 18 #include <nlohmann/detail/exceptions.hpp> 19 #include <nlohmann/detail/input/input_adapters.hpp> 20 #include <nlohmann/detail/input/json_sax.hpp> 21 #include <nlohmann/detail/input/lexer.hpp> 22 #include <nlohmann/detail/macro_scope.hpp> 23 #include <nlohmann/detail/meta/is_sax.hpp> 24 #include <nlohmann/detail/string_concat.hpp> 25 #include <nlohmann/detail/value_t.hpp> 26 27 NLOHMANN_JSON_NAMESPACE_BEGIN 28 namespace detail 29 { 30 //////////// 31 // parser // 32 //////////// 33 34 enum class parse_event_t : std::uint8_t 35 { 36 /// the parser read `{` and started to process a JSON object 37 object_start, 38 /// the parser read `}` and finished processing a JSON object 39 object_end, 40 /// the parser read `[` and started to process a JSON array 41 array_start, 42 /// the parser read `]` and finished processing a JSON array 43 array_end, 44 /// the parser read a key of a value in an object 45 key, 46 /// the parser finished reading a JSON value 47 value 48 }; 49 50 template<typename BasicJsonType> 51 using parser_callback_t = 52 std::function<bool(int /*depth*/, parse_event_t /*event*/, BasicJsonType& /*parsed*/)>; 53 54 /*! 55 @brief syntax analysis 56 57 This class implements a recursive descent parser. 58 */ 59 template<typename BasicJsonType, typename InputAdapterType> 60 class parser 61 { 62 using number_integer_t = typename BasicJsonType::number_integer_t; 63 using number_unsigned_t = typename BasicJsonType::number_unsigned_t; 64 using number_float_t = typename BasicJsonType::number_float_t; 65 using string_t = typename BasicJsonType::string_t; 66 using lexer_t = lexer<BasicJsonType, InputAdapterType>; 67 using token_type = typename lexer_t::token_type; 68 69 public: 70 /// a parser reading from an input adapter parser(InputAdapterType && adapter,const parser_callback_t<BasicJsonType> cb=nullptr,const bool allow_exceptions_=true,const bool skip_comments=false)71 explicit parser(InputAdapterType&& adapter, 72 const parser_callback_t<BasicJsonType> cb = nullptr, 73 const bool allow_exceptions_ = true, 74 const bool skip_comments = false) 75 : callback(cb) 76 , m_lexer(std::move(adapter), skip_comments) 77 , allow_exceptions(allow_exceptions_) 78 { 79 // read first token 80 get_token(); 81 } 82 83 /*! 84 @brief public parser interface 85 86 @param[in] strict whether to expect the last token to be EOF 87 @param[in,out] result parsed JSON value 88 89 @throw parse_error.101 in case of an unexpected token 90 @throw parse_error.102 if to_unicode fails or surrogate error 91 @throw parse_error.103 if to_unicode fails 92 */ parse(const bool strict,BasicJsonType & result)93 void parse(const bool strict, BasicJsonType& result) 94 { 95 if (callback) 96 { 97 json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); 98 sax_parse_internal(&sdp); 99 100 // in strict mode, input must be completely read 101 if (strict && (get_token() != token_type::end_of_input)) 102 { 103 sdp.parse_error(m_lexer.get_position(), 104 m_lexer.get_token_string(), 105 parse_error::create(101, m_lexer.get_position(), 106 exception_message(token_type::end_of_input, "value"), nullptr)); 107 } 108 109 // in case of an error, return discarded value 110 if (sdp.is_errored()) 111 { 112 result = value_t::discarded; 113 return; 114 } 115 116 // set top-level value to null if it was discarded by the callback 117 // function 118 if (result.is_discarded()) 119 { 120 result = nullptr; 121 } 122 } 123 else 124 { 125 json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); 126 sax_parse_internal(&sdp); 127 128 // in strict mode, input must be completely read 129 if (strict && (get_token() != token_type::end_of_input)) 130 { 131 sdp.parse_error(m_lexer.get_position(), 132 m_lexer.get_token_string(), 133 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); 134 } 135 136 // in case of an error, return discarded value 137 if (sdp.is_errored()) 138 { 139 result = value_t::discarded; 140 return; 141 } 142 } 143 144 result.assert_invariant(); 145 } 146 147 /*! 148 @brief public accept interface 149 150 @param[in] strict whether to expect the last token to be EOF 151 @return whether the input is a proper JSON text 152 */ accept(const bool strict=true)153 bool accept(const bool strict = true) 154 { 155 json_sax_acceptor<BasicJsonType> sax_acceptor; 156 return sax_parse(&sax_acceptor, strict); 157 } 158 159 template<typename SAX> 160 JSON_HEDLEY_NON_NULL(2) sax_parse(SAX * sax,const bool strict=true)161 bool sax_parse(SAX* sax, const bool strict = true) 162 { 163 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; 164 const bool result = sax_parse_internal(sax); 165 166 // strict mode: next byte must be EOF 167 if (result && strict && (get_token() != token_type::end_of_input)) 168 { 169 return sax->parse_error(m_lexer.get_position(), 170 m_lexer.get_token_string(), 171 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_of_input, "value"), nullptr)); 172 } 173 174 return result; 175 } 176 177 private: 178 template<typename SAX> 179 JSON_HEDLEY_NON_NULL(2) sax_parse_internal(SAX * sax)180 bool sax_parse_internal(SAX* sax) 181 { 182 // stack to remember the hierarchy of structured values we are parsing 183 // true = array; false = object 184 std::vector<bool> states; 185 // value to avoid a goto (see comment where set to true) 186 bool skip_to_state_evaluation = false; 187 188 while (true) 189 { 190 if (!skip_to_state_evaluation) 191 { 192 // invariant: get_token() was called before each iteration 193 switch (last_token) 194 { 195 case token_type::begin_object: 196 { 197 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1)))) 198 { 199 return false; 200 } 201 202 // closing } -> we are done 203 if (get_token() == token_type::end_object) 204 { 205 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 206 { 207 return false; 208 } 209 break; 210 } 211 212 // parse key 213 if (JSON_HEDLEY_UNLIKELY(last_token != token_type::value_string)) 214 { 215 return sax->parse_error(m_lexer.get_position(), 216 m_lexer.get_token_string(), 217 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); 218 } 219 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 220 { 221 return false; 222 } 223 224 // parse separator (:) 225 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 226 { 227 return sax->parse_error(m_lexer.get_position(), 228 m_lexer.get_token_string(), 229 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); 230 } 231 232 // remember we are now inside an object 233 states.push_back(false); 234 235 // parse values 236 get_token(); 237 continue; 238 } 239 240 case token_type::begin_array: 241 { 242 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1)))) 243 { 244 return false; 245 } 246 247 // closing ] -> we are done 248 if (get_token() == token_type::end_array) 249 { 250 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 251 { 252 return false; 253 } 254 break; 255 } 256 257 // remember we are now inside an array 258 states.push_back(true); 259 260 // parse values (no need to call get_token) 261 continue; 262 } 263 264 case token_type::value_float: 265 { 266 const auto res = m_lexer.get_number_float(); 267 268 if (JSON_HEDLEY_UNLIKELY(!std::isfinite(res))) 269 { 270 return sax->parse_error(m_lexer.get_position(), 271 m_lexer.get_token_string(), 272 out_of_range::create(406, concat("number overflow parsing '", m_lexer.get_token_string(), '\''), nullptr)); 273 } 274 275 if (JSON_HEDLEY_UNLIKELY(!sax->number_float(res, m_lexer.get_string()))) 276 { 277 return false; 278 } 279 280 break; 281 } 282 283 case token_type::literal_false: 284 { 285 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(false))) 286 { 287 return false; 288 } 289 break; 290 } 291 292 case token_type::literal_null: 293 { 294 if (JSON_HEDLEY_UNLIKELY(!sax->null())) 295 { 296 return false; 297 } 298 break; 299 } 300 301 case token_type::literal_true: 302 { 303 if (JSON_HEDLEY_UNLIKELY(!sax->boolean(true))) 304 { 305 return false; 306 } 307 break; 308 } 309 310 case token_type::value_integer: 311 { 312 if (JSON_HEDLEY_UNLIKELY(!sax->number_integer(m_lexer.get_number_integer()))) 313 { 314 return false; 315 } 316 break; 317 } 318 319 case token_type::value_string: 320 { 321 if (JSON_HEDLEY_UNLIKELY(!sax->string(m_lexer.get_string()))) 322 { 323 return false; 324 } 325 break; 326 } 327 328 case token_type::value_unsigned: 329 { 330 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(m_lexer.get_number_unsigned()))) 331 { 332 return false; 333 } 334 break; 335 } 336 337 case token_type::parse_error: 338 { 339 // using "uninitialized" to avoid "expected" message 340 return sax->parse_error(m_lexer.get_position(), 341 m_lexer.get_token_string(), 342 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::uninitialized, "value"), nullptr)); 343 } 344 345 case token_type::uninitialized: 346 case token_type::end_array: 347 case token_type::end_object: 348 case token_type::name_separator: 349 case token_type::value_separator: 350 case token_type::end_of_input: 351 case token_type::literal_or_value: 352 default: // the last token was unexpected 353 { 354 return sax->parse_error(m_lexer.get_position(), 355 m_lexer.get_token_string(), 356 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::literal_or_value, "value"), nullptr)); 357 } 358 } 359 } 360 else 361 { 362 skip_to_state_evaluation = false; 363 } 364 365 // we reached this line after we successfully parsed a value 366 if (states.empty()) 367 { 368 // empty stack: we reached the end of the hierarchy: done 369 return true; 370 } 371 372 if (states.back()) // array 373 { 374 // comma -> next value 375 if (get_token() == token_type::value_separator) 376 { 377 // parse a new value 378 get_token(); 379 continue; 380 } 381 382 // closing ] 383 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_array)) 384 { 385 if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) 386 { 387 return false; 388 } 389 390 // We are done with this array. Before we can parse a 391 // new value, we need to evaluate the new state first. 392 // By setting skip_to_state_evaluation to false, we 393 // are effectively jumping to the beginning of this if. 394 JSON_ASSERT(!states.empty()); 395 states.pop_back(); 396 skip_to_state_evaluation = true; 397 continue; 398 } 399 400 return sax->parse_error(m_lexer.get_position(), 401 m_lexer.get_token_string(), 402 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_array, "array"), nullptr)); 403 } 404 405 // states.back() is false -> object 406 407 // comma -> next value 408 if (get_token() == token_type::value_separator) 409 { 410 // parse key 411 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) 412 { 413 return sax->parse_error(m_lexer.get_position(), 414 m_lexer.get_token_string(), 415 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::value_string, "object key"), nullptr)); 416 } 417 418 if (JSON_HEDLEY_UNLIKELY(!sax->key(m_lexer.get_string()))) 419 { 420 return false; 421 } 422 423 // parse separator (:) 424 if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) 425 { 426 return sax->parse_error(m_lexer.get_position(), 427 m_lexer.get_token_string(), 428 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::name_separator, "object separator"), nullptr)); 429 } 430 431 // parse values 432 get_token(); 433 continue; 434 } 435 436 // closing } 437 if (JSON_HEDLEY_LIKELY(last_token == token_type::end_object)) 438 { 439 if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) 440 { 441 return false; 442 } 443 444 // We are done with this object. Before we can parse a 445 // new value, we need to evaluate the new state first. 446 // By setting skip_to_state_evaluation to false, we 447 // are effectively jumping to the beginning of this if. 448 JSON_ASSERT(!states.empty()); 449 states.pop_back(); 450 skip_to_state_evaluation = true; 451 continue; 452 } 453 454 return sax->parse_error(m_lexer.get_position(), 455 m_lexer.get_token_string(), 456 parse_error::create(101, m_lexer.get_position(), exception_message(token_type::end_object, "object"), nullptr)); 457 } 458 } 459 460 /// get next token from lexer get_token()461 token_type get_token() 462 { 463 return last_token = m_lexer.scan(); 464 } 465 exception_message(const token_type expected,const std::string & context)466 std::string exception_message(const token_type expected, const std::string& context) 467 { 468 std::string error_msg = "syntax error "; 469 470 if (!context.empty()) 471 { 472 error_msg += concat("while parsing ", context, ' '); 473 } 474 475 error_msg += "- "; 476 477 if (last_token == token_type::parse_error) 478 { 479 error_msg += concat(m_lexer.get_error_message(), "; last read: '", 480 m_lexer.get_token_string(), '\''); 481 } 482 else 483 { 484 error_msg += concat("unexpected ", lexer_t::token_type_name(last_token)); 485 } 486 487 if (expected != token_type::uninitialized) 488 { 489 error_msg += concat("; expected ", lexer_t::token_type_name(expected)); 490 } 491 492 return error_msg; 493 } 494 495 private: 496 /// callback function 497 const parser_callback_t<BasicJsonType> callback = nullptr; 498 /// the type of the last read token 499 token_type last_token = token_type::uninitialized; 500 /// the lexer 501 lexer_t m_lexer; 502 /// whether to throw exceptions in case of errors 503 const bool allow_exceptions = true; 504 }; 505 506 } // namespace detail 507 NLOHMANN_JSON_NAMESPACE_END 508