1 // __ _____ _____ _____
2 // __| | __| | | | JSON for Modern C++
3 // | | |__ | | | | | | version 3.11.2
4 // |_____|_____|_____|_|___| https://github.com/nlohmann/json
5 //
6 // SPDX-FileCopyrightText: 2013-2022 Niels Lohmann <https://nlohmann.me>
7 // SPDX-License-Identifier: MIT
8
9 #pragma once
10
11 #include <algorithm> // generate_n
12 #include <array> // array
13 #include <cmath> // ldexp
14 #include <cstddef> // size_t
15 #include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t
16 #include <cstdio> // snprintf
17 #include <cstring> // memcpy
18 #include <iterator> // back_inserter
19 #include <limits> // numeric_limits
20 #include <string> // char_traits, string
21 #include <utility> // make_pair, move
22 #include <vector> // vector
23
24 #include <nlohmann/detail/exceptions.hpp>
25 #include <nlohmann/detail/input/input_adapters.hpp>
26 #include <nlohmann/detail/input/json_sax.hpp>
27 #include <nlohmann/detail/input/lexer.hpp>
28 #include <nlohmann/detail/macro_scope.hpp>
29 #include <nlohmann/detail/meta/is_sax.hpp>
30 #include <nlohmann/detail/meta/type_traits.hpp>
31 #include <nlohmann/detail/string_concat.hpp>
32 #include <nlohmann/detail/value_t.hpp>
33
34 NLOHMANN_JSON_NAMESPACE_BEGIN
35 namespace detail
36 {
37
38 /// how to treat CBOR tags
39 enum class cbor_tag_handler_t
40 {
41 error, ///< throw a parse_error exception in case of a tag
42 ignore, ///< ignore tags
43 store ///< store tags as binary type
44 };
45
46 /*!
47 @brief determine system byte order
48
49 @return true if and only if system's byte order is little endian
50
51 @note from https://stackoverflow.com/a/1001328/266378
52 */
little_endianness(int num=1)53 static inline bool little_endianness(int num = 1) noexcept
54 {
55 return *reinterpret_cast<char*>(&num) == 1;
56 }
57
58
59 ///////////////////
60 // binary reader //
61 ///////////////////
62
63 /*!
64 @brief deserialization of CBOR, MessagePack, and UBJSON values
65 */
66 template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
67 class binary_reader
68 {
69 using number_integer_t = typename BasicJsonType::number_integer_t;
70 using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
71 using number_float_t = typename BasicJsonType::number_float_t;
72 using string_t = typename BasicJsonType::string_t;
73 using binary_t = typename BasicJsonType::binary_t;
74 using json_sax_t = SAX;
75 using char_type = typename InputAdapterType::char_type;
76 using char_int_type = typename std::char_traits<char_type>::int_type;
77
78 public:
79 /*!
80 @brief create a binary reader
81
82 @param[in] adapter input adapter to read from
83 */
binary_reader(InputAdapterType && adapter,const input_format_t format=input_format_t::json)84 explicit binary_reader(InputAdapterType&& adapter, const input_format_t format = input_format_t::json) noexcept : ia(std::move(adapter)), input_format(format)
85 {
86 (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
87 }
88
89 // make class move-only
90 binary_reader(const binary_reader&) = delete;
91 binary_reader(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
92 binary_reader& operator=(const binary_reader&) = delete;
93 binary_reader& operator=(binary_reader&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor)
94 ~binary_reader() = default;
95
96 /*!
97 @param[in] format the binary format to parse
98 @param[in] sax_ a SAX event processor
99 @param[in] strict whether to expect the input to be consumed completed
100 @param[in] tag_handler how to treat CBOR tags
101
102 @return whether parsing was successful
103 */
104 JSON_HEDLEY_NON_NULL(3)
sax_parse(const input_format_t format,json_sax_t * sax_,const bool strict=true,const cbor_tag_handler_t tag_handler=cbor_tag_handler_t::error)105 bool sax_parse(const input_format_t format,
106 json_sax_t* sax_,
107 const bool strict = true,
108 const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error)
109 {
110 sax = sax_;
111 bool result = false;
112
113 switch (format)
114 {
115 case input_format_t::bson:
116 result = parse_bson_internal();
117 break;
118
119 case input_format_t::cbor:
120 result = parse_cbor_internal(true, tag_handler);
121 break;
122
123 case input_format_t::msgpack:
124 result = parse_msgpack_internal();
125 break;
126
127 case input_format_t::ubjson:
128 case input_format_t::bjdata:
129 result = parse_ubjson_internal();
130 break;
131
132 case input_format_t::json: // LCOV_EXCL_LINE
133 default: // LCOV_EXCL_LINE
134 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
135 }
136
137 // strict mode: next byte must be EOF
138 if (result && strict)
139 {
140 if (input_format == input_format_t::ubjson || input_format == input_format_t::bjdata)
141 {
142 get_ignore_noop();
143 }
144 else
145 {
146 get();
147 }
148
149 if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char_type>::eof()))
150 {
151 return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read,
152 exception_message(input_format, concat("expected end of input; last byte: 0x", get_token_string()), "value"), nullptr));
153 }
154 }
155
156 return result;
157 }
158
159 private:
160 //////////
161 // BSON //
162 //////////
163
164 /*!
165 @brief Reads in a BSON-object and passes it to the SAX-parser.
166 @return whether a valid BSON-value was passed to the SAX parser
167 */
parse_bson_internal()168 bool parse_bson_internal()
169 {
170 std::int32_t document_size{};
171 get_number<std::int32_t, true>(input_format_t::bson, document_size);
172
173 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
174 {
175 return false;
176 }
177
178 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/false)))
179 {
180 return false;
181 }
182
183 return sax->end_object();
184 }
185
186 /*!
187 @brief Parses a C-style string from the BSON input.
188 @param[in,out] result A reference to the string variable where the read
189 string is to be stored.
190 @return `true` if the \x00-byte indicating the end of the string was
191 encountered before the EOF; false` indicates an unexpected EOF.
192 */
get_bson_cstr(string_t & result)193 bool get_bson_cstr(string_t& result)
194 {
195 auto out = std::back_inserter(result);
196 while (true)
197 {
198 get();
199 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "cstring")))
200 {
201 return false;
202 }
203 if (current == 0x00)
204 {
205 return true;
206 }
207 *out++ = static_cast<typename string_t::value_type>(current);
208 }
209 }
210
211 /*!
212 @brief Parses a zero-terminated string of length @a len from the BSON
213 input.
214 @param[in] len The length (including the zero-byte at the end) of the
215 string to be read.
216 @param[in,out] result A reference to the string variable where the read
217 string is to be stored.
218 @tparam NumberType The type of the length @a len
219 @pre len >= 1
220 @return `true` if the string was successfully parsed
221 */
222 template<typename NumberType>
get_bson_string(const NumberType len,string_t & result)223 bool get_bson_string(const NumberType len, string_t& result)
224 {
225 if (JSON_HEDLEY_UNLIKELY(len < 1))
226 {
227 auto last_token = get_token_string();
228 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
229 exception_message(input_format_t::bson, concat("string length must be at least 1, is ", std::to_string(len)), "string"), nullptr));
230 }
231
232 return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) && get() != std::char_traits<char_type>::eof();
233 }
234
235 /*!
236 @brief Parses a byte array input of length @a len from the BSON input.
237 @param[in] len The length of the byte array to be read.
238 @param[in,out] result A reference to the binary variable where the read
239 array is to be stored.
240 @tparam NumberType The type of the length @a len
241 @pre len >= 0
242 @return `true` if the byte array was successfully parsed
243 */
244 template<typename NumberType>
get_bson_binary(const NumberType len,binary_t & result)245 bool get_bson_binary(const NumberType len, binary_t& result)
246 {
247 if (JSON_HEDLEY_UNLIKELY(len < 0))
248 {
249 auto last_token = get_token_string();
250 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
251 exception_message(input_format_t::bson, concat("byte array length cannot be negative, is ", std::to_string(len)), "binary"), nullptr));
252 }
253
254 // All BSON binary values have a subtype
255 std::uint8_t subtype{};
256 get_number<std::uint8_t>(input_format_t::bson, subtype);
257 result.set_subtype(subtype);
258
259 return get_binary(input_format_t::bson, len, result);
260 }
261
262 /*!
263 @brief Read a BSON document element of the given @a element_type.
264 @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html
265 @param[in] element_type_parse_position The position in the input stream,
266 where the `element_type` was read.
267 @warning Not all BSON element types are supported yet. An unsupported
268 @a element_type will give rise to a parse_error.114:
269 Unsupported BSON record type 0x...
270 @return whether a valid BSON-object/array was passed to the SAX parser
271 */
parse_bson_element_internal(const char_int_type element_type,const std::size_t element_type_parse_position)272 bool parse_bson_element_internal(const char_int_type element_type,
273 const std::size_t element_type_parse_position)
274 {
275 switch (element_type)
276 {
277 case 0x01: // double
278 {
279 double number{};
280 return get_number<double, true>(input_format_t::bson, number) && sax->number_float(static_cast<number_float_t>(number), "");
281 }
282
283 case 0x02: // string
284 {
285 std::int32_t len{};
286 string_t value;
287 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value);
288 }
289
290 case 0x03: // object
291 {
292 return parse_bson_internal();
293 }
294
295 case 0x04: // array
296 {
297 return parse_bson_array();
298 }
299
300 case 0x05: // binary
301 {
302 std::int32_t len{};
303 binary_t value;
304 return get_number<std::int32_t, true>(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value);
305 }
306
307 case 0x08: // boolean
308 {
309 return sax->boolean(get() != 0);
310 }
311
312 case 0x0A: // null
313 {
314 return sax->null();
315 }
316
317 case 0x10: // int32
318 {
319 std::int32_t value{};
320 return get_number<std::int32_t, true>(input_format_t::bson, value) && sax->number_integer(value);
321 }
322
323 case 0x12: // int64
324 {
325 std::int64_t value{};
326 return get_number<std::int64_t, true>(input_format_t::bson, value) && sax->number_integer(value);
327 }
328
329 default: // anything else not supported (yet)
330 {
331 std::array<char, 3> cr{{}};
332 static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(element_type))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
333 std::string cr_str{cr.data()};
334 return sax->parse_error(element_type_parse_position, cr_str,
335 parse_error::create(114, element_type_parse_position, concat("Unsupported BSON record type 0x", cr_str), nullptr));
336 }
337 }
338 }
339
340 /*!
341 @brief Read a BSON element list (as specified in the BSON-spec)
342
343 The same binary layout is used for objects and arrays, hence it must be
344 indicated with the argument @a is_array which one is expected
345 (true --> array, false --> object).
346
347 @param[in] is_array Determines if the element list being read is to be
348 treated as an object (@a is_array == false), or as an
349 array (@a is_array == true).
350 @return whether a valid BSON-object/array was passed to the SAX parser
351 */
parse_bson_element_list(const bool is_array)352 bool parse_bson_element_list(const bool is_array)
353 {
354 string_t key;
355
356 while (auto element_type = get())
357 {
358 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::bson, "element list")))
359 {
360 return false;
361 }
362
363 const std::size_t element_type_parse_position = chars_read;
364 if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key)))
365 {
366 return false;
367 }
368
369 if (!is_array && !sax->key(key))
370 {
371 return false;
372 }
373
374 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position)))
375 {
376 return false;
377 }
378
379 // get_bson_cstr only appends
380 key.clear();
381 }
382
383 return true;
384 }
385
386 /*!
387 @brief Reads an array from the BSON input and passes it to the SAX-parser.
388 @return whether a valid BSON-array was passed to the SAX parser
389 */
parse_bson_array()390 bool parse_bson_array()
391 {
392 std::int32_t document_size{};
393 get_number<std::int32_t, true>(input_format_t::bson, document_size);
394
395 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
396 {
397 return false;
398 }
399
400 if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_list(/*is_array*/true)))
401 {
402 return false;
403 }
404
405 return sax->end_array();
406 }
407
408 //////////
409 // CBOR //
410 //////////
411
412 /*!
413 @param[in] get_char whether a new character should be retrieved from the
414 input (true) or whether the last read character should
415 be considered instead (false)
416 @param[in] tag_handler how CBOR tags should be treated
417
418 @return whether a valid CBOR value was passed to the SAX parser
419 */
parse_cbor_internal(const bool get_char,const cbor_tag_handler_t tag_handler)420 bool parse_cbor_internal(const bool get_char,
421 const cbor_tag_handler_t tag_handler)
422 {
423 switch (get_char ? get() : current)
424 {
425 // EOF
426 case std::char_traits<char_type>::eof():
427 return unexpect_eof(input_format_t::cbor, "value");
428
429 // Integer 0x00..0x17 (0..23)
430 case 0x00:
431 case 0x01:
432 case 0x02:
433 case 0x03:
434 case 0x04:
435 case 0x05:
436 case 0x06:
437 case 0x07:
438 case 0x08:
439 case 0x09:
440 case 0x0A:
441 case 0x0B:
442 case 0x0C:
443 case 0x0D:
444 case 0x0E:
445 case 0x0F:
446 case 0x10:
447 case 0x11:
448 case 0x12:
449 case 0x13:
450 case 0x14:
451 case 0x15:
452 case 0x16:
453 case 0x17:
454 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
455
456 case 0x18: // Unsigned integer (one-byte uint8_t follows)
457 {
458 std::uint8_t number{};
459 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
460 }
461
462 case 0x19: // Unsigned integer (two-byte uint16_t follows)
463 {
464 std::uint16_t number{};
465 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
466 }
467
468 case 0x1A: // Unsigned integer (four-byte uint32_t follows)
469 {
470 std::uint32_t number{};
471 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
472 }
473
474 case 0x1B: // Unsigned integer (eight-byte uint64_t follows)
475 {
476 std::uint64_t number{};
477 return get_number(input_format_t::cbor, number) && sax->number_unsigned(number);
478 }
479
480 // Negative integer -1-0x00..-1-0x17 (-1..-24)
481 case 0x20:
482 case 0x21:
483 case 0x22:
484 case 0x23:
485 case 0x24:
486 case 0x25:
487 case 0x26:
488 case 0x27:
489 case 0x28:
490 case 0x29:
491 case 0x2A:
492 case 0x2B:
493 case 0x2C:
494 case 0x2D:
495 case 0x2E:
496 case 0x2F:
497 case 0x30:
498 case 0x31:
499 case 0x32:
500 case 0x33:
501 case 0x34:
502 case 0x35:
503 case 0x36:
504 case 0x37:
505 return sax->number_integer(static_cast<std::int8_t>(0x20 - 1 - current));
506
507 case 0x38: // Negative integer (one-byte uint8_t follows)
508 {
509 std::uint8_t number{};
510 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
511 }
512
513 case 0x39: // Negative integer -1-n (two-byte uint16_t follows)
514 {
515 std::uint16_t number{};
516 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
517 }
518
519 case 0x3A: // Negative integer -1-n (four-byte uint32_t follows)
520 {
521 std::uint32_t number{};
522 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1) - number);
523 }
524
525 case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows)
526 {
527 std::uint64_t number{};
528 return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast<number_integer_t>(-1)
529 - static_cast<number_integer_t>(number));
530 }
531
532 // Binary data (0x00..0x17 bytes follow)
533 case 0x40:
534 case 0x41:
535 case 0x42:
536 case 0x43:
537 case 0x44:
538 case 0x45:
539 case 0x46:
540 case 0x47:
541 case 0x48:
542 case 0x49:
543 case 0x4A:
544 case 0x4B:
545 case 0x4C:
546 case 0x4D:
547 case 0x4E:
548 case 0x4F:
549 case 0x50:
550 case 0x51:
551 case 0x52:
552 case 0x53:
553 case 0x54:
554 case 0x55:
555 case 0x56:
556 case 0x57:
557 case 0x58: // Binary data (one-byte uint8_t for n follows)
558 case 0x59: // Binary data (two-byte uint16_t for n follow)
559 case 0x5A: // Binary data (four-byte uint32_t for n follow)
560 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
561 case 0x5F: // Binary data (indefinite length)
562 {
563 binary_t b;
564 return get_cbor_binary(b) && sax->binary(b);
565 }
566
567 // UTF-8 string (0x00..0x17 bytes follow)
568 case 0x60:
569 case 0x61:
570 case 0x62:
571 case 0x63:
572 case 0x64:
573 case 0x65:
574 case 0x66:
575 case 0x67:
576 case 0x68:
577 case 0x69:
578 case 0x6A:
579 case 0x6B:
580 case 0x6C:
581 case 0x6D:
582 case 0x6E:
583 case 0x6F:
584 case 0x70:
585 case 0x71:
586 case 0x72:
587 case 0x73:
588 case 0x74:
589 case 0x75:
590 case 0x76:
591 case 0x77:
592 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
593 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
594 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
595 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
596 case 0x7F: // UTF-8 string (indefinite length)
597 {
598 string_t s;
599 return get_cbor_string(s) && sax->string(s);
600 }
601
602 // array (0x00..0x17 data items follow)
603 case 0x80:
604 case 0x81:
605 case 0x82:
606 case 0x83:
607 case 0x84:
608 case 0x85:
609 case 0x86:
610 case 0x87:
611 case 0x88:
612 case 0x89:
613 case 0x8A:
614 case 0x8B:
615 case 0x8C:
616 case 0x8D:
617 case 0x8E:
618 case 0x8F:
619 case 0x90:
620 case 0x91:
621 case 0x92:
622 case 0x93:
623 case 0x94:
624 case 0x95:
625 case 0x96:
626 case 0x97:
627 return get_cbor_array(
628 conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
629
630 case 0x98: // array (one-byte uint8_t for n follows)
631 {
632 std::uint8_t len{};
633 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
634 }
635
636 case 0x99: // array (two-byte uint16_t for n follow)
637 {
638 std::uint16_t len{};
639 return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast<std::size_t>(len), tag_handler);
640 }
641
642 case 0x9A: // array (four-byte uint32_t for n follow)
643 {
644 std::uint32_t len{};
645 return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
646 }
647
648 case 0x9B: // array (eight-byte uint64_t for n follow)
649 {
650 std::uint64_t len{};
651 return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast<std::size_t>(len), tag_handler);
652 }
653
654 case 0x9F: // array (indefinite length)
655 return get_cbor_array(static_cast<std::size_t>(-1), tag_handler);
656
657 // map (0x00..0x17 pairs of data items follow)
658 case 0xA0:
659 case 0xA1:
660 case 0xA2:
661 case 0xA3:
662 case 0xA4:
663 case 0xA5:
664 case 0xA6:
665 case 0xA7:
666 case 0xA8:
667 case 0xA9:
668 case 0xAA:
669 case 0xAB:
670 case 0xAC:
671 case 0xAD:
672 case 0xAE:
673 case 0xAF:
674 case 0xB0:
675 case 0xB1:
676 case 0xB2:
677 case 0xB3:
678 case 0xB4:
679 case 0xB5:
680 case 0xB6:
681 case 0xB7:
682 return get_cbor_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x1Fu), tag_handler);
683
684 case 0xB8: // map (one-byte uint8_t for n follows)
685 {
686 std::uint8_t len{};
687 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
688 }
689
690 case 0xB9: // map (two-byte uint16_t for n follow)
691 {
692 std::uint16_t len{};
693 return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast<std::size_t>(len), tag_handler);
694 }
695
696 case 0xBA: // map (four-byte uint32_t for n follow)
697 {
698 std::uint32_t len{};
699 return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
700 }
701
702 case 0xBB: // map (eight-byte uint64_t for n follow)
703 {
704 std::uint64_t len{};
705 return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast<std::size_t>(len), tag_handler);
706 }
707
708 case 0xBF: // map (indefinite length)
709 return get_cbor_object(static_cast<std::size_t>(-1), tag_handler);
710
711 case 0xC6: // tagged item
712 case 0xC7:
713 case 0xC8:
714 case 0xC9:
715 case 0xCA:
716 case 0xCB:
717 case 0xCC:
718 case 0xCD:
719 case 0xCE:
720 case 0xCF:
721 case 0xD0:
722 case 0xD1:
723 case 0xD2:
724 case 0xD3:
725 case 0xD4:
726 case 0xD8: // tagged item (1 bytes follow)
727 case 0xD9: // tagged item (2 bytes follow)
728 case 0xDA: // tagged item (4 bytes follow)
729 case 0xDB: // tagged item (8 bytes follow)
730 {
731 switch (tag_handler)
732 {
733 case cbor_tag_handler_t::error:
734 {
735 auto last_token = get_token_string();
736 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
737 exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr));
738 }
739
740 case cbor_tag_handler_t::ignore:
741 {
742 // ignore binary subtype
743 switch (current)
744 {
745 case 0xD8:
746 {
747 std::uint8_t subtype_to_ignore{};
748 get_number(input_format_t::cbor, subtype_to_ignore);
749 break;
750 }
751 case 0xD9:
752 {
753 std::uint16_t subtype_to_ignore{};
754 get_number(input_format_t::cbor, subtype_to_ignore);
755 break;
756 }
757 case 0xDA:
758 {
759 std::uint32_t subtype_to_ignore{};
760 get_number(input_format_t::cbor, subtype_to_ignore);
761 break;
762 }
763 case 0xDB:
764 {
765 std::uint64_t subtype_to_ignore{};
766 get_number(input_format_t::cbor, subtype_to_ignore);
767 break;
768 }
769 default:
770 break;
771 }
772 return parse_cbor_internal(true, tag_handler);
773 }
774
775 case cbor_tag_handler_t::store:
776 {
777 binary_t b;
778 // use binary subtype and store in binary container
779 switch (current)
780 {
781 case 0xD8:
782 {
783 std::uint8_t subtype{};
784 get_number(input_format_t::cbor, subtype);
785 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
786 break;
787 }
788 case 0xD9:
789 {
790 std::uint16_t subtype{};
791 get_number(input_format_t::cbor, subtype);
792 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
793 break;
794 }
795 case 0xDA:
796 {
797 std::uint32_t subtype{};
798 get_number(input_format_t::cbor, subtype);
799 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
800 break;
801 }
802 case 0xDB:
803 {
804 std::uint64_t subtype{};
805 get_number(input_format_t::cbor, subtype);
806 b.set_subtype(detail::conditional_static_cast<typename binary_t::subtype_type>(subtype));
807 break;
808 }
809 default:
810 return parse_cbor_internal(true, tag_handler);
811 }
812 get();
813 return get_cbor_binary(b) && sax->binary(b);
814 }
815
816 default: // LCOV_EXCL_LINE
817 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
818 return false; // LCOV_EXCL_LINE
819 }
820 }
821
822 case 0xF4: // false
823 return sax->boolean(false);
824
825 case 0xF5: // true
826 return sax->boolean(true);
827
828 case 0xF6: // null
829 return sax->null();
830
831 case 0xF9: // Half-Precision Float (two-byte IEEE 754)
832 {
833 const auto byte1_raw = get();
834 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
835 {
836 return false;
837 }
838 const auto byte2_raw = get();
839 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number")))
840 {
841 return false;
842 }
843
844 const auto byte1 = static_cast<unsigned char>(byte1_raw);
845 const auto byte2 = static_cast<unsigned char>(byte2_raw);
846
847 // code from RFC 7049, Appendix D, Figure 3:
848 // As half-precision floating-point numbers were only added
849 // to IEEE 754 in 2008, today's programming platforms often
850 // still only have limited support for them. It is very
851 // easy to include at least decoding support for them even
852 // without such support. An example of a small decoder for
853 // half-precision floating-point numbers in the C language
854 // is shown in Fig. 3.
855 const auto half = static_cast<unsigned int>((byte1 << 8u) + byte2);
856 const double val = [&half]
857 {
858 const int exp = (half >> 10u) & 0x1Fu;
859 const unsigned int mant = half & 0x3FFu;
860 JSON_ASSERT(0 <= exp&& exp <= 32);
861 JSON_ASSERT(mant <= 1024);
862 switch (exp)
863 {
864 case 0:
865 return std::ldexp(mant, -24);
866 case 31:
867 return (mant == 0)
868 ? std::numeric_limits<double>::infinity()
869 : std::numeric_limits<double>::quiet_NaN();
870 default:
871 return std::ldexp(mant + 1024, exp - 25);
872 }
873 }();
874 return sax->number_float((half & 0x8000u) != 0
875 ? static_cast<number_float_t>(-val)
876 : static_cast<number_float_t>(val), "");
877 }
878
879 case 0xFA: // Single-Precision Float (four-byte IEEE 754)
880 {
881 float number{};
882 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
883 }
884
885 case 0xFB: // Double-Precision Float (eight-byte IEEE 754)
886 {
887 double number{};
888 return get_number(input_format_t::cbor, number) && sax->number_float(static_cast<number_float_t>(number), "");
889 }
890
891 default: // anything else (0xFF is handled inside the other types)
892 {
893 auto last_token = get_token_string();
894 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
895 exception_message(input_format_t::cbor, concat("invalid byte: 0x", last_token), "value"), nullptr));
896 }
897 }
898 }
899
900 /*!
901 @brief reads a CBOR string
902
903 This function first reads starting bytes to determine the expected
904 string length and then copies this number of bytes into a string.
905 Additionally, CBOR's strings with indefinite lengths are supported.
906
907 @param[out] result created string
908
909 @return whether string creation completed
910 */
get_cbor_string(string_t & result)911 bool get_cbor_string(string_t& result)
912 {
913 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "string")))
914 {
915 return false;
916 }
917
918 switch (current)
919 {
920 // UTF-8 string (0x00..0x17 bytes follow)
921 case 0x60:
922 case 0x61:
923 case 0x62:
924 case 0x63:
925 case 0x64:
926 case 0x65:
927 case 0x66:
928 case 0x67:
929 case 0x68:
930 case 0x69:
931 case 0x6A:
932 case 0x6B:
933 case 0x6C:
934 case 0x6D:
935 case 0x6E:
936 case 0x6F:
937 case 0x70:
938 case 0x71:
939 case 0x72:
940 case 0x73:
941 case 0x74:
942 case 0x75:
943 case 0x76:
944 case 0x77:
945 {
946 return get_string(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
947 }
948
949 case 0x78: // UTF-8 string (one-byte uint8_t for n follows)
950 {
951 std::uint8_t len{};
952 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
953 }
954
955 case 0x79: // UTF-8 string (two-byte uint16_t for n follow)
956 {
957 std::uint16_t len{};
958 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
959 }
960
961 case 0x7A: // UTF-8 string (four-byte uint32_t for n follow)
962 {
963 std::uint32_t len{};
964 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
965 }
966
967 case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow)
968 {
969 std::uint64_t len{};
970 return get_number(input_format_t::cbor, len) && get_string(input_format_t::cbor, len, result);
971 }
972
973 case 0x7F: // UTF-8 string (indefinite length)
974 {
975 while (get() != 0xFF)
976 {
977 string_t chunk;
978 if (!get_cbor_string(chunk))
979 {
980 return false;
981 }
982 result.append(chunk);
983 }
984 return true;
985 }
986
987 default:
988 {
989 auto last_token = get_token_string();
990 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
991 exception_message(input_format_t::cbor, concat("expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x", last_token), "string"), nullptr));
992 }
993 }
994 }
995
996 /*!
997 @brief reads a CBOR byte array
998
999 This function first reads starting bytes to determine the expected
1000 byte array length and then copies this number of bytes into the byte array.
1001 Additionally, CBOR's byte arrays with indefinite lengths are supported.
1002
1003 @param[out] result created byte array
1004
1005 @return whether byte array creation completed
1006 */
get_cbor_binary(binary_t & result)1007 bool get_cbor_binary(binary_t& result)
1008 {
1009 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "binary")))
1010 {
1011 return false;
1012 }
1013
1014 switch (current)
1015 {
1016 // Binary data (0x00..0x17 bytes follow)
1017 case 0x40:
1018 case 0x41:
1019 case 0x42:
1020 case 0x43:
1021 case 0x44:
1022 case 0x45:
1023 case 0x46:
1024 case 0x47:
1025 case 0x48:
1026 case 0x49:
1027 case 0x4A:
1028 case 0x4B:
1029 case 0x4C:
1030 case 0x4D:
1031 case 0x4E:
1032 case 0x4F:
1033 case 0x50:
1034 case 0x51:
1035 case 0x52:
1036 case 0x53:
1037 case 0x54:
1038 case 0x55:
1039 case 0x56:
1040 case 0x57:
1041 {
1042 return get_binary(input_format_t::cbor, static_cast<unsigned int>(current) & 0x1Fu, result);
1043 }
1044
1045 case 0x58: // Binary data (one-byte uint8_t for n follows)
1046 {
1047 std::uint8_t len{};
1048 return get_number(input_format_t::cbor, len) &&
1049 get_binary(input_format_t::cbor, len, result);
1050 }
1051
1052 case 0x59: // Binary data (two-byte uint16_t for n follow)
1053 {
1054 std::uint16_t len{};
1055 return get_number(input_format_t::cbor, len) &&
1056 get_binary(input_format_t::cbor, len, result);
1057 }
1058
1059 case 0x5A: // Binary data (four-byte uint32_t for n follow)
1060 {
1061 std::uint32_t len{};
1062 return get_number(input_format_t::cbor, len) &&
1063 get_binary(input_format_t::cbor, len, result);
1064 }
1065
1066 case 0x5B: // Binary data (eight-byte uint64_t for n follow)
1067 {
1068 std::uint64_t len{};
1069 return get_number(input_format_t::cbor, len) &&
1070 get_binary(input_format_t::cbor, len, result);
1071 }
1072
1073 case 0x5F: // Binary data (indefinite length)
1074 {
1075 while (get() != 0xFF)
1076 {
1077 binary_t chunk;
1078 if (!get_cbor_binary(chunk))
1079 {
1080 return false;
1081 }
1082 result.insert(result.end(), chunk.begin(), chunk.end());
1083 }
1084 return true;
1085 }
1086
1087 default:
1088 {
1089 auto last_token = get_token_string();
1090 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
1091 exception_message(input_format_t::cbor, concat("expected length specification (0x40-0x5B) or indefinite binary array type (0x5F); last byte: 0x", last_token), "binary"), nullptr));
1092 }
1093 }
1094 }
1095
1096 /*!
1097 @param[in] len the length of the array or static_cast<std::size_t>(-1) for an
1098 array of indefinite size
1099 @param[in] tag_handler how CBOR tags should be treated
1100 @return whether array creation completed
1101 */
get_cbor_array(const std::size_t len,const cbor_tag_handler_t tag_handler)1102 bool get_cbor_array(const std::size_t len,
1103 const cbor_tag_handler_t tag_handler)
1104 {
1105 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1106 {
1107 return false;
1108 }
1109
1110 if (len != static_cast<std::size_t>(-1))
1111 {
1112 for (std::size_t i = 0; i < len; ++i)
1113 {
1114 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1115 {
1116 return false;
1117 }
1118 }
1119 }
1120 else
1121 {
1122 while (get() != 0xFF)
1123 {
1124 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(false, tag_handler)))
1125 {
1126 return false;
1127 }
1128 }
1129 }
1130
1131 return sax->end_array();
1132 }
1133
1134 /*!
1135 @param[in] len the length of the object or static_cast<std::size_t>(-1) for an
1136 object of indefinite size
1137 @param[in] tag_handler how CBOR tags should be treated
1138 @return whether object creation completed
1139 */
get_cbor_object(const std::size_t len,const cbor_tag_handler_t tag_handler)1140 bool get_cbor_object(const std::size_t len,
1141 const cbor_tag_handler_t tag_handler)
1142 {
1143 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1144 {
1145 return false;
1146 }
1147
1148 if (len != 0)
1149 {
1150 string_t key;
1151 if (len != static_cast<std::size_t>(-1))
1152 {
1153 for (std::size_t i = 0; i < len; ++i)
1154 {
1155 get();
1156 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1157 {
1158 return false;
1159 }
1160
1161 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1162 {
1163 return false;
1164 }
1165 key.clear();
1166 }
1167 }
1168 else
1169 {
1170 while (get() != 0xFF)
1171 {
1172 if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key)))
1173 {
1174 return false;
1175 }
1176
1177 if (JSON_HEDLEY_UNLIKELY(!parse_cbor_internal(true, tag_handler)))
1178 {
1179 return false;
1180 }
1181 key.clear();
1182 }
1183 }
1184 }
1185
1186 return sax->end_object();
1187 }
1188
1189 /////////////
1190 // MsgPack //
1191 /////////////
1192
1193 /*!
1194 @return whether a valid MessagePack value was passed to the SAX parser
1195 */
parse_msgpack_internal()1196 bool parse_msgpack_internal()
1197 {
1198 switch (get())
1199 {
1200 // EOF
1201 case std::char_traits<char_type>::eof():
1202 return unexpect_eof(input_format_t::msgpack, "value");
1203
1204 // positive fixint
1205 case 0x00:
1206 case 0x01:
1207 case 0x02:
1208 case 0x03:
1209 case 0x04:
1210 case 0x05:
1211 case 0x06:
1212 case 0x07:
1213 case 0x08:
1214 case 0x09:
1215 case 0x0A:
1216 case 0x0B:
1217 case 0x0C:
1218 case 0x0D:
1219 case 0x0E:
1220 case 0x0F:
1221 case 0x10:
1222 case 0x11:
1223 case 0x12:
1224 case 0x13:
1225 case 0x14:
1226 case 0x15:
1227 case 0x16:
1228 case 0x17:
1229 case 0x18:
1230 case 0x19:
1231 case 0x1A:
1232 case 0x1B:
1233 case 0x1C:
1234 case 0x1D:
1235 case 0x1E:
1236 case 0x1F:
1237 case 0x20:
1238 case 0x21:
1239 case 0x22:
1240 case 0x23:
1241 case 0x24:
1242 case 0x25:
1243 case 0x26:
1244 case 0x27:
1245 case 0x28:
1246 case 0x29:
1247 case 0x2A:
1248 case 0x2B:
1249 case 0x2C:
1250 case 0x2D:
1251 case 0x2E:
1252 case 0x2F:
1253 case 0x30:
1254 case 0x31:
1255 case 0x32:
1256 case 0x33:
1257 case 0x34:
1258 case 0x35:
1259 case 0x36:
1260 case 0x37:
1261 case 0x38:
1262 case 0x39:
1263 case 0x3A:
1264 case 0x3B:
1265 case 0x3C:
1266 case 0x3D:
1267 case 0x3E:
1268 case 0x3F:
1269 case 0x40:
1270 case 0x41:
1271 case 0x42:
1272 case 0x43:
1273 case 0x44:
1274 case 0x45:
1275 case 0x46:
1276 case 0x47:
1277 case 0x48:
1278 case 0x49:
1279 case 0x4A:
1280 case 0x4B:
1281 case 0x4C:
1282 case 0x4D:
1283 case 0x4E:
1284 case 0x4F:
1285 case 0x50:
1286 case 0x51:
1287 case 0x52:
1288 case 0x53:
1289 case 0x54:
1290 case 0x55:
1291 case 0x56:
1292 case 0x57:
1293 case 0x58:
1294 case 0x59:
1295 case 0x5A:
1296 case 0x5B:
1297 case 0x5C:
1298 case 0x5D:
1299 case 0x5E:
1300 case 0x5F:
1301 case 0x60:
1302 case 0x61:
1303 case 0x62:
1304 case 0x63:
1305 case 0x64:
1306 case 0x65:
1307 case 0x66:
1308 case 0x67:
1309 case 0x68:
1310 case 0x69:
1311 case 0x6A:
1312 case 0x6B:
1313 case 0x6C:
1314 case 0x6D:
1315 case 0x6E:
1316 case 0x6F:
1317 case 0x70:
1318 case 0x71:
1319 case 0x72:
1320 case 0x73:
1321 case 0x74:
1322 case 0x75:
1323 case 0x76:
1324 case 0x77:
1325 case 0x78:
1326 case 0x79:
1327 case 0x7A:
1328 case 0x7B:
1329 case 0x7C:
1330 case 0x7D:
1331 case 0x7E:
1332 case 0x7F:
1333 return sax->number_unsigned(static_cast<number_unsigned_t>(current));
1334
1335 // fixmap
1336 case 0x80:
1337 case 0x81:
1338 case 0x82:
1339 case 0x83:
1340 case 0x84:
1341 case 0x85:
1342 case 0x86:
1343 case 0x87:
1344 case 0x88:
1345 case 0x89:
1346 case 0x8A:
1347 case 0x8B:
1348 case 0x8C:
1349 case 0x8D:
1350 case 0x8E:
1351 case 0x8F:
1352 return get_msgpack_object(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1353
1354 // fixarray
1355 case 0x90:
1356 case 0x91:
1357 case 0x92:
1358 case 0x93:
1359 case 0x94:
1360 case 0x95:
1361 case 0x96:
1362 case 0x97:
1363 case 0x98:
1364 case 0x99:
1365 case 0x9A:
1366 case 0x9B:
1367 case 0x9C:
1368 case 0x9D:
1369 case 0x9E:
1370 case 0x9F:
1371 return get_msgpack_array(conditional_static_cast<std::size_t>(static_cast<unsigned int>(current) & 0x0Fu));
1372
1373 // fixstr
1374 case 0xA0:
1375 case 0xA1:
1376 case 0xA2:
1377 case 0xA3:
1378 case 0xA4:
1379 case 0xA5:
1380 case 0xA6:
1381 case 0xA7:
1382 case 0xA8:
1383 case 0xA9:
1384 case 0xAA:
1385 case 0xAB:
1386 case 0xAC:
1387 case 0xAD:
1388 case 0xAE:
1389 case 0xAF:
1390 case 0xB0:
1391 case 0xB1:
1392 case 0xB2:
1393 case 0xB3:
1394 case 0xB4:
1395 case 0xB5:
1396 case 0xB6:
1397 case 0xB7:
1398 case 0xB8:
1399 case 0xB9:
1400 case 0xBA:
1401 case 0xBB:
1402 case 0xBC:
1403 case 0xBD:
1404 case 0xBE:
1405 case 0xBF:
1406 case 0xD9: // str 8
1407 case 0xDA: // str 16
1408 case 0xDB: // str 32
1409 {
1410 string_t s;
1411 return get_msgpack_string(s) && sax->string(s);
1412 }
1413
1414 case 0xC0: // nil
1415 return sax->null();
1416
1417 case 0xC2: // false
1418 return sax->boolean(false);
1419
1420 case 0xC3: // true
1421 return sax->boolean(true);
1422
1423 case 0xC4: // bin 8
1424 case 0xC5: // bin 16
1425 case 0xC6: // bin 32
1426 case 0xC7: // ext 8
1427 case 0xC8: // ext 16
1428 case 0xC9: // ext 32
1429 case 0xD4: // fixext 1
1430 case 0xD5: // fixext 2
1431 case 0xD6: // fixext 4
1432 case 0xD7: // fixext 8
1433 case 0xD8: // fixext 16
1434 {
1435 binary_t b;
1436 return get_msgpack_binary(b) && sax->binary(b);
1437 }
1438
1439 case 0xCA: // float 32
1440 {
1441 float number{};
1442 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1443 }
1444
1445 case 0xCB: // float 64
1446 {
1447 double number{};
1448 return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast<number_float_t>(number), "");
1449 }
1450
1451 case 0xCC: // uint 8
1452 {
1453 std::uint8_t number{};
1454 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1455 }
1456
1457 case 0xCD: // uint 16
1458 {
1459 std::uint16_t number{};
1460 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1461 }
1462
1463 case 0xCE: // uint 32
1464 {
1465 std::uint32_t number{};
1466 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1467 }
1468
1469 case 0xCF: // uint 64
1470 {
1471 std::uint64_t number{};
1472 return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number);
1473 }
1474
1475 case 0xD0: // int 8
1476 {
1477 std::int8_t number{};
1478 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1479 }
1480
1481 case 0xD1: // int 16
1482 {
1483 std::int16_t number{};
1484 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1485 }
1486
1487 case 0xD2: // int 32
1488 {
1489 std::int32_t number{};
1490 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1491 }
1492
1493 case 0xD3: // int 64
1494 {
1495 std::int64_t number{};
1496 return get_number(input_format_t::msgpack, number) && sax->number_integer(number);
1497 }
1498
1499 case 0xDC: // array 16
1500 {
1501 std::uint16_t len{};
1502 return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast<std::size_t>(len));
1503 }
1504
1505 case 0xDD: // array 32
1506 {
1507 std::uint32_t len{};
1508 return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast<std::size_t>(len));
1509 }
1510
1511 case 0xDE: // map 16
1512 {
1513 std::uint16_t len{};
1514 return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast<std::size_t>(len));
1515 }
1516
1517 case 0xDF: // map 32
1518 {
1519 std::uint32_t len{};
1520 return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast<std::size_t>(len));
1521 }
1522
1523 // negative fixint
1524 case 0xE0:
1525 case 0xE1:
1526 case 0xE2:
1527 case 0xE3:
1528 case 0xE4:
1529 case 0xE5:
1530 case 0xE6:
1531 case 0xE7:
1532 case 0xE8:
1533 case 0xE9:
1534 case 0xEA:
1535 case 0xEB:
1536 case 0xEC:
1537 case 0xED:
1538 case 0xEE:
1539 case 0xEF:
1540 case 0xF0:
1541 case 0xF1:
1542 case 0xF2:
1543 case 0xF3:
1544 case 0xF4:
1545 case 0xF5:
1546 case 0xF6:
1547 case 0xF7:
1548 case 0xF8:
1549 case 0xF9:
1550 case 0xFA:
1551 case 0xFB:
1552 case 0xFC:
1553 case 0xFD:
1554 case 0xFE:
1555 case 0xFF:
1556 return sax->number_integer(static_cast<std::int8_t>(current));
1557
1558 default: // anything else
1559 {
1560 auto last_token = get_token_string();
1561 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
1562 exception_message(input_format_t::msgpack, concat("invalid byte: 0x", last_token), "value"), nullptr));
1563 }
1564 }
1565 }
1566
1567 /*!
1568 @brief reads a MessagePack string
1569
1570 This function first reads starting bytes to determine the expected
1571 string length and then copies this number of bytes into a string.
1572
1573 @param[out] result created string
1574
1575 @return whether string creation completed
1576 */
get_msgpack_string(string_t & result)1577 bool get_msgpack_string(string_t& result)
1578 {
1579 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::msgpack, "string")))
1580 {
1581 return false;
1582 }
1583
1584 switch (current)
1585 {
1586 // fixstr
1587 case 0xA0:
1588 case 0xA1:
1589 case 0xA2:
1590 case 0xA3:
1591 case 0xA4:
1592 case 0xA5:
1593 case 0xA6:
1594 case 0xA7:
1595 case 0xA8:
1596 case 0xA9:
1597 case 0xAA:
1598 case 0xAB:
1599 case 0xAC:
1600 case 0xAD:
1601 case 0xAE:
1602 case 0xAF:
1603 case 0xB0:
1604 case 0xB1:
1605 case 0xB2:
1606 case 0xB3:
1607 case 0xB4:
1608 case 0xB5:
1609 case 0xB6:
1610 case 0xB7:
1611 case 0xB8:
1612 case 0xB9:
1613 case 0xBA:
1614 case 0xBB:
1615 case 0xBC:
1616 case 0xBD:
1617 case 0xBE:
1618 case 0xBF:
1619 {
1620 return get_string(input_format_t::msgpack, static_cast<unsigned int>(current) & 0x1Fu, result);
1621 }
1622
1623 case 0xD9: // str 8
1624 {
1625 std::uint8_t len{};
1626 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1627 }
1628
1629 case 0xDA: // str 16
1630 {
1631 std::uint16_t len{};
1632 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1633 }
1634
1635 case 0xDB: // str 32
1636 {
1637 std::uint32_t len{};
1638 return get_number(input_format_t::msgpack, len) && get_string(input_format_t::msgpack, len, result);
1639 }
1640
1641 default:
1642 {
1643 auto last_token = get_token_string();
1644 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
1645 exception_message(input_format_t::msgpack, concat("expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x", last_token), "string"), nullptr));
1646 }
1647 }
1648 }
1649
1650 /*!
1651 @brief reads a MessagePack byte array
1652
1653 This function first reads starting bytes to determine the expected
1654 byte array length and then copies this number of bytes into a byte array.
1655
1656 @param[out] result created byte array
1657
1658 @return whether byte array creation completed
1659 */
get_msgpack_binary(binary_t & result)1660 bool get_msgpack_binary(binary_t& result)
1661 {
1662 // helper function to set the subtype
1663 auto assign_and_return_true = [&result](std::int8_t subtype)
1664 {
1665 result.set_subtype(static_cast<std::uint8_t>(subtype));
1666 return true;
1667 };
1668
1669 switch (current)
1670 {
1671 case 0xC4: // bin 8
1672 {
1673 std::uint8_t len{};
1674 return get_number(input_format_t::msgpack, len) &&
1675 get_binary(input_format_t::msgpack, len, result);
1676 }
1677
1678 case 0xC5: // bin 16
1679 {
1680 std::uint16_t len{};
1681 return get_number(input_format_t::msgpack, len) &&
1682 get_binary(input_format_t::msgpack, len, result);
1683 }
1684
1685 case 0xC6: // bin 32
1686 {
1687 std::uint32_t len{};
1688 return get_number(input_format_t::msgpack, len) &&
1689 get_binary(input_format_t::msgpack, len, result);
1690 }
1691
1692 case 0xC7: // ext 8
1693 {
1694 std::uint8_t len{};
1695 std::int8_t subtype{};
1696 return get_number(input_format_t::msgpack, len) &&
1697 get_number(input_format_t::msgpack, subtype) &&
1698 get_binary(input_format_t::msgpack, len, result) &&
1699 assign_and_return_true(subtype);
1700 }
1701
1702 case 0xC8: // ext 16
1703 {
1704 std::uint16_t len{};
1705 std::int8_t subtype{};
1706 return get_number(input_format_t::msgpack, len) &&
1707 get_number(input_format_t::msgpack, subtype) &&
1708 get_binary(input_format_t::msgpack, len, result) &&
1709 assign_and_return_true(subtype);
1710 }
1711
1712 case 0xC9: // ext 32
1713 {
1714 std::uint32_t len{};
1715 std::int8_t subtype{};
1716 return get_number(input_format_t::msgpack, len) &&
1717 get_number(input_format_t::msgpack, subtype) &&
1718 get_binary(input_format_t::msgpack, len, result) &&
1719 assign_and_return_true(subtype);
1720 }
1721
1722 case 0xD4: // fixext 1
1723 {
1724 std::int8_t subtype{};
1725 return get_number(input_format_t::msgpack, subtype) &&
1726 get_binary(input_format_t::msgpack, 1, result) &&
1727 assign_and_return_true(subtype);
1728 }
1729
1730 case 0xD5: // fixext 2
1731 {
1732 std::int8_t subtype{};
1733 return get_number(input_format_t::msgpack, subtype) &&
1734 get_binary(input_format_t::msgpack, 2, result) &&
1735 assign_and_return_true(subtype);
1736 }
1737
1738 case 0xD6: // fixext 4
1739 {
1740 std::int8_t subtype{};
1741 return get_number(input_format_t::msgpack, subtype) &&
1742 get_binary(input_format_t::msgpack, 4, result) &&
1743 assign_and_return_true(subtype);
1744 }
1745
1746 case 0xD7: // fixext 8
1747 {
1748 std::int8_t subtype{};
1749 return get_number(input_format_t::msgpack, subtype) &&
1750 get_binary(input_format_t::msgpack, 8, result) &&
1751 assign_and_return_true(subtype);
1752 }
1753
1754 case 0xD8: // fixext 16
1755 {
1756 std::int8_t subtype{};
1757 return get_number(input_format_t::msgpack, subtype) &&
1758 get_binary(input_format_t::msgpack, 16, result) &&
1759 assign_and_return_true(subtype);
1760 }
1761
1762 default: // LCOV_EXCL_LINE
1763 return false; // LCOV_EXCL_LINE
1764 }
1765 }
1766
1767 /*!
1768 @param[in] len the length of the array
1769 @return whether array creation completed
1770 */
get_msgpack_array(const std::size_t len)1771 bool get_msgpack_array(const std::size_t len)
1772 {
1773 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(len)))
1774 {
1775 return false;
1776 }
1777
1778 for (std::size_t i = 0; i < len; ++i)
1779 {
1780 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1781 {
1782 return false;
1783 }
1784 }
1785
1786 return sax->end_array();
1787 }
1788
1789 /*!
1790 @param[in] len the length of the object
1791 @return whether object creation completed
1792 */
get_msgpack_object(const std::size_t len)1793 bool get_msgpack_object(const std::size_t len)
1794 {
1795 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(len)))
1796 {
1797 return false;
1798 }
1799
1800 string_t key;
1801 for (std::size_t i = 0; i < len; ++i)
1802 {
1803 get();
1804 if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key)))
1805 {
1806 return false;
1807 }
1808
1809 if (JSON_HEDLEY_UNLIKELY(!parse_msgpack_internal()))
1810 {
1811 return false;
1812 }
1813 key.clear();
1814 }
1815
1816 return sax->end_object();
1817 }
1818
1819 ////////////
1820 // UBJSON //
1821 ////////////
1822
1823 /*!
1824 @param[in] get_char whether a new character should be retrieved from the
1825 input (true, default) or whether the last read
1826 character should be considered instead
1827
1828 @return whether a valid UBJSON value was passed to the SAX parser
1829 */
parse_ubjson_internal(const bool get_char=true)1830 bool parse_ubjson_internal(const bool get_char = true)
1831 {
1832 return get_ubjson_value(get_char ? get_ignore_noop() : current);
1833 }
1834
1835 /*!
1836 @brief reads a UBJSON string
1837
1838 This function is either called after reading the 'S' byte explicitly
1839 indicating a string, or in case of an object key where the 'S' byte can be
1840 left out.
1841
1842 @param[out] result created string
1843 @param[in] get_char whether a new character should be retrieved from the
1844 input (true, default) or whether the last read
1845 character should be considered instead
1846
1847 @return whether string creation completed
1848 */
get_ubjson_string(string_t & result,const bool get_char=true)1849 bool get_ubjson_string(string_t& result, const bool get_char = true)
1850 {
1851 if (get_char)
1852 {
1853 get(); // TODO(niels): may we ignore N here?
1854 }
1855
1856 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
1857 {
1858 return false;
1859 }
1860
1861 switch (current)
1862 {
1863 case 'U':
1864 {
1865 std::uint8_t len{};
1866 return get_number(input_format, len) && get_string(input_format, len, result);
1867 }
1868
1869 case 'i':
1870 {
1871 std::int8_t len{};
1872 return get_number(input_format, len) && get_string(input_format, len, result);
1873 }
1874
1875 case 'I':
1876 {
1877 std::int16_t len{};
1878 return get_number(input_format, len) && get_string(input_format, len, result);
1879 }
1880
1881 case 'l':
1882 {
1883 std::int32_t len{};
1884 return get_number(input_format, len) && get_string(input_format, len, result);
1885 }
1886
1887 case 'L':
1888 {
1889 std::int64_t len{};
1890 return get_number(input_format, len) && get_string(input_format, len, result);
1891 }
1892
1893 case 'u':
1894 {
1895 if (input_format != input_format_t::bjdata)
1896 {
1897 break;
1898 }
1899 std::uint16_t len{};
1900 return get_number(input_format, len) && get_string(input_format, len, result);
1901 }
1902
1903 case 'm':
1904 {
1905 if (input_format != input_format_t::bjdata)
1906 {
1907 break;
1908 }
1909 std::uint32_t len{};
1910 return get_number(input_format, len) && get_string(input_format, len, result);
1911 }
1912
1913 case 'M':
1914 {
1915 if (input_format != input_format_t::bjdata)
1916 {
1917 break;
1918 }
1919 std::uint64_t len{};
1920 return get_number(input_format, len) && get_string(input_format, len, result);
1921 }
1922
1923 default:
1924 break;
1925 }
1926 auto last_token = get_token_string();
1927 std::string message;
1928
1929 if (input_format != input_format_t::bjdata)
1930 {
1931 message = "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token;
1932 }
1933 else
1934 {
1935 message = "expected length type specification (U, i, u, I, m, l, M, L); last byte: 0x" + last_token;
1936 }
1937 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "string"), nullptr));
1938 }
1939
1940 /*!
1941 @param[out] dim an integer vector storing the ND array dimensions
1942 @return whether reading ND array size vector is successful
1943 */
get_ubjson_ndarray_size(std::vector<size_t> & dim)1944 bool get_ubjson_ndarray_size(std::vector<size_t>& dim)
1945 {
1946 std::pair<std::size_t, char_int_type> size_and_type;
1947 size_t dimlen = 0;
1948 bool no_ndarray = true;
1949
1950 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, no_ndarray)))
1951 {
1952 return false;
1953 }
1954
1955 if (size_and_type.first != npos)
1956 {
1957 if (size_and_type.second != 0)
1958 {
1959 if (size_and_type.second != 'N')
1960 {
1961 for (std::size_t i = 0; i < size_and_type.first; ++i)
1962 {
1963 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, size_and_type.second)))
1964 {
1965 return false;
1966 }
1967 dim.push_back(dimlen);
1968 }
1969 }
1970 }
1971 else
1972 {
1973 for (std::size_t i = 0; i < size_and_type.first; ++i)
1974 {
1975 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray)))
1976 {
1977 return false;
1978 }
1979 dim.push_back(dimlen);
1980 }
1981 }
1982 }
1983 else
1984 {
1985 while (current != ']')
1986 {
1987 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, no_ndarray, current)))
1988 {
1989 return false;
1990 }
1991 dim.push_back(dimlen);
1992 get_ignore_noop();
1993 }
1994 }
1995 return true;
1996 }
1997
1998 /*!
1999 @param[out] result determined size
2000 @param[in,out] is_ndarray for input, `true` means already inside an ndarray vector
2001 or ndarray dimension is not allowed; `false` means ndarray
2002 is allowed; for output, `true` means an ndarray is found;
2003 is_ndarray can only return `true` when its initial value
2004 is `false`
2005 @param[in] prefix type marker if already read, otherwise set to 0
2006
2007 @return whether size determination completed
2008 */
get_ubjson_size_value(std::size_t & result,bool & is_ndarray,char_int_type prefix=0)2009 bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
2010 {
2011 if (prefix == 0)
2012 {
2013 prefix = get_ignore_noop();
2014 }
2015
2016 switch (prefix)
2017 {
2018 case 'U':
2019 {
2020 std::uint8_t number{};
2021 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2022 {
2023 return false;
2024 }
2025 result = static_cast<std::size_t>(number);
2026 return true;
2027 }
2028
2029 case 'i':
2030 {
2031 std::int8_t number{};
2032 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2033 {
2034 return false;
2035 }
2036 if (number < 0)
2037 {
2038 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2039 exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
2040 }
2041 result = static_cast<std::size_t>(number); // NOLINT(bugprone-signed-char-misuse,cert-str34-c): number is not a char
2042 return true;
2043 }
2044
2045 case 'I':
2046 {
2047 std::int16_t number{};
2048 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2049 {
2050 return false;
2051 }
2052 if (number < 0)
2053 {
2054 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2055 exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
2056 }
2057 result = static_cast<std::size_t>(number);
2058 return true;
2059 }
2060
2061 case 'l':
2062 {
2063 std::int32_t number{};
2064 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2065 {
2066 return false;
2067 }
2068 if (number < 0)
2069 {
2070 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2071 exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
2072 }
2073 result = static_cast<std::size_t>(number);
2074 return true;
2075 }
2076
2077 case 'L':
2078 {
2079 std::int64_t number{};
2080 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2081 {
2082 return false;
2083 }
2084 if (number < 0)
2085 {
2086 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read,
2087 exception_message(input_format, "count in an optimized container must be positive", "size"), nullptr));
2088 }
2089 if (!value_in_range_of<std::size_t>(number))
2090 {
2091 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
2092 exception_message(input_format, "integer value overflow", "size"), nullptr));
2093 }
2094 result = static_cast<std::size_t>(number);
2095 return true;
2096 }
2097
2098 case 'u':
2099 {
2100 if (input_format != input_format_t::bjdata)
2101 {
2102 break;
2103 }
2104 std::uint16_t number{};
2105 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2106 {
2107 return false;
2108 }
2109 result = static_cast<std::size_t>(number);
2110 return true;
2111 }
2112
2113 case 'm':
2114 {
2115 if (input_format != input_format_t::bjdata)
2116 {
2117 break;
2118 }
2119 std::uint32_t number{};
2120 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2121 {
2122 return false;
2123 }
2124 result = conditional_static_cast<std::size_t>(number);
2125 return true;
2126 }
2127
2128 case 'M':
2129 {
2130 if (input_format != input_format_t::bjdata)
2131 {
2132 break;
2133 }
2134 std::uint64_t number{};
2135 if (JSON_HEDLEY_UNLIKELY(!get_number(input_format, number)))
2136 {
2137 return false;
2138 }
2139 if (!value_in_range_of<std::size_t>(number))
2140 {
2141 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408,
2142 exception_message(input_format, "integer value overflow", "size"), nullptr));
2143 }
2144 result = detail::conditional_static_cast<std::size_t>(number);
2145 return true;
2146 }
2147
2148 case '[':
2149 {
2150 if (input_format != input_format_t::bjdata)
2151 {
2152 break;
2153 }
2154 if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
2155 {
2156 return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
2157 }
2158 std::vector<size_t> dim;
2159 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
2160 {
2161 return false;
2162 }
2163 if (dim.size() == 1 || (dim.size() == 2 && dim.at(0) == 1)) // return normal array size if 1D row vector
2164 {
2165 result = dim.at(dim.size() - 1);
2166 return true;
2167 }
2168 if (!dim.empty()) // if ndarray, convert to an object in JData annotated array format
2169 {
2170 for (auto i : dim) // test if any dimension in an ndarray is 0, if so, return a 1D empty container
2171 {
2172 if ( i == 0 )
2173 {
2174 result = 0;
2175 return true;
2176 }
2177 }
2178
2179 string_t key = "_ArraySize_";
2180 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size())))
2181 {
2182 return false;
2183 }
2184 result = 1;
2185 for (auto i : dim)
2186 {
2187 result *= i;
2188 if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
2189 {
2190 return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
2191 }
2192 if (JSON_HEDLEY_UNLIKELY(!sax->number_unsigned(static_cast<number_unsigned_t>(i))))
2193 {
2194 return false;
2195 }
2196 }
2197 is_ndarray = true;
2198 return sax->end_array();
2199 }
2200 result = 0;
2201 return true;
2202 }
2203
2204 default:
2205 break;
2206 }
2207 auto last_token = get_token_string();
2208 std::string message;
2209
2210 if (input_format != input_format_t::bjdata)
2211 {
2212 message = "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token;
2213 }
2214 else
2215 {
2216 message = "expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x" + last_token;
2217 }
2218 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format, message, "size"), nullptr));
2219 }
2220
2221 /*!
2222 @brief determine the type and size for a container
2223
2224 In the optimized UBJSON format, a type and a size can be provided to allow
2225 for a more compact representation.
2226
2227 @param[out] result pair of the size and the type
2228 @param[in] inside_ndarray whether the parser is parsing an ND array dimensional vector
2229
2230 @return whether pair creation completed
2231 */
get_ubjson_size_type(std::pair<std::size_t,char_int_type> & result,bool inside_ndarray=false)2232 bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
2233 {
2234 result.first = npos; // size
2235 result.second = 0; // type
2236 bool is_ndarray = false;
2237
2238 get_ignore_noop();
2239
2240 if (current == '$')
2241 {
2242 result.second = get(); // must not ignore 'N', because 'N' maybe the type
2243 if (input_format == input_format_t::bjdata
2244 && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
2245 {
2246 auto last_token = get_token_string();
2247 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2248 exception_message(input_format, concat("marker 0x", last_token, " is not a permitted optimized array type"), "type"), nullptr));
2249 }
2250
2251 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "type")))
2252 {
2253 return false;
2254 }
2255
2256 get_ignore_noop();
2257 if (JSON_HEDLEY_UNLIKELY(current != '#'))
2258 {
2259 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "value")))
2260 {
2261 return false;
2262 }
2263 auto last_token = get_token_string();
2264 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2265 exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr));
2266 }
2267
2268 bool is_error = get_ubjson_size_value(result.first, is_ndarray);
2269 if (input_format == input_format_t::bjdata && is_ndarray)
2270 {
2271 if (inside_ndarray)
2272 {
2273 return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
2274 exception_message(input_format, "ndarray can not be recursive", "size"), nullptr));
2275 }
2276 result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
2277 }
2278 return is_error;
2279 }
2280
2281 if (current == '#')
2282 {
2283 bool is_error = get_ubjson_size_value(result.first, is_ndarray);
2284 if (input_format == input_format_t::bjdata && is_ndarray)
2285 {
2286 return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
2287 exception_message(input_format, "ndarray requires both type and size", "size"), nullptr));
2288 }
2289 return is_error;
2290 }
2291
2292 return true;
2293 }
2294
2295 /*!
2296 @param prefix the previously read or set type prefix
2297 @return whether value creation completed
2298 */
get_ubjson_value(const char_int_type prefix)2299 bool get_ubjson_value(const char_int_type prefix)
2300 {
2301 switch (prefix)
2302 {
2303 case std::char_traits<char_type>::eof(): // EOF
2304 return unexpect_eof(input_format, "value");
2305
2306 case 'T': // true
2307 return sax->boolean(true);
2308 case 'F': // false
2309 return sax->boolean(false);
2310
2311 case 'Z': // null
2312 return sax->null();
2313
2314 case 'U':
2315 {
2316 std::uint8_t number{};
2317 return get_number(input_format, number) && sax->number_unsigned(number);
2318 }
2319
2320 case 'i':
2321 {
2322 std::int8_t number{};
2323 return get_number(input_format, number) && sax->number_integer(number);
2324 }
2325
2326 case 'I':
2327 {
2328 std::int16_t number{};
2329 return get_number(input_format, number) && sax->number_integer(number);
2330 }
2331
2332 case 'l':
2333 {
2334 std::int32_t number{};
2335 return get_number(input_format, number) && sax->number_integer(number);
2336 }
2337
2338 case 'L':
2339 {
2340 std::int64_t number{};
2341 return get_number(input_format, number) && sax->number_integer(number);
2342 }
2343
2344 case 'u':
2345 {
2346 if (input_format != input_format_t::bjdata)
2347 {
2348 break;
2349 }
2350 std::uint16_t number{};
2351 return get_number(input_format, number) && sax->number_unsigned(number);
2352 }
2353
2354 case 'm':
2355 {
2356 if (input_format != input_format_t::bjdata)
2357 {
2358 break;
2359 }
2360 std::uint32_t number{};
2361 return get_number(input_format, number) && sax->number_unsigned(number);
2362 }
2363
2364 case 'M':
2365 {
2366 if (input_format != input_format_t::bjdata)
2367 {
2368 break;
2369 }
2370 std::uint64_t number{};
2371 return get_number(input_format, number) && sax->number_unsigned(number);
2372 }
2373
2374 case 'h':
2375 {
2376 if (input_format != input_format_t::bjdata)
2377 {
2378 break;
2379 }
2380 const auto byte1_raw = get();
2381 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2382 {
2383 return false;
2384 }
2385 const auto byte2_raw = get();
2386 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2387 {
2388 return false;
2389 }
2390
2391 const auto byte1 = static_cast<unsigned char>(byte1_raw);
2392 const auto byte2 = static_cast<unsigned char>(byte2_raw);
2393
2394 // code from RFC 7049, Appendix D, Figure 3:
2395 // As half-precision floating-point numbers were only added
2396 // to IEEE 754 in 2008, today's programming platforms often
2397 // still only have limited support for them. It is very
2398 // easy to include at least decoding support for them even
2399 // without such support. An example of a small decoder for
2400 // half-precision floating-point numbers in the C language
2401 // is shown in Fig. 3.
2402 const auto half = static_cast<unsigned int>((byte2 << 8u) + byte1);
2403 const double val = [&half]
2404 {
2405 const int exp = (half >> 10u) & 0x1Fu;
2406 const unsigned int mant = half & 0x3FFu;
2407 JSON_ASSERT(0 <= exp&& exp <= 32);
2408 JSON_ASSERT(mant <= 1024);
2409 switch (exp)
2410 {
2411 case 0:
2412 return std::ldexp(mant, -24);
2413 case 31:
2414 return (mant == 0)
2415 ? std::numeric_limits<double>::infinity()
2416 : std::numeric_limits<double>::quiet_NaN();
2417 default:
2418 return std::ldexp(mant + 1024, exp - 25);
2419 }
2420 }();
2421 return sax->number_float((half & 0x8000u) != 0
2422 ? static_cast<number_float_t>(-val)
2423 : static_cast<number_float_t>(val), "");
2424 }
2425
2426 case 'd':
2427 {
2428 float number{};
2429 return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
2430 }
2431
2432 case 'D':
2433 {
2434 double number{};
2435 return get_number(input_format, number) && sax->number_float(static_cast<number_float_t>(number), "");
2436 }
2437
2438 case 'H':
2439 {
2440 return get_ubjson_high_precision_number();
2441 }
2442
2443 case 'C': // char
2444 {
2445 get();
2446 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "char")))
2447 {
2448 return false;
2449 }
2450 if (JSON_HEDLEY_UNLIKELY(current > 127))
2451 {
2452 auto last_token = get_token_string();
2453 return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read,
2454 exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr));
2455 }
2456 string_t s(1, static_cast<typename string_t::value_type>(current));
2457 return sax->string(s);
2458 }
2459
2460 case 'S': // string
2461 {
2462 string_t s;
2463 return get_ubjson_string(s) && sax->string(s);
2464 }
2465
2466 case '[': // array
2467 return get_ubjson_array();
2468
2469 case '{': // object
2470 return get_ubjson_object();
2471
2472 default: // anything else
2473 break;
2474 }
2475 auto last_token = get_token_string();
2476 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format, "invalid byte: 0x" + last_token, "value"), nullptr));
2477 }
2478
2479 /*!
2480 @return whether array creation completed
2481 */
get_ubjson_array()2482 bool get_ubjson_array()
2483 {
2484 std::pair<std::size_t, char_int_type> size_and_type;
2485 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2486 {
2487 return false;
2488 }
2489
2490 // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
2491 // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}
2492
2493 if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
2494 {
2495 size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker
2496 auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
2497 {
2498 return p.first < t;
2499 });
2500 string_t key = "_ArrayType_";
2501 if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
2502 {
2503 auto last_token = get_token_string();
2504 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2505 exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
2506 }
2507
2508 string_t type = it->second; // sax->string() takes a reference
2509 if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
2510 {
2511 return false;
2512 }
2513
2514 if (size_and_type.second == 'C')
2515 {
2516 size_and_type.second = 'U';
2517 }
2518
2519 key = "_ArrayData_";
2520 if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) ))
2521 {
2522 return false;
2523 }
2524
2525 for (std::size_t i = 0; i < size_and_type.first; ++i)
2526 {
2527 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2528 {
2529 return false;
2530 }
2531 }
2532
2533 return (sax->end_array() && sax->end_object());
2534 }
2535
2536 if (size_and_type.first != npos)
2537 {
2538 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
2539 {
2540 return false;
2541 }
2542
2543 if (size_and_type.second != 0)
2544 {
2545 if (size_and_type.second != 'N')
2546 {
2547 for (std::size_t i = 0; i < size_and_type.first; ++i)
2548 {
2549 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2550 {
2551 return false;
2552 }
2553 }
2554 }
2555 }
2556 else
2557 {
2558 for (std::size_t i = 0; i < size_and_type.first; ++i)
2559 {
2560 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2561 {
2562 return false;
2563 }
2564 }
2565 }
2566 }
2567 else
2568 {
2569 if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast<std::size_t>(-1))))
2570 {
2571 return false;
2572 }
2573
2574 while (current != ']')
2575 {
2576 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal(false)))
2577 {
2578 return false;
2579 }
2580 get_ignore_noop();
2581 }
2582 }
2583
2584 return sax->end_array();
2585 }
2586
2587 /*!
2588 @return whether object creation completed
2589 */
get_ubjson_object()2590 bool get_ubjson_object()
2591 {
2592 std::pair<std::size_t, char_int_type> size_and_type;
2593 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
2594 {
2595 return false;
2596 }
2597
2598 // do not accept ND-array size in objects in BJData
2599 if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
2600 {
2601 auto last_token = get_token_string();
2602 return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
2603 exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
2604 }
2605
2606 string_t key;
2607 if (size_and_type.first != npos)
2608 {
2609 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
2610 {
2611 return false;
2612 }
2613
2614 if (size_and_type.second != 0)
2615 {
2616 for (std::size_t i = 0; i < size_and_type.first; ++i)
2617 {
2618 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2619 {
2620 return false;
2621 }
2622 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second)))
2623 {
2624 return false;
2625 }
2626 key.clear();
2627 }
2628 }
2629 else
2630 {
2631 for (std::size_t i = 0; i < size_and_type.first; ++i)
2632 {
2633 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key)))
2634 {
2635 return false;
2636 }
2637 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2638 {
2639 return false;
2640 }
2641 key.clear();
2642 }
2643 }
2644 }
2645 else
2646 {
2647 if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast<std::size_t>(-1))))
2648 {
2649 return false;
2650 }
2651
2652 while (current != '}')
2653 {
2654 if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key)))
2655 {
2656 return false;
2657 }
2658 if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal()))
2659 {
2660 return false;
2661 }
2662 get_ignore_noop();
2663 key.clear();
2664 }
2665 }
2666
2667 return sax->end_object();
2668 }
2669
2670 // Note, no reader for UBJSON binary types is implemented because they do
2671 // not exist
2672
get_ubjson_high_precision_number()2673 bool get_ubjson_high_precision_number()
2674 {
2675 // get size of following number string
2676 std::size_t size{};
2677 bool no_ndarray = true;
2678 auto res = get_ubjson_size_value(size, no_ndarray);
2679 if (JSON_HEDLEY_UNLIKELY(!res))
2680 {
2681 return res;
2682 }
2683
2684 // get number string
2685 std::vector<char> number_vector;
2686 for (std::size_t i = 0; i < size; ++i)
2687 {
2688 get();
2689 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format, "number")))
2690 {
2691 return false;
2692 }
2693 number_vector.push_back(static_cast<char>(current));
2694 }
2695
2696 // parse number string
2697 using ia_type = decltype(detail::input_adapter(number_vector));
2698 auto number_lexer = detail::lexer<BasicJsonType, ia_type>(detail::input_adapter(number_vector), false);
2699 const auto result_number = number_lexer.scan();
2700 const auto number_string = number_lexer.get_token_string();
2701 const auto result_remainder = number_lexer.scan();
2702
2703 using token_type = typename detail::lexer_base<BasicJsonType>::token_type;
2704
2705 if (JSON_HEDLEY_UNLIKELY(result_remainder != token_type::end_of_input))
2706 {
2707 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
2708 exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr));
2709 }
2710
2711 switch (result_number)
2712 {
2713 case token_type::value_integer:
2714 return sax->number_integer(number_lexer.get_number_integer());
2715 case token_type::value_unsigned:
2716 return sax->number_unsigned(number_lexer.get_number_unsigned());
2717 case token_type::value_float:
2718 return sax->number_float(number_lexer.get_number_float(), std::move(number_string));
2719 case token_type::uninitialized:
2720 case token_type::literal_true:
2721 case token_type::literal_false:
2722 case token_type::literal_null:
2723 case token_type::value_string:
2724 case token_type::begin_array:
2725 case token_type::begin_object:
2726 case token_type::end_array:
2727 case token_type::end_object:
2728 case token_type::name_separator:
2729 case token_type::value_separator:
2730 case token_type::parse_error:
2731 case token_type::end_of_input:
2732 case token_type::literal_or_value:
2733 default:
2734 return sax->parse_error(chars_read, number_string, parse_error::create(115, chars_read,
2735 exception_message(input_format, concat("invalid number text: ", number_lexer.get_token_string()), "high-precision number"), nullptr));
2736 }
2737 }
2738
2739 ///////////////////////
2740 // Utility functions //
2741 ///////////////////////
2742
2743 /*!
2744 @brief get next character from the input
2745
2746 This function provides the interface to the used input adapter. It does
2747 not throw in case the input reached EOF, but returns a -'ve valued
2748 `std::char_traits<char_type>::eof()` in that case.
2749
2750 @return character read from the input
2751 */
get()2752 char_int_type get()
2753 {
2754 ++chars_read;
2755 return current = ia.get_character();
2756 }
2757
2758 /*!
2759 @return character read from the input after ignoring all 'N' entries
2760 */
get_ignore_noop()2761 char_int_type get_ignore_noop()
2762 {
2763 do
2764 {
2765 get();
2766 }
2767 while (current == 'N');
2768
2769 return current;
2770 }
2771
2772 /*
2773 @brief read a number from the input
2774
2775 @tparam NumberType the type of the number
2776 @param[in] format the current format (for diagnostics)
2777 @param[out] result number of type @a NumberType
2778
2779 @return whether conversion completed
2780
2781 @note This function needs to respect the system's endianness, because
2782 bytes in CBOR, MessagePack, and UBJSON are stored in network order
2783 (big endian) and therefore need reordering on little endian systems.
2784 On the other hand, BSON and BJData use little endian and should reorder
2785 on big endian systems.
2786 */
2787 template<typename NumberType, bool InputIsLittleEndian = false>
get_number(const input_format_t format,NumberType & result)2788 bool get_number(const input_format_t format, NumberType& result)
2789 {
2790 // step 1: read input into array with system's byte order
2791 std::array<std::uint8_t, sizeof(NumberType)> vec{};
2792 for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2793 {
2794 get();
2795 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2796 {
2797 return false;
2798 }
2799
2800 // reverse byte order prior to conversion if necessary
2801 if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2802 {
2803 vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2804 }
2805 else
2806 {
2807 vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2808 }
2809 }
2810
2811 // step 2: convert array into number of type T and return
2812 std::memcpy(&result, vec.data(), sizeof(NumberType));
2813 return true;
2814 }
2815
2816 /*!
2817 @brief create a string by reading characters from the input
2818
2819 @tparam NumberType the type of the number
2820 @param[in] format the current format (for diagnostics)
2821 @param[in] len number of characters to read
2822 @param[out] result string created by reading @a len bytes
2823
2824 @return whether string creation completed
2825
2826 @note We can not reserve @a len bytes for the result, because @a len
2827 may be too large. Usually, @ref unexpect_eof() detects the end of
2828 the input before we run out of string memory.
2829 */
2830 template<typename NumberType>
get_string(const input_format_t format,const NumberType len,string_t & result)2831 bool get_string(const input_format_t format,
2832 const NumberType len,
2833 string_t& result)
2834 {
2835 bool success = true;
2836 for (NumberType i = 0; i < len; i++)
2837 {
2838 get();
2839 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "string")))
2840 {
2841 success = false;
2842 break;
2843 }
2844 result.push_back(static_cast<typename string_t::value_type>(current));
2845 }
2846 return success;
2847 }
2848
2849 /*!
2850 @brief create a byte array by reading bytes from the input
2851
2852 @tparam NumberType the type of the number
2853 @param[in] format the current format (for diagnostics)
2854 @param[in] len number of bytes to read
2855 @param[out] result byte array created by reading @a len bytes
2856
2857 @return whether byte array creation completed
2858
2859 @note We can not reserve @a len bytes for the result, because @a len
2860 may be too large. Usually, @ref unexpect_eof() detects the end of
2861 the input before we run out of memory.
2862 */
2863 template<typename NumberType>
get_binary(const input_format_t format,const NumberType len,binary_t & result)2864 bool get_binary(const input_format_t format,
2865 const NumberType len,
2866 binary_t& result)
2867 {
2868 bool success = true;
2869 for (NumberType i = 0; i < len; i++)
2870 {
2871 get();
2872 if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "binary")))
2873 {
2874 success = false;
2875 break;
2876 }
2877 result.push_back(static_cast<std::uint8_t>(current));
2878 }
2879 return success;
2880 }
2881
2882 /*!
2883 @param[in] format the current format (for diagnostics)
2884 @param[in] context further context information (for diagnostics)
2885 @return whether the last read character is not EOF
2886 */
2887 JSON_HEDLEY_NON_NULL(3)
unexpect_eof(const input_format_t format,const char * context) const2888 bool unexpect_eof(const input_format_t format, const char* context) const
2889 {
2890 if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char_type>::eof()))
2891 {
2892 return sax->parse_error(chars_read, "<end of file>",
2893 parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
2894 }
2895 return true;
2896 }
2897
2898 /*!
2899 @return a string representation of the last read byte
2900 */
get_token_string() const2901 std::string get_token_string() const
2902 {
2903 std::array<char, 3> cr{{}};
2904 static_cast<void>((std::snprintf)(cr.data(), cr.size(), "%.2hhX", static_cast<unsigned char>(current))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
2905 return std::string{cr.data()};
2906 }
2907
2908 /*!
2909 @param[in] format the current format
2910 @param[in] detail a detailed error message
2911 @param[in] context further context information
2912 @return a message string to use in the parse_error exceptions
2913 */
exception_message(const input_format_t format,const std::string & detail,const std::string & context) const2914 std::string exception_message(const input_format_t format,
2915 const std::string& detail,
2916 const std::string& context) const
2917 {
2918 std::string error_msg = "syntax error while parsing ";
2919
2920 switch (format)
2921 {
2922 case input_format_t::cbor:
2923 error_msg += "CBOR";
2924 break;
2925
2926 case input_format_t::msgpack:
2927 error_msg += "MessagePack";
2928 break;
2929
2930 case input_format_t::ubjson:
2931 error_msg += "UBJSON";
2932 break;
2933
2934 case input_format_t::bson:
2935 error_msg += "BSON";
2936 break;
2937
2938 case input_format_t::bjdata:
2939 error_msg += "BJData";
2940 break;
2941
2942 case input_format_t::json: // LCOV_EXCL_LINE
2943 default: // LCOV_EXCL_LINE
2944 JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
2945 }
2946
2947 return concat(error_msg, ' ', context, ": ", detail);
2948 }
2949
2950 private:
2951 static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);
2952
2953 /// input adapter
2954 InputAdapterType ia;
2955
2956 /// the current character
2957 char_int_type current = std::char_traits<char_type>::eof();
2958
2959 /// the number of characters read
2960 std::size_t chars_read = 0;
2961
2962 /// whether we can assume little endianness
2963 const bool is_little_endian = little_endianness();
2964
2965 /// input format
2966 const input_format_t input_format = input_format_t::json;
2967
2968 /// the SAX parser
2969 json_sax_t* sax = nullptr;
2970
2971 // excluded markers in bjdata optimized type
2972 #define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
2973 make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')
2974
2975 #define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
2976 make_array<bjd_type>( \
2977 bjd_type{'C', "char"}, \
2978 bjd_type{'D', "double"}, \
2979 bjd_type{'I', "int16"}, \
2980 bjd_type{'L', "int64"}, \
2981 bjd_type{'M', "uint64"}, \
2982 bjd_type{'U', "uint8"}, \
2983 bjd_type{'d', "single"}, \
2984 bjd_type{'i', "int8"}, \
2985 bjd_type{'l', "int32"}, \
2986 bjd_type{'m', "uint32"}, \
2987 bjd_type{'u', "uint16"})
2988
2989 JSON_PRIVATE_UNLESS_TESTED:
2990 // lookup tables
2991 // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
2992 const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
2993 JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;
2994
2995 using bjd_type = std::pair<char_int_type, string_t>;
2996 // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
2997 const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
2998 JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;
2999
3000 #undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
3001 #undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
3002 };
3003
3004 #ifndef JSON_HAS_CPP_17
3005 template<typename BasicJsonType, typename InputAdapterType, typename SAX>
3006 constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
3007 #endif
3008
3009 } // namespace detail
3010 NLOHMANN_JSON_NAMESPACE_END
3011