1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef API_CORE_JSON_JSON_H
17 #define API_CORE_JSON_JSON_H
18
19 #include <cstdint>
20 #include <cstdio>
21 #include <new>
22
23 #include <base/containers/string.h>
24 #include <base/containers/string_view.h>
25 #include <base/containers/type_traits.h>
26 #include <base/containers/vector.h>
27 #include <base/namespace.h>
28 #include <core/namespace.h>
29
CORE_BEGIN_NAMESPACE()30 CORE_BEGIN_NAMESPACE()
31 namespace json {
32 using readonly_string_t = BASE_NS::string_view;
33 using writable_string_t = BASE_NS::string;
34 template<typename T>
35 using array_t = BASE_NS::vector<T>;
36
37 /** Type of a JSON value. */
38 enum class type : uint8_t {
39 uninitialized = 0,
40 object,
41 array,
42 string,
43 floating_point,
44 signed_int,
45 unsigned_int,
46 boolean,
47 null,
48 };
49
50 /** Tag for JSON values which contain read-only strings. The source JSON string must be kept alive until the parsing
51 * results are not used. */
52 struct readonly_tag {};
53
54 /** Tag for JSON values which contain writable strings. Life time of the parsing results doens't depend on the source
55 * JSON string. */
56 struct writable_tag {};
57
58 template<typename T = readonly_tag>
59 struct value_t;
60
61 /** JSON structure which contains read-only strings. The source JSON string must be kept alive until the parsing
62 * results are not used. */
63 using value = value_t<readonly_tag>;
64
65 /** JSON structure which contains writable strings. String values can be modified and if the instance was generated by
66 * the parser, the source string doesn't need to be stored while the instance is used. */
67 using standalone_value = value_t<writable_tag>;
68
69 /** Parses 'data' and returns JSON structure. the value::type will be 'uninitialized' if parsing failed.
70 * @param data JSON as a null terminated string.
71 * @return Parsed JSON structure.
72 */
73 template<typename T = readonly_tag>
74 value_t<T> parse(const char* data);
75
76 /** Converts a JSON structure into a string.
77 * @param value JSON structure.
78 * @return JSON as string.
79 */
80 template<typename T = readonly_tag>
81 BASE_NS::string to_string(const value_t<T>& value);
82
83 BASE_NS::string unescape(BASE_NS::string_view str);
84
85 BASE_NS::string escape(BASE_NS::string_view str);
86
87 /** JSON value. */
88 template<typename Tag>
89 struct value_t {
90 /** Type used for JSON strings and JSON object keys. */
91 using string =
92 typename BASE_NS::conditional_t<BASE_NS::is_same_v<Tag, writable_tag>, writable_string_t, readonly_string_t>;
93
94 /** Type used for JSON null */
95 struct null {};
96
97 /** Type used for key-value pairs inside JSON objects. */
98 struct pair {
99 pair(string&& k, value_t&& v) : key(BASE_NS::forward<string>(k)), value(BASE_NS::forward<value_t>(v)) {}
100 string key;
101 value_t value;
102 };
103
104 /** Type used for JSON objects. */
105 using object = array_t<pair>;
106
107 /** Type used for JSON arrays. */
108 using array = array_t<value_t>;
109
110 /** Type of this JSON value. */
111 type type { type::uninitialized };
112 union {
113 object object_;
114 array array_;
115 string string_;
116 double float_;
117 int64_t signed_;
118 uint64_t unsigned_;
119 bool boolean_;
120 };
121
122 value_t() noexcept : type { type::uninitialized } {}
123
124 value_t(object&& value) noexcept : type { type::object }, object_(BASE_NS::move(value)) {}
125
126 value_t(array&& value) noexcept : type { type::array }, array_(BASE_NS::move(value)) {}
127
128 value_t(string value) noexcept : type { type::string }, string_(BASE_NS::move(value)) {}
129
130 value_t(const char* value) noexcept : value_t(string(value)) {}
131
132 template<typename Number, BASE_NS::enable_if_t<BASE_NS::is_arithmetic_v<Number>, bool> = true>
133 value_t(Number value) noexcept
134 {
135 if constexpr (BASE_NS::is_same_v<Number, bool>) {
136 type = type::boolean;
137 boolean_ = value;
138 } else if constexpr (BASE_NS::is_floating_point_v<Number>) {
139 type = type::floating_point;
140 float_ = static_cast<double>(value);
141 } else if constexpr (BASE_NS::is_signed_v<Number>) {
142 type = type::signed_int;
143 signed_ = static_cast<int64_t>(value);
144 } else if constexpr (BASE_NS::is_unsigned_v<Number>) {
145 type = type::unsigned_int;
146 unsigned_ = static_cast<uint64_t>(value);
147 }
148 }
149
150 value_t(null /* value */) noexcept : type { type::null } {}
151
152 template<typename Value>
153 value_t(const array_t<Value>& values) noexcept : type { type::array }, array_()
154 {
155 array_.reserve(values.size());
156 for (const auto& value : values) {
157 array_.emplace_back(value);
158 }
159 }
160
161 template<typename Value, size_t N>
162 value_t(Value (&value)[N]) : type { type::array }, array_()
163 {
164 array_.reserve(N);
165 for (size_t i = 0; i < N; ++i) {
166 array_.emplace_back(value[i]);
167 }
168 }
169
170 value_t(const value_t& other) : type(other.type)
171 {
172 switch (type) {
173 case type::uninitialized:
174 break;
175 case type::object:
176 new (&object_) object(other.object_);
177 break;
178 case type::array:
179 new (&array_) array(other.array_);
180 break;
181 case type::string:
182 new (&string_) string(other.string_);
183 break;
184 case type::floating_point:
185 float_ = other.float_;
186 break;
187 case type::signed_int:
188 signed_ = other.signed_;
189 break;
190 case type::unsigned_int:
191 unsigned_ = other.unsigned_;
192 break;
193 case type::boolean:
194 boolean_ = other.boolean_;
195 break;
196 case type::null:
197 break;
198 default:
199 break;
200 }
201 }
202
203 value_t& operator=(const value_t& other)
204 {
205 if (this != &other) {
206 cleanup();
207 type = other.type;
208 switch (type) {
209 case type::uninitialized:
210 break;
211 case type::object:
212 new (&object_) object(other.object_);
213 break;
214 case type::array:
215 new (&array_) array(other.array_);
216 break;
217 case type::string:
218 new (&string_) string(other.string_);
219 break;
220 case type::floating_point:
221 float_ = other.float_;
222 break;
223 case type::signed_int:
224 signed_ = other.signed_;
225 break;
226 case type::unsigned_int:
227 unsigned_ = other.unsigned_;
228 break;
229 case type::boolean:
230 boolean_ = other.boolean_;
231 break;
232 case type::null:
233 break;
234 default:
235 break;
236 }
237 }
238 return *this;
239 }
240
241 value_t& operator=(string value)
242 {
243 cleanup();
244 type = type::string;
245 new (&string_) string(BASE_NS::move(value));
246 return *this;
247 }
248
249 value_t& operator=(const char* value)
250 {
251 cleanup();
252 type = type::string;
253 new (&string_) string(value);
254 return *this;
255 }
256
257 template<typename Number, BASE_NS::enable_if_t<BASE_NS::is_arithmetic_v<Number>, bool> = true>
258 value_t& operator=(Number value)
259 {
260 cleanup();
261 if constexpr (BASE_NS::is_same_v<Number, bool>) {
262 type = type::boolean;
263 boolean_ = value;
264 } else if constexpr (BASE_NS::is_floating_point_v<Number>) {
265 type = type::floating_point;
266 float_ = static_cast<double>(value);
267 } else if constexpr (BASE_NS::is_signed_v<Number>) {
268 type = type::signed_int;
269 signed_ = static_cast<int64_t>(value);
270 } else if constexpr (BASE_NS::is_unsigned_v<Number>) {
271 type = type::unsigned_int;
272 unsigned_ = static_cast<uint64_t>(value);
273 }
274 return *this;
275 }
276
277 value_t(value_t&& rhs) noexcept : type { BASE_NS::exchange(rhs.type, type::uninitialized) }
278 {
279 switch (type) {
280 case type::uninitialized:
281 break;
282 case type::object:
283 new (&object_) object(BASE_NS::move(rhs.object_));
284 break;
285 case type::array:
286 new (&array_) array(BASE_NS::move(rhs.array_));
287 break;
288 case type::string:
289 new (&string_) string(BASE_NS::move(rhs.string_));
290 break;
291 case type::floating_point:
292 float_ = rhs.float_;
293 break;
294 case type::signed_int:
295 signed_ = rhs.signed_;
296 break;
297 case type::unsigned_int:
298 unsigned_ = rhs.unsigned_;
299 break;
300 case type::boolean:
301 boolean_ = rhs.boolean_;
302 break;
303 case type::null:
304 break;
305 default:
306 break;
307 }
308 }
309
310 value_t& operator=(value_t&& rhs) noexcept
311 {
312 if (this != &rhs) {
313 cleanup();
314 type = BASE_NS::exchange(rhs.type, type::uninitialized);
315 switch (type) {
316 case type::uninitialized:
317 break;
318 case type::object:
319 new (&object_) object(BASE_NS::move(rhs.object_));
320 break;
321 case type::array:
322 new (&array_) array(BASE_NS::move(rhs.array_));
323 break;
324 case type::string:
325 new (&string_) string(BASE_NS::move(rhs.string_));
326 break;
327 case type::floating_point:
328 float_ = rhs.float_;
329 break;
330 case type::signed_int:
331 signed_ = rhs.signed_;
332 break;
333 case type::unsigned_int:
334 unsigned_ = rhs.unsigned_;
335 break;
336 case type::boolean:
337 boolean_ = rhs.boolean_;
338 break;
339 case type::null:
340 break;
341 default:
342 break;
343 }
344 }
345 return *this;
346 }
347
348 template<typename OtherT>
349 operator value_t<OtherT>() const
350 {
351 value_t<OtherT> other;
352 other.type = type;
353 switch (type) {
354 case type::uninitialized:
355 break;
356 case type::object:
357 new (&other.object_) typename value_t<OtherT>::object(BASE_NS::default_allocator());
358 other.object_.reserve(object_.size());
359 for (const auto& p : object_) {
360 other.object_.emplace_back(typename value_t<OtherT>::string(p.key), p.value);
361 }
362 break;
363 case type::array:
364 new (&other.array_) typename value_t<OtherT>::array(BASE_NS::default_allocator());
365 other.array_.reserve(array_.size());
366 for (const auto& v : array_) {
367 other.array_.emplace_back(v);
368 }
369 break;
370 case type::string:
371 new (&other.string_) typename value_t<OtherT>::string(string_);
372 break;
373 case type::floating_point:
374 other.float_ = float_;
375 break;
376 case type::signed_int:
377 other.signed_ = signed_;
378 break;
379 case type::unsigned_int:
380 other.unsigned_ = unsigned_;
381 break;
382 case type::boolean:
383 other.boolean_ = boolean_;
384 break;
385 case type::null:
386 break;
387 default:
388 break;
389 }
390 return other;
391 }
392
393 #if _MSC_VER
394 #pragma warning(push)
395 #pragma warning(disable : 4583)
396 #endif
397 ~value_t()
398 {
399 cleanup();
400 type = type::uninitialized;
401 }
402 #if _MSC_VER
403 #pragma warning(pop)
404 #endif
405 template<typename T>
406 inline void destroy(T& t)
407 {
408 t.~T();
409 }
410
411 void cleanup()
412 {
413 switch (type) {
414 case type::uninitialized:
415 break;
416 case type::object:
417 destroy(object_);
418 break;
419 case type::array:
420 destroy(array_);
421 break;
422 case type::string:
423 destroy(string_);
424 break;
425 case type::floating_point:
426 break;
427 case type::signed_int:
428 break;
429 case type::unsigned_int:
430 break;
431 case type::boolean:
432 break;
433 case type::null:
434 break;
435 default:
436 break;
437 }
438 }
439
440 explicit operator bool() const noexcept
441 {
442 return type != type::uninitialized;
443 }
444
445 bool is_object() const noexcept
446 {
447 return type == type::object;
448 }
449
450 bool is_array() const noexcept
451 {
452 return type == type::array;
453 }
454
455 bool is_string() const noexcept
456 {
457 return type == type::string;
458 }
459
460 bool is_floating_point() const noexcept
461 {
462 return type == type::floating_point;
463 }
464
465 bool is_signed_int() const noexcept
466 {
467 return type == type::signed_int;
468 }
469
470 bool is_unsigned_int() const noexcept
471 {
472 return type == type::unsigned_int;
473 }
474
475 bool is_number() const noexcept
476 {
477 return type == type::floating_point || type == type::signed_int || type == type::unsigned_int;
478 }
479
480 bool is_boolean() const noexcept
481 {
482 return type == type::boolean;
483 }
484
485 bool is_null() const noexcept
486 {
487 return type == type::null;
488 }
489
490 bool empty() const noexcept
491 {
492 if (is_object()) {
493 return object_.empty();
494 } else if (is_array()) {
495 return array_.empty();
496 }
497 return true;
498 }
499
500 template<typename T>
501 T as_number() const
502 {
503 switch (type) {
504 case type::floating_point:
505 return static_cast<T>(float_);
506 case type::signed_int:
507 return static_cast<T>(signed_);
508 case type::unsigned_int:
509 return static_cast<T>(unsigned_);
510 default:
511 return 0;
512 }
513 }
514
515 const value_t* find(BASE_NS::string_view key) const noexcept
516 {
517 if (type == type::object) {
518 for (auto& t : object_) {
519 if (t.key == key) {
520 return &t.value;
521 }
522 }
523 }
524 return nullptr;
525 }
526
527 value_t& operator[](const BASE_NS::string_view& key)
528 {
529 if (type == type::object) {
530 for (auto& t : object_) {
531 if (t.key == key) {
532 return t.value;
533 }
534 }
535 object_.emplace_back(value_t<Tag>::string(key), value_t<Tag> {});
536 return object_.back().value;
537 }
538 return *this;
539 }
540 };
541 } // namespace json
542 CORE_END_NAMESPACE()
543
544 #ifdef JSON_IMPL
545 #include <securec.h>
546
547 #include <base/containers/fixed_string.h>
548 #include <base/util/uid.h>
549
CORE_BEGIN_NAMESPACE()550 CORE_BEGIN_NAMESPACE()
551 namespace json {
552 inline bool isWhite(char data)
553 {
554 return ((data == ' ') || (data == '\n') || (data == '\r') || (data == '\t'));
555 }
556
557 inline bool isSign(char data)
558 {
559 return ((data == '+') || (data == '-'));
560 }
561
562 inline bool isDigit(char data)
563 {
564 return ((data >= '0') && (data <= '9'));
565 }
566
567 inline bool isHex(char data)
568 {
569 return ((data >= '0') && (data <= '9')) || ((data >= 'a') && (data <= 'f')) || ((data >= 'A') && (data <= 'F'));
570 }
571
572 inline const char* trim(const char* data)
573 {
574 while (*data && isWhite(*data)) {
575 data++;
576 }
577 return data;
578 }
579
580 // values
581 template<typename T>
582 const char* parse_string(const char* data, value_t<T>& res)
583 {
584 const char* start = data;
585 for (; *data != 0; data++) {
586 if (*data == '\\' && data[1]) {
587 // escape.. (parse just enough to not stop too early)
588 if (data[1] == '\\' || data[1] == '"' || data[1] == '/' || data[1] == 'b' || data[1] == 'f' ||
589 data[1] == 'n' || data[1] == 'r' || data[1] == 't') {
590 ++data;
591 continue;
592 } else if (data[1] == 'u') {
593 data += 2;
594 for (const char* end = data + 4; data != end; ++data) {
595 if (*data == 0 || !isHex(*data)) {
596 // invalid Unicode
597 return data;
598 }
599 }
600 --data;
601 } else {
602 // invalid escape
603 return data;
604 }
605 } else if (*data == '"') {
606 res = value_t<T> { typename value_t<T>::string { start, static_cast<size_t>(data - start) } };
607 return data + 1;
608 } else if (static_cast<unsigned char>(*data) < 0x20) {
609 // unescaped control
610 return data;
611 }
612 }
613 return data;
614 }
615
616 template<typename T>
617 const char* parse_number(const char* data, value_t<T>& res)
618 {
619 bool negative = false;
620 const char* beg = data;
621 if (*data == '-') {
622 negative = true;
623 data++;
624 if (!isDigit(*data)) {
625 // no digits after '-'
626 return data;
627 }
628 }
629 bool fraction = false;
630 bool exponent = false;
631
632 if (*data == '0') {
633 ++data;
634 // after leading zero only '.', 'e' and 'E' allowed
635 if (*data == '.') {
636 ++data;
637 fraction = true;
638 } else if (*data == 'e' || *data == 'E') {
639 ++data;
640 exponent = true;
641 }
642 } else {
643 while (isDigit(*data)) {
644 ++data;
645 }
646 if (*data == '.') {
647 ++data;
648 fraction = true;
649 } else if (*data == 'e' || *data == 'E') {
650 ++data;
651 exponent = true;
652 }
653 }
654
655 if (fraction) {
656 // fraction must start with a digit
657 if (isDigit(*data)) {
658 ++data;
659 } else {
660 // fraction missing first digit
661 return data;
662 }
663 // fraction may contain digits up to begining of exponent ('e' or 'E')
664 while (isDigit(*data)) {
665 ++data;
666 }
667 if (*data == 'e' || *data == 'E') {
668 ++data;
669 exponent = true;
670 }
671 }
672 if (exponent) {
673 // exponent must start with '-' or '+' followed by a digit, or digit
674 if (*data == '-' || *data == '+') {
675 ++data;
676 }
677 if (isDigit(*data)) {
678 ++data;
679 } else {
680 // exponent missing first digit
681 return data;
682 }
683 while (isDigit(*data)) {
684 ++data;
685 }
686 }
687 if (data != beg) {
688 char* end;
689 if (fraction || exponent) {
690 res = value_t<T>(strtod(beg, &end));
691 } else if (negative) {
692 res = value_t<T>(strtoll(beg, &end, 10));
693 } else {
694 res = value_t<T>(strtoull(beg, &end, 10));
695 }
696 return data;
697 }
698 // invalid json
699 return data;
700 }
701
702 template<typename T>
703 const char* parse_boolean(const char* data, value_t<T>& res)
704 {
705 if (*data == 't') {
706 ++data;
707 const char rue[] = { 'r', 'u', 'e' };
708 for (unsigned i = 0u; i < sizeof(rue); ++i) {
709 if (data[i] == 0 || data[i] != rue[i]) {
710 // non-string starting with 't' but != "true"
711 return data;
712 }
713 }
714
715 res = value_t<T>(true);
716 data += sizeof(rue);
717 } else if (*data == 'f') {
718 ++data;
719 const char alse[] = { 'a', 'l', 's', 'e' };
720 for (unsigned i = 0u; i < sizeof(alse); ++i) {
721 if (data[i] == 0 || data[i] != alse[i]) {
722 // non-string starting with 'f' but != "false"
723 return data;
724 }
725 }
726 res = value_t<T>(false);
727 data += sizeof(alse);
728 } else {
729 // non-string not starting with 'f' or 't'
730 return data;
731 }
732 return data;
733 }
734
735 template<typename T>
736 const char* parse_null(const char* data, value_t<T>& res)
737 {
738 if (*data == 'n') {
739 ++data;
740 const char ull[] = { 'u', 'l', 'l' };
741 for (unsigned i = 0u; i < sizeof(ull); ++i) {
742 if (data[i] == 0 || data[i] != ull[i]) {
743 // non-string starting with 'n' but != "null"
744 return data;
745 }
746 }
747 res = value_t<T>(typename value_t<T>::null {});
748 data += sizeof(ull);
749 } else {
750 // invalid json
751 return data;
752 }
753 return data;
754 }
755
756 template<typename T>
757 void add(value_t<T>& v, value_t<T>&& value)
758 {
759 switch (v.type) {
760 case type::uninitialized:
761 v = BASE_NS::move(value);
762 break;
763 case type::object:
764 v.object_.back().value = BASE_NS::move(value);
765 break;
766 case type::array:
767 v.array_.push_back(BASE_NS::move(value));
768 break;
769 case type::string:
770 case type::floating_point:
771 case type::signed_int:
772 case type::unsigned_int:
773 case type::boolean:
774 case type::null:
775 default:
776 break;
777 }
778 }
779
780 template<typename T>
781 value_t<T> parse(const char* data)
782 {
783 if (!data) {
784 return {};
785 }
786 using jsonValue = value_t<T>;
787 typename jsonValue::array stack;
788 // push an uninitialized value which will get the final value during parsing
789 stack.emplace_back();
790
791 bool acceptValue = true;
792 while (*data) {
793 data = trim(data);
794 if (*data == '{') {
795 // start of an object
796 if (!acceptValue) {
797 return {};
798 }
799 data = trim(data + 1);
800 if (*data == '}') {
801 data = trim(data + 1);
802 // handle empty object.
803 add(stack.back(), jsonValue(typename jsonValue::object {}));
804 acceptValue = false;
805 } else if (*data == '"') {
806 // try to read the key
807 jsonValue key;
808 data = trim(parse_string(data + 1, key));
809
810 if (*data != ':') {
811 // missing : after key
812 return {};
813 }
814 data = trim(data + 1);
815 // push the object with key and missing value on the stack and hope to find a value next
816 stack.emplace_back(typename jsonValue::object {})
817 .object_.emplace_back(BASE_NS::move(key.string_), jsonValue {});
818 acceptValue = true;
819 } else {
820 // missing start of key or end of object
821 return {};
822 }
823 } else if (*data == '}') {
824 // end of an object
825 if (stack.back().type != type::object) {
826 // unexpected }
827 return {};
828 }
829 // check are we missing a value ('{"":}', '{"":"",}' )
830 if (acceptValue) {
831 return {};
832 }
833 data = trim(data + 1);
834 // move this object to the next in the stack
835 auto value = BASE_NS::move(stack.back());
836 stack.pop_back();
837 if (stack.empty()) {
838 // invalid json
839 return {};
840 }
841 add(stack.back(), BASE_NS::move(value));
842 acceptValue = false;
843 } else if (*data == '[') {
844 // start of an array
845 if (!acceptValue) {
846 // unexpected [
847 return {};
848 }
849 data = trim(data + 1);
850 if (*data == ']') {
851 data = trim(data + 1);
852 // handle empty array.
853 add(stack.back(), jsonValue(typename jsonValue::array {}));
854 acceptValue = false;
855 } else {
856 // push the empty array on the stack and hope to find values
857 stack.push_back(typename jsonValue::array {});
858 acceptValue = true;
859 }
860 } else if (*data == ']') {
861 // end of an array
862 if (stack.back().type != type::array) {
863 // unexpected ]
864 return {};
865 }
866 // check are we missing a value ('[1,]' '[1]]')
867 if (acceptValue) {
868 // unexpected ]
869 return {};
870 }
871 data = trim(data + 1);
872
873 auto value = BASE_NS::move(stack.back());
874 stack.pop_back();
875 if (stack.empty()) {
876 // invalid json
877 return {};
878 }
879 add(stack.back(), BASE_NS::move(value));
880 acceptValue = false;
881 } else if (*data == ',') {
882 // comma is allowed when the previous value was complete and we have an incomplete object or array on the
883 // stack.
884 if (!acceptValue && stack.back().type == type::object) {
885 data = trim(data + 1);
886 if (*data != '"') {
887 // missing key for next object
888 return {};
889 }
890 // try to read the key
891 jsonValue key;
892 data = trim(parse_string(data + 1, key));
893
894 if (*data != ':') {
895 // missing value for next object
896 return {};
897 }
898 data = trim(data + 1);
899 stack.back().object_.emplace_back(BASE_NS::move(key.string_), jsonValue {});
900 acceptValue = true;
901 } else if (!acceptValue && stack.back().type == type::array) {
902 data = trim(data + 1);
903 acceptValue = true;
904 } else {
905 // comma allowed only between objects and values inside an array
906 return {};
907 }
908 } else if (*data == '"') {
909 jsonValue value;
910 data = trim(parse_string(data + 1, value));
911 if (acceptValue && value.type == type::string) {
912 add(stack.back(), BASE_NS::move(value));
913 acceptValue = false;
914 } else {
915 // unexpected "
916 return {};
917 }
918 } else if (isSign(*data) || isDigit(*data)) {
919 jsonValue value;
920 data = trim(parse_number(data, value));
921 if (acceptValue && value.type != type::uninitialized) {
922 add(stack.back(), BASE_NS::move(value));
923 acceptValue = false;
924 } else {
925 // failed parsing number
926 return {};
927 }
928 } else if ((*data == 't') || (*data == 'f')) {
929 jsonValue value;
930 data = trim(parse_boolean(data, value));
931 if (acceptValue && value.type == type::boolean) {
932 add(stack.back(), BASE_NS::move(value));
933 acceptValue = false;
934 } else {
935 // failed parsing boolean
936 return {};
937 }
938 } else if (*data == 'n') {
939 jsonValue value;
940 data = trim(parse_null(data, value));
941 if (acceptValue && value.type == type::null) {
942 add(stack.back(), BASE_NS::move(value));
943 acceptValue = false;
944 } else {
945 // failed parsing null
946 return {};
947 }
948 } else {
949 // unexpected character
950 return {};
951 }
952 }
953 // check if we are missing a value ('{"":' '[')
954 if (acceptValue) {
955 return {};
956 }
957
958 auto value = BASE_NS::move(stack.front());
959 return value;
960 }
961
962 template value parse(const char*);
963 template standalone_value parse(const char*);
964 // end of parser
965 template<typename T>
966 void append(BASE_NS::string& out, const typename value_t<T>::string& string)
967 {
968 out += '"';
969 out.append(escape(string));
970 out += '"';
971 }
972
973 template<typename T>
974 void append(BASE_NS::string& out, const typename value_t<T>::object& object)
975 {
976 out += '{';
977 int count = 0;
978 for (const auto& v : object) {
979 if (count++) {
980 out += ',';
981 }
982 append<T>(out, v.key);
983 out += ':';
984 out += to_string(v.value);
985 }
986 out += '}';
987 }
988
989 template<typename T>
990 void append(BASE_NS::string& out, const typename value_t<T>::array& array)
991 {
992 out += '[';
993 int count = 0;
994 for (const auto& v : array) {
995 if (count++) {
996 out += ',';
997 }
998 out += to_string(v);
999 }
1000 out += ']';
1001 }
1002
1003 template<typename T>
1004 void append(BASE_NS::string& out, const double floatingPoint)
1005 {
1006 constexpr const char* FLOATING_FORMAT_STR = "%.17g";
1007 if (const int size = snprintf(nullptr, 0, FLOATING_FORMAT_STR, floatingPoint); size > 0) {
1008 const size_t oldSize = out.size();
1009 out.resize(oldSize + size);
1010 const size_t newSize = out.size();
1011 // "At most bufsz - 1 characters are written." string has size() characters + 1 for null so use size() +
1012 // 1 as the total size. If resize() failed string size() hasn't changed, buffer will point to the null
1013 // character and bufsz will be 1 i.e. only the null character will be written.
1014 snprintf_s(
1015 out.data() + oldSize, newSize + 1 - oldSize, static_cast<size_t>(size), FLOATING_FORMAT_STR, floatingPoint);
1016 }
1017 }
1018
1019 template<typename T>
1020 BASE_NS::string to_string(const value_t<T>& value)
1021 {
1022 BASE_NS::string out;
1023 switch (value.type) {
1024 case type::uninitialized:
1025 break;
1026
1027 case type::object:
1028 append<T>(out, value.object_);
1029 break;
1030
1031 case type::array:
1032 append<T>(out, value.array_);
1033 break;
1034
1035 case type::string:
1036 append<T>(out, value.string_);
1037 break;
1038
1039 case type::floating_point:
1040 append<T>(out, value.float_);
1041 break;
1042
1043 case type::signed_int:
1044 out += BASE_NS::to_string(value.signed_);
1045 break;
1046
1047 case type::unsigned_int:
1048 out += BASE_NS::to_string(value.unsigned_);
1049 break;
1050
1051 case type::boolean:
1052 if (value.boolean_) {
1053 out += "true";
1054 } else {
1055 out += "false";
1056 }
1057 break;
1058
1059 case type::null:
1060 out += "null";
1061 break;
1062
1063 default:
1064 break;
1065 }
1066 return out;
1067 }
1068
1069 template BASE_NS::string to_string(const value& value);
1070 template BASE_NS::string to_string(const standalone_value& value);
1071
1072 int codepoint(BASE_NS::string_view str)
1073 {
1074 int code = 0;
1075 for (size_t u = 0U; u < 4U; ++u) {
1076 const char chr = str[u];
1077 code <<= 4U;
1078 code += BASE_NS::HexToDec(chr);
1079 }
1080 return code;
1081 }
1082
1083 BASE_NS::string unescape(BASE_NS::string_view str)
1084 {
1085 BASE_NS::string unescaped;
1086 unescaped.reserve(str.size());
1087 for (size_t i = 0, end = str.size(); i < end;) {
1088 auto chr = str[i];
1089 ++i;
1090 if (chr != '\\') {
1091 unescaped += chr;
1092 } else {
1093 chr = str[i];
1094 ++i;
1095 if (chr == '"') {
1096 unescaped += '"'; // Quotation mark
1097 } else if (chr == '\\') {
1098 unescaped += '\\'; // Reverse solidus
1099 } else if (chr == '/') {
1100 unescaped += '/'; // Solidus.. we do unescape this..
1101 } else if (chr == 'b') {
1102 unescaped += '\b'; // Backspace
1103 } else if (chr == 'f') {
1104 unescaped += '\f'; // Formfeed
1105 } else if (chr == 'n') {
1106 unescaped += '\n'; // Linefeed
1107 } else if (chr == 'r') {
1108 unescaped += '\r'; // Carriage return
1109 } else if (chr == 't') {
1110 unescaped += '\t'; // Horizontal tab
1111 } else if (chr == 'u') { // Unicode character
1112 if ((i + 4U) <= end) { // Expecting 4 hexadecimal values
1113 // Read the Unicode code point.
1114 int code = codepoint(str.substr(i, 4U));
1115 i += 4U;
1116 // Codepoints U+010000 to U+10FFFF are encoded as UTF-16 surrogate pairs. High surrogate between
1117 // 0xD800-0xDBFF and low between 0xDC00-0xDFFF.
1118 if (code >= 0xd800 && code <= 0xdbff) {
1119 // See if there's an other \uXXXX value in the correct range.
1120 if ((i + 6U) <= end) {
1121 auto next = str.substr(i, 6U);
1122 if (next[0] == '\\' && next[1] == 'u') {
1123 next.remove_prefix(2);
1124 int low = codepoint(next);
1125 i += 6U;
1126 if (low >= 0xdc00 && low <= 0xdfff) {
1127 // Surrogate pair encoding: 0x10000 + (Hi - 0xD800) * 0x400 + (Lo - 0xDC00)
1128 code = (static_cast<int>(static_cast<unsigned int>(code) << 10U) + low -
1129 (static_cast<int>(0xd800U << 10U) + 0xdc00 - 0x10000));
1130 }
1131 }
1132 }
1133 }
1134 // Convert code point to UTF-8.
1135 if (code < 0x80) {
1136 // 1-byte characters: 0xxxxxxx (ASCII)
1137 unescaped += static_cast<char>(code);
1138 } else if (code < 0x7ff) {
1139 // 2-byte characters: 110xxxxx 10xxxxxx
1140 unescaped += static_cast<char>(0xc0U | (static_cast<unsigned int>(code) >> 6U));
1141 unescaped += static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3fU));
1142 } else if (code <= 0xffff) {
1143 // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
1144 unescaped += static_cast<char>(0xe0U | (static_cast<unsigned int>(code) >> 12u));
1145 unescaped += static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 6U) & 0x3FU));
1146 unescaped += static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3Fu));
1147 } else {
1148 // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1149 unescaped += (static_cast<char>(0xf0U | (static_cast<unsigned int>(code) >> 18U)));
1150 unescaped += (static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 12U) & 0x3fU)));
1151 unescaped += (static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 6U) & 0x3fU)));
1152 unescaped += (static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3fU)));
1153 }
1154 }
1155 }
1156 }
1157 }
1158 return unescaped;
1159 }
1160
1161 BASE_NS::string escape(BASE_NS::string_view str)
1162 {
1163 BASE_NS::string escaped;
1164 escaped.reserve(str.size());
1165 for (size_t i = 0, end = str.size(); i < end;) {
1166 auto chr = static_cast<uint8_t>(str[i]);
1167 ++i;
1168 if (chr == '"') {
1169 escaped += "\\\""; // Quotation mark
1170 } else if (chr == '\\') {
1171 escaped += "\\\\"; // Reverse solidus
1172 } else if (chr == '\b') {
1173 escaped += "\\b"; // Backspace
1174 } else if (chr == '\f') {
1175 escaped += "\\f"; // Formfeed
1176 } else if (chr == '\n') {
1177 escaped += "\\n"; // Linefeed
1178 } else if (chr == '\r') {
1179 escaped += "\\r"; // Carriage return
1180 } else if (chr == '\t') {
1181 escaped += "\\t"; // Horizontal tab
1182 } else if (chr < 0x80) {
1183 escaped += static_cast<BASE_NS::string::value_type>(chr); // 1-byte characters: 0xxxxxxx (ASCII)
1184 } else {
1185 // Unicode
1186 unsigned code = 0U;
1187 unsigned left = 0U;
1188 // Decode first byte and figure out how many additional bytes are needed for the codepoint.
1189 if (chr < 0xE0) {
1190 // 2-byte characters: 110xxxxx 10xxxxxx
1191 if (i < end) {
1192 code = ((chr & ~0xC0U));
1193 left = 1U;
1194 }
1195 } else if (chr < 0xF0) {
1196 // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
1197 if ((i + 1U) < end) {
1198 code = (chr & ~0xE0U);
1199 left = 2U;
1200 }
1201 } else if (chr < 0xF8) {
1202 // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1203 if ((i + 2U) < end) {
1204 code = (chr & ~0xF0U);
1205 left = 3U;
1206 }
1207 }
1208
1209 // Combine the remaining bytes.
1210 while (left--) {
1211 code = (code << 6U) + (str[i++] & 0x3FU);
1212 }
1213
1214 // Codepoints U+010000 to U+10FFFF are encoded as UTF-16 surrogate pairs.
1215 // Surrogate pair encoding: 0x10000 + (Hi - 0xD800) * 0x400 + (Lo - 0xDC00)
1216 if (code >= 0x010000 && code <= 0x10FFFF) {
1217 // First append the Hi value.
1218 code -= 0x10000U;
1219 const auto hi = (code >> 10U) + 0xD800U;
1220 escaped += '\\';
1221 escaped += 'u';
1222 escaped += BASE_NS::to_hex(hi);
1223
1224 // Calculate the Lo value.
1225 code = (code & 0x3FFU) + 0xDC00U;
1226 }
1227
1228 // Append the codepoint zero padded to four hex values.
1229 escaped += '\\';
1230 escaped += 'u';
1231 const auto codepoint = BASE_NS::to_hex(code);
1232 if (codepoint.size() < 4U) {
1233 escaped.append(4U - codepoint.size(), '0');
1234 }
1235 escaped += codepoint;
1236 }
1237 }
1238 return escaped;
1239 }
1240 } // namespace json
1241 CORE_END_NAMESPACE()
1242 #endif // JSON_IMPL
1243
1244 #endif // !API_CORE_JSON_JSON_H
1245