1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef API_CORE_JSON_JSON_H
17 #define API_CORE_JSON_JSON_H
18
19 #include <cstdint>
20 #include <cstdio>
21 #include <new>
22
23 #include <base/containers/string.h>
24 #include <base/containers/string_view.h>
25 #include <base/containers/type_traits.h>
26 #include <base/containers/vector.h>
27 #include <base/namespace.h>
28 #include <core/namespace.h>
29
CORE_BEGIN_NAMESPACE()30 CORE_BEGIN_NAMESPACE()
31 namespace json {
32 using readonly_string_t = BASE_NS::string_view;
33 using writable_string_t = BASE_NS::string;
34 template<typename T>
35 using array_t = BASE_NS::vector<T>;
36
37 /** Type of a JSON value. */
38 enum class type : uint8_t {
39 uninitialized = 0,
40 object,
41 array,
42 string,
43 floating_point,
44 signed_int,
45 unsigned_int,
46 boolean,
47 null,
48 };
49
50 /** Tag for JSON values which contain read-only strings. The source JSON string must be kept alive until the parsing
51 * results are not used. */
52 struct readonly_tag {};
53
54 /** Tag for JSON values which contain writable strings. Life time of the parsing results doens't depend on the source
55 * JSON string. */
56 struct writable_tag {};
57
58 template<typename T = readonly_tag>
59 struct value_t;
60
61 /** JSON structure which contains read-only strings. The source JSON string must be kept alive until the parsing
62 * results are not used. */
63 using value = value_t<readonly_tag>;
64
65 /** JSON structure which contains writable strings. String values can be modified and if the instance was generated by
66 * the parser, the source string doesn't need to be stored while the instance is used. */
67 using standalone_value = value_t<writable_tag>;
68
69 /** Parses 'data' and returns JSON structure. the value::type will be 'uninitialized' if parsing failed.
70 * @param data JSON as a null terminated string.
71 * @return Parsed JSON structure.
72 */
73 template<typename T = readonly_tag>
74 value_t<T> parse(const char* data);
75
76 /** Converts a JSON structure into a string.
77 * @param value JSON structure.
78 * @return JSON as string.
79 */
80 template<typename T = readonly_tag>
81 BASE_NS::string to_string(const value_t<T>& value);
82
83 template<typename T = readonly_tag>
84 void to_string(BASE_NS::string& out, const value_t<T>& value);
85
86 BASE_NS::string unescape(BASE_NS::string_view str);
87
88 void unescape(BASE_NS::string& out, BASE_NS::string_view str);
89
90 BASE_NS::string escape(BASE_NS::string_view str);
91
92 void escape(BASE_NS::string& out, BASE_NS::string_view str);
93
94 /** JSON value. */
95 template<typename Tag>
96 struct value_t {
97 /** Type used for JSON strings and JSON object keys. */
98 using string =
99 typename BASE_NS::conditional_t<BASE_NS::is_same_v<Tag, writable_tag>, writable_string_t, readonly_string_t>;
100
101 /** Type used for JSON null */
102 struct null {};
103
104 /** Type used for key-value pairs inside JSON objects. */
105 struct pair {
106 pair(string&& k, value_t&& v) : key(BASE_NS::forward<string>(k)), value(BASE_NS::forward<value_t>(v)) {}
107 string key;
108 value_t value;
109 };
110
111 /** Type used for JSON objects. */
112 using object = array_t<pair>;
113
114 /** Type used for JSON arrays. */
115 using array = array_t<value_t>;
116
117 /** Type of this JSON value. */
118 type type { type::uninitialized };
119 union {
120 object object_;
121 array array_;
122 string string_;
123 double float_;
124 int64_t signed_;
125 uint64_t unsigned_;
126 bool boolean_;
127 };
128
129 value_t() noexcept : type { type::uninitialized } {}
130
131 value_t(object&& value) noexcept : type { type::object }, object_(BASE_NS::move(value)) {}
132
133 value_t(array&& value) noexcept : type { type::array }, array_(BASE_NS::move(value)) {}
134
135 value_t(string value) noexcept : type { type::string }, string_(BASE_NS::move(value)) {}
136
137 value_t(const char* value) noexcept : value_t(string(value)) {}
138
139 template<typename Number, BASE_NS::enable_if_t<BASE_NS::is_arithmetic_v<Number>, bool> = true>
140 value_t(Number value) noexcept
141 {
142 if constexpr (BASE_NS::is_same_v<Number, bool>) {
143 type = type::boolean;
144 boolean_ = value;
145 } else if constexpr (BASE_NS::is_floating_point_v<Number>) {
146 type = type::floating_point;
147 float_ = static_cast<double>(value);
148 } else if constexpr (BASE_NS::is_signed_v<Number>) {
149 type = type::signed_int;
150 signed_ = static_cast<int64_t>(value);
151 } else if constexpr (BASE_NS::is_unsigned_v<Number>) {
152 type = type::unsigned_int;
153 unsigned_ = static_cast<uint64_t>(value);
154 }
155 }
156
157 value_t(null /* value */) noexcept : type { type::null } {}
158
159 template<typename Value>
160 value_t(const array_t<Value>& values) noexcept : type { type::array }, array_()
161 {
162 array_.reserve(values.size());
163 for (const auto& value : values) {
164 array_.emplace_back(value);
165 }
166 }
167
168 template<typename Value, size_t N>
169 value_t(Value (&value)[N]) : type { type::array }, array_()
170 {
171 array_.reserve(N);
172 for (size_t i = 0; i < N; ++i) {
173 array_.emplace_back(value[i]);
174 }
175 }
176
177 value_t(const value_t& other) : type(other.type)
178 {
179 switch (type) {
180 case type::uninitialized:
181 break;
182 case type::object:
183 new (&object_) object(other.object_);
184 break;
185 case type::array:
186 new (&array_) array(other.array_);
187 break;
188 case type::string:
189 new (&string_) string(other.string_);
190 break;
191 case type::floating_point:
192 float_ = other.float_;
193 break;
194 case type::signed_int:
195 signed_ = other.signed_;
196 break;
197 case type::unsigned_int:
198 unsigned_ = other.unsigned_;
199 break;
200 case type::boolean:
201 boolean_ = other.boolean_;
202 break;
203 case type::null:
204 break;
205 default:
206 break;
207 }
208 }
209
210 value_t& operator=(const value_t& other)
211 {
212 if (this != &other) {
213 cleanup();
214 type = other.type;
215 switch (type) {
216 case type::uninitialized:
217 break;
218 case type::object:
219 new (&object_) object(other.object_);
220 break;
221 case type::array:
222 new (&array_) array(other.array_);
223 break;
224 case type::string:
225 new (&string_) string(other.string_);
226 break;
227 case type::floating_point:
228 float_ = other.float_;
229 break;
230 case type::signed_int:
231 signed_ = other.signed_;
232 break;
233 case type::unsigned_int:
234 unsigned_ = other.unsigned_;
235 break;
236 case type::boolean:
237 boolean_ = other.boolean_;
238 break;
239 case type::null:
240 break;
241 default:
242 break;
243 }
244 }
245 return *this;
246 }
247
248 value_t& operator=(string value)
249 {
250 cleanup();
251 type = type::string;
252 new (&string_) string(BASE_NS::move(value));
253 return *this;
254 }
255
256 value_t& operator=(const char* value)
257 {
258 cleanup();
259 type = type::string;
260 new (&string_) string(value);
261 return *this;
262 }
263
264 template<typename Number, BASE_NS::enable_if_t<BASE_NS::is_arithmetic_v<Number>, bool> = true>
265 value_t& operator=(Number value)
266 {
267 cleanup();
268 if constexpr (BASE_NS::is_same_v<Number, bool>) {
269 type = type::boolean;
270 boolean_ = value;
271 } else if constexpr (BASE_NS::is_floating_point_v<Number>) {
272 type = type::floating_point;
273 float_ = static_cast<double>(value);
274 } else if constexpr (BASE_NS::is_signed_v<Number>) {
275 type = type::signed_int;
276 signed_ = static_cast<int64_t>(value);
277 } else if constexpr (BASE_NS::is_unsigned_v<Number>) {
278 type = type::unsigned_int;
279 unsigned_ = static_cast<uint64_t>(value);
280 }
281 return *this;
282 }
283
284 value_t(value_t&& rhs) noexcept : type { BASE_NS::exchange(rhs.type, type::uninitialized) }
285 {
286 switch (type) {
287 case type::uninitialized:
288 break;
289 case type::object:
290 new (&object_) object(BASE_NS::move(rhs.object_));
291 break;
292 case type::array:
293 new (&array_) array(BASE_NS::move(rhs.array_));
294 break;
295 case type::string:
296 new (&string_) string(BASE_NS::move(rhs.string_));
297 break;
298 case type::floating_point:
299 float_ = rhs.float_;
300 break;
301 case type::signed_int:
302 signed_ = rhs.signed_;
303 break;
304 case type::unsigned_int:
305 unsigned_ = rhs.unsigned_;
306 break;
307 case type::boolean:
308 boolean_ = rhs.boolean_;
309 break;
310 case type::null:
311 break;
312 default:
313 break;
314 }
315 }
316
317 value_t& operator=(value_t&& rhs) noexcept
318 {
319 if (this != &rhs) {
320 cleanup();
321 type = BASE_NS::exchange(rhs.type, type::uninitialized);
322 switch (type) {
323 case type::uninitialized:
324 break;
325 case type::object:
326 new (&object_) object(BASE_NS::move(rhs.object_));
327 break;
328 case type::array:
329 new (&array_) array(BASE_NS::move(rhs.array_));
330 break;
331 case type::string:
332 new (&string_) string(BASE_NS::move(rhs.string_));
333 break;
334 case type::floating_point:
335 float_ = rhs.float_;
336 break;
337 case type::signed_int:
338 signed_ = rhs.signed_;
339 break;
340 case type::unsigned_int:
341 unsigned_ = rhs.unsigned_;
342 break;
343 case type::boolean:
344 boolean_ = rhs.boolean_;
345 break;
346 case type::null:
347 break;
348 default:
349 break;
350 }
351 }
352 return *this;
353 }
354
355 template<typename OtherT>
356 operator value_t<OtherT>() const
357 {
358 value_t<OtherT> other;
359 other.type = type;
360 switch (type) {
361 case type::uninitialized:
362 break;
363 case type::object:
364 new (&other.object_) typename value_t<OtherT>::object(BASE_NS::default_allocator());
365 other.object_.reserve(object_.size());
366 for (const auto& p : object_) {
367 other.object_.emplace_back(typename value_t<OtherT>::string(p.key), p.value);
368 }
369 break;
370 case type::array:
371 new (&other.array_) typename value_t<OtherT>::array(BASE_NS::default_allocator());
372 other.array_.reserve(array_.size());
373 for (const auto& v : array_) {
374 other.array_.emplace_back(v);
375 }
376 break;
377 case type::string:
378 new (&other.string_) typename value_t<OtherT>::string(string_);
379 break;
380 case type::floating_point:
381 other.float_ = float_;
382 break;
383 case type::signed_int:
384 other.signed_ = signed_;
385 break;
386 case type::unsigned_int:
387 other.unsigned_ = unsigned_;
388 break;
389 case type::boolean:
390 other.boolean_ = boolean_;
391 break;
392 case type::null:
393 break;
394 default:
395 break;
396 }
397 return other;
398 }
399
400 #if _MSC_VER
401 #pragma warning(push)
402 #pragma warning(disable : 4583)
403 #endif
404 ~value_t()
405 {
406 cleanup();
407 type = type::uninitialized;
408 }
409 #if _MSC_VER
410 #pragma warning(pop)
411 #endif
412 template<typename T>
413 inline void destroy(T& t)
414 {
415 t.~T();
416 }
417
418 void cleanup()
419 {
420 switch (type) {
421 case type::uninitialized:
422 break;
423 case type::object:
424 destroy(object_);
425 break;
426 case type::array:
427 destroy(array_);
428 break;
429 case type::string:
430 destroy(string_);
431 break;
432 case type::floating_point:
433 break;
434 case type::signed_int:
435 break;
436 case type::unsigned_int:
437 break;
438 case type::boolean:
439 break;
440 case type::null:
441 break;
442 default:
443 break;
444 }
445 }
446
447 explicit operator bool() const noexcept
448 {
449 return type != type::uninitialized;
450 }
451
452 bool is_object() const noexcept
453 {
454 return type == type::object;
455 }
456
457 bool is_array() const noexcept
458 {
459 return type == type::array;
460 }
461
462 bool is_string() const noexcept
463 {
464 return type == type::string;
465 }
466
467 bool is_floating_point() const noexcept
468 {
469 return type == type::floating_point;
470 }
471
472 bool is_signed_int() const noexcept
473 {
474 return type == type::signed_int;
475 }
476
477 bool is_unsigned_int() const noexcept
478 {
479 return type == type::unsigned_int;
480 }
481
482 bool is_number() const noexcept
483 {
484 return type == type::floating_point || type == type::signed_int || type == type::unsigned_int;
485 }
486
487 bool is_boolean() const noexcept
488 {
489 return type == type::boolean;
490 }
491
492 bool is_null() const noexcept
493 {
494 return type == type::null;
495 }
496
497 bool empty() const noexcept
498 {
499 if (is_object()) {
500 return object_.empty();
501 } else if (is_array()) {
502 return array_.empty();
503 }
504 return true;
505 }
506
507 template<typename T>
508 T as_number() const
509 {
510 switch (type) {
511 case type::floating_point:
512 return static_cast<T>(float_);
513 case type::signed_int:
514 return static_cast<T>(signed_);
515 case type::unsigned_int:
516 return static_cast<T>(unsigned_);
517 default:
518 return 0;
519 }
520 }
521
522 const value_t* find(BASE_NS::string_view key) const noexcept
523 {
524 if (type == type::object) {
525 for (auto& t : object_) {
526 if (t.key == key) {
527 return &t.value;
528 }
529 }
530 }
531 return nullptr;
532 }
533
534 value_t& operator[](const BASE_NS::string_view& key)
535 {
536 if (type == type::object) {
537 for (auto& t : object_) {
538 if (t.key == key) {
539 return t.value;
540 }
541 }
542 object_.emplace_back(value_t<Tag>::string(key), value_t<Tag> {});
543 return object_.back().value;
544 }
545 return *this;
546 }
547 };
548 } // namespace json
549 CORE_END_NAMESPACE()
550
551 #ifdef JSON_IMPL
552 #include <securec.h>
553
554 #include <base/containers/fixed_string.h>
555 #include <base/util/uid.h>
556
CORE_BEGIN_NAMESPACE()557 CORE_BEGIN_NAMESPACE()
558 namespace json {
559 namespace {
560 constexpr auto TRUE_STR = BASE_NS::string_view("true");
561 constexpr auto FALSE_STR = BASE_NS::string_view("false");
562 constexpr auto NULL_STR = BASE_NS::string_view("null");
563 constexpr auto UNISTART_STR = BASE_NS::string_view("\\u");
564
565 inline bool isWhite(char data)
566 {
567 return ((data == ' ') || (data == '\n') || (data == '\r') || (data == '\t'));
568 }
569
570 inline bool isSign(char data)
571 {
572 return ((data == '+') || (data == '-'));
573 }
574
575 inline bool isDigit(char data)
576 {
577 return ((data >= '0') && (data <= '9'));
578 }
579
580 inline bool isHex(char data)
581 {
582 return ((data >= '0') && (data <= '9')) || ((data >= 'a') && (data <= 'f')) || ((data >= 'A') && (data <= 'F'));
583 }
584
585 inline const char* trim(const char* data)
586 {
587 while (*data && isWhite(*data)) {
588 data++;
589 }
590 return data;
591 }
592
593 // values
594 template<typename T>
595 const char* parse_string(const char* data, value_t<T>& res)
596 {
597 const char* start = data;
598 for (; *data != 0; data++) {
599 if (*data == '\\' && data[1]) {
600 // escape.. (parse just enough to not stop too early)
601 if (data[1] == '\\' || data[1] == '"' || data[1] == '/' || data[1] == 'b' || data[1] == 'f' ||
602 data[1] == 'n' || data[1] == 'r' || data[1] == 't') {
603 ++data;
604 continue;
605 } else if (data[1] == 'u') {
606 data += 2;
607 for (const char* end = data + 4; data != end; ++data) {
608 if (*data == 0 || !isHex(*data)) {
609 // invalid Unicode
610 return data;
611 }
612 }
613 --data;
614 } else {
615 // invalid escape
616 return data;
617 }
618 } else if (*data == '"') {
619 res = value_t<T> { typename value_t<T>::string { start, static_cast<size_t>(data - start) } };
620 return data + 1;
621 } else if (static_cast<unsigned char>(*data) < 0x20) {
622 // unescaped control
623 return data;
624 }
625 }
626 return data;
627 }
628
629 template<typename T>
630 const char* parse_number(const char* data, value_t<T>& res)
631 {
632 bool negative = false;
633 const char* beg = data;
634 if (*data == '-') {
635 negative = true;
636 data++;
637 if (!isDigit(*data)) {
638 // no digits after '-'
639 return data;
640 }
641 }
642 bool fraction = false;
643 bool exponent = false;
644
645 if (*data == '0') {
646 ++data;
647 // after leading zero only '.', 'e' and 'E' allowed
648 if (*data == '.') {
649 ++data;
650 fraction = true;
651 } else if (*data == 'e' || *data == 'E') {
652 ++data;
653 exponent = true;
654 }
655 } else {
656 while (isDigit(*data)) {
657 ++data;
658 }
659 if (*data == '.') {
660 ++data;
661 fraction = true;
662 } else if (*data == 'e' || *data == 'E') {
663 ++data;
664 exponent = true;
665 }
666 }
667
668 if (fraction) {
669 // fraction must start with a digit
670 if (isDigit(*data)) {
671 ++data;
672 } else {
673 // fraction missing first digit
674 return data;
675 }
676 // fraction may contain digits up to begining of exponent ('e' or 'E')
677 while (isDigit(*data)) {
678 ++data;
679 }
680 if (*data == 'e' || *data == 'E') {
681 ++data;
682 exponent = true;
683 }
684 }
685 if (exponent) {
686 // exponent must start with '-' or '+' followed by a digit, or digit
687 if (*data == '-' || *data == '+') {
688 ++data;
689 }
690 if (isDigit(*data)) {
691 ++data;
692 } else {
693 // exponent missing first digit
694 return data;
695 }
696 while (isDigit(*data)) {
697 ++data;
698 }
699 }
700 if (data != beg) {
701 char* end;
702 if (fraction || exponent) {
703 res = value_t<T>(strtod(beg, &end));
704 } else if (negative) {
705 res = value_t<T>(strtoll(beg, &end, 10));
706 } else {
707 res = value_t<T>(strtoull(beg, &end, 10));
708 }
709 return data;
710 }
711 // invalid json
712 return data;
713 }
714
715 template<typename T>
716 const char* parse_boolean(const char* data, value_t<T>& res)
717 {
718 if (*data == 't') {
719 ++data;
720 const char rue[] = { 'r', 'u', 'e' };
721 for (unsigned i = 0u; i < sizeof(rue); ++i) {
722 if (data[i] == 0 || data[i] != rue[i]) {
723 // non-string starting with 't' but != "true"
724 return data;
725 }
726 }
727
728 res = value_t<T>(true);
729 data += sizeof(rue);
730 } else if (*data == 'f') {
731 ++data;
732 const char alse[] = { 'a', 'l', 's', 'e' };
733 for (unsigned i = 0u; i < sizeof(alse); ++i) {
734 if (data[i] == 0 || data[i] != alse[i]) {
735 // non-string starting with 'f' but != "false"
736 return data;
737 }
738 }
739 res = value_t<T>(false);
740 data += sizeof(alse);
741 } else {
742 // non-string not starting with 'f' or 't'
743 return data;
744 }
745 return data;
746 }
747
748 template<typename T>
749 const char* parse_null(const char* data, value_t<T>& res)
750 {
751 if (*data == 'n') {
752 ++data;
753 const char ull[] = { 'u', 'l', 'l' };
754 for (unsigned i = 0u; i < sizeof(ull); ++i) {
755 if (data[i] == 0 || data[i] != ull[i]) {
756 // non-string starting with 'n' but != "null"
757 return data;
758 }
759 }
760 res = value_t<T>(typename value_t<T>::null {});
761 data += sizeof(ull);
762 } else {
763 // invalid json
764 return data;
765 }
766 return data;
767 }
768
769 template<typename T>
770 void add(value_t<T>& v, value_t<T>&& value)
771 {
772 switch (v.type) {
773 case type::uninitialized:
774 v = BASE_NS::move(value);
775 break;
776 case type::object:
777 v.object_.back().value = BASE_NS::move(value);
778 break;
779 case type::array:
780 v.array_.push_back(BASE_NS::move(value));
781 break;
782 case type::string:
783 case type::floating_point:
784 case type::signed_int:
785 case type::unsigned_int:
786 case type::boolean:
787 case type::null:
788 default:
789 break;
790 }
791 }
792 } // namespace
793
794 template<typename T>
795 value_t<T> parse(const char* data)
796 {
797 if (!data) {
798 return {};
799 }
800 using jsonValue = value_t<T>;
801 typename jsonValue::array stack;
802 // push an uninitialized value which will get the final value during parsing
803 stack.emplace_back();
804
805 bool acceptValue = true;
806 while (*data) {
807 data = trim(data);
808 if (*data == '{') {
809 // start of an object
810 if (!acceptValue) {
811 return {};
812 }
813 data = trim(data + 1);
814 if (*data == '}') {
815 data = trim(data + 1);
816 // handle empty object.
817 add(stack.back(), jsonValue(typename jsonValue::object {}));
818 acceptValue = false;
819 } else if (*data == '"') {
820 // try to read the key
821 jsonValue key;
822 data = trim(parse_string(data + 1, key));
823
824 if (*data != ':') {
825 // missing : after key
826 return {};
827 }
828 data = trim(data + 1);
829 // push the object with key and missing value on the stack and hope to find a value next
830 stack.emplace_back(typename jsonValue::object {})
831 .object_.emplace_back(BASE_NS::move(key.string_), jsonValue {});
832 acceptValue = true;
833 } else {
834 // missing start of key or end of object
835 return {};
836 }
837 } else if (*data == '}') {
838 // end of an object
839 if (stack.back().type != type::object) {
840 // unexpected }
841 return {};
842 }
843 // check are we missing a value ('{"":}', '{"":"",}' )
844 if (acceptValue) {
845 return {};
846 }
847 data = trim(data + 1);
848 // move this object to the next in the stack
849 auto value = BASE_NS::move(stack.back());
850 stack.pop_back();
851 if (stack.empty()) {
852 // invalid json
853 return {};
854 }
855 add(stack.back(), BASE_NS::move(value));
856 acceptValue = false;
857 } else if (*data == '[') {
858 // start of an array
859 if (!acceptValue) {
860 // unexpected [
861 return {};
862 }
863 data = trim(data + 1);
864 if (*data == ']') {
865 data = trim(data + 1);
866 // handle empty array.
867 add(stack.back(), jsonValue(typename jsonValue::array {}));
868 acceptValue = false;
869 } else {
870 // push the empty array on the stack and hope to find values
871 stack.push_back(typename jsonValue::array {});
872 acceptValue = true;
873 }
874 } else if (*data == ']') {
875 // end of an array
876 if (stack.back().type != type::array) {
877 // unexpected ]
878 return {};
879 }
880 // check are we missing a value ('[1,]' '[1]]')
881 if (acceptValue) {
882 // unexpected ]
883 return {};
884 }
885 data = trim(data + 1);
886
887 auto value = BASE_NS::move(stack.back());
888 stack.pop_back();
889 if (stack.empty()) {
890 // invalid json
891 return {};
892 }
893 add(stack.back(), BASE_NS::move(value));
894 acceptValue = false;
895 } else if (*data == ',') {
896 // comma is allowed when the previous value was complete and we have an incomplete object or array on the
897 // stack.
898 if (!acceptValue && stack.back().type == type::object) {
899 data = trim(data + 1);
900 if (*data != '"') {
901 // missing key for next object
902 return {};
903 }
904 // try to read the key
905 jsonValue key;
906 data = trim(parse_string(data + 1, key));
907
908 if (*data != ':') {
909 // missing value for next object
910 return {};
911 }
912 data = trim(data + 1);
913 stack.back().object_.emplace_back(BASE_NS::move(key.string_), jsonValue {});
914 acceptValue = true;
915 } else if (!acceptValue && stack.back().type == type::array) {
916 data = trim(data + 1);
917 acceptValue = true;
918 } else {
919 // comma allowed only between objects and values inside an array
920 return {};
921 }
922 } else if (*data == '"') {
923 jsonValue value;
924 data = trim(parse_string(data + 1, value));
925 if (acceptValue && value.type == type::string) {
926 add(stack.back(), BASE_NS::move(value));
927 acceptValue = false;
928 } else {
929 // unexpected "
930 return {};
931 }
932 } else if (isSign(*data) || isDigit(*data)) {
933 jsonValue value;
934 data = trim(parse_number(data, value));
935 if (acceptValue && value.type != type::uninitialized) {
936 add(stack.back(), BASE_NS::move(value));
937 acceptValue = false;
938 } else {
939 // failed parsing number
940 return {};
941 }
942 } else if ((*data == 't') || (*data == 'f')) {
943 jsonValue value;
944 data = trim(parse_boolean(data, value));
945 if (acceptValue && value.type == type::boolean) {
946 add(stack.back(), BASE_NS::move(value));
947 acceptValue = false;
948 } else {
949 // failed parsing boolean
950 return {};
951 }
952 } else if (*data == 'n') {
953 jsonValue value;
954 data = trim(parse_null(data, value));
955 if (acceptValue && value.type == type::null) {
956 add(stack.back(), BASE_NS::move(value));
957 acceptValue = false;
958 } else {
959 // failed parsing null
960 return {};
961 }
962 } else {
963 // unexpected character
964 return {};
965 }
966 }
967 // check if we are missing a value ('{"":' '[')
968 if (acceptValue) {
969 return {};
970 }
971
972 auto value = BASE_NS::move(stack.front());
973 return value;
974 }
975
976 template value parse(const char*);
977 template standalone_value parse(const char*);
978 // end of parser
979 namespace {
980 template<typename T>
981 void append(BASE_NS::string& out, const typename value_t<T>::string& string)
982 {
983 out += '"';
984 escape(out, string);
985 out += '"';
986 }
987
988 template<typename T>
989 void append(BASE_NS::string& out, const typename value_t<T>::object& object)
990 {
991 out += '{';
992 int count = 0;
993 for (const auto& v : object) {
994 if (count++) {
995 out += ',';
996 }
997 append<T>(out, v.key);
998 out += ':';
999 to_string(out, v.value);
1000 }
1001 out += '}';
1002 }
1003
1004 template<typename T>
1005 void append(BASE_NS::string& out, const typename value_t<T>::array& array)
1006 {
1007 out += '[';
1008 int count = 0;
1009 for (const auto& v : array) {
1010 if (count++) {
1011 out += ',';
1012 }
1013 to_string(out, v);
1014 }
1015 out += ']';
1016 }
1017
1018 template<typename T>
1019 void append(BASE_NS::string& out, const double floatingPoint)
1020 {
1021 constexpr const char* FLOATING_FORMAT_STR = "%.17g";
1022 if (const int size = snprintf(nullptr, 0, FLOATING_FORMAT_STR, floatingPoint); size > 0) {
1023 const size_t oldSize = out.size();
1024 out.resize(oldSize + size);
1025 const size_t newSize = out.size();
1026 // "At most bufsz - 1 characters are written." string has size() characters + 1 for null so use size() +
1027 // 1 as the total size. If resize() failed string size() hasn't changed, buffer will point to the null
1028 // character and bufsz will be 1 i.e. only the null character will be written.
1029 snprintf_s(
1030 out.data() + oldSize, newSize + 1 - oldSize, static_cast<size_t>(size), FLOATING_FORMAT_STR, floatingPoint);
1031 }
1032 }
1033 } // namespace
1034
1035 template<typename T>
1036 BASE_NS::string to_string(const value_t<T>& value)
1037 {
1038 BASE_NS::string out;
1039 to_string(out, value);
1040 return out;
1041 }
1042
1043 template<typename T>
1044 void to_string(BASE_NS::string& out, const value_t<T>& value)
1045 {
1046 switch (value.type) {
1047 case type::uninitialized:
1048 break;
1049
1050 case type::object:
1051 append<T>(out, value.object_);
1052 break;
1053
1054 case type::array:
1055 append<T>(out, value.array_);
1056 break;
1057
1058 case type::string:
1059 append<T>(out, value.string_);
1060 break;
1061
1062 case type::floating_point:
1063 append<T>(out, value.float_);
1064 break;
1065
1066 case type::signed_int:
1067 out += BASE_NS::to_string(value.signed_);
1068 break;
1069
1070 case type::unsigned_int:
1071 out += BASE_NS::to_string(value.unsigned_);
1072 break;
1073
1074 case type::boolean:
1075 if (value.boolean_) {
1076 out += TRUE_STR;
1077 } else {
1078 out += FALSE_STR;
1079 }
1080 break;
1081
1082 case type::null:
1083 out += NULL_STR;
1084 break;
1085
1086 default:
1087 break;
1088 }
1089 }
1090
1091 template BASE_NS::string to_string(const value& value);
1092 template void to_string(BASE_NS::string& out, const value& value);
1093 template BASE_NS::string to_string(const standalone_value& value);
1094 template void to_string(BASE_NS::string& out, const standalone_value& value);
1095
1096 namespace {
1097 int codepoint(BASE_NS::string_view str)
1098 {
1099 int code = 0;
1100 for (size_t u = 0U; u < 4U; ++u) {
1101 const char chr = str[u];
1102 code <<= 4; // Shift by the size of a single hex character
1103 code += BASE_NS::HexToDec(chr);
1104 }
1105 return code;
1106 }
1107 } // namespace
1108
1109 BASE_NS::string unescape(BASE_NS::string_view str)
1110 {
1111 BASE_NS::string unescaped;
1112 unescape(unescaped, str);
1113 return unescaped;
1114 }
1115
1116 void unescape(BASE_NS::string& out, BASE_NS::string_view str)
1117 {
1118 out.reserve(out.size() + str.size());
1119 for (size_t i = 0, end = str.size(); i < end;) {
1120 auto chr = str[i];
1121 ++i;
1122 if (chr != '\\') {
1123 out += chr;
1124 } else {
1125 chr = str[i];
1126 ++i;
1127 if (chr == '"') {
1128 out += '"'; // Quotation mark
1129 } else if (chr == '\\') {
1130 out += '\\'; // Reverse solidus
1131 } else if (chr == '/') {
1132 out += '/'; // Solidus.. we do unescape this..
1133 } else if (chr == 'b') {
1134 out += '\b'; // Backspace
1135 } else if (chr == 'f') {
1136 out += '\f'; // Formfeed
1137 } else if (chr == 'n') {
1138 out += '\n'; // Linefeed
1139 } else if (chr == 'r') {
1140 out += '\r'; // Carriage return
1141 } else if (chr == 't') {
1142 out += '\t'; // Horizontal tab
1143 } else if (chr == 'u') { // Unicode character
1144 if ((i + 4U) <= end) { // Expecting 4 hexadecimal values
1145 // Read the Unicode code point.
1146 int code = codepoint(str.substr(i, 4U));
1147 i += 4U;
1148 // Codepoints U+010000 to U+10FFFF are encoded as UTF-16 surrogate pairs. High surrogate between
1149 // 0xD800-0xDBFF and low between 0xDC00-0xDFFF.
1150 if (code >= 0xd800 && code <= 0xdbff) {
1151 // See if there's an other \uXXXX value in the correct range.
1152 if ((i + 6U) <= end) {
1153 auto next = str.substr(i, 6U);
1154 if (next[0] == '\\' && next[1] == 'u') {
1155 next.remove_prefix(2);
1156 int low = codepoint(next);
1157 i += 6U;
1158 if (low >= 0xdc00 && low <= 0xdfff) {
1159 // Surrogate pair encoding: 0x10000 + (Hi - 0xD800) * 0x400 + (Lo - 0xDC00)
1160 code = (static_cast<int>(static_cast<unsigned int>(code) << 10U) + low -
1161 (static_cast<int>(0xd800U << 10U) + 0xdc00 - 0x10000));
1162 }
1163 }
1164 }
1165 }
1166 // Convert code point to UTF-8.
1167 if (code < 0x80) {
1168 // 1-byte characters: 0xxxxxxx (ASCII)
1169 out += static_cast<char>(code);
1170 } else if (code < 0x7ff) {
1171 // 2-byte characters: 110xxxxx 10xxxxxx
1172 out += static_cast<char>(0xc0U | (static_cast<unsigned int>(code) >> 6U));
1173 out += static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3fU));
1174 } else if (code <= 0xffff) {
1175 // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
1176 out += static_cast<char>(0xe0U | (static_cast<unsigned int>(code) >> 12u));
1177 out += static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 6U) & 0x3FU));
1178 out += static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3Fu));
1179 } else {
1180 // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1181 out += (static_cast<char>(0xf0U | (static_cast<unsigned int>(code) >> 18U)));
1182 out += (static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 12U) & 0x3fU)));
1183 out += (static_cast<char>(0x80U | ((static_cast<unsigned int>(code) >> 6U) & 0x3fU)));
1184 out += (static_cast<char>(0x80U | (static_cast<unsigned int>(code) & 0x3fU)));
1185 }
1186 }
1187 }
1188 }
1189 }
1190 }
1191
1192 BASE_NS::string escape(BASE_NS::string_view str)
1193 {
1194 BASE_NS::string escaped;
1195 escape(escaped, str);
1196 return escaped;
1197 }
1198
1199 void escape(BASE_NS::string& out, BASE_NS::string_view str)
1200 {
1201 out.reserve(out.size() + str.size());
1202 for (size_t i = 0, end = str.size(); i < end;) {
1203 auto chr = static_cast<uint8_t>(str[i]);
1204 ++i;
1205 if (chr == '"') {
1206 out += "\\\""; // Quotation mark
1207 } else if (chr == '\\') {
1208 out += "\\\\"; // Reverse solidus
1209 } else if (chr == '\b') {
1210 out += "\\b"; // Backspace
1211 } else if (chr == '\f') {
1212 out += "\\f"; // Formfeed
1213 } else if (chr == '\n') {
1214 out += "\\n"; // Linefeed
1215 } else if (chr == '\r') {
1216 out += "\\r"; // Carriage return
1217 } else if (chr == '\t') {
1218 out += "\\t"; // Horizontal tab
1219 } else if (chr < 0x80) {
1220 out += static_cast<BASE_NS::string::value_type>(chr); // 1-byte characters: 0xxxxxxx (ASCII)
1221 } else {
1222 // Unicode
1223 unsigned code = 0U;
1224 unsigned left = 0U;
1225 // Decode first byte and figure out how many additional bytes are needed for the codepoint.
1226 if (chr < 0xE0) {
1227 // 2-byte characters: 110xxxxx 10xxxxxx
1228 if (i < end) {
1229 code = ((chr & ~0xC0U));
1230 left = 1U;
1231 }
1232 } else if (chr < 0xF0) {
1233 // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
1234 if ((i + 1U) < end) {
1235 code = (chr & ~0xE0U);
1236 left = 2U;
1237 }
1238 } else if (chr < 0xF8) {
1239 // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1240 if ((i + 2U) < end) {
1241 code = (chr & ~0xF0U);
1242 left = 3U;
1243 }
1244 }
1245
1246 // Combine the remaining bytes.
1247 while (left--) {
1248 code = (code << 6U) + (str[i++] & 0x3FU);
1249 }
1250
1251 // Codepoints U+010000 to U+10FFFF are encoded as UTF-16 surrogate pairs.
1252 // Surrogate pair encoding: 0x10000 + (Hi - 0xD800) * 0x400 + (Lo - 0xDC00)
1253 if (code >= 0x010000 && code <= 0x10FFFF) {
1254 // First append the Hi value.
1255 code -= 0x10000U;
1256 const auto hi = (code >> 10U) + 0xD800U;
1257 out += UNISTART_STR;
1258 out += BASE_NS::to_hex(hi);
1259
1260 // Calculate the Lo value.
1261 code = (code & 0x3FFU) + 0xDC00U;
1262 }
1263
1264 // Append the codepoint zero padded to four hex values.
1265 out += UNISTART_STR;
1266 const auto codepoint = BASE_NS::to_hex(code);
1267 if (codepoint.size() < 4U) {
1268 out.append(4U - codepoint.size(), '0');
1269 }
1270 out += codepoint;
1271 }
1272 }
1273 }
1274 } // namespace json
1275 CORE_END_NAMESPACE()
1276 #endif // JSON_IMPL
1277
1278 #endif // !API_CORE_JSON_JSON_H
1279