1 //
2 // Copyright 2015-2016 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <grpc/support/json.h>
18 #include <grpc/support/port_platform.h>
19 #include <inttypes.h>
20 #include <stdlib.h>
21
22 #include <algorithm>
23 #include <map>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "absl/base/attributes.h"
29 #include "absl/log/check.h"
30 #include "absl/status/status.h"
31 #include "absl/status/statusor.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/str_format.h"
34 #include "absl/strings/str_join.h"
35 #include "absl/strings/string_view.h"
36 #include "absl/types/variant.h"
37 #include "src/core/util/json/json.h"
38 #include "src/core/util/match.h"
39
40 #define GRPC_JSON_MAX_DEPTH 255
41 #define GRPC_JSON_MAX_ERRORS 16
42
43 namespace grpc_core {
44
45 namespace {
46
47 class JsonReader {
48 public:
49 static absl::StatusOr<Json> Parse(absl::string_view input);
50
51 private:
52 enum class Status {
53 GRPC_JSON_DONE, // The parser finished successfully.
54 GRPC_JSON_PARSE_ERROR, // The parser found an error in the json stream.
55 GRPC_JSON_INTERNAL_ERROR // The parser got an internal error.
56 };
57
58 enum class State {
59 GRPC_JSON_STATE_OBJECT_KEY_BEGIN,
60 GRPC_JSON_STATE_OBJECT_KEY_STRING,
61 GRPC_JSON_STATE_OBJECT_KEY_END,
62 GRPC_JSON_STATE_VALUE_BEGIN,
63 GRPC_JSON_STATE_VALUE_STRING,
64 GRPC_JSON_STATE_STRING_ESCAPE,
65 GRPC_JSON_STATE_STRING_ESCAPE_U1,
66 GRPC_JSON_STATE_STRING_ESCAPE_U2,
67 GRPC_JSON_STATE_STRING_ESCAPE_U3,
68 GRPC_JSON_STATE_STRING_ESCAPE_U4,
69 GRPC_JSON_STATE_VALUE_NUMBER,
70 GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL,
71 GRPC_JSON_STATE_VALUE_NUMBER_ZERO,
72 GRPC_JSON_STATE_VALUE_NUMBER_DOT,
73 GRPC_JSON_STATE_VALUE_NUMBER_E,
74 GRPC_JSON_STATE_VALUE_NUMBER_EPM,
75 GRPC_JSON_STATE_VALUE_TRUE_R,
76 GRPC_JSON_STATE_VALUE_TRUE_U,
77 GRPC_JSON_STATE_VALUE_TRUE_E,
78 GRPC_JSON_STATE_VALUE_FALSE_A,
79 GRPC_JSON_STATE_VALUE_FALSE_L,
80 GRPC_JSON_STATE_VALUE_FALSE_S,
81 GRPC_JSON_STATE_VALUE_FALSE_E,
82 GRPC_JSON_STATE_VALUE_NULL_U,
83 GRPC_JSON_STATE_VALUE_NULL_L1,
84 GRPC_JSON_STATE_VALUE_NULL_L2,
85 GRPC_JSON_STATE_VALUE_END,
86 GRPC_JSON_STATE_END
87 };
88
89 // The first non-unicode value is 0x110000. But let's pick
90 // a value high enough to start our error codes from. These
91 // values are safe to return from the read_char function.
92 //
93 static constexpr uint32_t GRPC_JSON_READ_CHAR_EOF = 0x7ffffff0;
94
95 struct Scope {
96 std::string parent_object_key;
97 absl::variant<Json::Object, Json::Array> data;
98
typegrpc_core::__anon77b8be040111::JsonReader::Scope99 Json::Type type() const {
100 return Match(
101 data, [](const Json::Object&) { return Json::Type::kObject; },
102 [](const Json::Array&) { return Json::Type::kArray; });
103 }
104
TakeAsJsongrpc_core::__anon77b8be040111::JsonReader::Scope105 Json TakeAsJson() {
106 return MatchMutable(
107 &data,
108 [&](Json::Object* object) {
109 return Json::FromObject(std::move(*object));
110 },
111 [&](Json::Array* array) {
112 return Json::FromArray(std::move(*array));
113 });
114 }
115 };
116
JsonReader(absl::string_view input)117 explicit JsonReader(absl::string_view input)
118 : original_input_(reinterpret_cast<const uint8_t*>(input.data())),
119 input_(original_input_),
120 remaining_input_(input.size()) {}
121
122 Status Run();
123 uint32_t ReadChar();
124 bool IsComplete();
125
CurrentIndex() const126 size_t CurrentIndex() const { return input_ - original_input_ - 1; }
127
128 GRPC_MUST_USE_RESULT bool StringAddChar(uint32_t c);
129 GRPC_MUST_USE_RESULT bool StringAddUtf32(uint32_t c);
130
131 Json* CreateAndLinkValue();
132 bool StartContainer(Json::Type type);
133 void EndContainer();
134 void SetKey();
135 void SetString();
136 bool SetNumber();
137 void SetTrue();
138 void SetFalse();
139 void SetNull();
140
141 const uint8_t* original_input_;
142 const uint8_t* input_;
143 size_t remaining_input_;
144
145 State state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
146 bool escaped_string_was_key_ = false;
147 bool container_just_begun_ = false;
148 uint16_t unicode_char_ = 0;
149 uint16_t unicode_high_surrogate_ = 0;
150 std::vector<std::string> errors_;
151 bool truncated_errors_ = false;
152 uint8_t utf8_bytes_remaining_ = 0;
153 uint8_t utf8_first_byte_ = 0;
154
155 Json root_value_;
156 std::vector<Scope> stack_;
157
158 std::string key_;
159 std::string string_;
160 };
161
StringAddChar(uint32_t c)162 bool JsonReader::StringAddChar(uint32_t c) {
163 if (utf8_bytes_remaining_ == 0) {
164 if ((c & 0x80) == 0) {
165 utf8_bytes_remaining_ = 0;
166 } else if ((c & 0xe0) == 0xc0 && c >= 0xc2) {
167 /// For the UTF-8 characters with length of 2 bytes, the range of the
168 /// first byte is [0xc2, 0xdf]. Reference: Table 3-7 in
169 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
170 utf8_bytes_remaining_ = 1;
171 } else if ((c & 0xf0) == 0xe0) {
172 utf8_bytes_remaining_ = 2;
173 } else if ((c & 0xf8) == 0xf0 && c <= 0xf4) {
174 /// For the UTF-8 characters with length of 4 bytes, the range of the
175 /// first byte is [0xf0, 0xf4]. Reference: Table 3-7 in
176 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
177 utf8_bytes_remaining_ = 3;
178 } else {
179 return false;
180 }
181 utf8_first_byte_ = c;
182 } else if (utf8_bytes_remaining_ == 1) {
183 if ((c & 0xc0) != 0x80) {
184 return false;
185 }
186 --utf8_bytes_remaining_;
187 } else if (utf8_bytes_remaining_ == 2) {
188 /// For UTF-8 characters starting with 0xe0, their length is 3 bytes, and
189 /// the range of the second byte is [0xa0, 0xbf]. For UTF-8 characters
190 /// starting with 0xed, their length is 3 bytes, and the range of the second
191 /// byte is [0x80, 0x9f]. Reference: Table 3-7 in
192 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
193 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xe0 && c < 0xa0) ||
194 (utf8_first_byte_ == 0xed && c > 0x9f)) {
195 return false;
196 }
197 --utf8_bytes_remaining_;
198 } else if (utf8_bytes_remaining_ == 3) {
199 /// For UTF-8 characters starting with 0xf0, their length is 4 bytes, and
200 /// the range of the second byte is [0x90, 0xbf]. For UTF-8 characters
201 /// starting with 0xf4, their length is 4 bytes, and the range of the second
202 /// byte is [0x80, 0x8f]. Reference: Table 3-7 in
203 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
204 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xf0 && c < 0x90) ||
205 (utf8_first_byte_ == 0xf4 && c > 0x8f)) {
206 return false;
207 }
208 --utf8_bytes_remaining_;
209 } else {
210 abort();
211 }
212
213 string_.push_back(static_cast<uint8_t>(c));
214 return true;
215 }
216
StringAddUtf32(uint32_t c)217 bool JsonReader::StringAddUtf32(uint32_t c) {
218 if (c <= 0x7f) {
219 return StringAddChar(c);
220 } else if (c <= 0x7ff) {
221 uint32_t b1 = 0xc0 | ((c >> 6) & 0x1f);
222 uint32_t b2 = 0x80 | (c & 0x3f);
223 return StringAddChar(b1) && StringAddChar(b2);
224 } else if (c <= 0xffff) {
225 uint32_t b1 = 0xe0 | ((c >> 12) & 0x0f);
226 uint32_t b2 = 0x80 | ((c >> 6) & 0x3f);
227 uint32_t b3 = 0x80 | (c & 0x3f);
228 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3);
229 } else if (c <= 0x1fffff) {
230 uint32_t b1 = 0xf0 | ((c >> 18) & 0x07);
231 uint32_t b2 = 0x80 | ((c >> 12) & 0x3f);
232 uint32_t b3 = 0x80 | ((c >> 6) & 0x3f);
233 uint32_t b4 = 0x80 | (c & 0x3f);
234 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3) &&
235 StringAddChar(b4);
236 } else {
237 return false;
238 }
239 }
240
ReadChar()241 uint32_t JsonReader::ReadChar() {
242 if (remaining_input_ == 0) return GRPC_JSON_READ_CHAR_EOF;
243 const uint32_t r = *input_++;
244 --remaining_input_;
245 if (r == 0) {
246 remaining_input_ = 0;
247 return GRPC_JSON_READ_CHAR_EOF;
248 }
249 return r;
250 }
251
CreateAndLinkValue()252 Json* JsonReader::CreateAndLinkValue() {
253 if (stack_.empty()) return &root_value_;
254 return MatchMutable(
255 &stack_.back().data,
256 [&](Json::Object* object) { return &(*object)[std::move(key_)]; },
257 [&](Json::Array* array) {
258 array->emplace_back();
259 return &array->back();
260 });
261 }
262
StartContainer(Json::Type type)263 bool JsonReader::StartContainer(Json::Type type) {
264 if (stack_.size() == GRPC_JSON_MAX_DEPTH) {
265 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
266 truncated_errors_ = true;
267 } else {
268 errors_.push_back(
269 absl::StrFormat("exceeded max stack depth (%d) at index %" PRIuPTR,
270 GRPC_JSON_MAX_DEPTH, CurrentIndex()));
271 }
272 return false;
273 }
274 stack_.emplace_back();
275 Scope& scope = stack_.back();
276 scope.parent_object_key = std::move(key_);
277 if (type == Json::Type::kObject) {
278 scope.data = Json::Object();
279 } else {
280 CHECK(type == Json::Type::kArray);
281 scope.data = Json::Array();
282 }
283 return true;
284 }
285
EndContainer()286 void JsonReader::EndContainer() {
287 CHECK(!stack_.empty());
288 Scope scope = std::move(stack_.back());
289 stack_.pop_back();
290 key_ = std::move(scope.parent_object_key);
291 Json* value = CreateAndLinkValue();
292 *value = scope.TakeAsJson();
293 }
294
SetKey()295 void JsonReader::SetKey() {
296 key_ = std::move(string_);
297 string_.clear();
298 const Json::Object& object = absl::get<Json::Object>(stack_.back().data);
299 if (object.find(key_) != object.end()) {
300 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
301 truncated_errors_ = true;
302 } else {
303 errors_.push_back(
304 absl::StrFormat("duplicate key \"%s\" at index %" PRIuPTR, key_,
305 CurrentIndex() - key_.size() - 2));
306 }
307 }
308 }
309
SetString()310 void JsonReader::SetString() {
311 Json* value = CreateAndLinkValue();
312 *value = Json::FromString(std::move(string_));
313 string_.clear();
314 }
315
SetNumber()316 bool JsonReader::SetNumber() {
317 Json* value = CreateAndLinkValue();
318 *value = Json::FromNumber(std::move(string_));
319 string_.clear();
320 return true;
321 }
322
SetTrue()323 void JsonReader::SetTrue() {
324 Json* value = CreateAndLinkValue();
325 *value = Json::FromBool(true);
326 string_.clear();
327 }
328
SetFalse()329 void JsonReader::SetFalse() {
330 Json* value = CreateAndLinkValue();
331 *value = Json::FromBool(false);
332 string_.clear();
333 }
334
SetNull()335 void JsonReader::SetNull() { CreateAndLinkValue(); }
336
IsComplete()337 bool JsonReader::IsComplete() {
338 return (stack_.empty() && (state_ == State::GRPC_JSON_STATE_END ||
339 state_ == State::GRPC_JSON_STATE_VALUE_END));
340 }
341
342 // Call this function to start parsing the input. It will return the following:
343 // . GRPC_JSON_DONE if the input got eof, and the parsing finished
344 // successfully.
345 // . GRPC_JSON_PARSE_ERROR if the input was somehow invalid.
346 // . GRPC_JSON_INTERNAL_ERROR if the parser somehow ended into an invalid
347 // internal state.
348 //
Run()349 JsonReader::Status JsonReader::Run() {
350 uint32_t c;
351
352 // This state-machine is a strict implementation of ECMA-404
353 while (true) {
354 c = ReadChar();
355 switch (c) {
356 // Let's process the error case first.
357 case GRPC_JSON_READ_CHAR_EOF:
358 switch (state_) {
359 case State::GRPC_JSON_STATE_VALUE_NUMBER:
360 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
361 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
362 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
363 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
364 state_ = State::GRPC_JSON_STATE_VALUE_END;
365 break;
366
367 default:
368 break;
369 }
370 if (IsComplete()) {
371 return Status::GRPC_JSON_DONE;
372 }
373 return Status::GRPC_JSON_PARSE_ERROR;
374
375 // Processing whitespaces.
376 case ' ':
377 case '\t':
378 case '\n':
379 case '\r':
380 switch (state_) {
381 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
382 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
383 case State::GRPC_JSON_STATE_VALUE_BEGIN:
384 case State::GRPC_JSON_STATE_VALUE_END:
385 case State::GRPC_JSON_STATE_END:
386 break;
387
388 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
389 case State::GRPC_JSON_STATE_VALUE_STRING:
390 if (c != ' ') return Status::GRPC_JSON_PARSE_ERROR;
391 if (unicode_high_surrogate_ != 0) {
392 return Status::GRPC_JSON_PARSE_ERROR;
393 }
394 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
395 break;
396
397 case State::GRPC_JSON_STATE_VALUE_NUMBER:
398 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
399 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
400 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
401 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
402 state_ = State::GRPC_JSON_STATE_VALUE_END;
403 break;
404
405 default:
406 return Status::GRPC_JSON_PARSE_ERROR;
407 }
408 break;
409
410 // Value, object or array terminations.
411 case ',':
412 case '}':
413 case ']':
414 switch (state_) {
415 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
416 case State::GRPC_JSON_STATE_VALUE_STRING:
417 if (unicode_high_surrogate_ != 0) {
418 return Status::GRPC_JSON_PARSE_ERROR;
419 }
420 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
421 break;
422
423 case State::GRPC_JSON_STATE_VALUE_NUMBER:
424 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
425 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
426 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
427 if (stack_.empty()) {
428 return Status::GRPC_JSON_PARSE_ERROR;
429 } else if (c == '}' &&
430 stack_.back().type() != Json::Type::kObject) {
431 return Status::GRPC_JSON_PARSE_ERROR;
432 } else if (c == ']' && stack_.back().type() != Json::Type::kArray) {
433 return Status::GRPC_JSON_PARSE_ERROR;
434 }
435 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
436 state_ = State::GRPC_JSON_STATE_VALUE_END;
437 ABSL_FALLTHROUGH_INTENDED;
438
439 case State::GRPC_JSON_STATE_VALUE_END:
440 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
441 case State::GRPC_JSON_STATE_VALUE_BEGIN:
442 if (c == ',') {
443 if (state_ != State::GRPC_JSON_STATE_VALUE_END) {
444 return Status::GRPC_JSON_PARSE_ERROR;
445 }
446 if (!stack_.empty() &&
447 stack_.back().type() == Json::Type::kObject) {
448 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
449 } else if (!stack_.empty() &&
450 stack_.back().type() == Json::Type::kArray) {
451 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
452 } else {
453 return Status::GRPC_JSON_PARSE_ERROR;
454 }
455 } else {
456 if (stack_.empty()) {
457 return Status::GRPC_JSON_PARSE_ERROR;
458 }
459 if (c == '}' && stack_.back().type() != Json::Type::kObject) {
460 return Status::GRPC_JSON_PARSE_ERROR;
461 }
462 if (c == '}' &&
463 state_ == State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN &&
464 !container_just_begun_) {
465 return Status::GRPC_JSON_PARSE_ERROR;
466 }
467 if (c == ']' && stack_.back().type() != Json::Type::kArray) {
468 return Status::GRPC_JSON_PARSE_ERROR;
469 }
470 if (c == ']' && state_ == State::GRPC_JSON_STATE_VALUE_BEGIN &&
471 !container_just_begun_) {
472 return Status::GRPC_JSON_PARSE_ERROR;
473 }
474 state_ = State::GRPC_JSON_STATE_VALUE_END;
475 container_just_begun_ = false;
476 EndContainer();
477 if (stack_.empty()) {
478 state_ = State::GRPC_JSON_STATE_END;
479 }
480 }
481 break;
482
483 default:
484 return Status::GRPC_JSON_PARSE_ERROR;
485 }
486 break;
487
488 // In-string escaping.
489 case '\\':
490 switch (state_) {
491 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
492 escaped_string_was_key_ = true;
493 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
494 break;
495
496 case State::GRPC_JSON_STATE_VALUE_STRING:
497 escaped_string_was_key_ = false;
498 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
499 break;
500
501 // This is the \\ case.
502 case State::GRPC_JSON_STATE_STRING_ESCAPE:
503 if (unicode_high_surrogate_ != 0) {
504 return Status::GRPC_JSON_PARSE_ERROR;
505 }
506 if (!StringAddChar('\\')) return Status::GRPC_JSON_PARSE_ERROR;
507 if (escaped_string_was_key_) {
508 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
509 } else {
510 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
511 }
512 break;
513
514 default:
515 return Status::GRPC_JSON_PARSE_ERROR;
516 }
517 break;
518
519 default:
520 container_just_begun_ = false;
521 switch (state_) {
522 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
523 if (c != '"') return Status::GRPC_JSON_PARSE_ERROR;
524 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
525 break;
526
527 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
528 if (unicode_high_surrogate_ != 0) {
529 return Status::GRPC_JSON_PARSE_ERROR;
530 }
531 if (c == '"') {
532 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_END;
533 // Once the key is parsed, there should no un-matched utf8
534 // encoded bytes.
535 if (utf8_bytes_remaining_ != 0) {
536 return Status::GRPC_JSON_PARSE_ERROR;
537 }
538 SetKey();
539 } else {
540 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
541 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
542 }
543 break;
544
545 case State::GRPC_JSON_STATE_VALUE_STRING:
546 if (unicode_high_surrogate_ != 0) {
547 return Status::GRPC_JSON_PARSE_ERROR;
548 }
549 if (c == '"') {
550 state_ = State::GRPC_JSON_STATE_VALUE_END;
551 // Once the value is parsed, there should no un-matched utf8
552 // encoded bytes.
553 if (utf8_bytes_remaining_ != 0) {
554 return Status::GRPC_JSON_PARSE_ERROR;
555 }
556 SetString();
557 } else {
558 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
559 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
560 }
561 break;
562
563 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
564 if (c != ':') return Status::GRPC_JSON_PARSE_ERROR;
565 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
566 break;
567
568 case State::GRPC_JSON_STATE_VALUE_BEGIN:
569 switch (c) {
570 case 't':
571 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_R;
572 break;
573
574 case 'f':
575 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_A;
576 break;
577
578 case 'n':
579 state_ = State::GRPC_JSON_STATE_VALUE_NULL_U;
580 break;
581
582 case '"':
583 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
584 break;
585
586 case '0':
587 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
588 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO;
589 break;
590
591 case '1':
592 case '2':
593 case '3':
594 case '4':
595 case '5':
596 case '6':
597 case '7':
598 case '8':
599 case '9':
600 case '-':
601 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
602 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER;
603 break;
604
605 case '{':
606 container_just_begun_ = true;
607 if (!StartContainer(Json::Type::kObject)) {
608 return Status::GRPC_JSON_PARSE_ERROR;
609 }
610 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
611 break;
612
613 case '[':
614 container_just_begun_ = true;
615 if (!StartContainer(Json::Type::kArray)) {
616 return Status::GRPC_JSON_PARSE_ERROR;
617 }
618 break;
619 default:
620 return Status::GRPC_JSON_PARSE_ERROR;
621 }
622 break;
623
624 case State::GRPC_JSON_STATE_STRING_ESCAPE:
625 if (escaped_string_was_key_) {
626 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
627 } else {
628 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
629 }
630 if (unicode_high_surrogate_ && c != 'u') {
631 return Status::GRPC_JSON_PARSE_ERROR;
632 }
633 switch (c) {
634 case '"':
635 case '/':
636 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
637 break;
638 case 'b':
639 if (!StringAddChar('\b')) return Status::GRPC_JSON_PARSE_ERROR;
640 break;
641 case 'f':
642 if (!StringAddChar('\f')) return Status::GRPC_JSON_PARSE_ERROR;
643 break;
644 case 'n':
645 if (!StringAddChar('\n')) return Status::GRPC_JSON_PARSE_ERROR;
646 break;
647 case 'r':
648 if (!StringAddChar('\r')) return Status::GRPC_JSON_PARSE_ERROR;
649 break;
650 case 't':
651 if (!StringAddChar('\t')) return Status::GRPC_JSON_PARSE_ERROR;
652 break;
653 case 'u':
654 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U1;
655 unicode_char_ = 0;
656 break;
657 default:
658 return Status::GRPC_JSON_PARSE_ERROR;
659 }
660 break;
661
662 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
663 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
664 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
665 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
666 if ((c >= '0') && (c <= '9')) {
667 c -= '0';
668 } else if ((c >= 'A') && (c <= 'F')) {
669 c -= 'A' - 10;
670 } else if ((c >= 'a') && (c <= 'f')) {
671 c -= 'a' - 10;
672 } else {
673 return Status::GRPC_JSON_PARSE_ERROR;
674 }
675 unicode_char_ = static_cast<uint16_t>(unicode_char_ << 4);
676 unicode_char_ = static_cast<uint16_t>(unicode_char_ | c);
677
678 switch (state_) {
679 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
680 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U2;
681 break;
682 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
683 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U3;
684 break;
685 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
686 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U4;
687 break;
688 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
689 // See grpc_json_writer_escape_string to have a description
690 // of what's going on here.
691 //
692 if ((unicode_char_ & 0xfc00) == 0xd800) {
693 // high surrogate utf-16
694 if (unicode_high_surrogate_ != 0) {
695 return Status::GRPC_JSON_PARSE_ERROR;
696 }
697 unicode_high_surrogate_ = unicode_char_;
698 } else if ((unicode_char_ & 0xfc00) == 0xdc00) {
699 // low surrogate utf-16
700 uint32_t utf32;
701 if (unicode_high_surrogate_ == 0) {
702 return Status::GRPC_JSON_PARSE_ERROR;
703 }
704 utf32 = 0x10000;
705 utf32 += static_cast<uint32_t>(
706 (unicode_high_surrogate_ - 0xd800) * 0x400);
707 utf32 += static_cast<uint32_t>(unicode_char_ - 0xdc00);
708 if (!StringAddUtf32(utf32)) {
709 return Status::GRPC_JSON_PARSE_ERROR;
710 }
711 unicode_high_surrogate_ = 0;
712 } else {
713 // anything else
714 if (unicode_high_surrogate_ != 0) {
715 return Status::GRPC_JSON_PARSE_ERROR;
716 }
717 if (!StringAddUtf32(unicode_char_)) {
718 return Status::GRPC_JSON_PARSE_ERROR;
719 }
720 }
721 if (escaped_string_was_key_) {
722 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
723 } else {
724 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
725 }
726 break;
727 default:
728 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
729 }
730 break;
731
732 case State::GRPC_JSON_STATE_VALUE_NUMBER:
733 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
734 switch (c) {
735 case '0':
736 case '1':
737 case '2':
738 case '3':
739 case '4':
740 case '5':
741 case '6':
742 case '7':
743 case '8':
744 case '9':
745 break;
746 case 'e':
747 case 'E':
748 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
749 break;
750 case '.':
751 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
752 break;
753 default:
754 return Status::GRPC_JSON_PARSE_ERROR;
755 }
756 break;
757
758 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
759 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
760 switch (c) {
761 case '0':
762 case '1':
763 case '2':
764 case '3':
765 case '4':
766 case '5':
767 case '6':
768 case '7':
769 case '8':
770 case '9':
771 break;
772 case 'e':
773 case 'E':
774 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
775 break;
776 default:
777 return Status::GRPC_JSON_PARSE_ERROR;
778 }
779 break;
780
781 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
782 if (c != '.') return Status::GRPC_JSON_PARSE_ERROR;
783 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
784 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
785 break;
786
787 case State::GRPC_JSON_STATE_VALUE_NUMBER_DOT:
788 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
789 switch (c) {
790 case '0':
791 case '1':
792 case '2':
793 case '3':
794 case '4':
795 case '5':
796 case '6':
797 case '7':
798 case '8':
799 case '9':
800 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL;
801 break;
802 default:
803 return Status::GRPC_JSON_PARSE_ERROR;
804 }
805 break;
806
807 case State::GRPC_JSON_STATE_VALUE_NUMBER_E:
808 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
809 switch (c) {
810 case '0':
811 case '1':
812 case '2':
813 case '3':
814 case '4':
815 case '5':
816 case '6':
817 case '7':
818 case '8':
819 case '9':
820 case '+':
821 case '-':
822 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_EPM;
823 break;
824 default:
825 return Status::GRPC_JSON_PARSE_ERROR;
826 }
827 break;
828
829 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
830 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
831 switch (c) {
832 case '0':
833 case '1':
834 case '2':
835 case '3':
836 case '4':
837 case '5':
838 case '6':
839 case '7':
840 case '8':
841 case '9':
842 break;
843 default:
844 return Status::GRPC_JSON_PARSE_ERROR;
845 }
846 break;
847
848 case State::GRPC_JSON_STATE_VALUE_TRUE_R:
849 if (c != 'r') return Status::GRPC_JSON_PARSE_ERROR;
850 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_U;
851 break;
852
853 case State::GRPC_JSON_STATE_VALUE_TRUE_U:
854 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
855 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_E;
856 break;
857
858 case State::GRPC_JSON_STATE_VALUE_TRUE_E:
859 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
860 SetTrue();
861 state_ = State::GRPC_JSON_STATE_VALUE_END;
862 break;
863
864 case State::GRPC_JSON_STATE_VALUE_FALSE_A:
865 if (c != 'a') return Status::GRPC_JSON_PARSE_ERROR;
866 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_L;
867 break;
868
869 case State::GRPC_JSON_STATE_VALUE_FALSE_L:
870 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
871 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_S;
872 break;
873
874 case State::GRPC_JSON_STATE_VALUE_FALSE_S:
875 if (c != 's') return Status::GRPC_JSON_PARSE_ERROR;
876 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_E;
877 break;
878
879 case State::GRPC_JSON_STATE_VALUE_FALSE_E:
880 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
881 SetFalse();
882 state_ = State::GRPC_JSON_STATE_VALUE_END;
883 break;
884
885 case State::GRPC_JSON_STATE_VALUE_NULL_U:
886 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
887 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L1;
888 break;
889
890 case State::GRPC_JSON_STATE_VALUE_NULL_L1:
891 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
892 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L2;
893 break;
894
895 case State::GRPC_JSON_STATE_VALUE_NULL_L2:
896 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
897 SetNull();
898 state_ = State::GRPC_JSON_STATE_VALUE_END;
899 break;
900
901 // All of the VALUE_END cases are handled in the specialized case
902 // above.
903 case State::GRPC_JSON_STATE_VALUE_END:
904 switch (c) {
905 case ',':
906 case '}':
907 case ']':
908 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
909 break;
910
911 default:
912 return Status::GRPC_JSON_PARSE_ERROR;
913 }
914 break;
915
916 case State::GRPC_JSON_STATE_END:
917 return Status::GRPC_JSON_PARSE_ERROR;
918 }
919 }
920 }
921
922 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
923 }
924
Parse(absl::string_view input)925 absl::StatusOr<Json> JsonReader::Parse(absl::string_view input) {
926 JsonReader reader(input);
927 Status status = reader.Run();
928 if (reader.truncated_errors_) {
929 reader.errors_.push_back(
930 "too many errors encountered during JSON parsing -- fix reported "
931 "errors and try again to see additional errors");
932 }
933 if (status == Status::GRPC_JSON_INTERNAL_ERROR) {
934 reader.errors_.push_back(absl::StrCat(
935 "internal error in JSON parser at index ", reader.CurrentIndex()));
936 } else if (status == Status::GRPC_JSON_PARSE_ERROR) {
937 reader.errors_.push_back(
938 absl::StrCat("JSON parse error at index ", reader.CurrentIndex()));
939 }
940 if (!reader.errors_.empty()) {
941 return absl::InvalidArgumentError(absl::StrCat(
942 "JSON parsing failed: [", absl::StrJoin(reader.errors_, "; "), "]"));
943 }
944 return std::move(reader.root_value_);
945 }
946
947 } // namespace
948
JsonParse(absl::string_view json_str)949 absl::StatusOr<Json> JsonParse(absl::string_view json_str) {
950 return JsonReader::Parse(json_str);
951 }
952
953 } // namespace grpc_core
954