1 //
2 // Copyright 2015-2016 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <grpc/support/port_platform.h>
18
19 #include <inttypes.h>
20 #include <stdlib.h>
21
22 #include <algorithm>
23 #include <map>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "absl/base/attributes.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/strings/str_cat.h"
32 #include "absl/strings/str_format.h"
33 #include "absl/strings/str_join.h"
34 #include "absl/strings/string_view.h"
35 #include "absl/types/variant.h"
36
37 #include <grpc/support/json.h>
38 #include <grpc/support/log.h>
39
40 #include "src/core/lib/gprpp/match.h"
41 #include "src/core/lib/json/json.h"
42
43 #define GRPC_JSON_MAX_DEPTH 255
44 #define GRPC_JSON_MAX_ERRORS 16
45
46 namespace grpc_core {
47
48 namespace {
49
50 class JsonReader {
51 public:
52 static absl::StatusOr<Json> Parse(absl::string_view input);
53
54 private:
55 enum class Status {
56 GRPC_JSON_DONE, // The parser finished successfully.
57 GRPC_JSON_PARSE_ERROR, // The parser found an error in the json stream.
58 GRPC_JSON_INTERNAL_ERROR // The parser got an internal error.
59 };
60
61 enum class State {
62 GRPC_JSON_STATE_OBJECT_KEY_BEGIN,
63 GRPC_JSON_STATE_OBJECT_KEY_STRING,
64 GRPC_JSON_STATE_OBJECT_KEY_END,
65 GRPC_JSON_STATE_VALUE_BEGIN,
66 GRPC_JSON_STATE_VALUE_STRING,
67 GRPC_JSON_STATE_STRING_ESCAPE,
68 GRPC_JSON_STATE_STRING_ESCAPE_U1,
69 GRPC_JSON_STATE_STRING_ESCAPE_U2,
70 GRPC_JSON_STATE_STRING_ESCAPE_U3,
71 GRPC_JSON_STATE_STRING_ESCAPE_U4,
72 GRPC_JSON_STATE_VALUE_NUMBER,
73 GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL,
74 GRPC_JSON_STATE_VALUE_NUMBER_ZERO,
75 GRPC_JSON_STATE_VALUE_NUMBER_DOT,
76 GRPC_JSON_STATE_VALUE_NUMBER_E,
77 GRPC_JSON_STATE_VALUE_NUMBER_EPM,
78 GRPC_JSON_STATE_VALUE_TRUE_R,
79 GRPC_JSON_STATE_VALUE_TRUE_U,
80 GRPC_JSON_STATE_VALUE_TRUE_E,
81 GRPC_JSON_STATE_VALUE_FALSE_A,
82 GRPC_JSON_STATE_VALUE_FALSE_L,
83 GRPC_JSON_STATE_VALUE_FALSE_S,
84 GRPC_JSON_STATE_VALUE_FALSE_E,
85 GRPC_JSON_STATE_VALUE_NULL_U,
86 GRPC_JSON_STATE_VALUE_NULL_L1,
87 GRPC_JSON_STATE_VALUE_NULL_L2,
88 GRPC_JSON_STATE_VALUE_END,
89 GRPC_JSON_STATE_END
90 };
91
92 // The first non-unicode value is 0x110000. But let's pick
93 // a value high enough to start our error codes from. These
94 // values are safe to return from the read_char function.
95 //
96 static constexpr uint32_t GRPC_JSON_READ_CHAR_EOF = 0x7ffffff0;
97
98 struct Scope {
99 std::string parent_object_key;
100 absl::variant<Json::Object, Json::Array> data;
101
typegrpc_core::__anon3ff7cd990111::JsonReader::Scope102 Json::Type type() const {
103 return Match(
104 data, [](const Json::Object&) { return Json::Type::kObject; },
105 [](const Json::Array&) { return Json::Type::kArray; });
106 }
107
TakeAsJsongrpc_core::__anon3ff7cd990111::JsonReader::Scope108 Json TakeAsJson() {
109 return MatchMutable(
110 &data,
111 [&](Json::Object* object) {
112 return Json::FromObject(std::move(*object));
113 },
114 [&](Json::Array* array) {
115 return Json::FromArray(std::move(*array));
116 });
117 }
118 };
119
JsonReader(absl::string_view input)120 explicit JsonReader(absl::string_view input)
121 : original_input_(reinterpret_cast<const uint8_t*>(input.data())),
122 input_(original_input_),
123 remaining_input_(input.size()) {}
124
125 Status Run();
126 uint32_t ReadChar();
127 bool IsComplete();
128
CurrentIndex() const129 size_t CurrentIndex() const { return input_ - original_input_ - 1; }
130
131 GRPC_MUST_USE_RESULT bool StringAddChar(uint32_t c);
132 GRPC_MUST_USE_RESULT bool StringAddUtf32(uint32_t c);
133
134 Json* CreateAndLinkValue();
135 bool StartContainer(Json::Type type);
136 void EndContainer();
137 void SetKey();
138 void SetString();
139 bool SetNumber();
140 void SetTrue();
141 void SetFalse();
142 void SetNull();
143
144 const uint8_t* original_input_;
145 const uint8_t* input_;
146 size_t remaining_input_;
147
148 State state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
149 bool escaped_string_was_key_ = false;
150 bool container_just_begun_ = false;
151 uint16_t unicode_char_ = 0;
152 uint16_t unicode_high_surrogate_ = 0;
153 std::vector<std::string> errors_;
154 bool truncated_errors_ = false;
155 uint8_t utf8_bytes_remaining_ = 0;
156 uint8_t utf8_first_byte_ = 0;
157
158 Json root_value_;
159 std::vector<Scope> stack_;
160
161 std::string key_;
162 std::string string_;
163 };
164
StringAddChar(uint32_t c)165 bool JsonReader::StringAddChar(uint32_t c) {
166 if (utf8_bytes_remaining_ == 0) {
167 if ((c & 0x80) == 0) {
168 utf8_bytes_remaining_ = 0;
169 } else if ((c & 0xe0) == 0xc0 && c >= 0xc2) {
170 /// For the UTF-8 characters with length of 2 bytes, the range of the
171 /// first byte is [0xc2, 0xdf]. Reference: Table 3-7 in
172 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
173 utf8_bytes_remaining_ = 1;
174 } else if ((c & 0xf0) == 0xe0) {
175 utf8_bytes_remaining_ = 2;
176 } else if ((c & 0xf8) == 0xf0 && c <= 0xf4) {
177 /// For the UTF-8 characters with length of 4 bytes, the range of the
178 /// first byte is [0xf0, 0xf4]. Reference: Table 3-7 in
179 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
180 utf8_bytes_remaining_ = 3;
181 } else {
182 return false;
183 }
184 utf8_first_byte_ = c;
185 } else if (utf8_bytes_remaining_ == 1) {
186 if ((c & 0xc0) != 0x80) {
187 return false;
188 }
189 --utf8_bytes_remaining_;
190 } else if (utf8_bytes_remaining_ == 2) {
191 /// For UTF-8 characters starting with 0xe0, their length is 3 bytes, and
192 /// the range of the second byte is [0xa0, 0xbf]. For UTF-8 characters
193 /// starting with 0xed, their length is 3 bytes, and the range of the second
194 /// byte is [0x80, 0x9f]. Reference: Table 3-7 in
195 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
196 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xe0 && c < 0xa0) ||
197 (utf8_first_byte_ == 0xed && c > 0x9f)) {
198 return false;
199 }
200 --utf8_bytes_remaining_;
201 } else if (utf8_bytes_remaining_ == 3) {
202 /// For UTF-8 characters starting with 0xf0, their length is 4 bytes, and
203 /// the range of the second byte is [0x90, 0xbf]. For UTF-8 characters
204 /// starting with 0xf4, their length is 4 bytes, and the range of the second
205 /// byte is [0x80, 0x8f]. Reference: Table 3-7 in
206 /// https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf
207 if (((c & 0xc0) != 0x80) || (utf8_first_byte_ == 0xf0 && c < 0x90) ||
208 (utf8_first_byte_ == 0xf4 && c > 0x8f)) {
209 return false;
210 }
211 --utf8_bytes_remaining_;
212 } else {
213 abort();
214 }
215
216 string_.push_back(static_cast<uint8_t>(c));
217 return true;
218 }
219
StringAddUtf32(uint32_t c)220 bool JsonReader::StringAddUtf32(uint32_t c) {
221 if (c <= 0x7f) {
222 return StringAddChar(c);
223 } else if (c <= 0x7ff) {
224 uint32_t b1 = 0xc0 | ((c >> 6) & 0x1f);
225 uint32_t b2 = 0x80 | (c & 0x3f);
226 return StringAddChar(b1) && StringAddChar(b2);
227 } else if (c <= 0xffff) {
228 uint32_t b1 = 0xe0 | ((c >> 12) & 0x0f);
229 uint32_t b2 = 0x80 | ((c >> 6) & 0x3f);
230 uint32_t b3 = 0x80 | (c & 0x3f);
231 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3);
232 } else if (c <= 0x1fffff) {
233 uint32_t b1 = 0xf0 | ((c >> 18) & 0x07);
234 uint32_t b2 = 0x80 | ((c >> 12) & 0x3f);
235 uint32_t b3 = 0x80 | ((c >> 6) & 0x3f);
236 uint32_t b4 = 0x80 | (c & 0x3f);
237 return StringAddChar(b1) && StringAddChar(b2) && StringAddChar(b3) &&
238 StringAddChar(b4);
239 } else {
240 return false;
241 }
242 }
243
ReadChar()244 uint32_t JsonReader::ReadChar() {
245 if (remaining_input_ == 0) return GRPC_JSON_READ_CHAR_EOF;
246 const uint32_t r = *input_++;
247 --remaining_input_;
248 if (r == 0) {
249 remaining_input_ = 0;
250 return GRPC_JSON_READ_CHAR_EOF;
251 }
252 return r;
253 }
254
CreateAndLinkValue()255 Json* JsonReader::CreateAndLinkValue() {
256 if (stack_.empty()) return &root_value_;
257 return MatchMutable(
258 &stack_.back().data,
259 [&](Json::Object* object) { return &(*object)[std::move(key_)]; },
260 [&](Json::Array* array) {
261 array->emplace_back();
262 return &array->back();
263 });
264 }
265
StartContainer(Json::Type type)266 bool JsonReader::StartContainer(Json::Type type) {
267 if (stack_.size() == GRPC_JSON_MAX_DEPTH) {
268 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
269 truncated_errors_ = true;
270 } else {
271 errors_.push_back(
272 absl::StrFormat("exceeded max stack depth (%d) at index %" PRIuPTR,
273 GRPC_JSON_MAX_DEPTH, CurrentIndex()));
274 }
275 return false;
276 }
277 stack_.emplace_back();
278 Scope& scope = stack_.back();
279 scope.parent_object_key = std::move(key_);
280 if (type == Json::Type::kObject) {
281 scope.data = Json::Object();
282 } else {
283 GPR_ASSERT(type == Json::Type::kArray);
284 scope.data = Json::Array();
285 }
286 return true;
287 }
288
EndContainer()289 void JsonReader::EndContainer() {
290 GPR_ASSERT(!stack_.empty());
291 Scope scope = std::move(stack_.back());
292 stack_.pop_back();
293 key_ = std::move(scope.parent_object_key);
294 Json* value = CreateAndLinkValue();
295 *value = scope.TakeAsJson();
296 }
297
SetKey()298 void JsonReader::SetKey() {
299 key_ = std::move(string_);
300 string_.clear();
301 const Json::Object& object = absl::get<Json::Object>(stack_.back().data);
302 if (object.find(key_) != object.end()) {
303 if (errors_.size() == GRPC_JSON_MAX_ERRORS) {
304 truncated_errors_ = true;
305 } else {
306 errors_.push_back(
307 absl::StrFormat("duplicate key \"%s\" at index %" PRIuPTR, key_,
308 CurrentIndex() - key_.size() - 2));
309 }
310 }
311 }
312
SetString()313 void JsonReader::SetString() {
314 Json* value = CreateAndLinkValue();
315 *value = Json::FromString(std::move(string_));
316 string_.clear();
317 }
318
SetNumber()319 bool JsonReader::SetNumber() {
320 Json* value = CreateAndLinkValue();
321 *value = Json::FromNumber(std::move(string_));
322 string_.clear();
323 return true;
324 }
325
SetTrue()326 void JsonReader::SetTrue() {
327 Json* value = CreateAndLinkValue();
328 *value = Json::FromBool(true);
329 string_.clear();
330 }
331
SetFalse()332 void JsonReader::SetFalse() {
333 Json* value = CreateAndLinkValue();
334 *value = Json::FromBool(false);
335 string_.clear();
336 }
337
SetNull()338 void JsonReader::SetNull() { CreateAndLinkValue(); }
339
IsComplete()340 bool JsonReader::IsComplete() {
341 return (stack_.empty() && (state_ == State::GRPC_JSON_STATE_END ||
342 state_ == State::GRPC_JSON_STATE_VALUE_END));
343 }
344
345 // Call this function to start parsing the input. It will return the following:
346 // . GRPC_JSON_DONE if the input got eof, and the parsing finished
347 // successfully.
348 // . GRPC_JSON_PARSE_ERROR if the input was somehow invalid.
349 // . GRPC_JSON_INTERNAL_ERROR if the parser somehow ended into an invalid
350 // internal state.
351 //
Run()352 JsonReader::Status JsonReader::Run() {
353 uint32_t c;
354
355 // This state-machine is a strict implementation of ECMA-404
356 while (true) {
357 c = ReadChar();
358 switch (c) {
359 // Let's process the error case first.
360 case GRPC_JSON_READ_CHAR_EOF:
361 switch (state_) {
362 case State::GRPC_JSON_STATE_VALUE_NUMBER:
363 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
364 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
365 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
366 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
367 state_ = State::GRPC_JSON_STATE_VALUE_END;
368 break;
369
370 default:
371 break;
372 }
373 if (IsComplete()) {
374 return Status::GRPC_JSON_DONE;
375 }
376 return Status::GRPC_JSON_PARSE_ERROR;
377
378 // Processing whitespaces.
379 case ' ':
380 case '\t':
381 case '\n':
382 case '\r':
383 switch (state_) {
384 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
385 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
386 case State::GRPC_JSON_STATE_VALUE_BEGIN:
387 case State::GRPC_JSON_STATE_VALUE_END:
388 case State::GRPC_JSON_STATE_END:
389 break;
390
391 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
392 case State::GRPC_JSON_STATE_VALUE_STRING:
393 if (c != ' ') return Status::GRPC_JSON_PARSE_ERROR;
394 if (unicode_high_surrogate_ != 0) {
395 return Status::GRPC_JSON_PARSE_ERROR;
396 }
397 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
398 break;
399
400 case State::GRPC_JSON_STATE_VALUE_NUMBER:
401 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
402 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
403 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
404 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
405 state_ = State::GRPC_JSON_STATE_VALUE_END;
406 break;
407
408 default:
409 return Status::GRPC_JSON_PARSE_ERROR;
410 }
411 break;
412
413 // Value, object or array terminations.
414 case ',':
415 case '}':
416 case ']':
417 switch (state_) {
418 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
419 case State::GRPC_JSON_STATE_VALUE_STRING:
420 if (unicode_high_surrogate_ != 0) {
421 return Status::GRPC_JSON_PARSE_ERROR;
422 }
423 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
424 break;
425
426 case State::GRPC_JSON_STATE_VALUE_NUMBER:
427 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
428 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
429 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
430 if (stack_.empty()) {
431 return Status::GRPC_JSON_PARSE_ERROR;
432 } else if (c == '}' &&
433 stack_.back().type() != Json::Type::kObject) {
434 return Status::GRPC_JSON_PARSE_ERROR;
435 } else if (c == ']' && stack_.back().type() != Json::Type::kArray) {
436 return Status::GRPC_JSON_PARSE_ERROR;
437 }
438 if (!SetNumber()) return Status::GRPC_JSON_PARSE_ERROR;
439 state_ = State::GRPC_JSON_STATE_VALUE_END;
440 ABSL_FALLTHROUGH_INTENDED;
441
442 case State::GRPC_JSON_STATE_VALUE_END:
443 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
444 case State::GRPC_JSON_STATE_VALUE_BEGIN:
445 if (c == ',') {
446 if (state_ != State::GRPC_JSON_STATE_VALUE_END) {
447 return Status::GRPC_JSON_PARSE_ERROR;
448 }
449 if (!stack_.empty() &&
450 stack_.back().type() == Json::Type::kObject) {
451 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
452 } else if (!stack_.empty() &&
453 stack_.back().type() == Json::Type::kArray) {
454 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
455 } else {
456 return Status::GRPC_JSON_PARSE_ERROR;
457 }
458 } else {
459 if (stack_.empty()) {
460 return Status::GRPC_JSON_PARSE_ERROR;
461 }
462 if (c == '}' && stack_.back().type() != Json::Type::kObject) {
463 return Status::GRPC_JSON_PARSE_ERROR;
464 }
465 if (c == '}' &&
466 state_ == State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN &&
467 !container_just_begun_) {
468 return Status::GRPC_JSON_PARSE_ERROR;
469 }
470 if (c == ']' && stack_.back().type() != Json::Type::kArray) {
471 return Status::GRPC_JSON_PARSE_ERROR;
472 }
473 if (c == ']' && state_ == State::GRPC_JSON_STATE_VALUE_BEGIN &&
474 !container_just_begun_) {
475 return Status::GRPC_JSON_PARSE_ERROR;
476 }
477 state_ = State::GRPC_JSON_STATE_VALUE_END;
478 container_just_begun_ = false;
479 EndContainer();
480 if (stack_.empty()) {
481 state_ = State::GRPC_JSON_STATE_END;
482 }
483 }
484 break;
485
486 default:
487 return Status::GRPC_JSON_PARSE_ERROR;
488 }
489 break;
490
491 // In-string escaping.
492 case '\\':
493 switch (state_) {
494 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
495 escaped_string_was_key_ = true;
496 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
497 break;
498
499 case State::GRPC_JSON_STATE_VALUE_STRING:
500 escaped_string_was_key_ = false;
501 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE;
502 break;
503
504 // This is the \\ case.
505 case State::GRPC_JSON_STATE_STRING_ESCAPE:
506 if (unicode_high_surrogate_ != 0) {
507 return Status::GRPC_JSON_PARSE_ERROR;
508 }
509 if (!StringAddChar('\\')) return Status::GRPC_JSON_PARSE_ERROR;
510 if (escaped_string_was_key_) {
511 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
512 } else {
513 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
514 }
515 break;
516
517 default:
518 return Status::GRPC_JSON_PARSE_ERROR;
519 }
520 break;
521
522 default:
523 container_just_begun_ = false;
524 switch (state_) {
525 case State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN:
526 if (c != '"') return Status::GRPC_JSON_PARSE_ERROR;
527 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
528 break;
529
530 case State::GRPC_JSON_STATE_OBJECT_KEY_STRING:
531 if (unicode_high_surrogate_ != 0) {
532 return Status::GRPC_JSON_PARSE_ERROR;
533 }
534 if (c == '"') {
535 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_END;
536 // Once the key is parsed, there should no un-matched utf8
537 // encoded bytes.
538 if (utf8_bytes_remaining_ != 0) {
539 return Status::GRPC_JSON_PARSE_ERROR;
540 }
541 SetKey();
542 } else {
543 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
544 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
545 }
546 break;
547
548 case State::GRPC_JSON_STATE_VALUE_STRING:
549 if (unicode_high_surrogate_ != 0) {
550 return Status::GRPC_JSON_PARSE_ERROR;
551 }
552 if (c == '"') {
553 state_ = State::GRPC_JSON_STATE_VALUE_END;
554 // Once the value is parsed, there should no un-matched utf8
555 // encoded bytes.
556 if (utf8_bytes_remaining_ != 0) {
557 return Status::GRPC_JSON_PARSE_ERROR;
558 }
559 SetString();
560 } else {
561 if (c < 32) return Status::GRPC_JSON_PARSE_ERROR;
562 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
563 }
564 break;
565
566 case State::GRPC_JSON_STATE_OBJECT_KEY_END:
567 if (c != ':') return Status::GRPC_JSON_PARSE_ERROR;
568 state_ = State::GRPC_JSON_STATE_VALUE_BEGIN;
569 break;
570
571 case State::GRPC_JSON_STATE_VALUE_BEGIN:
572 switch (c) {
573 case 't':
574 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_R;
575 break;
576
577 case 'f':
578 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_A;
579 break;
580
581 case 'n':
582 state_ = State::GRPC_JSON_STATE_VALUE_NULL_U;
583 break;
584
585 case '"':
586 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
587 break;
588
589 case '0':
590 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
591 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO;
592 break;
593
594 case '1':
595 case '2':
596 case '3':
597 case '4':
598 case '5':
599 case '6':
600 case '7':
601 case '8':
602 case '9':
603 case '-':
604 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
605 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER;
606 break;
607
608 case '{':
609 container_just_begun_ = true;
610 if (!StartContainer(Json::Type::kObject)) {
611 return Status::GRPC_JSON_PARSE_ERROR;
612 }
613 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_BEGIN;
614 break;
615
616 case '[':
617 container_just_begun_ = true;
618 if (!StartContainer(Json::Type::kArray)) {
619 return Status::GRPC_JSON_PARSE_ERROR;
620 }
621 break;
622 default:
623 return Status::GRPC_JSON_PARSE_ERROR;
624 }
625 break;
626
627 case State::GRPC_JSON_STATE_STRING_ESCAPE:
628 if (escaped_string_was_key_) {
629 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
630 } else {
631 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
632 }
633 if (unicode_high_surrogate_ && c != 'u') {
634 return Status::GRPC_JSON_PARSE_ERROR;
635 }
636 switch (c) {
637 case '"':
638 case '/':
639 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
640 break;
641 case 'b':
642 if (!StringAddChar('\b')) return Status::GRPC_JSON_PARSE_ERROR;
643 break;
644 case 'f':
645 if (!StringAddChar('\f')) return Status::GRPC_JSON_PARSE_ERROR;
646 break;
647 case 'n':
648 if (!StringAddChar('\n')) return Status::GRPC_JSON_PARSE_ERROR;
649 break;
650 case 'r':
651 if (!StringAddChar('\r')) return Status::GRPC_JSON_PARSE_ERROR;
652 break;
653 case 't':
654 if (!StringAddChar('\t')) return Status::GRPC_JSON_PARSE_ERROR;
655 break;
656 case 'u':
657 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U1;
658 unicode_char_ = 0;
659 break;
660 default:
661 return Status::GRPC_JSON_PARSE_ERROR;
662 }
663 break;
664
665 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
666 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
667 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
668 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
669 if ((c >= '0') && (c <= '9')) {
670 c -= '0';
671 } else if ((c >= 'A') && (c <= 'F')) {
672 c -= 'A' - 10;
673 } else if ((c >= 'a') && (c <= 'f')) {
674 c -= 'a' - 10;
675 } else {
676 return Status::GRPC_JSON_PARSE_ERROR;
677 }
678 unicode_char_ = static_cast<uint16_t>(unicode_char_ << 4);
679 unicode_char_ = static_cast<uint16_t>(unicode_char_ | c);
680
681 switch (state_) {
682 case State::GRPC_JSON_STATE_STRING_ESCAPE_U1:
683 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U2;
684 break;
685 case State::GRPC_JSON_STATE_STRING_ESCAPE_U2:
686 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U3;
687 break;
688 case State::GRPC_JSON_STATE_STRING_ESCAPE_U3:
689 state_ = State::GRPC_JSON_STATE_STRING_ESCAPE_U4;
690 break;
691 case State::GRPC_JSON_STATE_STRING_ESCAPE_U4:
692 // See grpc_json_writer_escape_string to have a description
693 // of what's going on here.
694 //
695 if ((unicode_char_ & 0xfc00) == 0xd800) {
696 // high surrogate utf-16
697 if (unicode_high_surrogate_ != 0) {
698 return Status::GRPC_JSON_PARSE_ERROR;
699 }
700 unicode_high_surrogate_ = unicode_char_;
701 } else if ((unicode_char_ & 0xfc00) == 0xdc00) {
702 // low surrogate utf-16
703 uint32_t utf32;
704 if (unicode_high_surrogate_ == 0) {
705 return Status::GRPC_JSON_PARSE_ERROR;
706 }
707 utf32 = 0x10000;
708 utf32 += static_cast<uint32_t>(
709 (unicode_high_surrogate_ - 0xd800) * 0x400);
710 utf32 += static_cast<uint32_t>(unicode_char_ - 0xdc00);
711 if (!StringAddUtf32(utf32)) {
712 return Status::GRPC_JSON_PARSE_ERROR;
713 }
714 unicode_high_surrogate_ = 0;
715 } else {
716 // anything else
717 if (unicode_high_surrogate_ != 0) {
718 return Status::GRPC_JSON_PARSE_ERROR;
719 }
720 if (!StringAddUtf32(unicode_char_)) {
721 return Status::GRPC_JSON_PARSE_ERROR;
722 }
723 }
724 if (escaped_string_was_key_) {
725 state_ = State::GRPC_JSON_STATE_OBJECT_KEY_STRING;
726 } else {
727 state_ = State::GRPC_JSON_STATE_VALUE_STRING;
728 }
729 break;
730 default:
731 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
732 }
733 break;
734
735 case State::GRPC_JSON_STATE_VALUE_NUMBER:
736 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
737 switch (c) {
738 case '0':
739 case '1':
740 case '2':
741 case '3':
742 case '4':
743 case '5':
744 case '6':
745 case '7':
746 case '8':
747 case '9':
748 break;
749 case 'e':
750 case 'E':
751 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
752 break;
753 case '.':
754 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
755 break;
756 default:
757 return Status::GRPC_JSON_PARSE_ERROR;
758 }
759 break;
760
761 case State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL:
762 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
763 switch (c) {
764 case '0':
765 case '1':
766 case '2':
767 case '3':
768 case '4':
769 case '5':
770 case '6':
771 case '7':
772 case '8':
773 case '9':
774 break;
775 case 'e':
776 case 'E':
777 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_E;
778 break;
779 default:
780 return Status::GRPC_JSON_PARSE_ERROR;
781 }
782 break;
783
784 case State::GRPC_JSON_STATE_VALUE_NUMBER_ZERO:
785 if (c != '.') return Status::GRPC_JSON_PARSE_ERROR;
786 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
787 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_DOT;
788 break;
789
790 case State::GRPC_JSON_STATE_VALUE_NUMBER_DOT:
791 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
792 switch (c) {
793 case '0':
794 case '1':
795 case '2':
796 case '3':
797 case '4':
798 case '5':
799 case '6':
800 case '7':
801 case '8':
802 case '9':
803 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_WITH_DECIMAL;
804 break;
805 default:
806 return Status::GRPC_JSON_PARSE_ERROR;
807 }
808 break;
809
810 case State::GRPC_JSON_STATE_VALUE_NUMBER_E:
811 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
812 switch (c) {
813 case '0':
814 case '1':
815 case '2':
816 case '3':
817 case '4':
818 case '5':
819 case '6':
820 case '7':
821 case '8':
822 case '9':
823 case '+':
824 case '-':
825 state_ = State::GRPC_JSON_STATE_VALUE_NUMBER_EPM;
826 break;
827 default:
828 return Status::GRPC_JSON_PARSE_ERROR;
829 }
830 break;
831
832 case State::GRPC_JSON_STATE_VALUE_NUMBER_EPM:
833 if (!StringAddChar(c)) return Status::GRPC_JSON_PARSE_ERROR;
834 switch (c) {
835 case '0':
836 case '1':
837 case '2':
838 case '3':
839 case '4':
840 case '5':
841 case '6':
842 case '7':
843 case '8':
844 case '9':
845 break;
846 default:
847 return Status::GRPC_JSON_PARSE_ERROR;
848 }
849 break;
850
851 case State::GRPC_JSON_STATE_VALUE_TRUE_R:
852 if (c != 'r') return Status::GRPC_JSON_PARSE_ERROR;
853 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_U;
854 break;
855
856 case State::GRPC_JSON_STATE_VALUE_TRUE_U:
857 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
858 state_ = State::GRPC_JSON_STATE_VALUE_TRUE_E;
859 break;
860
861 case State::GRPC_JSON_STATE_VALUE_TRUE_E:
862 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
863 SetTrue();
864 state_ = State::GRPC_JSON_STATE_VALUE_END;
865 break;
866
867 case State::GRPC_JSON_STATE_VALUE_FALSE_A:
868 if (c != 'a') return Status::GRPC_JSON_PARSE_ERROR;
869 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_L;
870 break;
871
872 case State::GRPC_JSON_STATE_VALUE_FALSE_L:
873 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
874 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_S;
875 break;
876
877 case State::GRPC_JSON_STATE_VALUE_FALSE_S:
878 if (c != 's') return Status::GRPC_JSON_PARSE_ERROR;
879 state_ = State::GRPC_JSON_STATE_VALUE_FALSE_E;
880 break;
881
882 case State::GRPC_JSON_STATE_VALUE_FALSE_E:
883 if (c != 'e') return Status::GRPC_JSON_PARSE_ERROR;
884 SetFalse();
885 state_ = State::GRPC_JSON_STATE_VALUE_END;
886 break;
887
888 case State::GRPC_JSON_STATE_VALUE_NULL_U:
889 if (c != 'u') return Status::GRPC_JSON_PARSE_ERROR;
890 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L1;
891 break;
892
893 case State::GRPC_JSON_STATE_VALUE_NULL_L1:
894 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
895 state_ = State::GRPC_JSON_STATE_VALUE_NULL_L2;
896 break;
897
898 case State::GRPC_JSON_STATE_VALUE_NULL_L2:
899 if (c != 'l') return Status::GRPC_JSON_PARSE_ERROR;
900 SetNull();
901 state_ = State::GRPC_JSON_STATE_VALUE_END;
902 break;
903
904 // All of the VALUE_END cases are handled in the specialized case
905 // above.
906 case State::GRPC_JSON_STATE_VALUE_END:
907 switch (c) {
908 case ',':
909 case '}':
910 case ']':
911 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
912 break;
913
914 default:
915 return Status::GRPC_JSON_PARSE_ERROR;
916 }
917 break;
918
919 case State::GRPC_JSON_STATE_END:
920 return Status::GRPC_JSON_PARSE_ERROR;
921 }
922 }
923 }
924
925 GPR_UNREACHABLE_CODE(return Status::GRPC_JSON_INTERNAL_ERROR);
926 }
927
Parse(absl::string_view input)928 absl::StatusOr<Json> JsonReader::Parse(absl::string_view input) {
929 JsonReader reader(input);
930 Status status = reader.Run();
931 if (reader.truncated_errors_) {
932 reader.errors_.push_back(
933 "too many errors encountered during JSON parsing -- fix reported "
934 "errors and try again to see additional errors");
935 }
936 if (status == Status::GRPC_JSON_INTERNAL_ERROR) {
937 reader.errors_.push_back(absl::StrCat(
938 "internal error in JSON parser at index ", reader.CurrentIndex()));
939 } else if (status == Status::GRPC_JSON_PARSE_ERROR) {
940 reader.errors_.push_back(
941 absl::StrCat("JSON parse error at index ", reader.CurrentIndex()));
942 }
943 if (!reader.errors_.empty()) {
944 return absl::InvalidArgumentError(absl::StrCat(
945 "JSON parsing failed: [", absl::StrJoin(reader.errors_, "; "), "]"));
946 }
947 return std::move(reader.root_value_);
948 }
949
950 } // namespace
951
JsonParse(absl::string_view json_str)952 absl::StatusOr<Json> JsonParse(absl::string_view json_str) {
953 return JsonReader::Parse(json_str);
954 }
955
956 } // namespace grpc_core
957