1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/json/internal/parser.h"
9
10 #include <cfloat>
11 #include <cmath>
12 #include <cstdint>
13 #include <cstring>
14 #include <limits>
15 #include <memory>
16 #include <string>
17 #include <utility>
18
19 #include "google/protobuf/type.pb.h"
20 #include "absl/base/attributes.h"
21 #include "absl/container/flat_hash_set.h"
22 #include "absl/log/absl_check.h"
23 #include "absl/log/absl_log.h"
24 #include "absl/status/status.h"
25 #include "absl/status/statusor.h"
26 #include "absl/strings/ascii.h"
27 #include "absl/strings/escaping.h"
28 #include "absl/strings/match.h"
29 #include "absl/strings/numbers.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/str_split.h"
32 #include "absl/strings/string_view.h"
33 #include "absl/types/optional.h"
34 #include "absl/types/span.h"
35 #include "google/protobuf/descriptor.h"
36 #include "google/protobuf/dynamic_message.h"
37 #include "google/protobuf/io/zero_copy_sink.h"
38 #include "google/protobuf/io/zero_copy_stream.h"
39 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
40 #include "google/protobuf/json/internal/descriptor_traits.h"
41 #include "google/protobuf/json/internal/lexer.h"
42 #include "google/protobuf/json/internal/parser_traits.h"
43 #include "google/protobuf/message.h"
44 #include "google/protobuf/util/type_resolver.h"
45 #include "google/protobuf/stubs/status_macros.h"
46
47 // Must be included last.
48 #include "google/protobuf/port_def.inc"
49
50 namespace google {
51 namespace protobuf {
52 namespace json_internal {
53 namespace {
54 // This file contains code that drives a JsonLexer to visit a JSON document and
55 // convert it into some form of proto.
56 //
57 // This semantic layer is duplicated: proto2-ish code can deserialize directly
58 // into a message, whereas proto3-ish code deserializes into a byte stream,
59 // using TypeResolvers instead of Descriptors.
60 //
61 // The parsing code is templated over which of these two reflection + output
62 // combinations is used. The traits types that collect the per-instantiation
63 // functionality can be found in json_util2_parser_traits-inl.h.
64
65 // This table maps an unsigned `char` value, interpreted as an ASCII character,
66 // to a corresponding value in the base64 alphabet (both traditional and
67 // "web-safe" characters are included).
68 //
69 // If a character is not valid base64, it maps to -1; this is used by the bit
70 // operations that assemble a base64-encoded word to determine if an error
71 // occurred, by checking the sign bit.
72 constexpr signed char kBase64Table[256] = {
73 -1, -1, -1, -1, -1, -1, -1,
74 -1, -1, -1, -1, -1, -1, -1,
75 -1, -1, -1, -1, -1, -1, -1,
76 -1, -1, -1, -1, -1, -1, -1,
77 -1, -1, -1, -1, -1, -1, -1,
78 -1, -1, -1, -1, -1, -1, -1,
79 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
80 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
81 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
82 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
83 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
84 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
85 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
86 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
87 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
88 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
89 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
90 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
91 -1, -1, -1, -1, -1, -1, -1,
92 -1, -1, -1, -1, -1, -1, -1,
93 -1, -1, -1, -1, -1, -1, -1,
94 -1, -1, -1, -1, -1, -1, -1,
95 -1, -1, -1, -1, -1, -1, -1,
96 -1, -1, -1, -1, -1, -1, -1,
97 -1, -1, -1, -1, -1, -1, -1,
98 -1, -1, -1, -1, -1, -1, -1,
99 -1, -1, -1, -1, -1, -1, -1,
100 -1, -1, -1, -1, -1, -1, -1,
101 -1, -1, -1, -1, -1, -1, -1,
102 -1, -1, -1, -1, -1, -1, -1,
103 -1, -1, -1, -1, -1, -1, -1,
104 -1, -1, -1, -1, -1, -1, -1,
105 -1, -1, -1, -1, -1, -1, -1,
106 -1, -1, -1, -1, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1,
109 -1, -1, -1, -1};
110
Base64Lookup(char c)111 uint32_t Base64Lookup(char c) {
112 // Sign-extend return value so high bit will be set on any unexpected char.
113 return static_cast<uint32_t>(kBase64Table[static_cast<uint8_t>(c)]);
114 }
115
116 // Decodes `base64` in-place, shrinking the length as appropriate.
DecodeBase64InPlace(absl::Span<char> base64)117 absl::StatusOr<absl::Span<char>> DecodeBase64InPlace(absl::Span<char> base64) {
118 // We decode in place. This is safe because this is a new buffer (not
119 // aliasing the input) and because base64 decoding shrinks 4 bytes into 3.
120 char* out = base64.data();
121 const char* ptr = base64.data();
122 const char* end = ptr + base64.size();
123 const char* end4 = ptr + (base64.size() & ~3u);
124
125 for (; ptr < end4; ptr += 4, out += 3) {
126 auto val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12 |
127 Base64Lookup(ptr[2]) << 6 | Base64Lookup(ptr[3]) << 0;
128
129 if (static_cast<int32_t>(val) < 0) {
130 // Junk chars or padding. Remove trailing padding, if any.
131 if (end - ptr == 4 && ptr[3] == '=') {
132 if (ptr[2] == '=') {
133 end -= 2;
134 } else {
135 end -= 1;
136 }
137 }
138 break;
139 }
140
141 out[0] = val >> 16;
142 out[1] = (val >> 8) & 0xff;
143 out[2] = val & 0xff;
144 }
145
146 if (ptr < end) {
147 uint32_t val = ~0u;
148 switch (end - ptr) {
149 case 2:
150 val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12;
151 out[0] = val >> 16;
152 out += 1;
153 break;
154 case 3:
155 val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12 |
156 Base64Lookup(ptr[2]) << 6;
157 out[0] = val >> 16;
158 out[1] = (val >> 8) & 0xff;
159 out += 2;
160 break;
161 }
162
163 if (static_cast<int32_t>(val) < 0) {
164 return absl::InvalidArgumentError("corrupt base64");
165 }
166 }
167
168 return absl::Span<char>(base64.data(),
169 static_cast<size_t>(out - base64.data()));
170 }
171
172 template <typename T>
ParseIntInner(JsonLexer & lex,double lo,double hi)173 absl::StatusOr<LocationWith<T>> ParseIntInner(JsonLexer& lex, double lo,
174 double hi) {
175 absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
176 RETURN_IF_ERROR(kind.status());
177
178 LocationWith<T> n;
179 switch (*kind) {
180 case JsonLexer::kNum: {
181 absl::StatusOr<LocationWith<MaybeOwnedString>> x = lex.ParseRawNumber();
182 RETURN_IF_ERROR(x.status());
183 n.loc = x->loc;
184 if (absl::SimpleAtoi(x->value.AsView(), &n.value)) {
185 break;
186 }
187
188 double d;
189 if (!absl::SimpleAtod(x->value.AsView(), &d) || !std::isfinite(d)) {
190 return x->loc.Invalid(
191 absl::StrFormat("invalid number: '%s'", x->value.AsView()));
192 }
193
194 // Conversion overflow here would be UB.
195 if (lo > d || d > hi) {
196 return lex.Invalid("JSON number out of range for int");
197 }
198 n.value = static_cast<T>(d);
199 if (d - static_cast<double>(n.value) != 0) {
200 return lex.Invalid(
201 "expected integer, but JSON number had fractional part");
202 }
203 break;
204 }
205 case JsonLexer::kStr: {
206 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
207 RETURN_IF_ERROR(str.status());
208 // SimpleAtoi will ignore leading and trailing whitespace, so we need
209 // to check for it ourselves.
210 for (char c : str->value.AsView()) {
211 if (absl::ascii_isspace(c)) {
212 return lex.Invalid("non-number characters in quoted number");
213 }
214 }
215 if (!absl::SimpleAtoi(str->value.AsView(), &n.value)) {
216 return str->loc.Invalid("non-number characters in quoted number");
217 }
218 n.loc = str->loc;
219 break;
220 }
221 default:
222 return lex.Invalid("expected number or string");
223 }
224
225 return n;
226 }
227
228 template <typename Traits>
ParseInt(JsonLexer & lex,Field<Traits> field)229 absl::StatusOr<int64_t> ParseInt(JsonLexer& lex, Field<Traits> field) {
230 absl::StatusOr<LocationWith<int64_t>> n =
231 ParseIntInner<int64_t>(lex, -9007199254740992.0, 9007199254740992.0);
232 RETURN_IF_ERROR(n.status());
233
234 if (Traits::Is32Bit(field)) {
235 if (std::numeric_limits<int32_t>::min() > n->value ||
236 n->value > std::numeric_limits<int32_t>::max()) {
237 return n->loc.Invalid("integer out of range");
238 }
239 }
240
241 return n->value;
242 }
243
244 template <typename Traits>
ParseUInt(JsonLexer & lex,Field<Traits> field)245 absl::StatusOr<uint64_t> ParseUInt(JsonLexer& lex, Field<Traits> field) {
246 absl::StatusOr<LocationWith<uint64_t>> n =
247 ParseIntInner<uint64_t>(lex, 0, 18014398509481984.0);
248 RETURN_IF_ERROR(n.status());
249
250 if (Traits::Is32Bit(field)) {
251 if (n->value > std::numeric_limits<uint32_t>::max()) {
252 return n->loc.Invalid("integer out of range");
253 }
254 }
255
256 return n->value;
257 }
258
259 template <typename Traits>
ParseFp(JsonLexer & lex,Field<Traits> field)260 absl::StatusOr<double> ParseFp(JsonLexer& lex, Field<Traits> field) {
261 absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
262 RETURN_IF_ERROR(kind.status());
263
264 double n;
265 switch (*kind) {
266 case JsonLexer::kNum: {
267 absl::StatusOr<LocationWith<double>> d = lex.ParseNumber();
268 RETURN_IF_ERROR(d.status());
269 n = d->value;
270 break;
271 }
272 case JsonLexer::kStr: {
273 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
274 RETURN_IF_ERROR(str.status());
275
276 if (str->value == "NaN") {
277 n = NAN;
278 } else if (str->value == "Infinity") {
279 n = INFINITY;
280 } else if (str->value == "-Infinity") {
281 n = -INFINITY;
282 } else if (!absl::SimpleAtod(str->value.AsView(), &n)) {
283 return str->loc.Invalid("non-number characters in quoted number");
284 }
285 break;
286 }
287 default:
288 return lex.Invalid("expected number or string");
289 }
290
291 if (Traits::Is32Bit(field)) {
292 // Detect out-of-range 32-bit floats by seeing whether the conversion result
293 // is still finite. Finite extreme values may have textual representations
294 // that parse to 64-bit values outside the 32-bit range, but which are
295 // closer to the 32-bit extreme than to the "next value with the same
296 // precision".
297 if (std::isfinite(n) && !std::isfinite(static_cast<float>(n))) {
298 return lex.Invalid("float out of range");
299 }
300 }
301
302 return n;
303 }
304
305 template <typename Traits>
ParseStrOrBytes(JsonLexer & lex,Field<Traits> field)306 absl::StatusOr<std::string> ParseStrOrBytes(JsonLexer& lex,
307 Field<Traits> field) {
308 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
309 RETURN_IF_ERROR(str.status());
310
311 if (Traits::FieldType(field) == FieldDescriptor::TYPE_BYTES) {
312 std::string& b64 = str->value.ToString();
313 absl::StatusOr<absl::Span<char>> decoded =
314 DecodeBase64InPlace(absl::MakeSpan(&b64[0], b64.size()));
315 if (!decoded.ok()) {
316 return str->loc.Invalid(decoded.status().message());
317 }
318 b64.resize(decoded->size());
319 }
320
321 return std::move(str->value.ToString());
322 }
323
324 template <typename Traits>
ParseEnumFromStr(JsonLexer & lex,MaybeOwnedString & str,Field<Traits> field)325 absl::StatusOr<absl::optional<int32_t>> ParseEnumFromStr(JsonLexer& lex,
326 MaybeOwnedString& str,
327 Field<Traits> field) {
328 absl::StatusOr<int32_t> value = Traits::EnumNumberByName(
329 field, str.AsView(), lex.options().case_insensitive_enum_parsing);
330 if (value.ok()) {
331 return absl::optional<int32_t>(*value);
332 }
333
334 int32_t i;
335 if (absl::SimpleAtoi(str.AsView(), &i)) {
336 return absl::optional<int32_t>(i);
337 } else if (lex.options().ignore_unknown_fields) {
338 return {absl::nullopt};
339 }
340
341 return value.status();
342 }
343
344 // Parses an enum; can return nullopt if a quoted enumerator that we don't
345 // know about is received and `ignore_unknown_fields` is set.
346 template <typename Traits>
ParseEnum(JsonLexer & lex,Field<Traits> field)347 absl::StatusOr<absl::optional<int32_t>> ParseEnum(JsonLexer& lex,
348 Field<Traits> field) {
349 absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
350 RETURN_IF_ERROR(kind.status());
351
352 int32_t n = 0;
353 switch (*kind) {
354 case JsonLexer::kStr: {
355 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
356 RETURN_IF_ERROR(str.status());
357
358 auto e = ParseEnumFromStr<Traits>(lex, str->value, field);
359 RETURN_IF_ERROR(e.status());
360 if (!e->has_value()) {
361 return {absl::nullopt};
362 }
363 n = **e;
364 break;
365 }
366 case JsonLexer::kNum:
367 return ParseInt<Traits>(lex, field);
368 default:
369 return lex.Invalid("expected number or string");
370 }
371
372 return n;
373 }
374
375 // Mutually recursive with functions that follow.
376 template <typename Traits>
377 absl::Status ParseMessage(JsonLexer& lex, const Desc<Traits>& desc,
378 Msg<Traits>& msg, bool any_reparse);
379 template <typename Traits>
380 absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
381 absl::string_view name, Msg<Traits>& msg);
382
383 template <typename Traits>
ParseSingular(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)384 absl::Status ParseSingular(JsonLexer& lex, Field<Traits> field,
385 Msg<Traits>& msg) {
386 auto field_type = Traits::FieldType(field);
387 if (lex.Peek(JsonLexer::kNull)) {
388 auto message_type = ClassifyMessage(Traits::FieldTypeName(field));
389 switch (field_type) {
390 case FieldDescriptor::TYPE_ENUM:
391 if (message_type == MessageType::kNull) {
392 Traits::SetEnum(field, msg, 0);
393 }
394 break;
395 case FieldDescriptor::TYPE_MESSAGE: {
396 if (message_type == MessageType::kValue) {
397 return Traits::NewMsg(
398 field, msg,
399 [&](const Desc<Traits>& type, Msg<Traits>& msg) -> absl::Status {
400 auto field = Traits::FieldByNumber(type, 1);
401 ABSL_DCHECK(field.has_value());
402 RETURN_IF_ERROR(lex.Expect("null"));
403 Traits::SetEnum(Traits::MustHaveField(type, 1), msg, 0);
404 return absl::OkStatus();
405 });
406 }
407 break;
408 }
409 default:
410 break;
411 }
412 return lex.Expect("null");
413 }
414
415 switch (field_type) {
416 case FieldDescriptor::TYPE_FLOAT: {
417 auto x = ParseFp<Traits>(lex, field);
418 RETURN_IF_ERROR(x.status());
419 Traits::SetFloat(field, msg, *x);
420 break;
421 }
422 case FieldDescriptor::TYPE_DOUBLE: {
423 auto x = ParseFp<Traits>(lex, field);
424 RETURN_IF_ERROR(x.status());
425 Traits::SetDouble(field, msg, *x);
426 break;
427 }
428
429 case FieldDescriptor::TYPE_SFIXED64:
430 case FieldDescriptor::TYPE_SINT64:
431 case FieldDescriptor::TYPE_INT64: {
432 auto x = ParseInt<Traits>(lex, field);
433 RETURN_IF_ERROR(x.status());
434 Traits::SetInt64(field, msg, *x);
435 break;
436 }
437 case FieldDescriptor::TYPE_FIXED64:
438 case FieldDescriptor::TYPE_UINT64: {
439 auto x = ParseUInt<Traits>(lex, field);
440 RETURN_IF_ERROR(x.status());
441 Traits::SetUInt64(field, msg, *x);
442 break;
443 }
444
445 case FieldDescriptor::TYPE_SFIXED32:
446 case FieldDescriptor::TYPE_SINT32:
447 case FieldDescriptor::TYPE_INT32: {
448 auto x = ParseInt<Traits>(lex, field);
449 RETURN_IF_ERROR(x.status());
450 Traits::SetInt32(field, msg, static_cast<int32_t>(*x));
451 break;
452 }
453 case FieldDescriptor::TYPE_FIXED32:
454 case FieldDescriptor::TYPE_UINT32: {
455 auto x = ParseUInt<Traits>(lex, field);
456 RETURN_IF_ERROR(x.status());
457 Traits::SetUInt32(field, msg, static_cast<uint32_t>(*x));
458 break;
459 }
460 case FieldDescriptor::TYPE_BOOL: {
461 absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
462 RETURN_IF_ERROR(kind.status());
463
464 switch (*kind) {
465 case JsonLexer::kTrue:
466 RETURN_IF_ERROR(lex.Expect("true"));
467 Traits::SetBool(field, msg, true);
468 break;
469 case JsonLexer::kFalse:
470 RETURN_IF_ERROR(lex.Expect("false"));
471 Traits::SetBool(field, msg, false);
472 break;
473 case JsonLexer::kStr: {
474 if (!lex.options().allow_legacy_syntax) {
475 goto bad;
476 }
477
478 auto x = lex.ParseUtf8();
479 RETURN_IF_ERROR(x.status());
480
481 bool flag;
482 if (!absl::SimpleAtob(x->value, &flag)) {
483 // Is this error a lie? Do we accept things otyher than "true" and
484 // "false" because SimpleAtob does? Absolutely!
485 return x->loc.Invalid("expected 'true' or 'false'");
486 }
487 Traits::SetBool(field, msg, flag);
488
489 break;
490 }
491 bad:
492 default:
493 return lex.Invalid("expected 'true' or 'false'");
494 }
495 break;
496 }
497 case FieldDescriptor::TYPE_STRING:
498 case FieldDescriptor::TYPE_BYTES: {
499 auto x = ParseStrOrBytes<Traits>(lex, field);
500 RETURN_IF_ERROR(x.status());
501 Traits::SetString(field, msg, *x);
502 break;
503 }
504 case FieldDescriptor::TYPE_ENUM: {
505 absl::StatusOr<absl::optional<int32_t>> x = ParseEnum<Traits>(lex, field);
506 RETURN_IF_ERROR(x.status());
507
508 if (x->has_value() || Traits::IsImplicitPresence(field)) {
509 Traits::SetEnum(field, msg, x->value_or(0));
510 }
511 break;
512 }
513 case FieldDescriptor::TYPE_MESSAGE:
514 case FieldDescriptor::TYPE_GROUP: {
515 return Traits::NewMsg(
516 field, msg,
517 [&](const Desc<Traits>& type, Msg<Traits>& msg) -> absl::Status {
518 return ParseMessage<Traits>(lex, type, msg,
519 /*any_reparse=*/false);
520 });
521 }
522 default:
523 return lex.Invalid(
524 absl::StrCat("unsupported field type: ", Traits::FieldType(field)));
525 }
526
527 return absl::OkStatus();
528 }
529
530 template <typename Traits>
EmitNull(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)531 absl::Status EmitNull(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
532 switch (Traits::FieldType(field)) {
533 case FieldDescriptor::TYPE_FLOAT:
534 Traits::SetFloat(field, msg, 0);
535 break;
536 case FieldDescriptor::TYPE_DOUBLE:
537 Traits::SetDouble(field, msg, 0);
538 break;
539 case FieldDescriptor::TYPE_SFIXED64:
540 case FieldDescriptor::TYPE_SINT64:
541 case FieldDescriptor::TYPE_INT64:
542 Traits::SetInt64(field, msg, 0);
543 break;
544 case FieldDescriptor::TYPE_FIXED64:
545 case FieldDescriptor::TYPE_UINT64:
546 Traits::SetUInt64(field, msg, 0);
547 break;
548 case FieldDescriptor::TYPE_SFIXED32:
549 case FieldDescriptor::TYPE_SINT32:
550 case FieldDescriptor::TYPE_INT32:
551 Traits::SetInt32(field, msg, 0);
552 break;
553 case FieldDescriptor::TYPE_FIXED32:
554 case FieldDescriptor::TYPE_UINT32:
555 Traits::SetUInt32(field, msg, 0);
556 break;
557 case FieldDescriptor::TYPE_BOOL:
558 Traits::SetBool(field, msg, false);
559 break;
560 case FieldDescriptor::TYPE_STRING:
561 case FieldDescriptor::TYPE_BYTES:
562 Traits::SetString(field, msg, "");
563 break;
564 case FieldDescriptor::TYPE_ENUM:
565 Traits::SetEnum(field, msg, 0);
566 break;
567 case FieldDescriptor::TYPE_MESSAGE:
568 case FieldDescriptor::TYPE_GROUP:
569 return Traits::NewMsg(field, msg,
570 [](const auto&, const auto&) -> absl::Status {
571 return absl::OkStatus();
572 });
573 default:
574 return lex.Invalid(
575 absl::StrCat("unsupported field type: ", Traits::FieldType(field)));
576 }
577 return absl::OkStatus();
578 }
579
580 template <typename Traits>
ParseArray(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)581 absl::Status ParseArray(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
582 if (lex.Peek(JsonLexer::kNull)) {
583 return lex.Expect("null");
584 }
585
586 return lex.VisitArray([&]() -> absl::Status {
587 lex.path().NextRepeated();
588 MessageType type = ClassifyMessage(Traits::FieldTypeName(field));
589
590 if (lex.Peek(JsonLexer::kNull)) {
591 if (type == MessageType::kValue) {
592 return ParseSingular<Traits>(lex, field, msg);
593 }
594 if (type == MessageType::kNull) {
595 return ParseSingular<Traits>(lex, field, msg);
596 }
597
598 if (lex.options().allow_legacy_syntax) {
599 RETURN_IF_ERROR(lex.Expect("null"));
600 return EmitNull<Traits>(lex, field, msg);
601 }
602 return lex.Invalid("null cannot occur inside of repeated fields");
603 }
604
605 // Note that this is sufficient to catch when we are inside of a ListValue,
606 // because a ListValue's sole field is of type Value. Thus, we only need to
607 // classify cases in which we are inside of an array and parsing messages
608 // that like looking like arrays.
609 //
610 // This will also correctly handle e.g. writing out a ListValue with the
611 // legacy syntax of `{"values": [[0], [1], [2]]}`, which does not go through
612 // the custom parser handler.
613 bool can_flatten =
614 type != MessageType::kValue && type != MessageType::kList;
615 if (can_flatten && lex.options().allow_legacy_syntax &&
616 lex.Peek(JsonLexer::kArr)) {
617 // You read that right. In legacy mode, if we encounter an array within
618 // an array, we just flatten it as part of the current array!
619 //
620 // This DOES NOT apply when parsing a google.protobuf.Value or a
621 // google.protobuf.ListValue!
622 return ParseArray<Traits>(lex, field, msg);
623 }
624 return ParseSingular<Traits>(lex, field, msg);
625 });
626 }
627
628 template <typename Traits>
ParseMap(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)629 absl::Status ParseMap(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
630 if (lex.Peek(JsonLexer::kNull)) {
631 return lex.Expect("null");
632 }
633
634 absl::flat_hash_set<std::string> keys_seen;
635 return lex.VisitObject(
636 [&](LocationWith<MaybeOwnedString>& key) -> absl::Status {
637 lex.path().NextRepeated();
638 auto insert_result = keys_seen.emplace(key.value.AsView());
639 if (!insert_result.second) {
640 return key.loc.Invalid(absl::StrFormat(
641 "got unexpectedly-repeated repeated map key: '%s'",
642 key.value.AsView()));
643 }
644 return Traits::NewMsg(
645 field, msg,
646 [&](const Desc<Traits>& type, Msg<Traits>& entry) -> absl::Status {
647 auto key_field = Traits::KeyField(type);
648 switch (Traits::FieldType(key_field)) {
649 case FieldDescriptor::TYPE_INT64:
650 case FieldDescriptor::TYPE_SINT64:
651 case FieldDescriptor::TYPE_SFIXED64: {
652 int64_t n;
653 if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
654 return key.loc.Invalid(
655 "non-number characters in quoted number");
656 }
657 Traits::SetInt64(key_field, entry, n);
658 break;
659 }
660 case FieldDescriptor::TYPE_UINT64:
661 case FieldDescriptor::TYPE_FIXED64: {
662 uint64_t n;
663 if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
664 return key.loc.Invalid(
665 "non-number characters in quoted number");
666 }
667 Traits::SetUInt64(key_field, entry, n);
668 break;
669 }
670 case FieldDescriptor::TYPE_INT32:
671 case FieldDescriptor::TYPE_SINT32:
672 case FieldDescriptor::TYPE_SFIXED32: {
673 int32_t n;
674 if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
675 return key.loc.Invalid(
676 "non-number characters in quoted number");
677 }
678 Traits::SetInt32(key_field, entry, n);
679 break;
680 }
681 case FieldDescriptor::TYPE_UINT32:
682 case FieldDescriptor::TYPE_FIXED32: {
683 uint32_t n;
684 if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
685 return key.loc.Invalid(
686 "non-number characters in quoted number");
687 }
688 Traits::SetUInt32(key_field, entry, n);
689 break;
690 }
691 case FieldDescriptor::TYPE_BOOL: {
692 if (key.value == "true") {
693 Traits::SetBool(key_field, entry, true);
694 } else if (key.value == "false") {
695 Traits::SetBool(key_field, entry, false);
696 } else {
697 return key.loc.Invalid(absl::StrFormat(
698 "expected bool string, got '%s'", key.value.AsView()));
699 }
700 break;
701 }
702 case FieldDescriptor::TYPE_STRING: {
703 Traits::SetString(key_field, entry,
704 std::move(key.value.ToString()));
705 break;
706 }
707 default:
708 return lex.Invalid("unsupported map key type");
709 }
710
711 return ParseSingular<Traits>(lex, Traits::ValueField(type),
712 entry);
713 });
714 });
715 }
716
TakeTimeDigitsWithSuffixAndAdvance(absl::string_view & data,int max_digits,absl::string_view end)717 absl::optional<uint32_t> TakeTimeDigitsWithSuffixAndAdvance(
718 absl::string_view& data, int max_digits, absl::string_view end) {
719 ABSL_DCHECK_LE(max_digits, 9);
720
721 uint32_t val = 0;
722 int limit = max_digits;
723 while (!data.empty()) {
724 if (limit-- < 0) {
725 return absl::nullopt;
726 }
727 uint32_t digit = data[0] - '0';
728 if (digit >= 10) {
729 break;
730 }
731
732 val *= 10;
733 val += digit;
734 data = data.substr(1);
735 }
736 if (!absl::StartsWith(data, end)) {
737 return absl::nullopt;
738 }
739
740 data = data.substr(end.size());
741 return val;
742 }
743
TakeNanosAndAdvance(absl::string_view & data)744 absl::optional<int32_t> TakeNanosAndAdvance(absl::string_view& data) {
745 int32_t frac_secs = 0;
746 size_t frac_digits = 0;
747 if (absl::StartsWith(data, ".")) {
748 for (char c : data.substr(1)) {
749 if (!absl::ascii_isdigit(c)) {
750 break;
751 }
752 ++frac_digits;
753 }
754 auto digits = data.substr(1, frac_digits);
755 if (frac_digits == 0 || frac_digits > 9 ||
756 !absl::SimpleAtoi(digits, &frac_secs)) {
757 return absl::nullopt;
758 }
759 data = data.substr(frac_digits + 1);
760 }
761 for (int i = 0; i < 9 - frac_digits; ++i) {
762 frac_secs *= 10;
763 }
764 return frac_secs;
765 }
766
767 template <typename Traits>
ParseTimestamp(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)768 absl::Status ParseTimestamp(JsonLexer& lex, const Desc<Traits>& desc,
769 Msg<Traits>& msg) {
770 if (lex.Peek(JsonLexer::kNull)) {
771 return lex.Expect("null");
772 }
773
774 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
775 RETURN_IF_ERROR(str.status());
776
777 absl::string_view data = str->value.AsView();
778 if (data.size() < 20) {
779 return str->loc.Invalid("timestamp string too short");
780 }
781
782 int64_t secs;
783 {
784 /* 1972-01-01T01:00:00 */
785 auto year = TakeTimeDigitsWithSuffixAndAdvance(data, 4, "-");
786 if (!year.has_value() || *year == 0) {
787 return str->loc.Invalid("bad year in timestamp");
788 }
789 auto mon = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "-");
790 if (!mon.has_value() || *mon == 0) {
791 return str->loc.Invalid("bad month in timestamp");
792 }
793 auto day = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "T");
794 if (!day.has_value() || *day == 0) {
795 return str->loc.Invalid("bad day in timestamp");
796 }
797 auto hour = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
798 if (!hour.has_value()) {
799 return str->loc.Invalid("bad hours in timestamp");
800 }
801 auto min = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
802 if (!min.has_value()) {
803 return str->loc.Invalid("bad minutes in timestamp");
804 }
805 auto sec = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "");
806 if (!sec.has_value()) {
807 return str->loc.Invalid("bad seconds in timestamp");
808 }
809
810 uint32_t m_adj = *mon - 3; // March-based month.
811 uint32_t carry = m_adj > *mon ? 1 : 0;
812
813 uint32_t year_base = 4800; // Before min year, multiple of 400.
814 uint32_t y_adj = *year + year_base - carry;
815
816 uint32_t month_days = ((m_adj + carry * 12) * 62719 + 769) / 2048;
817 uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
818 int32_t epoch_days =
819 y_adj * 365 + leap_days + month_days + (*day - 1) - 2472632;
820
821 secs = int64_t{epoch_days} * 86400 + *hour * 3600 + *min * 60 + *sec;
822 }
823
824 auto nanos = TakeNanosAndAdvance(data);
825 if (!nanos.has_value()) {
826 return str->loc.Invalid("timestamp had bad nanoseconds");
827 }
828
829 if (data.empty()) {
830 return str->loc.Invalid("timestamp missing timezone offset");
831 }
832
833 {
834 // [+-]hh:mm or Z
835 bool neg = false;
836 switch (data[0]) {
837 case '-':
838 neg = true;
839 ABSL_FALLTHROUGH_INTENDED;
840 case '+': {
841 if (data.size() != 6) {
842 return str->loc.Invalid("timestamp offset of wrong size.");
843 }
844
845 data = data.substr(1);
846 auto hour = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
847 auto mins = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "");
848 if (!hour.has_value() || !mins.has_value()) {
849 return str->loc.Invalid("timestamp offset has bad hours and minutes");
850 }
851
852 int64_t offset = (*hour * 60 + *mins) * 60;
853 secs += (neg ? offset : -offset);
854 break;
855 }
856 // Lowercase z is not accepted, per the spec.
857 case 'Z':
858 if (data.size() == 1) {
859 break;
860 }
861 ABSL_FALLTHROUGH_INTENDED;
862 default:
863 return str->loc.Invalid("bad timezone offset");
864 }
865 }
866
867 Traits::SetInt64(Traits::MustHaveField(desc, 1), msg, secs);
868 Traits::SetInt32(Traits::MustHaveField(desc, 2), msg, *nanos);
869
870 return absl::OkStatus();
871 }
872
873 template <typename Traits>
ParseDuration(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)874 absl::Status ParseDuration(JsonLexer& lex, const Desc<Traits>& desc,
875 Msg<Traits>& msg) {
876 if (lex.Peek(JsonLexer::kNull)) {
877 return lex.Expect("null");
878 }
879
880 constexpr int64_t kMaxSeconds = int64_t{3652500} * 86400;
881
882 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
883 RETURN_IF_ERROR(str.status());
884
885 size_t int_part_end = 0;
886 for (char c : str->value.AsView()) {
887 if (!absl::ascii_isdigit(c) && c != '-') {
888 break;
889 }
890 ++int_part_end;
891 }
892 if (int_part_end == 0) {
893 return str->loc.Invalid("duration must start with an integer");
894 }
895
896 absl::string_view sec_digits = str->value.AsView().substr(0, int_part_end);
897 int64_t secs;
898 if (!absl::SimpleAtoi(sec_digits, &secs)) {
899 return str->loc.Invalid("duration had bad seconds");
900 }
901
902 if (secs > kMaxSeconds || secs < -kMaxSeconds) {
903 return str->loc.Invalid("duration out of range");
904 }
905
906 absl::string_view rest = str->value.AsView().substr(int_part_end);
907 auto nanos = TakeNanosAndAdvance(rest);
908 if (!nanos.has_value()) {
909 return str->loc.Invalid("duration had bad nanoseconds");
910 }
911
912 bool isNegative = (secs < 0) || absl::StartsWith(sec_digits, "-");
913 if (isNegative) {
914 *nanos *= -1;
915 }
916
917 if (rest != "s") {
918 return str->loc.Invalid("duration must end with a single 's'");
919 }
920
921 Traits::SetInt64(Traits::MustHaveField(desc, 1), msg, secs);
922 Traits::SetInt32(Traits::MustHaveField(desc, 2), msg, *nanos);
923
924 return absl::OkStatus();
925 }
926
927 template <typename Traits>
ParseFieldMask(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)928 absl::Status ParseFieldMask(JsonLexer& lex, const Desc<Traits>& desc,
929 Msg<Traits>& msg) {
930 absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
931 RETURN_IF_ERROR(str.status());
932 auto paths = str->value.AsView();
933
934 // The special case of the empty string is not handled correctly below,
935 // because StrSplit("", ',') is [""], not [].
936 if (paths.empty()) {
937 return absl::OkStatus();
938 }
939
940 // google.protobuf.FieldMask has a single field with number 1.
941 auto paths_field = Traits::MustHaveField(desc, 1);
942 for (absl::string_view path : absl::StrSplit(paths, ',')) {
943 std::string snake_path;
944 // Assume approximately six-letter words, so add one extra space for an
945 // underscore for every six bytes.
946 snake_path.reserve(path.size() * 7 / 6);
947 for (char c : path) {
948 if (absl::ascii_isdigit(c) || absl::ascii_islower(c) || c == '.') {
949 snake_path.push_back(c);
950 } else if (absl::ascii_isupper(c)) {
951 snake_path.push_back('_');
952 snake_path.push_back(absl::ascii_tolower(c));
953 } else if (lex.options().allow_legacy_syntax) {
954 snake_path.push_back(c);
955 } else {
956 return str->loc.Invalid("unexpected character in FieldMask");
957 }
958 }
959 Traits::SetString(paths_field, msg, snake_path);
960 }
961
962 return absl::OkStatus();
963 }
964
965 template <typename Traits>
ParseAny(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)966 absl::Status ParseAny(JsonLexer& lex, const Desc<Traits>& desc,
967 Msg<Traits>& msg) {
968 // Buffer an entire object. Because @type can occur anywhere, we're forced
969 // to do this.
970 RETURN_IF_ERROR(lex.SkipToToken());
971 auto mark = lex.BeginMark();
972
973 // Search for @type, buffering the entire object along the way so we can
974 // reparse it.
975 absl::optional<MaybeOwnedString> type_url;
976 RETURN_IF_ERROR(lex.VisitObject(
977 [&](const LocationWith<MaybeOwnedString>& key) -> absl::Status {
978 if (key.value == "@type") {
979 if (type_url.has_value()) {
980 return key.loc.Invalid("repeated @type in Any");
981 }
982
983 absl::StatusOr<LocationWith<MaybeOwnedString>> maybe_url =
984 lex.ParseUtf8();
985 RETURN_IF_ERROR(maybe_url.status());
986 type_url = std::move(maybe_url)->value;
987 return absl::OkStatus();
988 }
989 return lex.SkipValue();
990 }));
991
992 // Build a new lexer over the skipped object.
993 absl::string_view any_text = mark.value.UpToUnread();
994 io::ArrayInputStream in(any_text.data(), any_text.size());
995 // Copying lex.options() is important; it inherits the recursion
996 // limit.
997 JsonLexer any_lex(&in, lex.options(), &lex.path(), mark.loc);
998
999 if (!type_url.has_value() && !lex.options().allow_legacy_syntax) {
1000 return mark.loc.Invalid("missing @type in Any");
1001 }
1002
1003 if (type_url.has_value()) {
1004 Traits::SetString(Traits::MustHaveField(desc, 1), msg, type_url->AsView());
1005 return Traits::NewDynamic(
1006 Traits::MustHaveField(desc, 2), type_url->ToString(), msg,
1007 [&](const Desc<Traits>& desc, Msg<Traits>& msg) {
1008 auto pop = any_lex.path().Push("<any>", FieldDescriptor::TYPE_MESSAGE,
1009 Traits::TypeName(desc));
1010 return ParseMessage<Traits>(any_lex, desc, msg,
1011 /*any_reparse=*/true);
1012 });
1013 } else {
1014 // Empty {} is accepted in legacy mode.
1015 ABSL_DCHECK(lex.options().allow_legacy_syntax);
1016 RETURN_IF_ERROR(any_lex.VisitObject([&](auto&) {
1017 return mark.loc.Invalid(
1018 "in legacy mode, missing @type in Any is only allowed for an empty "
1019 "object");
1020 }));
1021 return absl::OkStatus();
1022 }
1023 }
1024
1025 // These are mutually recursive with ParseValue.
1026 template <typename Traits>
1027 absl::Status ParseStructValue(JsonLexer& lex, const Desc<Traits>& desc,
1028 Msg<Traits>& msg);
1029 template <typename Traits>
1030 absl::Status ParseListValue(JsonLexer& lex, const Desc<Traits>& desc,
1031 Msg<Traits>& msg);
1032
1033 template <typename Traits>
ParseValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1034 absl::Status ParseValue(JsonLexer& lex, const Desc<Traits>& desc,
1035 Msg<Traits>& msg) {
1036 auto kind = lex.PeekKind();
1037 RETURN_IF_ERROR(kind.status());
1038 // NOTE: The field numbers 1 through 6 are the numbers of the oneof fields
1039 // in google.protobuf.Value. Conformance tests verify the correctness of
1040 // these numbers.
1041 switch (*kind) {
1042 case JsonLexer::kNull: {
1043 auto field = Traits::MustHaveField(desc, 1);
1044 auto pop =
1045 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1046 Traits::FieldTypeName(field));
1047
1048 RETURN_IF_ERROR(lex.Expect("null"));
1049 Traits::SetEnum(field, msg, 0);
1050 break;
1051 }
1052 case JsonLexer::kNum: {
1053 auto field = Traits::MustHaveField(desc, 2);
1054 auto pop =
1055 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1056 Traits::FieldTypeName(field));
1057
1058 auto number = lex.ParseNumber();
1059 RETURN_IF_ERROR(number.status());
1060 Traits::SetDouble(field, msg, number->value);
1061 break;
1062 }
1063 case JsonLexer::kStr: {
1064 auto field = Traits::MustHaveField(desc, 3);
1065 auto pop =
1066 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1067 Traits::FieldTypeName(field));
1068
1069 auto str = lex.ParseUtf8();
1070 RETURN_IF_ERROR(str.status());
1071 Traits::SetString(field, msg, std::move(str->value.ToString()));
1072 break;
1073 }
1074 case JsonLexer::kFalse:
1075 case JsonLexer::kTrue: {
1076 auto field = Traits::MustHaveField(desc, 4);
1077 auto pop =
1078 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1079 Traits::FieldTypeName(field));
1080
1081 // "Quoted" bools, including non-standard Abseil Atob bools, are not
1082 // supported, because all strings are treated as genuine JSON strings.
1083 if (*kind == JsonLexer::kTrue) {
1084 RETURN_IF_ERROR(lex.Expect("true"));
1085 Traits::SetBool(field, msg, true);
1086 } else {
1087 RETURN_IF_ERROR(lex.Expect("false"));
1088 Traits::SetBool(field, msg, false);
1089 }
1090 break;
1091 }
1092 case JsonLexer::kObj: {
1093 auto field = Traits::MustHaveField(desc, 5);
1094 auto pop =
1095 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1096 Traits::FieldTypeName(field));
1097
1098 return Traits::NewMsg(field, msg, [&](auto& desc, auto& msg) {
1099 return ParseStructValue<Traits>(lex, desc, msg);
1100 });
1101 }
1102 case JsonLexer::kArr: {
1103 auto field = Traits::MustHaveField(desc, 6);
1104 auto pop =
1105 lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1106 Traits::FieldTypeName(field));
1107
1108 return Traits::NewMsg(field, msg, [&](auto& desc, auto& msg) {
1109 return ParseListValue<Traits>(lex, desc, msg);
1110 });
1111 }
1112 }
1113
1114 return absl::OkStatus();
1115 }
1116
1117 template <typename Traits>
ParseStructValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1118 absl::Status ParseStructValue(JsonLexer& lex, const Desc<Traits>& desc,
1119 Msg<Traits>& msg) {
1120 auto entry_field = Traits::MustHaveField(desc, 1);
1121 auto pop = lex.path().Push("<struct>", FieldDescriptor::TYPE_MESSAGE,
1122 Traits::FieldTypeName(entry_field));
1123
1124 // Structs are always cleared even if set to {}.
1125 Traits::RecordAsSeen(entry_field, msg);
1126
1127 // Parsing a map does the right thing: Struct has a single map<string,
1128 // Value> field; keys are correctly parsed as strings, and the values
1129 // recurse into ParseMessage, which will be routed into ParseValue. This
1130 // results in some extra overhead, but performance is not what we're going
1131 // for here.
1132 return ParseMap<Traits>(lex, entry_field, msg);
1133 }
1134
1135 template <typename Traits>
ParseListValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1136 absl::Status ParseListValue(JsonLexer& lex, const Desc<Traits>& desc,
1137 Msg<Traits>& msg) {
1138 auto entry_field = Traits::MustHaveField(desc, 1);
1139 auto pop = lex.path().Push("<list>", FieldDescriptor::TYPE_MESSAGE,
1140 Traits::FieldTypeName(entry_field));
1141
1142 // ListValues are always cleared even if set to [].
1143 Traits::RecordAsSeen(entry_field, msg);
1144 // Parsing an array does the right thing: see the analogous comment in
1145 // ParseStructValue.
1146 return ParseArray<Traits>(lex, entry_field, msg);
1147 }
1148
1149 template <typename Traits>
ParseField(JsonLexer & lex,const Desc<Traits> & desc,absl::string_view name,Msg<Traits> & msg)1150 absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
1151 absl::string_view name, Msg<Traits>& msg) {
1152 absl::optional<Field<Traits>> field;
1153 if (absl::StartsWith(name, "[") && absl::EndsWith(name, "]")) {
1154 absl::string_view extn_name = name.substr(1, name.size() - 2);
1155 field = Traits::ExtensionByName(desc, extn_name);
1156
1157 if (field.has_value()) {
1158 // The check for whether this is an invalid field occurs below, since it
1159 // is combined for both extension and non-extension fields.
1160 auto correct_type_name = Traits::TypeName(desc);
1161 if (Traits::TypeName(Traits::ContainingType(*field)) !=
1162 correct_type_name) {
1163 return lex.Invalid(absl::StrFormat(
1164 "'%s' is a known extension name, but is not an extension "
1165 "of '%s' as expected",
1166 extn_name, correct_type_name));
1167 }
1168 }
1169 } else {
1170 field = Traits::FieldByName(desc, name);
1171 }
1172
1173 if (!field.has_value()) {
1174 if (!lex.options().ignore_unknown_fields) {
1175 return lex.Invalid(absl::StrFormat("no such field: '%s'", name));
1176 }
1177 return lex.SkipValue();
1178 }
1179
1180 auto pop = lex.path().Push(name, Traits::FieldType(*field),
1181 Traits::FieldTypeName(*field));
1182
1183 if (Traits::HasParsed(
1184 *field, msg,
1185 /*allow_repeated_non_oneof=*/lex.options().allow_legacy_syntax) &&
1186 !lex.Peek(JsonLexer::kNull)) {
1187 return lex.Invalid(absl::StrFormat(
1188 "'%s' has already been set (either directly or as part of a oneof)",
1189 name));
1190 }
1191
1192 if (Traits::IsMap(*field)) {
1193 return ParseMap<Traits>(lex, *field, msg);
1194 }
1195
1196 if (Traits::IsRepeated(*field)) {
1197 if (lex.options().allow_legacy_syntax && !lex.Peek(JsonLexer::kArr)) {
1198 // The original ESF parser permits a single element in place of an array
1199 // thereof.
1200 return ParseSingular<Traits>(lex, *field, msg);
1201 }
1202 return ParseArray<Traits>(lex, *field, msg);
1203 }
1204
1205 return ParseSingular<Traits>(lex, *field, msg);
1206 }
1207
1208 template <typename Traits>
ParseMessage(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg,bool any_reparse)1209 absl::Status ParseMessage(JsonLexer& lex, const Desc<Traits>& desc,
1210 Msg<Traits>& msg, bool any_reparse) {
1211 MessageType type = ClassifyMessage(Traits::TypeName(desc));
1212 if (!any_reparse) {
1213 switch (type) {
1214 case MessageType::kAny:
1215 return ParseAny<Traits>(lex, desc, msg);
1216 case MessageType::kValue:
1217 return ParseValue<Traits>(lex, desc, msg);
1218 case MessageType::kStruct:
1219 return ParseStructValue<Traits>(lex, desc, msg);
1220 default:
1221 break;
1222 }
1223 // For some types, the ESF parser permits parsing the "non-special" version.
1224 // It is not clear if this counts as out-of-spec, but we're treating it as
1225 // such.
1226 bool is_upcoming_object = lex.Peek(JsonLexer::kObj);
1227 if (!(is_upcoming_object && lex.options().allow_legacy_syntax)) {
1228 switch (type) {
1229 case MessageType::kList:
1230 return ParseListValue<Traits>(lex, desc, msg);
1231 case MessageType::kWrapper: {
1232 return ParseSingular<Traits>(lex, Traits::MustHaveField(desc, 1),
1233 msg);
1234 }
1235 case MessageType::kTimestamp:
1236 return ParseTimestamp<Traits>(lex, desc, msg);
1237 case MessageType::kDuration:
1238 return ParseDuration<Traits>(lex, desc, msg);
1239 case MessageType::kFieldMask:
1240 return ParseFieldMask<Traits>(lex, desc, msg);
1241 default:
1242 break;
1243 }
1244 }
1245 }
1246
1247 return lex.VisitObject(
1248 [&](LocationWith<MaybeOwnedString>& name) -> absl::Status {
1249 // If this is a well-known type, we expect its contents to be inside
1250 // of a JSON field named "value".
1251 if (any_reparse) {
1252 if (name.value == "@type") {
1253 RETURN_IF_ERROR(lex.SkipValue());
1254 return absl::OkStatus();
1255 }
1256 if (type != MessageType::kNotWellKnown) {
1257 if (name.value != "value") {
1258 return lex.Invalid(
1259 "fields in a well-known-typed Any must be @type or value");
1260 }
1261 // Parse the upcoming value as the message itself. This is *not*
1262 // an Any reparse because we do not expect to see @type in the
1263 // upcoming value.
1264 return ParseMessage<Traits>(lex, desc, msg,
1265 /*any_reparse=*/false);
1266 }
1267 }
1268
1269 return ParseField<Traits>(lex, desc, name.value.ToString(), msg);
1270 });
1271 }
1272 } // namespace
1273
JsonStreamToMessage(io::ZeroCopyInputStream * input,Message * message,json_internal::ParseOptions options)1274 absl::Status JsonStreamToMessage(io::ZeroCopyInputStream* input,
1275 Message* message,
1276 json_internal::ParseOptions options) {
1277 MessagePath path(message->GetDescriptor()->full_name());
1278 JsonLexer lex(input, options, &path);
1279
1280 ParseProto2Descriptor::Msg msg(message);
1281 absl::Status s =
1282 ParseMessage<ParseProto2Descriptor>(lex, *message->GetDescriptor(), msg,
1283 /*any_reparse=*/false);
1284 if (s.ok() && !lex.AtEof()) {
1285 s = absl::InvalidArgumentError(
1286 "extraneous characters after end of JSON object");
1287 }
1288
1289 if (PROTOBUF_DEBUG) {
1290 ABSL_DLOG(INFO) << "json2/status: " << s;
1291 ABSL_DLOG(INFO) << "json2/output: " << message->DebugString();
1292 }
1293 return s;
1294 }
1295
JsonToBinaryStream(google::protobuf::util::TypeResolver * resolver,const std::string & type_url,io::ZeroCopyInputStream * json_input,io::ZeroCopyOutputStream * binary_output,json_internal::ParseOptions options)1296 absl::Status JsonToBinaryStream(google::protobuf::util::TypeResolver* resolver,
1297 const std::string& type_url,
1298 io::ZeroCopyInputStream* json_input,
1299 io::ZeroCopyOutputStream* binary_output,
1300 json_internal::ParseOptions options) {
1301 // NOTE: Most of the contortions in this function are to allow for capture of
1302 // input and output of the parser in ABSL_DLOG mode. Destruction order is very
1303 // critical in this function, because io::ZeroCopy*Stream types usually only
1304 // flush on destruction.
1305
1306 // For ABSL_DLOG, we would like to print out the input and output, which
1307 // requires buffering both instead of doing "zero copy". This block, and the
1308 // one at the end of the function, set up and tear down interception of the
1309 // input and output streams.
1310 std::string copy;
1311 std::string out;
1312 absl::optional<io::ArrayInputStream> tee_input;
1313 absl::optional<io::StringOutputStream> tee_output;
1314 if (PROTOBUF_DEBUG) {
1315 const void* data;
1316 int len;
1317 while (json_input->Next(&data, &len)) {
1318 copy.resize(copy.size() + len);
1319 std::memcpy(©[copy.size() - len], data, len);
1320 }
1321 tee_input.emplace(copy.data(), copy.size());
1322 tee_output.emplace(&out);
1323 ABSL_DLOG(INFO) << "json2/input: " << absl::CHexEscape(copy);
1324 }
1325
1326 // This scope forces the CodedOutputStream inside of `msg` to flush before we
1327 // possibly handle logging the binary protobuf output.
1328 absl::Status s;
1329 {
1330 MessagePath path(type_url);
1331 JsonLexer lex(tee_input.has_value() ? &*tee_input : json_input, options,
1332 &path);
1333 Msg<ParseProto3Type> msg(tee_output.has_value() ? &*tee_output
1334 : binary_output);
1335
1336 ResolverPool pool(resolver);
1337 auto desc = pool.FindMessage(type_url);
1338 RETURN_IF_ERROR(desc.status());
1339
1340 s = ParseMessage<ParseProto3Type>(lex, **desc, msg, /*any_reparse=*/false);
1341 if (s.ok() && !lex.AtEof()) {
1342 s = absl::InvalidArgumentError(
1343 "extraneous characters after end of JSON object");
1344 }
1345 }
1346
1347 if (PROTOBUF_DEBUG) {
1348 tee_output.reset(); // Flush the output stream.
1349 io::zc_sink_internal::ZeroCopyStreamByteSink(binary_output)
1350 .Append(out.data(), out.size());
1351 ABSL_DLOG(INFO) << "json2/status: " << s;
1352 ABSL_DLOG(INFO) << "json2/output: " << absl::BytesToHexString(out);
1353 }
1354
1355 return s;
1356 }
1357 } // namespace json_internal
1358 } // namespace protobuf
1359 } // namespace google
1360
1361 #include "google/protobuf/port_undef.inc"
1362