• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/json/internal/parser.h"
9 
10 #include <cfloat>
11 #include <cmath>
12 #include <cstdint>
13 #include <cstring>
14 #include <limits>
15 #include <memory>
16 #include <string>
17 #include <utility>
18 
19 #include "google/protobuf/type.pb.h"
20 #include "absl/base/attributes.h"
21 #include "absl/container/flat_hash_set.h"
22 #include "absl/log/absl_check.h"
23 #include "absl/log/absl_log.h"
24 #include "absl/status/status.h"
25 #include "absl/status/statusor.h"
26 #include "absl/strings/ascii.h"
27 #include "absl/strings/escaping.h"
28 #include "absl/strings/match.h"
29 #include "absl/strings/numbers.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/str_split.h"
32 #include "absl/strings/string_view.h"
33 #include "absl/types/optional.h"
34 #include "absl/types/span.h"
35 #include "google/protobuf/descriptor.h"
36 #include "google/protobuf/dynamic_message.h"
37 #include "google/protobuf/io/zero_copy_sink.h"
38 #include "google/protobuf/io/zero_copy_stream.h"
39 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
40 #include "google/protobuf/json/internal/descriptor_traits.h"
41 #include "google/protobuf/json/internal/lexer.h"
42 #include "google/protobuf/json/internal/parser_traits.h"
43 #include "google/protobuf/message.h"
44 #include "google/protobuf/util/type_resolver.h"
45 #include "google/protobuf/stubs/status_macros.h"
46 
47 // Must be included last.
48 #include "google/protobuf/port_def.inc"
49 
50 namespace google {
51 namespace protobuf {
52 namespace json_internal {
53 namespace {
54 // This file contains code that drives a JsonLexer to visit a JSON document and
55 // convert it into some form of proto.
56 //
57 // This semantic layer is duplicated: proto2-ish code can deserialize directly
58 // into a message, whereas proto3-ish code deserializes into a byte stream,
59 // using TypeResolvers instead of Descriptors.
60 //
61 // The parsing code is templated over which of these two reflection + output
62 // combinations is used. The traits types that collect the per-instantiation
63 // functionality can be found in json_util2_parser_traits-inl.h.
64 
65 // This table maps an unsigned `char` value, interpreted as an ASCII character,
66 // to a corresponding value in the base64 alphabet (both traditional and
67 // "web-safe" characters are included).
68 //
69 // If a character is not valid base64, it maps to -1; this is used by the bit
70 // operations that assemble a base64-encoded word to determine if an error
71 // occurred, by checking the sign bit.
72 constexpr signed char kBase64Table[256] = {
73     -1,       -1,       -1,       -1,       -1,       -1,        -1,
74     -1,       -1,       -1,       -1,       -1,       -1,        -1,
75     -1,       -1,       -1,       -1,       -1,       -1,        -1,
76     -1,       -1,       -1,       -1,       -1,       -1,        -1,
77     -1,       -1,       -1,       -1,       -1,       -1,        -1,
78     -1,       -1,       -1,       -1,       -1,       -1,        -1,
79     -1,       62 /*+*/, -1,       62 /*-*/, -1,       63 /*/ */, 52 /*0*/,
80     53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/,  59 /*7*/,
81     60 /*8*/, 61 /*9*/, -1,       -1,       -1,       -1,        -1,
82     -1,       -1,       0 /*A*/,  1 /*B*/,  2 /*C*/,  3 /*D*/,   4 /*E*/,
83     5 /*F*/,  6 /*G*/,  07 /*H*/, 8 /*I*/,  9 /*J*/,  10 /*K*/,  11 /*L*/,
84     12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/,  18 /*S*/,
85     19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/,  25 /*Z*/,
86     -1,       -1,       -1,       -1,       63 /*_*/, -1,        26 /*a*/,
87     27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/,  33 /*h*/,
88     34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/,  40 /*o*/,
89     41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/,  47 /*v*/,
90     48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1,       -1,        -1,
91     -1,       -1,       -1,       -1,       -1,       -1,        -1,
92     -1,       -1,       -1,       -1,       -1,       -1,        -1,
93     -1,       -1,       -1,       -1,       -1,       -1,        -1,
94     -1,       -1,       -1,       -1,       -1,       -1,        -1,
95     -1,       -1,       -1,       -1,       -1,       -1,        -1,
96     -1,       -1,       -1,       -1,       -1,       -1,        -1,
97     -1,       -1,       -1,       -1,       -1,       -1,        -1,
98     -1,       -1,       -1,       -1,       -1,       -1,        -1,
99     -1,       -1,       -1,       -1,       -1,       -1,        -1,
100     -1,       -1,       -1,       -1,       -1,       -1,        -1,
101     -1,       -1,       -1,       -1,       -1,       -1,        -1,
102     -1,       -1,       -1,       -1,       -1,       -1,        -1,
103     -1,       -1,       -1,       -1,       -1,       -1,        -1,
104     -1,       -1,       -1,       -1,       -1,       -1,        -1,
105     -1,       -1,       -1,       -1,       -1,       -1,        -1,
106     -1,       -1,       -1,       -1,       -1,       -1,        -1,
107     -1,       -1,       -1,       -1,       -1,       -1,        -1,
108     -1,       -1,       -1,       -1,       -1,       -1,        -1,
109     -1,       -1,       -1,       -1};
110 
Base64Lookup(char c)111 uint32_t Base64Lookup(char c) {
112   // Sign-extend return value so high bit will be set on any unexpected char.
113   return static_cast<uint32_t>(kBase64Table[static_cast<uint8_t>(c)]);
114 }
115 
116 // Decodes `base64` in-place, shrinking the length as appropriate.
DecodeBase64InPlace(absl::Span<char> base64)117 absl::StatusOr<absl::Span<char>> DecodeBase64InPlace(absl::Span<char> base64) {
118   // We decode in place. This is safe because this is a new buffer (not
119   // aliasing the input) and because base64 decoding shrinks 4 bytes into 3.
120   char* out = base64.data();
121   const char* ptr = base64.data();
122   const char* end = ptr + base64.size();
123   const char* end4 = ptr + (base64.size() & ~3u);
124 
125   for (; ptr < end4; ptr += 4, out += 3) {
126     auto val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12 |
127                Base64Lookup(ptr[2]) << 6 | Base64Lookup(ptr[3]) << 0;
128 
129     if (static_cast<int32_t>(val) < 0) {
130       // Junk chars or padding. Remove trailing padding, if any.
131       if (end - ptr == 4 && ptr[3] == '=') {
132         if (ptr[2] == '=') {
133           end -= 2;
134         } else {
135           end -= 1;
136         }
137       }
138       break;
139     }
140 
141     out[0] = val >> 16;
142     out[1] = (val >> 8) & 0xff;
143     out[2] = val & 0xff;
144   }
145 
146   if (ptr < end) {
147     uint32_t val = ~0u;
148     switch (end - ptr) {
149       case 2:
150         val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12;
151         out[0] = val >> 16;
152         out += 1;
153         break;
154       case 3:
155         val = Base64Lookup(ptr[0]) << 18 | Base64Lookup(ptr[1]) << 12 |
156               Base64Lookup(ptr[2]) << 6;
157         out[0] = val >> 16;
158         out[1] = (val >> 8) & 0xff;
159         out += 2;
160         break;
161     }
162 
163     if (static_cast<int32_t>(val) < 0) {
164       return absl::InvalidArgumentError("corrupt base64");
165     }
166   }
167 
168   return absl::Span<char>(base64.data(),
169                           static_cast<size_t>(out - base64.data()));
170 }
171 
172 template <typename T>
ParseIntInner(JsonLexer & lex,double lo,double hi)173 absl::StatusOr<LocationWith<T>> ParseIntInner(JsonLexer& lex, double lo,
174                                               double hi) {
175   absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
176   RETURN_IF_ERROR(kind.status());
177 
178   LocationWith<T> n;
179   switch (*kind) {
180     case JsonLexer::kNum: {
181       absl::StatusOr<LocationWith<MaybeOwnedString>> x = lex.ParseRawNumber();
182       RETURN_IF_ERROR(x.status());
183       n.loc = x->loc;
184       if (absl::SimpleAtoi(x->value.AsView(), &n.value)) {
185         break;
186       }
187 
188       double d;
189       if (!absl::SimpleAtod(x->value.AsView(), &d) || !std::isfinite(d)) {
190         return x->loc.Invalid(
191             absl::StrFormat("invalid number: '%s'", x->value.AsView()));
192       }
193 
194       // Conversion overflow here would be UB.
195       if (lo > d || d > hi) {
196         return lex.Invalid("JSON number out of range for int");
197       }
198       n.value = static_cast<T>(d);
199       if (d - static_cast<double>(n.value) != 0) {
200         return lex.Invalid(
201             "expected integer, but JSON number had fractional part");
202       }
203       break;
204     }
205     case JsonLexer::kStr: {
206       absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
207       RETURN_IF_ERROR(str.status());
208       // SimpleAtoi will ignore leading and trailing whitespace, so we need
209       // to check for it ourselves.
210       for (char c : str->value.AsView()) {
211         if (absl::ascii_isspace(c)) {
212           return lex.Invalid("non-number characters in quoted number");
213         }
214       }
215       if (!absl::SimpleAtoi(str->value.AsView(), &n.value)) {
216         return str->loc.Invalid("non-number characters in quoted number");
217       }
218       n.loc = str->loc;
219       break;
220     }
221     default:
222       return lex.Invalid("expected number or string");
223   }
224 
225   return n;
226 }
227 
228 template <typename Traits>
ParseInt(JsonLexer & lex,Field<Traits> field)229 absl::StatusOr<int64_t> ParseInt(JsonLexer& lex, Field<Traits> field) {
230   absl::StatusOr<LocationWith<int64_t>> n =
231       ParseIntInner<int64_t>(lex, -9007199254740992.0, 9007199254740992.0);
232   RETURN_IF_ERROR(n.status());
233 
234   if (Traits::Is32Bit(field)) {
235     if (std::numeric_limits<int32_t>::min() > n->value ||
236         n->value > std::numeric_limits<int32_t>::max()) {
237       return n->loc.Invalid("integer out of range");
238     }
239   }
240 
241   return n->value;
242 }
243 
244 template <typename Traits>
ParseUInt(JsonLexer & lex,Field<Traits> field)245 absl::StatusOr<uint64_t> ParseUInt(JsonLexer& lex, Field<Traits> field) {
246   absl::StatusOr<LocationWith<uint64_t>> n =
247       ParseIntInner<uint64_t>(lex, 0, 18014398509481984.0);
248   RETURN_IF_ERROR(n.status());
249 
250   if (Traits::Is32Bit(field)) {
251     if (n->value > std::numeric_limits<uint32_t>::max()) {
252       return n->loc.Invalid("integer out of range");
253     }
254   }
255 
256   return n->value;
257 }
258 
259 template <typename Traits>
ParseFp(JsonLexer & lex,Field<Traits> field)260 absl::StatusOr<double> ParseFp(JsonLexer& lex, Field<Traits> field) {
261   absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
262   RETURN_IF_ERROR(kind.status());
263 
264   double n;
265   switch (*kind) {
266     case JsonLexer::kNum: {
267       absl::StatusOr<LocationWith<double>> d = lex.ParseNumber();
268       RETURN_IF_ERROR(d.status());
269       n = d->value;
270       break;
271     }
272     case JsonLexer::kStr: {
273       absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
274       RETURN_IF_ERROR(str.status());
275 
276       if (str->value == "NaN") {
277         n = NAN;
278       } else if (str->value == "Infinity") {
279         n = INFINITY;
280       } else if (str->value == "-Infinity") {
281         n = -INFINITY;
282       } else if (!absl::SimpleAtod(str->value.AsView(), &n)) {
283         return str->loc.Invalid("non-number characters in quoted number");
284       }
285       break;
286     }
287     default:
288       return lex.Invalid("expected number or string");
289   }
290 
291   if (Traits::Is32Bit(field)) {
292     // Detect out-of-range 32-bit floats by seeing whether the conversion result
293     // is still finite. Finite extreme values may have textual representations
294     // that parse to 64-bit values outside the 32-bit range, but which are
295     // closer to the 32-bit extreme than to the "next value with the same
296     // precision".
297     if (std::isfinite(n) && !std::isfinite(static_cast<float>(n))) {
298       return lex.Invalid("float out of range");
299     }
300   }
301 
302   return n;
303 }
304 
305 template <typename Traits>
ParseStrOrBytes(JsonLexer & lex,Field<Traits> field)306 absl::StatusOr<std::string> ParseStrOrBytes(JsonLexer& lex,
307                                             Field<Traits> field) {
308   absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
309   RETURN_IF_ERROR(str.status());
310 
311   if (Traits::FieldType(field) == FieldDescriptor::TYPE_BYTES) {
312     std::string& b64 = str->value.ToString();
313     absl::StatusOr<absl::Span<char>> decoded =
314         DecodeBase64InPlace(absl::MakeSpan(&b64[0], b64.size()));
315     if (!decoded.ok()) {
316       return str->loc.Invalid(decoded.status().message());
317     }
318     b64.resize(decoded->size());
319   }
320 
321   return std::move(str->value.ToString());
322 }
323 
324 template <typename Traits>
ParseEnumFromStr(JsonLexer & lex,MaybeOwnedString & str,Field<Traits> field)325 absl::StatusOr<absl::optional<int32_t>> ParseEnumFromStr(JsonLexer& lex,
326                                                          MaybeOwnedString& str,
327                                                          Field<Traits> field) {
328   absl::StatusOr<int32_t> value = Traits::EnumNumberByName(
329       field, str.AsView(), lex.options().case_insensitive_enum_parsing);
330   if (value.ok()) {
331     return absl::optional<int32_t>(*value);
332   }
333 
334   int32_t i;
335   if (absl::SimpleAtoi(str.AsView(), &i)) {
336     return absl::optional<int32_t>(i);
337   } else if (lex.options().ignore_unknown_fields) {
338     return {absl::nullopt};
339   }
340 
341   return value.status();
342 }
343 
344 // Parses an enum; can return nullopt if a quoted enumerator that we don't
345 // know about is received and `ignore_unknown_fields` is set.
346 template <typename Traits>
ParseEnum(JsonLexer & lex,Field<Traits> field)347 absl::StatusOr<absl::optional<int32_t>> ParseEnum(JsonLexer& lex,
348                                                   Field<Traits> field) {
349   absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
350   RETURN_IF_ERROR(kind.status());
351 
352   int32_t n = 0;
353   switch (*kind) {
354     case JsonLexer::kStr: {
355       absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
356       RETURN_IF_ERROR(str.status());
357 
358       auto e = ParseEnumFromStr<Traits>(lex, str->value, field);
359       RETURN_IF_ERROR(e.status());
360       if (!e->has_value()) {
361         return {absl::nullopt};
362       }
363       n = **e;
364       break;
365     }
366     case JsonLexer::kNum:
367       return ParseInt<Traits>(lex, field);
368     default:
369       return lex.Invalid("expected number or string");
370   }
371 
372   return n;
373 }
374 
375 // Mutually recursive with functions that follow.
376 template <typename Traits>
377 absl::Status ParseMessage(JsonLexer& lex, const Desc<Traits>& desc,
378                           Msg<Traits>& msg, bool any_reparse);
379 template <typename Traits>
380 absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
381                         absl::string_view name, Msg<Traits>& msg);
382 
383 template <typename Traits>
ParseSingular(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)384 absl::Status ParseSingular(JsonLexer& lex, Field<Traits> field,
385                            Msg<Traits>& msg) {
386   auto field_type = Traits::FieldType(field);
387   if (lex.Peek(JsonLexer::kNull)) {
388     auto message_type = ClassifyMessage(Traits::FieldTypeName(field));
389     switch (field_type) {
390       case FieldDescriptor::TYPE_ENUM:
391         if (message_type == MessageType::kNull) {
392           Traits::SetEnum(field, msg, 0);
393         }
394         break;
395       case FieldDescriptor::TYPE_MESSAGE: {
396         if (message_type == MessageType::kValue) {
397           return Traits::NewMsg(
398               field, msg,
399               [&](const Desc<Traits>& type, Msg<Traits>& msg) -> absl::Status {
400                 auto field = Traits::FieldByNumber(type, 1);
401                 ABSL_DCHECK(field.has_value());
402                 RETURN_IF_ERROR(lex.Expect("null"));
403                 Traits::SetEnum(Traits::MustHaveField(type, 1), msg, 0);
404                 return absl::OkStatus();
405               });
406         }
407         break;
408       }
409       default:
410         break;
411     }
412     return lex.Expect("null");
413   }
414 
415   switch (field_type) {
416     case FieldDescriptor::TYPE_FLOAT: {
417       auto x = ParseFp<Traits>(lex, field);
418       RETURN_IF_ERROR(x.status());
419       Traits::SetFloat(field, msg, *x);
420       break;
421     }
422     case FieldDescriptor::TYPE_DOUBLE: {
423       auto x = ParseFp<Traits>(lex, field);
424       RETURN_IF_ERROR(x.status());
425       Traits::SetDouble(field, msg, *x);
426       break;
427     }
428 
429     case FieldDescriptor::TYPE_SFIXED64:
430     case FieldDescriptor::TYPE_SINT64:
431     case FieldDescriptor::TYPE_INT64: {
432       auto x = ParseInt<Traits>(lex, field);
433       RETURN_IF_ERROR(x.status());
434       Traits::SetInt64(field, msg, *x);
435       break;
436     }
437     case FieldDescriptor::TYPE_FIXED64:
438     case FieldDescriptor::TYPE_UINT64: {
439       auto x = ParseUInt<Traits>(lex, field);
440       RETURN_IF_ERROR(x.status());
441       Traits::SetUInt64(field, msg, *x);
442       break;
443     }
444 
445     case FieldDescriptor::TYPE_SFIXED32:
446     case FieldDescriptor::TYPE_SINT32:
447     case FieldDescriptor::TYPE_INT32: {
448       auto x = ParseInt<Traits>(lex, field);
449       RETURN_IF_ERROR(x.status());
450       Traits::SetInt32(field, msg, static_cast<int32_t>(*x));
451       break;
452     }
453     case FieldDescriptor::TYPE_FIXED32:
454     case FieldDescriptor::TYPE_UINT32: {
455       auto x = ParseUInt<Traits>(lex, field);
456       RETURN_IF_ERROR(x.status());
457       Traits::SetUInt32(field, msg, static_cast<uint32_t>(*x));
458       break;
459     }
460     case FieldDescriptor::TYPE_BOOL: {
461       absl::StatusOr<JsonLexer::Kind> kind = lex.PeekKind();
462       RETURN_IF_ERROR(kind.status());
463 
464       switch (*kind) {
465         case JsonLexer::kTrue:
466           RETURN_IF_ERROR(lex.Expect("true"));
467           Traits::SetBool(field, msg, true);
468           break;
469         case JsonLexer::kFalse:
470           RETURN_IF_ERROR(lex.Expect("false"));
471           Traits::SetBool(field, msg, false);
472           break;
473         case JsonLexer::kStr: {
474           if (!lex.options().allow_legacy_syntax) {
475             goto bad;
476           }
477 
478           auto x = lex.ParseUtf8();
479           RETURN_IF_ERROR(x.status());
480 
481           bool flag;
482           if (!absl::SimpleAtob(x->value, &flag)) {
483             // Is this error a lie? Do we accept things otyher than "true" and
484             // "false" because SimpleAtob does? Absolutely!
485             return x->loc.Invalid("expected 'true' or 'false'");
486           }
487           Traits::SetBool(field, msg, flag);
488 
489           break;
490         }
491         bad:
492         default:
493           return lex.Invalid("expected 'true' or 'false'");
494       }
495       break;
496     }
497     case FieldDescriptor::TYPE_STRING:
498     case FieldDescriptor::TYPE_BYTES: {
499       auto x = ParseStrOrBytes<Traits>(lex, field);
500       RETURN_IF_ERROR(x.status());
501       Traits::SetString(field, msg, *x);
502       break;
503     }
504     case FieldDescriptor::TYPE_ENUM: {
505       absl::StatusOr<absl::optional<int32_t>> x = ParseEnum<Traits>(lex, field);
506       RETURN_IF_ERROR(x.status());
507 
508       if (x->has_value() || Traits::IsImplicitPresence(field)) {
509         Traits::SetEnum(field, msg, x->value_or(0));
510       }
511       break;
512     }
513     case FieldDescriptor::TYPE_MESSAGE:
514     case FieldDescriptor::TYPE_GROUP: {
515       return Traits::NewMsg(
516           field, msg,
517           [&](const Desc<Traits>& type, Msg<Traits>& msg) -> absl::Status {
518             return ParseMessage<Traits>(lex, type, msg,
519                                         /*any_reparse=*/false);
520           });
521     }
522     default:
523       return lex.Invalid(
524           absl::StrCat("unsupported field type: ", Traits::FieldType(field)));
525   }
526 
527   return absl::OkStatus();
528 }
529 
530 template <typename Traits>
EmitNull(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)531 absl::Status EmitNull(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
532   switch (Traits::FieldType(field)) {
533     case FieldDescriptor::TYPE_FLOAT:
534       Traits::SetFloat(field, msg, 0);
535       break;
536     case FieldDescriptor::TYPE_DOUBLE:
537       Traits::SetDouble(field, msg, 0);
538       break;
539     case FieldDescriptor::TYPE_SFIXED64:
540     case FieldDescriptor::TYPE_SINT64:
541     case FieldDescriptor::TYPE_INT64:
542       Traits::SetInt64(field, msg, 0);
543       break;
544     case FieldDescriptor::TYPE_FIXED64:
545     case FieldDescriptor::TYPE_UINT64:
546       Traits::SetUInt64(field, msg, 0);
547       break;
548     case FieldDescriptor::TYPE_SFIXED32:
549     case FieldDescriptor::TYPE_SINT32:
550     case FieldDescriptor::TYPE_INT32:
551       Traits::SetInt32(field, msg, 0);
552       break;
553     case FieldDescriptor::TYPE_FIXED32:
554     case FieldDescriptor::TYPE_UINT32:
555       Traits::SetUInt32(field, msg, 0);
556       break;
557     case FieldDescriptor::TYPE_BOOL:
558       Traits::SetBool(field, msg, false);
559       break;
560     case FieldDescriptor::TYPE_STRING:
561     case FieldDescriptor::TYPE_BYTES:
562       Traits::SetString(field, msg, "");
563       break;
564     case FieldDescriptor::TYPE_ENUM:
565       Traits::SetEnum(field, msg, 0);
566       break;
567     case FieldDescriptor::TYPE_MESSAGE:
568     case FieldDescriptor::TYPE_GROUP:
569       return Traits::NewMsg(field, msg,
570                             [](const auto&, const auto&) -> absl::Status {
571                               return absl::OkStatus();
572                             });
573     default:
574       return lex.Invalid(
575           absl::StrCat("unsupported field type: ", Traits::FieldType(field)));
576   }
577   return absl::OkStatus();
578 }
579 
580 template <typename Traits>
ParseArray(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)581 absl::Status ParseArray(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
582   if (lex.Peek(JsonLexer::kNull)) {
583     return lex.Expect("null");
584   }
585 
586   return lex.VisitArray([&]() -> absl::Status {
587     lex.path().NextRepeated();
588     MessageType type = ClassifyMessage(Traits::FieldTypeName(field));
589 
590     if (lex.Peek(JsonLexer::kNull)) {
591       if (type == MessageType::kValue) {
592         return ParseSingular<Traits>(lex, field, msg);
593       }
594       if (type == MessageType::kNull) {
595         return ParseSingular<Traits>(lex, field, msg);
596       }
597 
598       if (lex.options().allow_legacy_syntax) {
599         RETURN_IF_ERROR(lex.Expect("null"));
600         return EmitNull<Traits>(lex, field, msg);
601       }
602       return lex.Invalid("null cannot occur inside of repeated fields");
603     }
604 
605     // Note that this is sufficient to catch when we are inside of a ListValue,
606     // because a ListValue's sole field is of type Value. Thus, we only need to
607     // classify cases in which we are inside of an array and parsing messages
608     // that like looking like arrays.
609     //
610     // This will also correctly handle e.g. writing out a ListValue with the
611     // legacy syntax of `{"values": [[0], [1], [2]]}`, which does not go through
612     // the custom parser handler.
613     bool can_flatten =
614         type != MessageType::kValue && type != MessageType::kList;
615     if (can_flatten && lex.options().allow_legacy_syntax &&
616         lex.Peek(JsonLexer::kArr)) {
617       // You read that right. In legacy mode, if we encounter an array within
618       // an array, we just flatten it as part of the current array!
619       //
620       // This DOES NOT apply when parsing a google.protobuf.Value or a
621       // google.protobuf.ListValue!
622       return ParseArray<Traits>(lex, field, msg);
623     }
624     return ParseSingular<Traits>(lex, field, msg);
625   });
626 }
627 
628 template <typename Traits>
ParseMap(JsonLexer & lex,Field<Traits> field,Msg<Traits> & msg)629 absl::Status ParseMap(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
630   if (lex.Peek(JsonLexer::kNull)) {
631     return lex.Expect("null");
632   }
633 
634   absl::flat_hash_set<std::string> keys_seen;
635   return lex.VisitObject(
636       [&](LocationWith<MaybeOwnedString>& key) -> absl::Status {
637         lex.path().NextRepeated();
638         auto insert_result = keys_seen.emplace(key.value.AsView());
639         if (!insert_result.second) {
640           return key.loc.Invalid(absl::StrFormat(
641               "got unexpectedly-repeated repeated map key: '%s'",
642               key.value.AsView()));
643         }
644         return Traits::NewMsg(
645             field, msg,
646             [&](const Desc<Traits>& type, Msg<Traits>& entry) -> absl::Status {
647               auto key_field = Traits::KeyField(type);
648               switch (Traits::FieldType(key_field)) {
649                 case FieldDescriptor::TYPE_INT64:
650                 case FieldDescriptor::TYPE_SINT64:
651                 case FieldDescriptor::TYPE_SFIXED64: {
652                   int64_t n;
653                   if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
654                     return key.loc.Invalid(
655                         "non-number characters in quoted number");
656                   }
657                   Traits::SetInt64(key_field, entry, n);
658                   break;
659                 }
660                 case FieldDescriptor::TYPE_UINT64:
661                 case FieldDescriptor::TYPE_FIXED64: {
662                   uint64_t n;
663                   if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
664                     return key.loc.Invalid(
665                         "non-number characters in quoted number");
666                   }
667                   Traits::SetUInt64(key_field, entry, n);
668                   break;
669                 }
670                 case FieldDescriptor::TYPE_INT32:
671                 case FieldDescriptor::TYPE_SINT32:
672                 case FieldDescriptor::TYPE_SFIXED32: {
673                   int32_t n;
674                   if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
675                     return key.loc.Invalid(
676                         "non-number characters in quoted number");
677                   }
678                   Traits::SetInt32(key_field, entry, n);
679                   break;
680                 }
681                 case FieldDescriptor::TYPE_UINT32:
682                 case FieldDescriptor::TYPE_FIXED32: {
683                   uint32_t n;
684                   if (!absl::SimpleAtoi(key.value.AsView(), &n)) {
685                     return key.loc.Invalid(
686                         "non-number characters in quoted number");
687                   }
688                   Traits::SetUInt32(key_field, entry, n);
689                   break;
690                 }
691                 case FieldDescriptor::TYPE_BOOL: {
692                   if (key.value == "true") {
693                     Traits::SetBool(key_field, entry, true);
694                   } else if (key.value == "false") {
695                     Traits::SetBool(key_field, entry, false);
696                   } else {
697                     return key.loc.Invalid(absl::StrFormat(
698                         "expected bool string, got '%s'", key.value.AsView()));
699                   }
700                   break;
701                 }
702                 case FieldDescriptor::TYPE_STRING: {
703                   Traits::SetString(key_field, entry,
704                                     std::move(key.value.ToString()));
705                   break;
706                 }
707                 default:
708                   return lex.Invalid("unsupported map key type");
709               }
710 
711               return ParseSingular<Traits>(lex, Traits::ValueField(type),
712                                            entry);
713             });
714       });
715 }
716 
TakeTimeDigitsWithSuffixAndAdvance(absl::string_view & data,int max_digits,absl::string_view end)717 absl::optional<uint32_t> TakeTimeDigitsWithSuffixAndAdvance(
718     absl::string_view& data, int max_digits, absl::string_view end) {
719   ABSL_DCHECK_LE(max_digits, 9);
720 
721   uint32_t val = 0;
722   int limit = max_digits;
723   while (!data.empty()) {
724     if (limit-- < 0) {
725       return absl::nullopt;
726     }
727     uint32_t digit = data[0] - '0';
728     if (digit >= 10) {
729       break;
730     }
731 
732     val *= 10;
733     val += digit;
734     data = data.substr(1);
735   }
736   if (!absl::StartsWith(data, end)) {
737     return absl::nullopt;
738   }
739 
740   data = data.substr(end.size());
741   return val;
742 }
743 
TakeNanosAndAdvance(absl::string_view & data)744 absl::optional<int32_t> TakeNanosAndAdvance(absl::string_view& data) {
745   int32_t frac_secs = 0;
746   size_t frac_digits = 0;
747   if (absl::StartsWith(data, ".")) {
748     for (char c : data.substr(1)) {
749       if (!absl::ascii_isdigit(c)) {
750         break;
751       }
752       ++frac_digits;
753     }
754     auto digits = data.substr(1, frac_digits);
755     if (frac_digits == 0 || frac_digits > 9 ||
756         !absl::SimpleAtoi(digits, &frac_secs)) {
757       return absl::nullopt;
758     }
759     data = data.substr(frac_digits + 1);
760   }
761   for (int i = 0; i < 9 - frac_digits; ++i) {
762     frac_secs *= 10;
763   }
764   return frac_secs;
765 }
766 
767 template <typename Traits>
ParseTimestamp(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)768 absl::Status ParseTimestamp(JsonLexer& lex, const Desc<Traits>& desc,
769                             Msg<Traits>& msg) {
770   if (lex.Peek(JsonLexer::kNull)) {
771     return lex.Expect("null");
772   }
773 
774   absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
775   RETURN_IF_ERROR(str.status());
776 
777   absl::string_view data = str->value.AsView();
778   if (data.size() < 20) {
779     return str->loc.Invalid("timestamp string too short");
780   }
781 
782   int64_t secs;
783   {
784     /* 1972-01-01T01:00:00 */
785     auto year = TakeTimeDigitsWithSuffixAndAdvance(data, 4, "-");
786     if (!year.has_value() || *year == 0) {
787       return str->loc.Invalid("bad year in timestamp");
788     }
789     auto mon = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "-");
790     if (!mon.has_value() || *mon == 0) {
791       return str->loc.Invalid("bad month in timestamp");
792     }
793     auto day = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "T");
794     if (!day.has_value() || *day == 0) {
795       return str->loc.Invalid("bad day in timestamp");
796     }
797     auto hour = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
798     if (!hour.has_value()) {
799       return str->loc.Invalid("bad hours in timestamp");
800     }
801     auto min = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
802     if (!min.has_value()) {
803       return str->loc.Invalid("bad minutes in timestamp");
804     }
805     auto sec = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "");
806     if (!sec.has_value()) {
807       return str->loc.Invalid("bad seconds in timestamp");
808     }
809 
810     uint32_t m_adj = *mon - 3;  // March-based month.
811     uint32_t carry = m_adj > *mon ? 1 : 0;
812 
813     uint32_t year_base = 4800;  // Before min year, multiple of 400.
814     uint32_t y_adj = *year + year_base - carry;
815 
816     uint32_t month_days = ((m_adj + carry * 12) * 62719 + 769) / 2048;
817     uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
818     int32_t epoch_days =
819         y_adj * 365 + leap_days + month_days + (*day - 1) - 2472632;
820 
821     secs = int64_t{epoch_days} * 86400 + *hour * 3600 + *min * 60 + *sec;
822   }
823 
824   auto nanos = TakeNanosAndAdvance(data);
825   if (!nanos.has_value()) {
826     return str->loc.Invalid("timestamp had bad nanoseconds");
827   }
828 
829   if (data.empty()) {
830     return str->loc.Invalid("timestamp missing timezone offset");
831   }
832 
833   {
834     // [+-]hh:mm or Z
835     bool neg = false;
836     switch (data[0]) {
837       case '-':
838         neg = true;
839         ABSL_FALLTHROUGH_INTENDED;
840       case '+': {
841         if (data.size() != 6) {
842           return str->loc.Invalid("timestamp offset of wrong size.");
843         }
844 
845         data = data.substr(1);
846         auto hour = TakeTimeDigitsWithSuffixAndAdvance(data, 2, ":");
847         auto mins = TakeTimeDigitsWithSuffixAndAdvance(data, 2, "");
848         if (!hour.has_value() || !mins.has_value()) {
849           return str->loc.Invalid("timestamp offset has bad hours and minutes");
850         }
851 
852         int64_t offset = (*hour * 60 + *mins) * 60;
853         secs += (neg ? offset : -offset);
854         break;
855       }
856       // Lowercase z is not accepted, per the spec.
857       case 'Z':
858         if (data.size() == 1) {
859           break;
860         }
861         ABSL_FALLTHROUGH_INTENDED;
862       default:
863         return str->loc.Invalid("bad timezone offset");
864     }
865   }
866 
867   Traits::SetInt64(Traits::MustHaveField(desc, 1), msg, secs);
868   Traits::SetInt32(Traits::MustHaveField(desc, 2), msg, *nanos);
869 
870   return absl::OkStatus();
871 }
872 
873 template <typename Traits>
ParseDuration(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)874 absl::Status ParseDuration(JsonLexer& lex, const Desc<Traits>& desc,
875                            Msg<Traits>& msg) {
876   if (lex.Peek(JsonLexer::kNull)) {
877     return lex.Expect("null");
878   }
879 
880   constexpr int64_t kMaxSeconds = int64_t{3652500} * 86400;
881 
882   absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
883   RETURN_IF_ERROR(str.status());
884 
885   size_t int_part_end = 0;
886   for (char c : str->value.AsView()) {
887     if (!absl::ascii_isdigit(c) && c != '-') {
888       break;
889     }
890     ++int_part_end;
891   }
892   if (int_part_end == 0) {
893     return str->loc.Invalid("duration must start with an integer");
894   }
895 
896   absl::string_view sec_digits = str->value.AsView().substr(0, int_part_end);
897   int64_t secs;
898   if (!absl::SimpleAtoi(sec_digits, &secs)) {
899     return str->loc.Invalid("duration had bad seconds");
900   }
901 
902   if (secs > kMaxSeconds || secs < -kMaxSeconds) {
903     return str->loc.Invalid("duration out of range");
904   }
905 
906   absl::string_view rest = str->value.AsView().substr(int_part_end);
907   auto nanos = TakeNanosAndAdvance(rest);
908   if (!nanos.has_value()) {
909     return str->loc.Invalid("duration had bad nanoseconds");
910   }
911 
912   bool isNegative = (secs < 0) || absl::StartsWith(sec_digits, "-");
913   if (isNegative) {
914     *nanos *= -1;
915   }
916 
917   if (rest != "s") {
918     return str->loc.Invalid("duration must end with a single 's'");
919   }
920 
921   Traits::SetInt64(Traits::MustHaveField(desc, 1), msg, secs);
922   Traits::SetInt32(Traits::MustHaveField(desc, 2), msg, *nanos);
923 
924   return absl::OkStatus();
925 }
926 
927 template <typename Traits>
ParseFieldMask(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)928 absl::Status ParseFieldMask(JsonLexer& lex, const Desc<Traits>& desc,
929                             Msg<Traits>& msg) {
930   absl::StatusOr<LocationWith<MaybeOwnedString>> str = lex.ParseUtf8();
931   RETURN_IF_ERROR(str.status());
932   auto paths = str->value.AsView();
933 
934   // The special case of the empty string is not handled correctly below,
935   // because StrSplit("", ',') is [""], not [].
936   if (paths.empty()) {
937     return absl::OkStatus();
938   }
939 
940   // google.protobuf.FieldMask has a single field with number 1.
941   auto paths_field = Traits::MustHaveField(desc, 1);
942   for (absl::string_view path : absl::StrSplit(paths, ',')) {
943     std::string snake_path;
944     // Assume approximately six-letter words, so add one extra space for an
945     // underscore for every six bytes.
946     snake_path.reserve(path.size() * 7 / 6);
947     for (char c : path) {
948       if (absl::ascii_isdigit(c) || absl::ascii_islower(c) || c == '.') {
949         snake_path.push_back(c);
950       } else if (absl::ascii_isupper(c)) {
951         snake_path.push_back('_');
952         snake_path.push_back(absl::ascii_tolower(c));
953       } else if (lex.options().allow_legacy_syntax) {
954         snake_path.push_back(c);
955       } else {
956         return str->loc.Invalid("unexpected character in FieldMask");
957       }
958     }
959     Traits::SetString(paths_field, msg, snake_path);
960   }
961 
962   return absl::OkStatus();
963 }
964 
965 template <typename Traits>
ParseAny(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)966 absl::Status ParseAny(JsonLexer& lex, const Desc<Traits>& desc,
967                       Msg<Traits>& msg) {
968   // Buffer an entire object. Because @type can occur anywhere, we're forced
969   // to do this.
970   RETURN_IF_ERROR(lex.SkipToToken());
971   auto mark = lex.BeginMark();
972 
973   // Search for @type, buffering the entire object along the way so we can
974   // reparse it.
975   absl::optional<MaybeOwnedString> type_url;
976   RETURN_IF_ERROR(lex.VisitObject(
977       [&](const LocationWith<MaybeOwnedString>& key) -> absl::Status {
978         if (key.value == "@type") {
979           if (type_url.has_value()) {
980             return key.loc.Invalid("repeated @type in Any");
981           }
982 
983           absl::StatusOr<LocationWith<MaybeOwnedString>> maybe_url =
984               lex.ParseUtf8();
985           RETURN_IF_ERROR(maybe_url.status());
986           type_url = std::move(maybe_url)->value;
987           return absl::OkStatus();
988         }
989         return lex.SkipValue();
990       }));
991 
992   // Build a new lexer over the skipped object.
993   absl::string_view any_text = mark.value.UpToUnread();
994   io::ArrayInputStream in(any_text.data(), any_text.size());
995   // Copying lex.options() is important; it inherits the recursion
996   // limit.
997   JsonLexer any_lex(&in, lex.options(), &lex.path(), mark.loc);
998 
999   if (!type_url.has_value() && !lex.options().allow_legacy_syntax) {
1000     return mark.loc.Invalid("missing @type in Any");
1001   }
1002 
1003   if (type_url.has_value()) {
1004     Traits::SetString(Traits::MustHaveField(desc, 1), msg, type_url->AsView());
1005     return Traits::NewDynamic(
1006         Traits::MustHaveField(desc, 2), type_url->ToString(), msg,
1007         [&](const Desc<Traits>& desc, Msg<Traits>& msg) {
1008           auto pop = any_lex.path().Push("<any>", FieldDescriptor::TYPE_MESSAGE,
1009                                          Traits::TypeName(desc));
1010           return ParseMessage<Traits>(any_lex, desc, msg,
1011                                       /*any_reparse=*/true);
1012         });
1013   } else {
1014     // Empty {} is accepted in legacy mode.
1015     ABSL_DCHECK(lex.options().allow_legacy_syntax);
1016     RETURN_IF_ERROR(any_lex.VisitObject([&](auto&) {
1017       return mark.loc.Invalid(
1018           "in legacy mode, missing @type in Any is only allowed for an empty "
1019           "object");
1020     }));
1021     return absl::OkStatus();
1022   }
1023 }
1024 
1025 // These are mutually recursive with ParseValue.
1026 template <typename Traits>
1027 absl::Status ParseStructValue(JsonLexer& lex, const Desc<Traits>& desc,
1028                               Msg<Traits>& msg);
1029 template <typename Traits>
1030 absl::Status ParseListValue(JsonLexer& lex, const Desc<Traits>& desc,
1031                             Msg<Traits>& msg);
1032 
1033 template <typename Traits>
ParseValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1034 absl::Status ParseValue(JsonLexer& lex, const Desc<Traits>& desc,
1035                         Msg<Traits>& msg) {
1036   auto kind = lex.PeekKind();
1037   RETURN_IF_ERROR(kind.status());
1038   // NOTE: The field numbers 1 through 6 are the numbers of the oneof fields
1039   // in google.protobuf.Value. Conformance tests verify the correctness of
1040   // these numbers.
1041   switch (*kind) {
1042     case JsonLexer::kNull: {
1043       auto field = Traits::MustHaveField(desc, 1);
1044       auto pop =
1045           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1046                           Traits::FieldTypeName(field));
1047 
1048       RETURN_IF_ERROR(lex.Expect("null"));
1049       Traits::SetEnum(field, msg, 0);
1050       break;
1051     }
1052     case JsonLexer::kNum: {
1053       auto field = Traits::MustHaveField(desc, 2);
1054       auto pop =
1055           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1056                           Traits::FieldTypeName(field));
1057 
1058       auto number = lex.ParseNumber();
1059       RETURN_IF_ERROR(number.status());
1060       Traits::SetDouble(field, msg, number->value);
1061       break;
1062     }
1063     case JsonLexer::kStr: {
1064       auto field = Traits::MustHaveField(desc, 3);
1065       auto pop =
1066           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1067                           Traits::FieldTypeName(field));
1068 
1069       auto str = lex.ParseUtf8();
1070       RETURN_IF_ERROR(str.status());
1071       Traits::SetString(field, msg, std::move(str->value.ToString()));
1072       break;
1073     }
1074     case JsonLexer::kFalse:
1075     case JsonLexer::kTrue: {
1076       auto field = Traits::MustHaveField(desc, 4);
1077       auto pop =
1078           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1079                           Traits::FieldTypeName(field));
1080 
1081       // "Quoted" bools, including non-standard Abseil Atob bools, are not
1082       // supported, because all strings are treated as genuine JSON strings.
1083       if (*kind == JsonLexer::kTrue) {
1084         RETURN_IF_ERROR(lex.Expect("true"));
1085         Traits::SetBool(field, msg, true);
1086       } else {
1087         RETURN_IF_ERROR(lex.Expect("false"));
1088         Traits::SetBool(field, msg, false);
1089       }
1090       break;
1091     }
1092     case JsonLexer::kObj: {
1093       auto field = Traits::MustHaveField(desc, 5);
1094       auto pop =
1095           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1096                           Traits::FieldTypeName(field));
1097 
1098       return Traits::NewMsg(field, msg, [&](auto& desc, auto& msg) {
1099         return ParseStructValue<Traits>(lex, desc, msg);
1100       });
1101     }
1102     case JsonLexer::kArr: {
1103       auto field = Traits::MustHaveField(desc, 6);
1104       auto pop =
1105           lex.path().Push(Traits::FieldName(field), Traits::FieldType(field),
1106                           Traits::FieldTypeName(field));
1107 
1108       return Traits::NewMsg(field, msg, [&](auto& desc, auto& msg) {
1109         return ParseListValue<Traits>(lex, desc, msg);
1110       });
1111     }
1112   }
1113 
1114   return absl::OkStatus();
1115 }
1116 
1117 template <typename Traits>
ParseStructValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1118 absl::Status ParseStructValue(JsonLexer& lex, const Desc<Traits>& desc,
1119                               Msg<Traits>& msg) {
1120   auto entry_field = Traits::MustHaveField(desc, 1);
1121   auto pop = lex.path().Push("<struct>", FieldDescriptor::TYPE_MESSAGE,
1122                              Traits::FieldTypeName(entry_field));
1123 
1124   // Structs are always cleared even if set to {}.
1125   Traits::RecordAsSeen(entry_field, msg);
1126 
1127   // Parsing a map does the right thing: Struct has a single map<string,
1128   // Value> field; keys are correctly parsed as strings, and the values
1129   // recurse into ParseMessage, which will be routed into ParseValue. This
1130   // results in some extra overhead, but performance is not what we're going
1131   // for here.
1132   return ParseMap<Traits>(lex, entry_field, msg);
1133 }
1134 
1135 template <typename Traits>
ParseListValue(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg)1136 absl::Status ParseListValue(JsonLexer& lex, const Desc<Traits>& desc,
1137                             Msg<Traits>& msg) {
1138   auto entry_field = Traits::MustHaveField(desc, 1);
1139   auto pop = lex.path().Push("<list>", FieldDescriptor::TYPE_MESSAGE,
1140                              Traits::FieldTypeName(entry_field));
1141 
1142   // ListValues are always cleared even if set to [].
1143   Traits::RecordAsSeen(entry_field, msg);
1144   // Parsing an array does the right thing: see the analogous comment in
1145   // ParseStructValue.
1146   return ParseArray<Traits>(lex, entry_field, msg);
1147 }
1148 
1149 template <typename Traits>
ParseField(JsonLexer & lex,const Desc<Traits> & desc,absl::string_view name,Msg<Traits> & msg)1150 absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
1151                         absl::string_view name, Msg<Traits>& msg) {
1152   absl::optional<Field<Traits>> field;
1153   if (absl::StartsWith(name, "[") && absl::EndsWith(name, "]")) {
1154     absl::string_view extn_name = name.substr(1, name.size() - 2);
1155     field = Traits::ExtensionByName(desc, extn_name);
1156 
1157     if (field.has_value()) {
1158       // The check for whether this is an invalid field occurs below, since it
1159       // is combined for both extension and non-extension fields.
1160       auto correct_type_name = Traits::TypeName(desc);
1161       if (Traits::TypeName(Traits::ContainingType(*field)) !=
1162           correct_type_name) {
1163         return lex.Invalid(absl::StrFormat(
1164             "'%s' is a known extension name, but is not an extension "
1165             "of '%s' as expected",
1166             extn_name, correct_type_name));
1167       }
1168     }
1169   } else {
1170     field = Traits::FieldByName(desc, name);
1171   }
1172 
1173   if (!field.has_value()) {
1174     if (!lex.options().ignore_unknown_fields) {
1175       return lex.Invalid(absl::StrFormat("no such field: '%s'", name));
1176     }
1177     return lex.SkipValue();
1178   }
1179 
1180   auto pop = lex.path().Push(name, Traits::FieldType(*field),
1181                              Traits::FieldTypeName(*field));
1182 
1183   if (Traits::HasParsed(
1184           *field, msg,
1185           /*allow_repeated_non_oneof=*/lex.options().allow_legacy_syntax) &&
1186       !lex.Peek(JsonLexer::kNull)) {
1187     return lex.Invalid(absl::StrFormat(
1188         "'%s' has already been set (either directly or as part of a oneof)",
1189         name));
1190   }
1191 
1192   if (Traits::IsMap(*field)) {
1193     return ParseMap<Traits>(lex, *field, msg);
1194   }
1195 
1196   if (Traits::IsRepeated(*field)) {
1197     if (lex.options().allow_legacy_syntax && !lex.Peek(JsonLexer::kArr)) {
1198       // The original ESF parser permits a single element in place of an array
1199       // thereof.
1200       return ParseSingular<Traits>(lex, *field, msg);
1201     }
1202     return ParseArray<Traits>(lex, *field, msg);
1203   }
1204 
1205   return ParseSingular<Traits>(lex, *field, msg);
1206 }
1207 
1208 template <typename Traits>
ParseMessage(JsonLexer & lex,const Desc<Traits> & desc,Msg<Traits> & msg,bool any_reparse)1209 absl::Status ParseMessage(JsonLexer& lex, const Desc<Traits>& desc,
1210                           Msg<Traits>& msg, bool any_reparse) {
1211   MessageType type = ClassifyMessage(Traits::TypeName(desc));
1212   if (!any_reparse) {
1213     switch (type) {
1214       case MessageType::kAny:
1215         return ParseAny<Traits>(lex, desc, msg);
1216       case MessageType::kValue:
1217         return ParseValue<Traits>(lex, desc, msg);
1218       case MessageType::kStruct:
1219         return ParseStructValue<Traits>(lex, desc, msg);
1220       default:
1221         break;
1222     }
1223     // For some types, the ESF parser permits parsing the "non-special" version.
1224     // It is not clear if this counts as out-of-spec, but we're treating it as
1225     // such.
1226     bool is_upcoming_object = lex.Peek(JsonLexer::kObj);
1227     if (!(is_upcoming_object && lex.options().allow_legacy_syntax)) {
1228       switch (type) {
1229         case MessageType::kList:
1230           return ParseListValue<Traits>(lex, desc, msg);
1231         case MessageType::kWrapper: {
1232           return ParseSingular<Traits>(lex, Traits::MustHaveField(desc, 1),
1233                                        msg);
1234         }
1235         case MessageType::kTimestamp:
1236           return ParseTimestamp<Traits>(lex, desc, msg);
1237         case MessageType::kDuration:
1238           return ParseDuration<Traits>(lex, desc, msg);
1239         case MessageType::kFieldMask:
1240           return ParseFieldMask<Traits>(lex, desc, msg);
1241         default:
1242           break;
1243       }
1244     }
1245   }
1246 
1247   return lex.VisitObject(
1248       [&](LocationWith<MaybeOwnedString>& name) -> absl::Status {
1249         // If this is a well-known type, we expect its contents to be inside
1250         // of a JSON field named "value".
1251         if (any_reparse) {
1252           if (name.value == "@type") {
1253             RETURN_IF_ERROR(lex.SkipValue());
1254             return absl::OkStatus();
1255           }
1256           if (type != MessageType::kNotWellKnown) {
1257             if (name.value != "value") {
1258               return lex.Invalid(
1259                   "fields in a well-known-typed Any must be @type or value");
1260             }
1261             // Parse the upcoming value as the message itself. This is *not*
1262             // an Any reparse because we do not expect to see @type in the
1263             // upcoming value.
1264             return ParseMessage<Traits>(lex, desc, msg,
1265                                         /*any_reparse=*/false);
1266           }
1267         }
1268 
1269         return ParseField<Traits>(lex, desc, name.value.ToString(), msg);
1270       });
1271 }
1272 }  // namespace
1273 
JsonStreamToMessage(io::ZeroCopyInputStream * input,Message * message,json_internal::ParseOptions options)1274 absl::Status JsonStreamToMessage(io::ZeroCopyInputStream* input,
1275                                  Message* message,
1276                                  json_internal::ParseOptions options) {
1277   MessagePath path(message->GetDescriptor()->full_name());
1278   JsonLexer lex(input, options, &path);
1279 
1280   ParseProto2Descriptor::Msg msg(message);
1281   absl::Status s =
1282       ParseMessage<ParseProto2Descriptor>(lex, *message->GetDescriptor(), msg,
1283                                           /*any_reparse=*/false);
1284   if (s.ok() && !lex.AtEof()) {
1285     s = absl::InvalidArgumentError(
1286         "extraneous characters after end of JSON object");
1287   }
1288 
1289   if (PROTOBUF_DEBUG) {
1290     ABSL_DLOG(INFO) << "json2/status: " << s;
1291     ABSL_DLOG(INFO) << "json2/output: " << message->DebugString();
1292   }
1293   return s;
1294 }
1295 
JsonToBinaryStream(google::protobuf::util::TypeResolver * resolver,const std::string & type_url,io::ZeroCopyInputStream * json_input,io::ZeroCopyOutputStream * binary_output,json_internal::ParseOptions options)1296 absl::Status JsonToBinaryStream(google::protobuf::util::TypeResolver* resolver,
1297                                 const std::string& type_url,
1298                                 io::ZeroCopyInputStream* json_input,
1299                                 io::ZeroCopyOutputStream* binary_output,
1300                                 json_internal::ParseOptions options) {
1301   // NOTE: Most of the contortions in this function are to allow for capture of
1302   // input and output of the parser in ABSL_DLOG mode. Destruction order is very
1303   // critical in this function, because io::ZeroCopy*Stream types usually only
1304   // flush on destruction.
1305 
1306   // For ABSL_DLOG, we would like to print out the input and output, which
1307   // requires buffering both instead of doing "zero copy". This block, and the
1308   // one at the end of the function, set up and tear down interception of the
1309   // input and output streams.
1310   std::string copy;
1311   std::string out;
1312   absl::optional<io::ArrayInputStream> tee_input;
1313   absl::optional<io::StringOutputStream> tee_output;
1314   if (PROTOBUF_DEBUG) {
1315     const void* data;
1316     int len;
1317     while (json_input->Next(&data, &len)) {
1318       copy.resize(copy.size() + len);
1319       std::memcpy(&copy[copy.size() - len], data, len);
1320     }
1321     tee_input.emplace(copy.data(), copy.size());
1322     tee_output.emplace(&out);
1323     ABSL_DLOG(INFO) << "json2/input: " << absl::CHexEscape(copy);
1324   }
1325 
1326   // This scope forces the CodedOutputStream inside of `msg` to flush before we
1327   // possibly handle logging the binary protobuf output.
1328   absl::Status s;
1329   {
1330     MessagePath path(type_url);
1331     JsonLexer lex(tee_input.has_value() ? &*tee_input : json_input, options,
1332                   &path);
1333     Msg<ParseProto3Type> msg(tee_output.has_value() ? &*tee_output
1334                                                     : binary_output);
1335 
1336     ResolverPool pool(resolver);
1337     auto desc = pool.FindMessage(type_url);
1338     RETURN_IF_ERROR(desc.status());
1339 
1340     s = ParseMessage<ParseProto3Type>(lex, **desc, msg, /*any_reparse=*/false);
1341     if (s.ok() && !lex.AtEof()) {
1342       s = absl::InvalidArgumentError(
1343           "extraneous characters after end of JSON object");
1344     }
1345   }
1346 
1347   if (PROTOBUF_DEBUG) {
1348     tee_output.reset();  // Flush the output stream.
1349     io::zc_sink_internal::ZeroCopyStreamByteSink(binary_output)
1350         .Append(out.data(), out.size());
1351     ABSL_DLOG(INFO) << "json2/status: " << s;
1352     ABSL_DLOG(INFO) << "json2/output: " << absl::BytesToHexString(out);
1353   }
1354 
1355   return s;
1356 }
1357 }  // namespace json_internal
1358 }  // namespace protobuf
1359 }  // namespace google
1360 
1361 #include "google/protobuf/port_undef.inc"
1362