• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/json/internal/untyped_message.h"
9 
10 #include <algorithm>
11 #include <cfloat>
12 #include <cstdint>
13 #include <memory>
14 #include <sstream>
15 #include <string>
16 #include <type_traits>
17 #include <utility>
18 #include <vector>
19 
20 #include "google/protobuf/type.pb.h"
21 #include "absl/container/flat_hash_map.h"
22 #include "absl/log/absl_check.h"
23 #include "absl/log/absl_log.h"
24 #include "absl/status/status.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/str_format.h"
27 #include "absl/strings/string_view.h"
28 #include "absl/types/optional.h"
29 #include "absl/types/span.h"
30 #include "absl/types/variant.h"
31 #include "google/protobuf/io/coded_stream.h"
32 #include "google/protobuf/port.h"
33 #include "google/protobuf/util/type_resolver.h"
34 #include "google/protobuf/wire_format_lite.h"
35 #include "utf8_validity.h"
36 #include "google/protobuf/stubs/status_macros.h"
37 
38 // Must be included last.
39 #include "google/protobuf/port_def.inc"
40 
41 namespace google {
42 namespace protobuf {
43 namespace json_internal {
44 using ::google::protobuf::Field;
45 using ::google::protobuf::internal::WireFormatLite;
46 
MessageType() const47 absl::StatusOr<const ResolverPool::Message*> ResolverPool::Field::MessageType()
48     const {
49   ABSL_CHECK(proto().kind() == google::protobuf::Field::TYPE_MESSAGE ||
50              proto().kind() == google::protobuf::Field::TYPE_GROUP)
51       << proto().kind();
52   if (type_ == nullptr) {
53     auto type = pool_->FindMessage(proto().type_url());
54     RETURN_IF_ERROR(type.status());
55     type_ = *type;
56   }
57   return reinterpret_cast<const Message*>(type_);
58 }
59 
EnumType() const60 absl::StatusOr<const ResolverPool::Enum*> ResolverPool::Field::EnumType()
61     const {
62   ABSL_CHECK(proto().kind() == google::protobuf::Field::TYPE_ENUM)
63       << proto().kind();
64   if (type_ == nullptr) {
65     auto type = pool_->FindEnum(proto().type_url());
66     RETURN_IF_ERROR(type.status());
67     type_ = *type;
68   }
69   return reinterpret_cast<const Enum*>(type_);
70 }
71 
FieldsByIndex() const72 absl::Span<const ResolverPool::Field> ResolverPool::Message::FieldsByIndex()
73     const {
74   if (raw_.fields_size() > 0 && fields_ == nullptr) {
75     fields_ = std::unique_ptr<Field[]>(new Field[raw_.fields_size()]);
76     for (size_t i = 0; i < raw_.fields_size(); ++i) {
77       fields_[i].pool_ = pool_;
78       fields_[i].raw_ = &raw_.fields(i);
79       fields_[i].parent_ = this;
80     }
81   }
82 
83   return absl::MakeSpan(fields_.get(), proto().fields_size());
84 }
85 
FindField(absl::string_view name) const86 const ResolverPool::Field* ResolverPool::Message::FindField(
87     absl::string_view name) const {
88   if (raw_.fields_size() == 0) {
89     return nullptr;
90   }
91 
92   if (fields_by_name_.empty()) {
93     const Field* found = nullptr;
94     for (auto& field : FieldsByIndex()) {
95       if (field.proto().name() == name || field.proto().json_name() == name) {
96         found = &field;
97       }
98       fields_by_name_.try_emplace(field.proto().name(), &field);
99       fields_by_name_.try_emplace(field.proto().json_name(), &field);
100     }
101     return found;
102   }
103 
104   auto it = fields_by_name_.find(name);
105   return it == fields_by_name_.end() ? nullptr : it->second;
106 }
107 
FindField(int32_t number) const108 const ResolverPool::Field* ResolverPool::Message::FindField(
109     int32_t number) const {
110   if (raw_.fields_size() == 0) {
111     return nullptr;
112   }
113 
114   bool is_small = raw_.fields_size() < 8;
115   if (is_small || fields_by_number_.empty()) {
116     const Field* found = nullptr;
117     for (auto& field : FieldsByIndex()) {
118       if (field.proto().number() == number) {
119         found = &field;
120       }
121       if (!is_small) {
122         fields_by_number_.try_emplace(field.proto().number(), &field);
123       }
124     }
125     return found;
126   }
127 
128   auto it = fields_by_number_.find(number);
129   return it == fields_by_number_.end() ? nullptr : it->second;
130 }
131 
FindMessage(absl::string_view url)132 absl::StatusOr<const ResolverPool::Message*> ResolverPool::FindMessage(
133     absl::string_view url) {
134   auto it = messages_.find(url);
135   if (it != messages_.end()) {
136     return it->second.get();
137   }
138 
139   auto msg = absl::WrapUnique(new Message(this));
140   std::string url_buf(url);
141   RETURN_IF_ERROR(resolver_->ResolveMessageType(url_buf, &msg->raw_));
142 
143   return messages_.try_emplace(std::move(url_buf), std::move(msg))
144       .first->second.get();
145 }
146 
FindEnum(absl::string_view url)147 absl::StatusOr<const ResolverPool::Enum*> ResolverPool::FindEnum(
148     absl::string_view url) {
149   auto it = enums_.find(url);
150   if (it != enums_.end()) {
151     return it->second.get();
152   }
153 
154   auto enoom = absl::WrapUnique(new Enum(this));
155   std::string url_buf(url);
156   RETURN_IF_ERROR(resolver_->ResolveEnumType(url_buf, &enoom->raw_));
157 
158   return enums_.try_emplace(std::move(url_buf), std::move(enoom))
159       .first->second.get();
160 }
161 
MakeEndGroupWithoutGroupError(int field_number)162 PROTOBUF_NOINLINE static absl::Status MakeEndGroupWithoutGroupError(
163     int field_number) {
164   return absl::InvalidArgumentError(absl::StrFormat(
165       "attempted to close group %d before SGROUP tag", field_number));
166 }
167 
MakeEndGroupMismatchError(int field_number,int current_group)168 PROTOBUF_NOINLINE static absl::Status MakeEndGroupMismatchError(
169     int field_number, int current_group) {
170   return absl::InvalidArgumentError(
171       absl::StrFormat("attempted to close group %d while inside group %d",
172                       field_number, current_group));
173 }
174 
MakeFieldNotGroupError(int field_number)175 PROTOBUF_NOINLINE static absl::Status MakeFieldNotGroupError(int field_number) {
176   return absl::InvalidArgumentError(
177       absl::StrFormat("field number %d is not a group", field_number));
178 }
179 
MakeUnexpectedEofError()180 PROTOBUF_NOINLINE static absl::Status MakeUnexpectedEofError() {
181   return absl::InvalidArgumentError("unexpected EOF");
182 }
183 
MakeUnknownWireTypeError(int wire_type)184 PROTOBUF_NOINLINE static absl::Status MakeUnknownWireTypeError(int wire_type) {
185   return absl::InvalidArgumentError(
186       absl::StrCat("unknown wire type: ", wire_type));
187 }
188 
MakeProto3Utf8Error()189 PROTOBUF_NOINLINE static absl::Status MakeProto3Utf8Error() {
190   return absl::InvalidArgumentError("proto3 strings must be UTF-8");
191 }
192 
MakeInvalidLengthDelimType(int kind,int field_number)193 PROTOBUF_NOINLINE static absl::Status MakeInvalidLengthDelimType(
194     int kind, int field_number) {
195   return absl::InvalidArgumentError(absl::StrFormat(
196       "field type %d (number %d) does not support type 2 records", kind,
197       field_number));
198 }
199 
MakeTooDeepError()200 PROTOBUF_NOINLINE static absl::Status MakeTooDeepError() {
201   return absl::InvalidArgumentError("allowed depth exceeded");
202 }
203 
Decode(io::CodedInputStream & stream,absl::optional<int32_t> current_group)204 absl::Status UntypedMessage::Decode(io::CodedInputStream& stream,
205                                     absl::optional<int32_t> current_group) {
206   std::vector<int32_t> group_stack;
207   while (true) {
208     uint32_t tag = stream.ReadTag();
209     if (tag == 0) {
210       return absl::OkStatus();
211     }
212 
213     int32_t field_number = tag >> 3;
214     int32_t wire_type = tag & 7;
215 
216     // EGROUP markers can show up as "unknown fields", so we need to handle them
217     // before we even do field lookup. Being inside of a group behaves as if a
218     // special field has been added to the message.
219     if (wire_type == WireFormatLite::WIRETYPE_END_GROUP &&
220         group_stack.empty()) {
221       if (!current_group.has_value()) {
222         return MakeEndGroupWithoutGroupError(field_number);
223       }
224       if (field_number != *current_group) {
225         return MakeEndGroupMismatchError(field_number, *current_group);
226       }
227       return absl::OkStatus();
228     }
229 
230     const auto* field = desc_->FindField(field_number);
231     if (!group_stack.empty() || field == nullptr) {
232       // Skip unknown field. If the group-stack is non-empty, we are in the
233       // process of working through an unknown group.
234       switch (wire_type) {
235         case WireFormatLite::WIRETYPE_VARINT: {
236           uint64_t x;
237           if (!stream.ReadVarint64(&x)) {
238             return MakeUnexpectedEofError();
239           }
240           continue;
241         }
242         case WireFormatLite::WIRETYPE_FIXED64: {
243           uint64_t x;
244           if (!stream.ReadLittleEndian64(&x)) {
245             return MakeUnexpectedEofError();
246           }
247           continue;
248         }
249         case WireFormatLite::WIRETYPE_FIXED32: {
250           uint32_t x;
251           if (!stream.ReadLittleEndian32(&x)) {
252             return MakeUnexpectedEofError();
253           }
254           continue;
255         }
256         case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
257           uint32_t x;
258           if (!stream.ReadVarint32(&x)) {
259             return MakeUnexpectedEofError();
260           }
261           stream.Skip(x);
262           continue;
263         }
264         case WireFormatLite::WIRETYPE_START_GROUP: {
265           group_stack.push_back(field_number);
266           continue;
267         }
268         case WireFormatLite::WIRETYPE_END_GROUP: {
269           if (group_stack.empty()) {
270             return MakeEndGroupWithoutGroupError(field_number);
271           }
272           if (field_number != group_stack.back()) {
273             return MakeEndGroupMismatchError(field_number, group_stack.back());
274           }
275           group_stack.pop_back();
276           continue;
277         }
278         default:
279           return MakeUnknownWireTypeError(wire_type);
280       }
281     }
282     switch (wire_type) {
283       case WireFormatLite::WIRETYPE_VARINT:
284         RETURN_IF_ERROR(DecodeVarint(stream, *field));
285         break;
286       case WireFormatLite::WIRETYPE_FIXED64:
287         RETURN_IF_ERROR(Decode64Bit(stream, *field));
288         break;
289       case WireFormatLite::WIRETYPE_FIXED32:
290         RETURN_IF_ERROR(Decode32Bit(stream, *field));
291         break;
292       case WireFormatLite::WIRETYPE_LENGTH_DELIMITED:
293         RETURN_IF_ERROR(DecodeDelimited(stream, *field));
294         break;
295       case WireFormatLite::WIRETYPE_START_GROUP: {
296         if (field->proto().kind() != Field::TYPE_GROUP) {
297           return MakeFieldNotGroupError(field->proto().number());
298         }
299         auto group_desc = field->MessageType();
300         RETURN_IF_ERROR(group_desc.status());
301 
302         UntypedMessage group(*group_desc);
303         RETURN_IF_ERROR(group.Decode(stream, field_number));
304         RETURN_IF_ERROR(InsertField(*field, std::move(group)));
305         break;
306       }
307       case WireFormatLite::WIRETYPE_END_GROUP:
308         ABSL_LOG(FATAL) << "unreachable";
309         break;
310       default:
311         return MakeUnknownWireTypeError(wire_type);
312     }
313   }
314 
315   return absl::OkStatus();
316 }
317 
DecodeVarint(io::CodedInputStream & stream,const ResolverPool::Field & field)318 absl::Status UntypedMessage::DecodeVarint(io::CodedInputStream& stream,
319                                           const ResolverPool::Field& field) {
320   switch (field.proto().kind()) {
321     case Field::TYPE_BOOL: {
322       char byte;
323       if (!stream.ReadRaw(&byte, 1)) {
324         return absl::InvalidArgumentError("unexpected EOF");
325       }
326       switch (byte) {
327         case 0:
328           RETURN_IF_ERROR(InsertField(field, kFalse));
329           break;
330         case 1:
331           RETURN_IF_ERROR(InsertField(field, kTrue));
332           break;
333         default:
334           return absl::InvalidArgumentError(
335               absl::StrFormat("bad value for bool: \\x%02x", byte));
336       }
337       break;
338     }
339     case Field::TYPE_INT32:
340     case Field::TYPE_SINT32:
341     case Field::TYPE_UINT32:
342     case Field::TYPE_ENUM: {
343       uint32_t x;
344       if (!stream.ReadVarint32(&x)) {
345         return absl::InvalidArgumentError("unexpected EOF");
346       }
347       if (field.proto().kind() == Field::TYPE_UINT32) {
348         RETURN_IF_ERROR(InsertField(field, x));
349         break;
350       }
351       if (field.proto().kind() == Field::TYPE_SINT32) {
352         x = WireFormatLite::ZigZagDecode32(x);
353       }
354       RETURN_IF_ERROR(InsertField(field, static_cast<int32_t>(x)));
355       break;
356     }
357     case Field::TYPE_INT64:
358     case Field::TYPE_SINT64:
359     case Field::TYPE_UINT64: {
360       uint64_t x;
361       if (!stream.ReadVarint64(&x)) {
362         return absl::InvalidArgumentError("unexpected EOF");
363       }
364       if (field.proto().kind() == Field::TYPE_UINT64) {
365         RETURN_IF_ERROR(InsertField(field, x));
366         break;
367       }
368       if (field.proto().kind() == Field::TYPE_SINT64) {
369         x = WireFormatLite::ZigZagDecode64(x);
370       }
371       RETURN_IF_ERROR(InsertField(field, static_cast<int64_t>(x)));
372       break;
373     }
374     default:
375       return absl::InvalidArgumentError(absl::StrFormat(
376           "field type %d (number %d) does not support varint fields",
377           field.proto().kind(), field.proto().number()));
378   }
379   return absl::OkStatus();
380 }
381 
Decode64Bit(io::CodedInputStream & stream,const ResolverPool::Field & field)382 absl::Status UntypedMessage::Decode64Bit(io::CodedInputStream& stream,
383                                          const ResolverPool::Field& field) {
384   switch (field.proto().kind()) {
385     case Field::TYPE_FIXED64: {
386       uint64_t x;
387       if (!stream.ReadLittleEndian64(&x)) {
388         return absl::InvalidArgumentError("unexpected EOF");
389       }
390       RETURN_IF_ERROR(InsertField(field, x));
391       break;
392     }
393     case Field::TYPE_SFIXED64: {
394       uint64_t x;
395       if (!stream.ReadLittleEndian64(&x)) {
396         return absl::InvalidArgumentError("unexpected EOF");
397       }
398       RETURN_IF_ERROR(InsertField(field, static_cast<int64_t>(x)));
399       break;
400     }
401     case Field::TYPE_DOUBLE: {
402       uint64_t x;
403       if (!stream.ReadLittleEndian64(&x)) {
404         return absl::InvalidArgumentError("unexpected EOF");
405       }
406       RETURN_IF_ERROR(InsertField(field, absl::bit_cast<double>(x)));
407       break;
408     }
409     default:
410       return absl::InvalidArgumentError(
411           absl::StrFormat("field type %d (number %d) does not support "
412                           "type 64-bit fields",
413                           field.proto().kind(), field.proto().number()));
414   }
415   return absl::OkStatus();
416 }
417 
Decode32Bit(io::CodedInputStream & stream,const ResolverPool::Field & field)418 absl::Status UntypedMessage::Decode32Bit(io::CodedInputStream& stream,
419                                          const ResolverPool::Field& field) {
420   switch (field.proto().kind()) {
421     case Field::TYPE_FIXED32: {
422       uint32_t x;
423       if (!stream.ReadLittleEndian32(&x)) {
424         return absl::InvalidArgumentError("unexpected EOF");
425       }
426       RETURN_IF_ERROR(InsertField(field, x));
427       break;
428     }
429     case Field::TYPE_SFIXED32: {
430       uint32_t x;
431       if (!stream.ReadLittleEndian32(&x)) {
432         return absl::InvalidArgumentError("unexpected EOF");
433       }
434       RETURN_IF_ERROR(InsertField(field, static_cast<int32_t>(x)));
435       break;
436     }
437     case Field::TYPE_FLOAT: {
438       uint32_t x;
439       if (!stream.ReadLittleEndian32(&x)) {
440         return absl::InvalidArgumentError("unexpected EOF");
441       }
442       RETURN_IF_ERROR(InsertField(field, absl::bit_cast<float>(x)));
443       break;
444     }
445     default:
446       return absl::InvalidArgumentError(absl::StrFormat(
447           "field type %d (number %d) does not support 32-bit fields",
448           field.proto().kind(), field.proto().number()));
449   }
450   return absl::OkStatus();
451 }
452 
DecodeDelimited(io::CodedInputStream & stream,const ResolverPool::Field & field)453 absl::Status UntypedMessage::DecodeDelimited(io::CodedInputStream& stream,
454                                              const ResolverPool::Field& field) {
455   if (!stream.IncrementRecursionDepth()) {
456     return MakeTooDeepError();
457   }
458   auto limit = stream.ReadLengthAndPushLimit();
459   if (limit == 0) {
460     return MakeUnexpectedEofError();
461   }
462 
463   switch (field.proto().kind()) {
464     case Field::TYPE_STRING:
465     case Field::TYPE_BYTES: {
466       std::string buf;
467       if (!stream.ReadString(&buf, stream.BytesUntilLimit())) {
468         return MakeUnexpectedEofError();
469       }
470       if (field.proto().kind() == Field::TYPE_STRING) {
471         if (desc_->proto().syntax() == google::protobuf::SYNTAX_PROTO3 &&
472             !utf8_range::IsStructurallyValid(buf)) {
473           return MakeProto3Utf8Error();
474         }
475       }
476 
477       RETURN_IF_ERROR(InsertField(field, std::move(buf)));
478       break;
479     }
480     case Field::TYPE_MESSAGE: {
481       auto inner_desc = field.MessageType();
482       RETURN_IF_ERROR(inner_desc.status());
483 
484       auto inner = ParseFromStream(*inner_desc, stream);
485       RETURN_IF_ERROR(inner.status());
486       RETURN_IF_ERROR(InsertField(field, std::move(*inner)));
487       break;
488     }
489     default: {
490       // This is definitely a packed field.
491       while (stream.BytesUntilLimit() > 0) {
492         switch (field.proto().kind()) {
493           case Field::TYPE_BOOL:
494           case Field::TYPE_INT32:
495           case Field::TYPE_SINT32:
496           case Field::TYPE_UINT32:
497           case Field::TYPE_ENUM:
498           case Field::TYPE_INT64:
499           case Field::TYPE_SINT64:
500           case Field::TYPE_UINT64:
501             RETURN_IF_ERROR(DecodeVarint(stream, field));
502             break;
503           case Field::TYPE_FIXED64:
504           case Field::TYPE_SFIXED64:
505           case Field::TYPE_DOUBLE:
506             RETURN_IF_ERROR(Decode64Bit(stream, field));
507             break;
508           case Field::TYPE_FIXED32:
509           case Field::TYPE_SFIXED32:
510           case Field::TYPE_FLOAT:
511             RETURN_IF_ERROR(Decode32Bit(stream, field));
512             break;
513           default:
514             return MakeInvalidLengthDelimType(field.proto().kind(),
515                                               field.proto().number());
516         }
517       }
518       break;
519     }
520   }
521   stream.DecrementRecursionDepthAndPopLimit(limit);
522   return absl::OkStatus();
523 }
524 
525 template <typename T>
InsertField(const ResolverPool::Field & field,T && value)526 absl::Status UntypedMessage::InsertField(const ResolverPool::Field& field,
527                                          T&& value) {
528   int32_t number = field.proto().number();
529   auto emplace_result = fields_.try_emplace(number, std::forward<T>(value));
530   if (emplace_result.second) {
531     return absl::OkStatus();
532   }
533 
534   if (field.proto().cardinality() !=
535       google::protobuf::Field::CARDINALITY_REPEATED) {
536     return absl::InvalidArgumentError(
537         absl::StrCat("repeated entries for singular field number ", number));
538   }
539 
540   Value& slot = emplace_result.first->second;
541   using value_type = std::decay_t<T>;
542   if (auto* extant = absl::get_if<value_type>(&slot)) {
543     std::vector<value_type> repeated;
544     repeated.push_back(std::move(*extant));
545     repeated.push_back(std::forward<T>(value));
546 
547     slot = std::move(repeated);
548   } else if (auto* extant = absl::get_if<std::vector<value_type>>(&slot)) {
549     extant->push_back(std::forward<T>(value));
550   } else {
551     absl::optional<absl::string_view> name =
552         google::protobuf::internal::RttiTypeName<value_type>();
553     if (!name.has_value()) {
554       name = "<unknown>";
555     }
556 
557     return absl::InvalidArgumentError(
558         absl::StrFormat("inconsistent types for field number %d: tried to "
559                         "insert '%s', but index was %d",
560                         number, *name, slot.index()));
561   }
562 
563   return absl::OkStatus();
564 }
565 
566 }  // namespace json_internal
567 }  // namespace protobuf
568 }  // namespace google
569 
570 #include "google/protobuf/port_undef.inc"
571