1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/json/internal/untyped_message.h"
9
10 #include <algorithm>
11 #include <cfloat>
12 #include <cstdint>
13 #include <memory>
14 #include <sstream>
15 #include <string>
16 #include <type_traits>
17 #include <utility>
18 #include <vector>
19
20 #include "google/protobuf/type.pb.h"
21 #include "absl/container/flat_hash_map.h"
22 #include "absl/log/absl_check.h"
23 #include "absl/log/absl_log.h"
24 #include "absl/status/status.h"
25 #include "absl/strings/str_cat.h"
26 #include "absl/strings/str_format.h"
27 #include "absl/strings/string_view.h"
28 #include "absl/types/optional.h"
29 #include "absl/types/span.h"
30 #include "absl/types/variant.h"
31 #include "google/protobuf/io/coded_stream.h"
32 #include "google/protobuf/port.h"
33 #include "google/protobuf/util/type_resolver.h"
34 #include "google/protobuf/wire_format_lite.h"
35 #include "utf8_validity.h"
36 #include "google/protobuf/stubs/status_macros.h"
37
38 // Must be included last.
39 #include "google/protobuf/port_def.inc"
40
41 namespace google {
42 namespace protobuf {
43 namespace json_internal {
44 using ::google::protobuf::Field;
45 using ::google::protobuf::internal::WireFormatLite;
46
MessageType() const47 absl::StatusOr<const ResolverPool::Message*> ResolverPool::Field::MessageType()
48 const {
49 ABSL_CHECK(proto().kind() == google::protobuf::Field::TYPE_MESSAGE ||
50 proto().kind() == google::protobuf::Field::TYPE_GROUP)
51 << proto().kind();
52 if (type_ == nullptr) {
53 auto type = pool_->FindMessage(proto().type_url());
54 RETURN_IF_ERROR(type.status());
55 type_ = *type;
56 }
57 return reinterpret_cast<const Message*>(type_);
58 }
59
EnumType() const60 absl::StatusOr<const ResolverPool::Enum*> ResolverPool::Field::EnumType()
61 const {
62 ABSL_CHECK(proto().kind() == google::protobuf::Field::TYPE_ENUM)
63 << proto().kind();
64 if (type_ == nullptr) {
65 auto type = pool_->FindEnum(proto().type_url());
66 RETURN_IF_ERROR(type.status());
67 type_ = *type;
68 }
69 return reinterpret_cast<const Enum*>(type_);
70 }
71
FieldsByIndex() const72 absl::Span<const ResolverPool::Field> ResolverPool::Message::FieldsByIndex()
73 const {
74 if (raw_.fields_size() > 0 && fields_ == nullptr) {
75 fields_ = std::unique_ptr<Field[]>(new Field[raw_.fields_size()]);
76 for (size_t i = 0; i < raw_.fields_size(); ++i) {
77 fields_[i].pool_ = pool_;
78 fields_[i].raw_ = &raw_.fields(i);
79 fields_[i].parent_ = this;
80 }
81 }
82
83 return absl::MakeSpan(fields_.get(), proto().fields_size());
84 }
85
FindField(absl::string_view name) const86 const ResolverPool::Field* ResolverPool::Message::FindField(
87 absl::string_view name) const {
88 if (raw_.fields_size() == 0) {
89 return nullptr;
90 }
91
92 if (fields_by_name_.empty()) {
93 const Field* found = nullptr;
94 for (auto& field : FieldsByIndex()) {
95 if (field.proto().name() == name || field.proto().json_name() == name) {
96 found = &field;
97 }
98 fields_by_name_.try_emplace(field.proto().name(), &field);
99 fields_by_name_.try_emplace(field.proto().json_name(), &field);
100 }
101 return found;
102 }
103
104 auto it = fields_by_name_.find(name);
105 return it == fields_by_name_.end() ? nullptr : it->second;
106 }
107
FindField(int32_t number) const108 const ResolverPool::Field* ResolverPool::Message::FindField(
109 int32_t number) const {
110 if (raw_.fields_size() == 0) {
111 return nullptr;
112 }
113
114 bool is_small = raw_.fields_size() < 8;
115 if (is_small || fields_by_number_.empty()) {
116 const Field* found = nullptr;
117 for (auto& field : FieldsByIndex()) {
118 if (field.proto().number() == number) {
119 found = &field;
120 }
121 if (!is_small) {
122 fields_by_number_.try_emplace(field.proto().number(), &field);
123 }
124 }
125 return found;
126 }
127
128 auto it = fields_by_number_.find(number);
129 return it == fields_by_number_.end() ? nullptr : it->second;
130 }
131
FindMessage(absl::string_view url)132 absl::StatusOr<const ResolverPool::Message*> ResolverPool::FindMessage(
133 absl::string_view url) {
134 auto it = messages_.find(url);
135 if (it != messages_.end()) {
136 return it->second.get();
137 }
138
139 auto msg = absl::WrapUnique(new Message(this));
140 std::string url_buf(url);
141 RETURN_IF_ERROR(resolver_->ResolveMessageType(url_buf, &msg->raw_));
142
143 return messages_.try_emplace(std::move(url_buf), std::move(msg))
144 .first->second.get();
145 }
146
FindEnum(absl::string_view url)147 absl::StatusOr<const ResolverPool::Enum*> ResolverPool::FindEnum(
148 absl::string_view url) {
149 auto it = enums_.find(url);
150 if (it != enums_.end()) {
151 return it->second.get();
152 }
153
154 auto enoom = absl::WrapUnique(new Enum(this));
155 std::string url_buf(url);
156 RETURN_IF_ERROR(resolver_->ResolveEnumType(url_buf, &enoom->raw_));
157
158 return enums_.try_emplace(std::move(url_buf), std::move(enoom))
159 .first->second.get();
160 }
161
MakeEndGroupWithoutGroupError(int field_number)162 PROTOBUF_NOINLINE static absl::Status MakeEndGroupWithoutGroupError(
163 int field_number) {
164 return absl::InvalidArgumentError(absl::StrFormat(
165 "attempted to close group %d before SGROUP tag", field_number));
166 }
167
MakeEndGroupMismatchError(int field_number,int current_group)168 PROTOBUF_NOINLINE static absl::Status MakeEndGroupMismatchError(
169 int field_number, int current_group) {
170 return absl::InvalidArgumentError(
171 absl::StrFormat("attempted to close group %d while inside group %d",
172 field_number, current_group));
173 }
174
MakeFieldNotGroupError(int field_number)175 PROTOBUF_NOINLINE static absl::Status MakeFieldNotGroupError(int field_number) {
176 return absl::InvalidArgumentError(
177 absl::StrFormat("field number %d is not a group", field_number));
178 }
179
MakeUnexpectedEofError()180 PROTOBUF_NOINLINE static absl::Status MakeUnexpectedEofError() {
181 return absl::InvalidArgumentError("unexpected EOF");
182 }
183
MakeUnknownWireTypeError(int wire_type)184 PROTOBUF_NOINLINE static absl::Status MakeUnknownWireTypeError(int wire_type) {
185 return absl::InvalidArgumentError(
186 absl::StrCat("unknown wire type: ", wire_type));
187 }
188
MakeProto3Utf8Error()189 PROTOBUF_NOINLINE static absl::Status MakeProto3Utf8Error() {
190 return absl::InvalidArgumentError("proto3 strings must be UTF-8");
191 }
192
MakeInvalidLengthDelimType(int kind,int field_number)193 PROTOBUF_NOINLINE static absl::Status MakeInvalidLengthDelimType(
194 int kind, int field_number) {
195 return absl::InvalidArgumentError(absl::StrFormat(
196 "field type %d (number %d) does not support type 2 records", kind,
197 field_number));
198 }
199
MakeTooDeepError()200 PROTOBUF_NOINLINE static absl::Status MakeTooDeepError() {
201 return absl::InvalidArgumentError("allowed depth exceeded");
202 }
203
Decode(io::CodedInputStream & stream,absl::optional<int32_t> current_group)204 absl::Status UntypedMessage::Decode(io::CodedInputStream& stream,
205 absl::optional<int32_t> current_group) {
206 std::vector<int32_t> group_stack;
207 while (true) {
208 uint32_t tag = stream.ReadTag();
209 if (tag == 0) {
210 return absl::OkStatus();
211 }
212
213 int32_t field_number = tag >> 3;
214 int32_t wire_type = tag & 7;
215
216 // EGROUP markers can show up as "unknown fields", so we need to handle them
217 // before we even do field lookup. Being inside of a group behaves as if a
218 // special field has been added to the message.
219 if (wire_type == WireFormatLite::WIRETYPE_END_GROUP &&
220 group_stack.empty()) {
221 if (!current_group.has_value()) {
222 return MakeEndGroupWithoutGroupError(field_number);
223 }
224 if (field_number != *current_group) {
225 return MakeEndGroupMismatchError(field_number, *current_group);
226 }
227 return absl::OkStatus();
228 }
229
230 const auto* field = desc_->FindField(field_number);
231 if (!group_stack.empty() || field == nullptr) {
232 // Skip unknown field. If the group-stack is non-empty, we are in the
233 // process of working through an unknown group.
234 switch (wire_type) {
235 case WireFormatLite::WIRETYPE_VARINT: {
236 uint64_t x;
237 if (!stream.ReadVarint64(&x)) {
238 return MakeUnexpectedEofError();
239 }
240 continue;
241 }
242 case WireFormatLite::WIRETYPE_FIXED64: {
243 uint64_t x;
244 if (!stream.ReadLittleEndian64(&x)) {
245 return MakeUnexpectedEofError();
246 }
247 continue;
248 }
249 case WireFormatLite::WIRETYPE_FIXED32: {
250 uint32_t x;
251 if (!stream.ReadLittleEndian32(&x)) {
252 return MakeUnexpectedEofError();
253 }
254 continue;
255 }
256 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
257 uint32_t x;
258 if (!stream.ReadVarint32(&x)) {
259 return MakeUnexpectedEofError();
260 }
261 stream.Skip(x);
262 continue;
263 }
264 case WireFormatLite::WIRETYPE_START_GROUP: {
265 group_stack.push_back(field_number);
266 continue;
267 }
268 case WireFormatLite::WIRETYPE_END_GROUP: {
269 if (group_stack.empty()) {
270 return MakeEndGroupWithoutGroupError(field_number);
271 }
272 if (field_number != group_stack.back()) {
273 return MakeEndGroupMismatchError(field_number, group_stack.back());
274 }
275 group_stack.pop_back();
276 continue;
277 }
278 default:
279 return MakeUnknownWireTypeError(wire_type);
280 }
281 }
282 switch (wire_type) {
283 case WireFormatLite::WIRETYPE_VARINT:
284 RETURN_IF_ERROR(DecodeVarint(stream, *field));
285 break;
286 case WireFormatLite::WIRETYPE_FIXED64:
287 RETURN_IF_ERROR(Decode64Bit(stream, *field));
288 break;
289 case WireFormatLite::WIRETYPE_FIXED32:
290 RETURN_IF_ERROR(Decode32Bit(stream, *field));
291 break;
292 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED:
293 RETURN_IF_ERROR(DecodeDelimited(stream, *field));
294 break;
295 case WireFormatLite::WIRETYPE_START_GROUP: {
296 if (field->proto().kind() != Field::TYPE_GROUP) {
297 return MakeFieldNotGroupError(field->proto().number());
298 }
299 auto group_desc = field->MessageType();
300 RETURN_IF_ERROR(group_desc.status());
301
302 UntypedMessage group(*group_desc);
303 RETURN_IF_ERROR(group.Decode(stream, field_number));
304 RETURN_IF_ERROR(InsertField(*field, std::move(group)));
305 break;
306 }
307 case WireFormatLite::WIRETYPE_END_GROUP:
308 ABSL_LOG(FATAL) << "unreachable";
309 break;
310 default:
311 return MakeUnknownWireTypeError(wire_type);
312 }
313 }
314
315 return absl::OkStatus();
316 }
317
DecodeVarint(io::CodedInputStream & stream,const ResolverPool::Field & field)318 absl::Status UntypedMessage::DecodeVarint(io::CodedInputStream& stream,
319 const ResolverPool::Field& field) {
320 switch (field.proto().kind()) {
321 case Field::TYPE_BOOL: {
322 char byte;
323 if (!stream.ReadRaw(&byte, 1)) {
324 return absl::InvalidArgumentError("unexpected EOF");
325 }
326 switch (byte) {
327 case 0:
328 RETURN_IF_ERROR(InsertField(field, kFalse));
329 break;
330 case 1:
331 RETURN_IF_ERROR(InsertField(field, kTrue));
332 break;
333 default:
334 return absl::InvalidArgumentError(
335 absl::StrFormat("bad value for bool: \\x%02x", byte));
336 }
337 break;
338 }
339 case Field::TYPE_INT32:
340 case Field::TYPE_SINT32:
341 case Field::TYPE_UINT32:
342 case Field::TYPE_ENUM: {
343 uint32_t x;
344 if (!stream.ReadVarint32(&x)) {
345 return absl::InvalidArgumentError("unexpected EOF");
346 }
347 if (field.proto().kind() == Field::TYPE_UINT32) {
348 RETURN_IF_ERROR(InsertField(field, x));
349 break;
350 }
351 if (field.proto().kind() == Field::TYPE_SINT32) {
352 x = WireFormatLite::ZigZagDecode32(x);
353 }
354 RETURN_IF_ERROR(InsertField(field, static_cast<int32_t>(x)));
355 break;
356 }
357 case Field::TYPE_INT64:
358 case Field::TYPE_SINT64:
359 case Field::TYPE_UINT64: {
360 uint64_t x;
361 if (!stream.ReadVarint64(&x)) {
362 return absl::InvalidArgumentError("unexpected EOF");
363 }
364 if (field.proto().kind() == Field::TYPE_UINT64) {
365 RETURN_IF_ERROR(InsertField(field, x));
366 break;
367 }
368 if (field.proto().kind() == Field::TYPE_SINT64) {
369 x = WireFormatLite::ZigZagDecode64(x);
370 }
371 RETURN_IF_ERROR(InsertField(field, static_cast<int64_t>(x)));
372 break;
373 }
374 default:
375 return absl::InvalidArgumentError(absl::StrFormat(
376 "field type %d (number %d) does not support varint fields",
377 field.proto().kind(), field.proto().number()));
378 }
379 return absl::OkStatus();
380 }
381
Decode64Bit(io::CodedInputStream & stream,const ResolverPool::Field & field)382 absl::Status UntypedMessage::Decode64Bit(io::CodedInputStream& stream,
383 const ResolverPool::Field& field) {
384 switch (field.proto().kind()) {
385 case Field::TYPE_FIXED64: {
386 uint64_t x;
387 if (!stream.ReadLittleEndian64(&x)) {
388 return absl::InvalidArgumentError("unexpected EOF");
389 }
390 RETURN_IF_ERROR(InsertField(field, x));
391 break;
392 }
393 case Field::TYPE_SFIXED64: {
394 uint64_t x;
395 if (!stream.ReadLittleEndian64(&x)) {
396 return absl::InvalidArgumentError("unexpected EOF");
397 }
398 RETURN_IF_ERROR(InsertField(field, static_cast<int64_t>(x)));
399 break;
400 }
401 case Field::TYPE_DOUBLE: {
402 uint64_t x;
403 if (!stream.ReadLittleEndian64(&x)) {
404 return absl::InvalidArgumentError("unexpected EOF");
405 }
406 RETURN_IF_ERROR(InsertField(field, absl::bit_cast<double>(x)));
407 break;
408 }
409 default:
410 return absl::InvalidArgumentError(
411 absl::StrFormat("field type %d (number %d) does not support "
412 "type 64-bit fields",
413 field.proto().kind(), field.proto().number()));
414 }
415 return absl::OkStatus();
416 }
417
Decode32Bit(io::CodedInputStream & stream,const ResolverPool::Field & field)418 absl::Status UntypedMessage::Decode32Bit(io::CodedInputStream& stream,
419 const ResolverPool::Field& field) {
420 switch (field.proto().kind()) {
421 case Field::TYPE_FIXED32: {
422 uint32_t x;
423 if (!stream.ReadLittleEndian32(&x)) {
424 return absl::InvalidArgumentError("unexpected EOF");
425 }
426 RETURN_IF_ERROR(InsertField(field, x));
427 break;
428 }
429 case Field::TYPE_SFIXED32: {
430 uint32_t x;
431 if (!stream.ReadLittleEndian32(&x)) {
432 return absl::InvalidArgumentError("unexpected EOF");
433 }
434 RETURN_IF_ERROR(InsertField(field, static_cast<int32_t>(x)));
435 break;
436 }
437 case Field::TYPE_FLOAT: {
438 uint32_t x;
439 if (!stream.ReadLittleEndian32(&x)) {
440 return absl::InvalidArgumentError("unexpected EOF");
441 }
442 RETURN_IF_ERROR(InsertField(field, absl::bit_cast<float>(x)));
443 break;
444 }
445 default:
446 return absl::InvalidArgumentError(absl::StrFormat(
447 "field type %d (number %d) does not support 32-bit fields",
448 field.proto().kind(), field.proto().number()));
449 }
450 return absl::OkStatus();
451 }
452
DecodeDelimited(io::CodedInputStream & stream,const ResolverPool::Field & field)453 absl::Status UntypedMessage::DecodeDelimited(io::CodedInputStream& stream,
454 const ResolverPool::Field& field) {
455 if (!stream.IncrementRecursionDepth()) {
456 return MakeTooDeepError();
457 }
458 auto limit = stream.ReadLengthAndPushLimit();
459 if (limit == 0) {
460 return MakeUnexpectedEofError();
461 }
462
463 switch (field.proto().kind()) {
464 case Field::TYPE_STRING:
465 case Field::TYPE_BYTES: {
466 std::string buf;
467 if (!stream.ReadString(&buf, stream.BytesUntilLimit())) {
468 return MakeUnexpectedEofError();
469 }
470 if (field.proto().kind() == Field::TYPE_STRING) {
471 if (desc_->proto().syntax() == google::protobuf::SYNTAX_PROTO3 &&
472 !utf8_range::IsStructurallyValid(buf)) {
473 return MakeProto3Utf8Error();
474 }
475 }
476
477 RETURN_IF_ERROR(InsertField(field, std::move(buf)));
478 break;
479 }
480 case Field::TYPE_MESSAGE: {
481 auto inner_desc = field.MessageType();
482 RETURN_IF_ERROR(inner_desc.status());
483
484 auto inner = ParseFromStream(*inner_desc, stream);
485 RETURN_IF_ERROR(inner.status());
486 RETURN_IF_ERROR(InsertField(field, std::move(*inner)));
487 break;
488 }
489 default: {
490 // This is definitely a packed field.
491 while (stream.BytesUntilLimit() > 0) {
492 switch (field.proto().kind()) {
493 case Field::TYPE_BOOL:
494 case Field::TYPE_INT32:
495 case Field::TYPE_SINT32:
496 case Field::TYPE_UINT32:
497 case Field::TYPE_ENUM:
498 case Field::TYPE_INT64:
499 case Field::TYPE_SINT64:
500 case Field::TYPE_UINT64:
501 RETURN_IF_ERROR(DecodeVarint(stream, field));
502 break;
503 case Field::TYPE_FIXED64:
504 case Field::TYPE_SFIXED64:
505 case Field::TYPE_DOUBLE:
506 RETURN_IF_ERROR(Decode64Bit(stream, field));
507 break;
508 case Field::TYPE_FIXED32:
509 case Field::TYPE_SFIXED32:
510 case Field::TYPE_FLOAT:
511 RETURN_IF_ERROR(Decode32Bit(stream, field));
512 break;
513 default:
514 return MakeInvalidLengthDelimType(field.proto().kind(),
515 field.proto().number());
516 }
517 }
518 break;
519 }
520 }
521 stream.DecrementRecursionDepthAndPopLimit(limit);
522 return absl::OkStatus();
523 }
524
525 template <typename T>
InsertField(const ResolverPool::Field & field,T && value)526 absl::Status UntypedMessage::InsertField(const ResolverPool::Field& field,
527 T&& value) {
528 int32_t number = field.proto().number();
529 auto emplace_result = fields_.try_emplace(number, std::forward<T>(value));
530 if (emplace_result.second) {
531 return absl::OkStatus();
532 }
533
534 if (field.proto().cardinality() !=
535 google::protobuf::Field::CARDINALITY_REPEATED) {
536 return absl::InvalidArgumentError(
537 absl::StrCat("repeated entries for singular field number ", number));
538 }
539
540 Value& slot = emplace_result.first->second;
541 using value_type = std::decay_t<T>;
542 if (auto* extant = absl::get_if<value_type>(&slot)) {
543 std::vector<value_type> repeated;
544 repeated.push_back(std::move(*extant));
545 repeated.push_back(std::forward<T>(value));
546
547 slot = std::move(repeated);
548 } else if (auto* extant = absl::get_if<std::vector<value_type>>(&slot)) {
549 extant->push_back(std::forward<T>(value));
550 } else {
551 absl::optional<absl::string_view> name =
552 google::protobuf::internal::RttiTypeName<value_type>();
553 if (!name.has_value()) {
554 name = "<unknown>";
555 }
556
557 return absl::InvalidArgumentError(
558 absl::StrFormat("inconsistent types for field number %d: tried to "
559 "insert '%s', but index was %d",
560 number, *name, slot.index()));
561 }
562
563 return absl::OkStatus();
564 }
565
566 } // namespace json_internal
567 } // namespace protobuf
568 } // namespace google
569
570 #include "google/protobuf/port_undef.inc"
571