• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__
9 #define GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__
10 
11 #include <cfloat>
12 #include <cmath>
13 #include <cstdint>
14 #include <limits>
15 #include <memory>
16 #include <string>
17 #include <utility>
18 
19 #include "google/protobuf/type.pb.h"
20 #include "absl/base/attributes.h"
21 #include "absl/base/casts.h"
22 #include "absl/container/flat_hash_map.h"
23 #include "absl/container/flat_hash_set.h"
24 #include "absl/status/status.h"
25 #include "absl/strings/str_format.h"
26 #include "absl/strings/string_view.h"
27 #include "google/protobuf/io/coded_stream.h"
28 #include "google/protobuf/io/zero_copy_stream.h"
29 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
30 #include "google/protobuf/json/internal/descriptor_traits.h"
31 #include "google/protobuf/wire_format_lite.h"
32 #include "google/protobuf/stubs/status_macros.h"
33 
34 // Must be included last.
35 #include "google/protobuf/port_def.inc"
36 
37 namespace google {
38 namespace protobuf {
39 namespace json_internal {
40 using ::google::protobuf::internal::WireFormatLite;
41 
42 // See the comment in json_util2_parser.cc for more information.
43 //
44 // The type traits in this file  describe how to parse to a protobuf
45 // representation used by the JSON API, either via proto reflection or by
46 // emitting wire format to an output stream.
47 
48 // Helper alias templates to avoid needing to write `typename` in function
49 // signatures.
50 template <typename Traits>
51 using Msg = typename Traits::Msg;
52 
53 struct ParseProto2Descriptor : Proto2Descriptor {
54   // A message value that fields can be written to, but not read from.
55   class Msg {
56    public:
MsgParseProto2Descriptor57     explicit Msg(Message* msg) : msg_(msg) {}
58 
59    private:
60     friend ParseProto2Descriptor;
61     Message* msg_;
62     // Because `msg` might already have oneofs set, we need to track which were
63     // set *during* the parse separately.
64     absl::flat_hash_set<int> parsed_oneofs_indices_;
65     absl::flat_hash_set<int> parsed_fields_;
66   };
67 
HasParsedParseProto2Descriptor68   static bool HasParsed(Field f, const Msg& msg,
69                         bool allow_repeated_non_oneof) {
70     if (f->real_containing_oneof()) {
71       return msg.parsed_oneofs_indices_.contains(
72           f->real_containing_oneof()->index());
73     }
74     if (allow_repeated_non_oneof) {
75       return false;
76     }
77     return msg.parsed_fields_.contains(f->number());
78   }
79 
80   /// Functions for writing fields. ///
81 
82   // Marks a field as having been "seen". This will clear the field if it is
83   // the first occurrence thereof.
84   //
85   // All setters call this function automatically, but it may also be called
86   // eagerly to clear a pre-existing value that might not be overwritten, such
87   // as when parsing a repeated field.
RecordAsSeenParseProto2Descriptor88   static void RecordAsSeen(Field f, Msg& msg) {
89     bool inserted = msg.parsed_fields_.insert(f->number()).second;
90     if (inserted) {
91       msg.msg_->GetReflection()->ClearField(msg.msg_, f);
92     }
93 
94     if (f->real_containing_oneof() != nullptr) {
95       msg.parsed_oneofs_indices_.insert(f->real_containing_oneof()->index());
96     }
97   }
98 
99   // Adds a new message and calls body on it.
100   //
101   // Body should have a signature `absl::Status(const Desc&, Msg&)`.
102   template <typename F>
NewMsgParseProto2Descriptor103   static absl::Status NewMsg(Field f, Msg& msg, F body) {
104     RecordAsSeen(f, msg);
105 
106     Message* new_msg;
107     if (f->is_repeated()) {
108       new_msg = msg.msg_->GetReflection()->AddMessage(msg.msg_, f);
109     } else {
110       new_msg = msg.msg_->GetReflection()->MutableMessage(msg.msg_, f);
111     }
112     Msg wrapper(new_msg);
113     return body(*f->message_type(), wrapper);
114   }
115 
116   // Adds a new dynamic message with the given type name and calls body on it.
117   //
118   // Body should have a signature `absl::Status(const Desc&, Msg&)`.
119   template <typename F>
NewDynamicParseProto2Descriptor120   static absl::Status NewDynamic(Field f, const std::string& type_url, Msg& msg,
121                                  F body) {
122     RecordAsSeen(f, msg);
123     return WithDynamicType(
124         *f->containing_type(), type_url, [&](const Desc& desc) -> absl::Status {
125           DynamicMessageFactory factory;
126           std::unique_ptr<Message> dynamic(factory.GetPrototype(&desc)->New());
127           Msg wrapper(dynamic.get());
128           RETURN_IF_ERROR(body(desc, wrapper));
129 
130           if (f->is_repeated()) {
131             msg.msg_->GetReflection()->AddString(
132                 msg.msg_, f, dynamic->SerializePartialAsString());
133           } else {
134             msg.msg_->GetReflection()->SetString(
135                 msg.msg_, f, dynamic->SerializePartialAsString());
136           }
137           return absl::OkStatus();
138         });
139   }
140 
SetFloatParseProto2Descriptor141   static void SetFloat(Field f, Msg& msg, float x) {
142     RecordAsSeen(f, msg);
143     if (f->is_repeated()) {
144       msg.msg_->GetReflection()->AddFloat(msg.msg_, f, x);
145     } else {
146       msg.msg_->GetReflection()->SetFloat(msg.msg_, f, x);
147     }
148   }
149 
SetDoubleParseProto2Descriptor150   static void SetDouble(Field f, Msg& msg, double x) {
151     if (f->is_repeated()) {
152       msg.msg_->GetReflection()->AddDouble(msg.msg_, f, x);
153     } else {
154       msg.msg_->GetReflection()->SetDouble(msg.msg_, f, x);
155     }
156   }
157 
SetInt64ParseProto2Descriptor158   static void SetInt64(Field f, Msg& msg, int64_t x) {
159     RecordAsSeen(f, msg);
160     if (f->is_repeated()) {
161       msg.msg_->GetReflection()->AddInt64(msg.msg_, f, x);
162     } else {
163       msg.msg_->GetReflection()->SetInt64(msg.msg_, f, x);
164     }
165   }
166 
SetUInt64ParseProto2Descriptor167   static void SetUInt64(Field f, Msg& msg, uint64_t x) {
168     RecordAsSeen(f, msg);
169     if (f->is_repeated()) {
170       msg.msg_->GetReflection()->AddUInt64(msg.msg_, f, x);
171     } else {
172       msg.msg_->GetReflection()->SetUInt64(msg.msg_, f, x);
173     }
174   }
175 
SetInt32ParseProto2Descriptor176   static void SetInt32(Field f, Msg& msg, int32_t x) {
177     RecordAsSeen(f, msg);
178     if (f->is_repeated()) {
179       msg.msg_->GetReflection()->AddInt32(msg.msg_, f, x);
180     } else {
181       msg.msg_->GetReflection()->SetInt32(msg.msg_, f, x);
182     }
183   }
184 
SetUInt32ParseProto2Descriptor185   static void SetUInt32(Field f, Msg& msg, uint32_t x) {
186     RecordAsSeen(f, msg);
187     if (f->is_repeated()) {
188       msg.msg_->GetReflection()->AddUInt32(msg.msg_, f, x);
189     } else {
190       msg.msg_->GetReflection()->SetUInt32(msg.msg_, f, x);
191     }
192   }
193 
SetBoolParseProto2Descriptor194   static void SetBool(Field f, Msg& msg, bool x) {
195     RecordAsSeen(f, msg);
196     if (f->is_repeated()) {
197       msg.msg_->GetReflection()->AddBool(msg.msg_, f, x);
198     } else {
199       msg.msg_->GetReflection()->SetBool(msg.msg_, f, x);
200     }
201   }
202 
SetStringParseProto2Descriptor203   static void SetString(Field f, Msg& msg, absl::string_view x) {
204     RecordAsSeen(f, msg);
205     if (f->is_repeated()) {
206       msg.msg_->GetReflection()->AddString(msg.msg_, f, std::string(x));
207     } else {
208       msg.msg_->GetReflection()->SetString(msg.msg_, f, std::string(x));
209     }
210   }
211 
SetEnumParseProto2Descriptor212   static void SetEnum(Field f, Msg& msg, int32_t x) {
213     RecordAsSeen(f, msg);
214     if (f->is_repeated()) {
215       msg.msg_->GetReflection()->AddEnumValue(msg.msg_, f, x);
216     } else {
217       msg.msg_->GetReflection()->SetEnumValue(msg.msg_, f, x);
218     }
219   }
220 };
221 
222 // Traits for proto3-ish deserialization.
223 //
224 // This includes a rudimentary proto serializer, since message fields are
225 // written directly instead of being reflectively written to a proto field.
226 //
227 // See MessageTraits for API docs.
228 struct ParseProto3Type : Proto3Type {
229   class Msg {
230    public:
MsgParseProto3Type231     explicit Msg(io::ZeroCopyOutputStream* stream) : stream_(stream) {}
232 
233    private:
234     friend ParseProto3Type;
235     io::CodedOutputStream stream_;
236     absl::flat_hash_set<int32_t> parsed_oneofs_indices_;
237     absl::flat_hash_set<int32_t> parsed_fields_;
238   };
239 
HasParsedParseProto3Type240   static bool HasParsed(Field f, const Msg& msg,
241                         bool allow_repeated_non_oneof) {
242     if (f->proto().oneof_index() != 0) {
243       return msg.parsed_oneofs_indices_.contains(f->proto().oneof_index());
244     }
245     if (allow_repeated_non_oneof) {
246       return false;
247     }
248     return msg.parsed_fields_.contains(f->proto().number());
249   }
250 
251   /// Functions for writing fields. ///
252 
RecordAsSeenParseProto3Type253   static void RecordAsSeen(Field f, Msg& msg) {
254     msg.parsed_fields_.insert(f->proto().number());
255     if (f->proto().oneof_index() != 0) {
256       msg.parsed_oneofs_indices_.insert(f->proto().oneof_index());
257     }
258   }
259 
260   template <typename F>
NewMsgParseProto3Type261   static absl::Status NewMsg(Field f, Msg& msg, F body) {
262     return NewDynamic(f, f->proto().type_url(), msg, body);
263   }
264 
265   template <typename F>
NewDynamicParseProto3Type266   static absl::Status NewDynamic(Field f, const std::string& type_url, Msg& msg,
267                                  F body) {
268     RecordAsSeen(f, msg);
269     return WithDynamicType(
270         f->parent(), type_url, [&](const Desc& desc) -> absl::Status {
271           if (f->proto().kind() == google::protobuf::Field::TYPE_GROUP) {
272             msg.stream_.WriteTag(f->proto().number() << 3 |
273                                  WireFormatLite::WIRETYPE_START_GROUP);
274             RETURN_IF_ERROR(body(desc, msg));
275             msg.stream_.WriteTag(f->proto().number() << 3 |
276                                  WireFormatLite::WIRETYPE_END_GROUP);
277             return absl::OkStatus();
278           }
279 
280           std::string out;
281           io::StringOutputStream stream(&out);
282           Msg new_msg(&stream);
283           RETURN_IF_ERROR(body(desc, new_msg));
284 
285           new_msg.stream_.Trim();  // Should probably be called "Flush()".
286           absl::string_view written(
287               out.data(), static_cast<size_t>(new_msg.stream_.ByteCount()));
288           SetString(f, msg, written);
289           return absl::OkStatus();
290         });
291   }
292 
SetFloatParseProto3Type293   static void SetFloat(Field f, Msg& msg, float x) {
294     RecordAsSeen(f, msg);
295     msg.stream_.WriteTag(f->proto().number() << 3 |
296                          WireFormatLite::WIRETYPE_FIXED32);
297     msg.stream_.WriteLittleEndian32(absl::bit_cast<uint32_t>(x));
298   }
299 
SetDoubleParseProto3Type300   static void SetDouble(Field f, Msg& msg, double x) {
301     RecordAsSeen(f, msg);
302     msg.stream_.WriteTag(f->proto().number() << 3 |
303                          WireFormatLite::WIRETYPE_FIXED64);
304     msg.stream_.WriteLittleEndian64(absl::bit_cast<uint64_t>(x));
305   }
306 
SetInt64ParseProto3Type307   static void SetInt64(Field f, Msg& msg, int64_t x) {
308     SetInt<int64_t, google::protobuf::Field::TYPE_INT64,
309            google::protobuf::Field::TYPE_SFIXED64,
310            google::protobuf::Field::TYPE_SINT64>(f, msg, x);
311   }
312 
SetUInt64ParseProto3Type313   static void SetUInt64(Field f, Msg& msg, uint64_t x) {
314     SetInt<uint64_t, google::protobuf::Field::TYPE_UINT64,
315            google::protobuf::Field::TYPE_FIXED64,
316            google::protobuf::Field::TYPE_UNKNOWN>(f, msg, x);
317   }
318 
SetInt32ParseProto3Type319   static void SetInt32(Field f, Msg& msg, int32_t x) {
320     SetInt<int32_t, google::protobuf::Field::TYPE_INT32,
321            google::protobuf::Field::TYPE_SFIXED32,
322            google::protobuf::Field::TYPE_SINT32>(f, msg, x);
323   }
324 
SetUInt32ParseProto3Type325   static void SetUInt32(Field f, Msg& msg, uint32_t x) {
326     SetInt<uint32_t, google::protobuf::Field::TYPE_UINT32,
327            google::protobuf::Field::TYPE_FIXED32,
328            google::protobuf::Field::TYPE_UNKNOWN>(f, msg, x);
329   }
330 
SetBoolParseProto3Type331   static void SetBool(Field f, Msg& msg, bool x) {
332     RecordAsSeen(f, msg);
333     msg.stream_.WriteTag(f->proto().number() << 3);
334     char b = x ? 0x01 : 0x00;
335     msg.stream_.WriteRaw(&b, 1);
336   }
337 
SetStringParseProto3Type338   static void SetString(Field f, Msg& msg, absl::string_view x) {
339     RecordAsSeen(f, msg);
340     msg.stream_.WriteTag(f->proto().number() << 3 |
341                          WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
342     msg.stream_.WriteVarint64(static_cast<uint64_t>(x.size()));
343     msg.stream_.WriteRaw(x.data(), x.size());
344   }
345 
SetEnumParseProto3Type346   static void SetEnum(Field f, Msg& msg, int32_t x) {
347     RecordAsSeen(f, msg);
348     msg.stream_.WriteTag(f->proto().number() << 3);
349     // Sign extension is deliberate here.
350     msg.stream_.WriteVarint32(x);
351   }
352 
353  private:
354   using Kind = google::protobuf::Field::Kind;
355   // Sets a field of *some* integer type, with the given kinds for the possible
356   // encodings. This avoids quadruplicating this code in the helpers for the
357   // four major integer types.
358   template <typename Int, Kind varint, Kind fixed, Kind zigzag>
SetIntParseProto3Type359   static void SetInt(Field f, Msg& msg, Int x) {
360     RecordAsSeen(f, msg);
361     switch (f->proto().kind()) {
362       case zigzag:
363         // Regardless of the integer type, ZigZag64 will do the right thing,
364         // because ZigZag is not dependent on the width of the integer: it is
365         // always `2 * abs(n) + (n < 0)`.
366         x = static_cast<Int>(
367             internal::WireFormatLite::ZigZagEncode64(static_cast<int64_t>(x)));
368         ABSL_FALLTHROUGH_INTENDED;
369       case varint:
370         msg.stream_.WriteTag(f->proto().number() << 3 |
371                              WireFormatLite::WIRETYPE_VARINT);
372         if (sizeof(Int) == 4) {
373           msg.stream_.WriteVarint32(static_cast<uint32_t>(x));
374         } else {
375           msg.stream_.WriteVarint64(static_cast<uint64_t>(x));
376         }
377         break;
378       case fixed: {
379         if (sizeof(Int) == 4) {
380           msg.stream_.WriteTag(f->proto().number() << 3 |
381                                WireFormatLite::WIRETYPE_FIXED32);
382           msg.stream_.WriteLittleEndian32(static_cast<uint32_t>(x));
383         } else {
384           msg.stream_.WriteTag(f->proto().number() << 3 |
385                                WireFormatLite::WIRETYPE_FIXED64);
386           msg.stream_.WriteLittleEndian64(static_cast<uint64_t>(x));
387         }
388         break;
389       }
390       default: {  // Unreachable.
391       }
392     }
393   }
394 };
395 }  // namespace json_internal
396 }  // namespace protobuf
397 }  // namespace google
398 
399 #include "google/protobuf/port_undef.inc"
400 #endif  // GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__
401