1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 #ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__ 9 #define GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__ 10 11 #include <cfloat> 12 #include <cmath> 13 #include <cstdint> 14 #include <limits> 15 #include <memory> 16 #include <string> 17 #include <utility> 18 19 #include "google/protobuf/type.pb.h" 20 #include "absl/base/attributes.h" 21 #include "absl/base/casts.h" 22 #include "absl/container/flat_hash_map.h" 23 #include "absl/container/flat_hash_set.h" 24 #include "absl/status/status.h" 25 #include "absl/strings/str_format.h" 26 #include "absl/strings/string_view.h" 27 #include "google/protobuf/io/coded_stream.h" 28 #include "google/protobuf/io/zero_copy_stream.h" 29 #include "google/protobuf/io/zero_copy_stream_impl_lite.h" 30 #include "google/protobuf/json/internal/descriptor_traits.h" 31 #include "google/protobuf/wire_format_lite.h" 32 #include "google/protobuf/stubs/status_macros.h" 33 34 // Must be included last. 35 #include "google/protobuf/port_def.inc" 36 37 namespace google { 38 namespace protobuf { 39 namespace json_internal { 40 using ::google::protobuf::internal::WireFormatLite; 41 42 // See the comment in json_util2_parser.cc for more information. 43 // 44 // The type traits in this file describe how to parse to a protobuf 45 // representation used by the JSON API, either via proto reflection or by 46 // emitting wire format to an output stream. 47 48 // Helper alias templates to avoid needing to write `typename` in function 49 // signatures. 50 template <typename Traits> 51 using Msg = typename Traits::Msg; 52 53 struct ParseProto2Descriptor : Proto2Descriptor { 54 // A message value that fields can be written to, but not read from. 55 class Msg { 56 public: MsgParseProto2Descriptor57 explicit Msg(Message* msg) : msg_(msg) {} 58 59 private: 60 friend ParseProto2Descriptor; 61 Message* msg_; 62 // Because `msg` might already have oneofs set, we need to track which were 63 // set *during* the parse separately. 64 absl::flat_hash_set<int> parsed_oneofs_indices_; 65 absl::flat_hash_set<int> parsed_fields_; 66 }; 67 HasParsedParseProto2Descriptor68 static bool HasParsed(Field f, const Msg& msg, 69 bool allow_repeated_non_oneof) { 70 if (f->real_containing_oneof()) { 71 return msg.parsed_oneofs_indices_.contains( 72 f->real_containing_oneof()->index()); 73 } 74 if (allow_repeated_non_oneof) { 75 return false; 76 } 77 return msg.parsed_fields_.contains(f->number()); 78 } 79 80 /// Functions for writing fields. /// 81 82 // Marks a field as having been "seen". This will clear the field if it is 83 // the first occurrence thereof. 84 // 85 // All setters call this function automatically, but it may also be called 86 // eagerly to clear a pre-existing value that might not be overwritten, such 87 // as when parsing a repeated field. RecordAsSeenParseProto2Descriptor88 static void RecordAsSeen(Field f, Msg& msg) { 89 bool inserted = msg.parsed_fields_.insert(f->number()).second; 90 if (inserted) { 91 msg.msg_->GetReflection()->ClearField(msg.msg_, f); 92 } 93 94 if (f->real_containing_oneof() != nullptr) { 95 msg.parsed_oneofs_indices_.insert(f->real_containing_oneof()->index()); 96 } 97 } 98 99 // Adds a new message and calls body on it. 100 // 101 // Body should have a signature `absl::Status(const Desc&, Msg&)`. 102 template <typename F> NewMsgParseProto2Descriptor103 static absl::Status NewMsg(Field f, Msg& msg, F body) { 104 RecordAsSeen(f, msg); 105 106 Message* new_msg; 107 if (f->is_repeated()) { 108 new_msg = msg.msg_->GetReflection()->AddMessage(msg.msg_, f); 109 } else { 110 new_msg = msg.msg_->GetReflection()->MutableMessage(msg.msg_, f); 111 } 112 Msg wrapper(new_msg); 113 return body(*f->message_type(), wrapper); 114 } 115 116 // Adds a new dynamic message with the given type name and calls body on it. 117 // 118 // Body should have a signature `absl::Status(const Desc&, Msg&)`. 119 template <typename F> NewDynamicParseProto2Descriptor120 static absl::Status NewDynamic(Field f, const std::string& type_url, Msg& msg, 121 F body) { 122 RecordAsSeen(f, msg); 123 return WithDynamicType( 124 *f->containing_type(), type_url, [&](const Desc& desc) -> absl::Status { 125 DynamicMessageFactory factory; 126 std::unique_ptr<Message> dynamic(factory.GetPrototype(&desc)->New()); 127 Msg wrapper(dynamic.get()); 128 RETURN_IF_ERROR(body(desc, wrapper)); 129 130 if (f->is_repeated()) { 131 msg.msg_->GetReflection()->AddString( 132 msg.msg_, f, dynamic->SerializePartialAsString()); 133 } else { 134 msg.msg_->GetReflection()->SetString( 135 msg.msg_, f, dynamic->SerializePartialAsString()); 136 } 137 return absl::OkStatus(); 138 }); 139 } 140 SetFloatParseProto2Descriptor141 static void SetFloat(Field f, Msg& msg, float x) { 142 RecordAsSeen(f, msg); 143 if (f->is_repeated()) { 144 msg.msg_->GetReflection()->AddFloat(msg.msg_, f, x); 145 } else { 146 msg.msg_->GetReflection()->SetFloat(msg.msg_, f, x); 147 } 148 } 149 SetDoubleParseProto2Descriptor150 static void SetDouble(Field f, Msg& msg, double x) { 151 if (f->is_repeated()) { 152 msg.msg_->GetReflection()->AddDouble(msg.msg_, f, x); 153 } else { 154 msg.msg_->GetReflection()->SetDouble(msg.msg_, f, x); 155 } 156 } 157 SetInt64ParseProto2Descriptor158 static void SetInt64(Field f, Msg& msg, int64_t x) { 159 RecordAsSeen(f, msg); 160 if (f->is_repeated()) { 161 msg.msg_->GetReflection()->AddInt64(msg.msg_, f, x); 162 } else { 163 msg.msg_->GetReflection()->SetInt64(msg.msg_, f, x); 164 } 165 } 166 SetUInt64ParseProto2Descriptor167 static void SetUInt64(Field f, Msg& msg, uint64_t x) { 168 RecordAsSeen(f, msg); 169 if (f->is_repeated()) { 170 msg.msg_->GetReflection()->AddUInt64(msg.msg_, f, x); 171 } else { 172 msg.msg_->GetReflection()->SetUInt64(msg.msg_, f, x); 173 } 174 } 175 SetInt32ParseProto2Descriptor176 static void SetInt32(Field f, Msg& msg, int32_t x) { 177 RecordAsSeen(f, msg); 178 if (f->is_repeated()) { 179 msg.msg_->GetReflection()->AddInt32(msg.msg_, f, x); 180 } else { 181 msg.msg_->GetReflection()->SetInt32(msg.msg_, f, x); 182 } 183 } 184 SetUInt32ParseProto2Descriptor185 static void SetUInt32(Field f, Msg& msg, uint32_t x) { 186 RecordAsSeen(f, msg); 187 if (f->is_repeated()) { 188 msg.msg_->GetReflection()->AddUInt32(msg.msg_, f, x); 189 } else { 190 msg.msg_->GetReflection()->SetUInt32(msg.msg_, f, x); 191 } 192 } 193 SetBoolParseProto2Descriptor194 static void SetBool(Field f, Msg& msg, bool x) { 195 RecordAsSeen(f, msg); 196 if (f->is_repeated()) { 197 msg.msg_->GetReflection()->AddBool(msg.msg_, f, x); 198 } else { 199 msg.msg_->GetReflection()->SetBool(msg.msg_, f, x); 200 } 201 } 202 SetStringParseProto2Descriptor203 static void SetString(Field f, Msg& msg, absl::string_view x) { 204 RecordAsSeen(f, msg); 205 if (f->is_repeated()) { 206 msg.msg_->GetReflection()->AddString(msg.msg_, f, std::string(x)); 207 } else { 208 msg.msg_->GetReflection()->SetString(msg.msg_, f, std::string(x)); 209 } 210 } 211 SetEnumParseProto2Descriptor212 static void SetEnum(Field f, Msg& msg, int32_t x) { 213 RecordAsSeen(f, msg); 214 if (f->is_repeated()) { 215 msg.msg_->GetReflection()->AddEnumValue(msg.msg_, f, x); 216 } else { 217 msg.msg_->GetReflection()->SetEnumValue(msg.msg_, f, x); 218 } 219 } 220 }; 221 222 // Traits for proto3-ish deserialization. 223 // 224 // This includes a rudimentary proto serializer, since message fields are 225 // written directly instead of being reflectively written to a proto field. 226 // 227 // See MessageTraits for API docs. 228 struct ParseProto3Type : Proto3Type { 229 class Msg { 230 public: MsgParseProto3Type231 explicit Msg(io::ZeroCopyOutputStream* stream) : stream_(stream) {} 232 233 private: 234 friend ParseProto3Type; 235 io::CodedOutputStream stream_; 236 absl::flat_hash_set<int32_t> parsed_oneofs_indices_; 237 absl::flat_hash_set<int32_t> parsed_fields_; 238 }; 239 HasParsedParseProto3Type240 static bool HasParsed(Field f, const Msg& msg, 241 bool allow_repeated_non_oneof) { 242 if (f->proto().oneof_index() != 0) { 243 return msg.parsed_oneofs_indices_.contains(f->proto().oneof_index()); 244 } 245 if (allow_repeated_non_oneof) { 246 return false; 247 } 248 return msg.parsed_fields_.contains(f->proto().number()); 249 } 250 251 /// Functions for writing fields. /// 252 RecordAsSeenParseProto3Type253 static void RecordAsSeen(Field f, Msg& msg) { 254 msg.parsed_fields_.insert(f->proto().number()); 255 if (f->proto().oneof_index() != 0) { 256 msg.parsed_oneofs_indices_.insert(f->proto().oneof_index()); 257 } 258 } 259 260 template <typename F> NewMsgParseProto3Type261 static absl::Status NewMsg(Field f, Msg& msg, F body) { 262 return NewDynamic(f, f->proto().type_url(), msg, body); 263 } 264 265 template <typename F> NewDynamicParseProto3Type266 static absl::Status NewDynamic(Field f, const std::string& type_url, Msg& msg, 267 F body) { 268 RecordAsSeen(f, msg); 269 return WithDynamicType( 270 f->parent(), type_url, [&](const Desc& desc) -> absl::Status { 271 if (f->proto().kind() == google::protobuf::Field::TYPE_GROUP) { 272 msg.stream_.WriteTag(f->proto().number() << 3 | 273 WireFormatLite::WIRETYPE_START_GROUP); 274 RETURN_IF_ERROR(body(desc, msg)); 275 msg.stream_.WriteTag(f->proto().number() << 3 | 276 WireFormatLite::WIRETYPE_END_GROUP); 277 return absl::OkStatus(); 278 } 279 280 std::string out; 281 io::StringOutputStream stream(&out); 282 Msg new_msg(&stream); 283 RETURN_IF_ERROR(body(desc, new_msg)); 284 285 new_msg.stream_.Trim(); // Should probably be called "Flush()". 286 absl::string_view written( 287 out.data(), static_cast<size_t>(new_msg.stream_.ByteCount())); 288 SetString(f, msg, written); 289 return absl::OkStatus(); 290 }); 291 } 292 SetFloatParseProto3Type293 static void SetFloat(Field f, Msg& msg, float x) { 294 RecordAsSeen(f, msg); 295 msg.stream_.WriteTag(f->proto().number() << 3 | 296 WireFormatLite::WIRETYPE_FIXED32); 297 msg.stream_.WriteLittleEndian32(absl::bit_cast<uint32_t>(x)); 298 } 299 SetDoubleParseProto3Type300 static void SetDouble(Field f, Msg& msg, double x) { 301 RecordAsSeen(f, msg); 302 msg.stream_.WriteTag(f->proto().number() << 3 | 303 WireFormatLite::WIRETYPE_FIXED64); 304 msg.stream_.WriteLittleEndian64(absl::bit_cast<uint64_t>(x)); 305 } 306 SetInt64ParseProto3Type307 static void SetInt64(Field f, Msg& msg, int64_t x) { 308 SetInt<int64_t, google::protobuf::Field::TYPE_INT64, 309 google::protobuf::Field::TYPE_SFIXED64, 310 google::protobuf::Field::TYPE_SINT64>(f, msg, x); 311 } 312 SetUInt64ParseProto3Type313 static void SetUInt64(Field f, Msg& msg, uint64_t x) { 314 SetInt<uint64_t, google::protobuf::Field::TYPE_UINT64, 315 google::protobuf::Field::TYPE_FIXED64, 316 google::protobuf::Field::TYPE_UNKNOWN>(f, msg, x); 317 } 318 SetInt32ParseProto3Type319 static void SetInt32(Field f, Msg& msg, int32_t x) { 320 SetInt<int32_t, google::protobuf::Field::TYPE_INT32, 321 google::protobuf::Field::TYPE_SFIXED32, 322 google::protobuf::Field::TYPE_SINT32>(f, msg, x); 323 } 324 SetUInt32ParseProto3Type325 static void SetUInt32(Field f, Msg& msg, uint32_t x) { 326 SetInt<uint32_t, google::protobuf::Field::TYPE_UINT32, 327 google::protobuf::Field::TYPE_FIXED32, 328 google::protobuf::Field::TYPE_UNKNOWN>(f, msg, x); 329 } 330 SetBoolParseProto3Type331 static void SetBool(Field f, Msg& msg, bool x) { 332 RecordAsSeen(f, msg); 333 msg.stream_.WriteTag(f->proto().number() << 3); 334 char b = x ? 0x01 : 0x00; 335 msg.stream_.WriteRaw(&b, 1); 336 } 337 SetStringParseProto3Type338 static void SetString(Field f, Msg& msg, absl::string_view x) { 339 RecordAsSeen(f, msg); 340 msg.stream_.WriteTag(f->proto().number() << 3 | 341 WireFormatLite::WIRETYPE_LENGTH_DELIMITED); 342 msg.stream_.WriteVarint64(static_cast<uint64_t>(x.size())); 343 msg.stream_.WriteRaw(x.data(), x.size()); 344 } 345 SetEnumParseProto3Type346 static void SetEnum(Field f, Msg& msg, int32_t x) { 347 RecordAsSeen(f, msg); 348 msg.stream_.WriteTag(f->proto().number() << 3); 349 // Sign extension is deliberate here. 350 msg.stream_.WriteVarint32(x); 351 } 352 353 private: 354 using Kind = google::protobuf::Field::Kind; 355 // Sets a field of *some* integer type, with the given kinds for the possible 356 // encodings. This avoids quadruplicating this code in the helpers for the 357 // four major integer types. 358 template <typename Int, Kind varint, Kind fixed, Kind zigzag> SetIntParseProto3Type359 static void SetInt(Field f, Msg& msg, Int x) { 360 RecordAsSeen(f, msg); 361 switch (f->proto().kind()) { 362 case zigzag: 363 // Regardless of the integer type, ZigZag64 will do the right thing, 364 // because ZigZag is not dependent on the width of the integer: it is 365 // always `2 * abs(n) + (n < 0)`. 366 x = static_cast<Int>( 367 internal::WireFormatLite::ZigZagEncode64(static_cast<int64_t>(x))); 368 ABSL_FALLTHROUGH_INTENDED; 369 case varint: 370 msg.stream_.WriteTag(f->proto().number() << 3 | 371 WireFormatLite::WIRETYPE_VARINT); 372 if (sizeof(Int) == 4) { 373 msg.stream_.WriteVarint32(static_cast<uint32_t>(x)); 374 } else { 375 msg.stream_.WriteVarint64(static_cast<uint64_t>(x)); 376 } 377 break; 378 case fixed: { 379 if (sizeof(Int) == 4) { 380 msg.stream_.WriteTag(f->proto().number() << 3 | 381 WireFormatLite::WIRETYPE_FIXED32); 382 msg.stream_.WriteLittleEndian32(static_cast<uint32_t>(x)); 383 } else { 384 msg.stream_.WriteTag(f->proto().number() << 3 | 385 WireFormatLite::WIRETYPE_FIXED64); 386 msg.stream_.WriteLittleEndian64(static_cast<uint64_t>(x)); 387 } 388 break; 389 } 390 default: { // Unreachable. 391 } 392 } 393 } 394 }; 395 } // namespace json_internal 396 } // namespace protobuf 397 } // namespace google 398 399 #include "google/protobuf/port_undef.inc" 400 #endif // GOOGLE_PROTOBUF_JSON_INTERNAL_PARSER_TRAITS_H__ 401