• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //         atenasio@google.com (Chris Atenasio) (ZigZag transform)
10 //  Based on original Protocol Buffers design by
11 //  Sanjay Ghemawat, Jeff Dean, and others.
12 //
13 // This header is logically internal, but is made public because it is used
14 // from protocol-compiler-generated code, which may reside in other components.
15 
16 #ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__
17 #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
18 
19 #include <cstddef>
20 #include <cstdint>
21 
22 #include "absl/base/casts.h"
23 #include "absl/log/absl_check.h"
24 #include "absl/strings/cord.h"
25 #include "absl/strings/string_view.h"
26 #include "google/protobuf/descriptor.h"
27 #include "google/protobuf/generated_message_util.h"
28 #include "google/protobuf/io/coded_stream.h"
29 #include "google/protobuf/message.h"
30 #include "google/protobuf/metadata_lite.h"
31 #include "google/protobuf/parse_context.h"
32 #include "google/protobuf/wire_format_lite.h"
33 
34 #ifdef SWIG
35 #error "You cannot SWIG proto headers"
36 #endif
37 
38 // Must be included last.
39 #include "google/protobuf/port_def.inc"
40 
41 namespace google {
42 namespace protobuf {
43 class MapKey;           // map_field.h
44 class UnknownFieldSet;  // unknown_field_set.h
45 }  // namespace protobuf
46 }  // namespace google
47 
48 namespace google {
49 namespace protobuf {
50 namespace internal {
51 
52 class TcParser;
53 
54 // This class is for internal use by the protocol buffer library and by
55 // protocol-compiler-generated message classes.  It must not be called
56 // directly by clients.
57 //
58 // This class contains code for implementing the binary protocol buffer
59 // wire format via reflection.  The WireFormatLite class implements the
60 // non-reflection based routines.
61 //
62 // This class is really a namespace that contains only static methods
63 class PROTOBUF_EXPORT WireFormat {
64  public:
65   WireFormat() = delete;
66 
67   // Given a field return its WireType
68   static inline WireFormatLite::WireType WireTypeForField(
69       const FieldDescriptor* field);
70 
71   // Given a FieldDescriptor::Type return its WireType
72   static inline WireFormatLite::WireType WireTypeForFieldType(
73       FieldDescriptor::Type type);
74 
75   // Compute the byte size of a tag.  For groups, this includes both the start
76   // and end tags.
77   static inline size_t TagSize(int field_number, FieldDescriptor::Type type);
78 
79   // These procedures can be used to implement the methods of Message which
80   // handle parsing and serialization of the protocol buffer wire format
81   // using only the Reflection interface.  When you ask the protocol
82   // compiler to optimize for code size rather than speed, it will implement
83   // those methods in terms of these procedures.  Of course, these are much
84   // slower than the specialized implementations which the protocol compiler
85   // generates when told to optimize for speed.
86 
87   // Read a message in protocol buffer wire format.
88   //
89   // This procedure reads either to the end of the input stream or through
90   // a WIRETYPE_END_GROUP tag ending the message, whichever comes first.
91   // It returns false if the input is invalid.
92   //
93   // Required fields are NOT checked by this method.  You must call
94   // IsInitialized() on the resulting message yourself.
95   static bool ParseAndMergePartial(io::CodedInputStream* input,
96                                    Message* message);
97 
98   // This is meant for internal protobuf use (WireFormat is an internal class).
99   // This is the reflective implementation of the _InternalParse functionality.
100   static const char* _InternalParse(Message* msg, const char* ptr,
101                                     internal::ParseContext* ctx);
102 
103   // Serialize a message in protocol buffer wire format.
104   //
105   // Any embedded messages within the message must have their correct sizes
106   // cached.  However, the top-level message need not; its size is passed as
107   // a parameter to this procedure.
108   //
109   // These return false iff the underlying stream returns a write error.
SerializeWithCachedSizes(const Message & message,int size,io::CodedOutputStream * output)110   static void SerializeWithCachedSizes(const Message& message, int size,
111                                        io::CodedOutputStream* output) {
112     int expected_endpoint = output->ByteCount() + size;
113     output->SetCur(
114         _InternalSerialize(message, output->Cur(), output->EpsCopy()));
115     ABSL_CHECK_EQ(output->ByteCount(), expected_endpoint)
116         << ": Protocol message serialized to a size different from what was "
117            "originally expected.  Perhaps it was modified by another thread "
118            "during serialization?";
119   }
120   static uint8_t* _InternalSerialize(const Message& message, uint8_t* target,
121                                      io::EpsCopyOutputStream* stream);
122 
123   // Implements Message::ByteSize() via reflection.  WARNING:  The result
124   // of this method is *not* cached anywhere.  However, all embedded messages
125   // will have their ByteSize() methods called, so their sizes will be cached.
126   // Therefore, calling this method is sufficient to allow you to call
127   // WireFormat::SerializeWithCachedSizes() on the same object.
128   static size_t ByteSize(const Message& message);
129 
130   // -----------------------------------------------------------------
131   // Helpers for dealing with unknown fields
132 
133   // Skips a field value of the given WireType.  The input should start
134   // positioned immediately after the tag.  If unknown_fields is non-nullptr,
135   // the contents of the field will be added to it.
136   static bool SkipField(io::CodedInputStream* input, uint32_t tag,
137                         UnknownFieldSet* unknown_fields);
138 
139   // Reads and ignores a message from the input.  If unknown_fields is
140   // non-nullptr, the contents will be added to it.
141   static bool SkipMessage(io::CodedInputStream* input,
142                           UnknownFieldSet* unknown_fields);
143 
144   // Read a packed enum field. If the is_valid function is not nullptr, values
145   // for which is_valid(value) returns false are appended to
146   // unknown_fields_stream.
147   static bool ReadPackedEnumPreserveUnknowns(io::CodedInputStream* input,
148                                              uint32_t field_number,
149                                              bool (*is_valid)(int),
150                                              UnknownFieldSet* unknown_fields,
151                                              RepeatedField<int>* values);
152 
153   // Write the contents of an UnknownFieldSet to the output.
SerializeUnknownFields(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)154   static void SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
155                                      io::CodedOutputStream* output) {
156     output->SetCur(InternalSerializeUnknownFieldsToArray(
157         unknown_fields, output->Cur(), output->EpsCopy()));
158   }
159   // Same as above, except writing directly to the provided buffer.
160   // Requires that the buffer have sufficient capacity for
161   // ComputeUnknownFieldsSize(unknown_fields).
162   //
163   // Returns a pointer past the last written byte.
SerializeUnknownFieldsToArray(const UnknownFieldSet & unknown_fields,uint8_t * target)164   static uint8_t* SerializeUnknownFieldsToArray(
165       const UnknownFieldSet& unknown_fields, uint8_t* target) {
166     io::EpsCopyOutputStream stream(
167         target, static_cast<int>(ComputeUnknownFieldsSize(unknown_fields)),
168         io::CodedOutputStream::IsDefaultSerializationDeterministic());
169     return InternalSerializeUnknownFieldsToArray(unknown_fields, target,
170                                                  &stream);
171   }
172   static uint8_t* InternalSerializeUnknownFieldsToArray(
173       const UnknownFieldSet& unknown_fields, uint8_t* target,
174       io::EpsCopyOutputStream* stream);
175 
176   // Same thing except for messages that have the message_set_wire_format
177   // option.
SerializeUnknownMessageSetItems(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)178   static void SerializeUnknownMessageSetItems(
179       const UnknownFieldSet& unknown_fields, io::CodedOutputStream* output) {
180     output->SetCur(InternalSerializeUnknownMessageSetItemsToArray(
181         unknown_fields, output->Cur(), output->EpsCopy()));
182   }
183   // Same as above, except writing directly to the provided buffer.
184   // Requires that the buffer have sufficient capacity for
185   // ComputeUnknownMessageSetItemsSize(unknown_fields).
186   //
187   // Returns a pointer past the last written byte.
188   static uint8_t* SerializeUnknownMessageSetItemsToArray(
189       const UnknownFieldSet& unknown_fields, uint8_t* target);
190   static uint8_t* InternalSerializeUnknownMessageSetItemsToArray(
191       const UnknownFieldSet& unknown_fields, uint8_t* target,
192       io::EpsCopyOutputStream* stream);
193 
194   // Compute the size of the UnknownFieldSet on the wire.
195   static size_t ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields);
196 
197   // Same thing except for messages that have the message_set_wire_format
198   // option.
199   static size_t ComputeUnknownMessageSetItemsSize(
200       const UnknownFieldSet& unknown_fields);
201 
202   // Helper functions for encoding and decoding tags.  (Inlined below and in
203   // _inl.h)
204   //
205   // This is different from MakeTag(field->number(), field->type()) in the
206   // case of packed repeated fields.
207   static uint32_t MakeTag(const FieldDescriptor* field);
208 
209   // Parse a single field.  The input should start out positioned immediately
210   // after the tag.
211   static bool ParseAndMergeField(
212       uint32_t tag,
213       const FieldDescriptor* field,  // May be nullptr for unknown
214       Message* message, io::CodedInputStream* input);
215 
216   // Serialize a single field.
SerializeFieldWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)217   static void SerializeFieldWithCachedSizes(
218       const FieldDescriptor* field,  // Cannot be nullptr
219       const Message& message, io::CodedOutputStream* output) {
220     output->SetCur(InternalSerializeField(field, message, output->Cur(),
221                                           output->EpsCopy()));
222   }
223   static uint8_t* InternalSerializeField(
224       const FieldDescriptor* field,  // Cannot be nullptr
225       const Message& message, uint8_t* target, io::EpsCopyOutputStream* stream);
226 
227   // Compute size of a single field.  If the field is a message type, this
228   // will call ByteSize() for the embedded message, insuring that it caches
229   // its size.
230   static size_t FieldByteSize(const FieldDescriptor* field,  // Can't be nullptr
231                               const Message& message);
232 
233   // Parse/serialize a MessageSet::Item group.  Used with messages that use
234   // option message_set_wire_format = true.
235   static bool ParseAndMergeMessageSetItem(io::CodedInputStream* input,
236                                           Message* message);
SerializeMessageSetItemWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)237   static void SerializeMessageSetItemWithCachedSizes(
238       const FieldDescriptor* field, const Message& message,
239       io::CodedOutputStream* output) {
240     output->SetCur(InternalSerializeMessageSetItem(
241         field, message, output->Cur(), output->EpsCopy()));
242   }
243   static uint8_t* InternalSerializeMessageSetItem(
244       const FieldDescriptor* field, const Message& message, uint8_t* target,
245       io::EpsCopyOutputStream* stream);
246   static size_t MessageSetItemByteSize(const FieldDescriptor* field,
247                                        const Message& message);
248 
249   // Computes the byte size of a field, excluding tags. For packed fields, it
250   // only includes the size of the raw data, and not the size of the total
251   // length, but for other length-prefixed types, the size of the length is
252   // included.
253   static size_t FieldDataOnlyByteSize(
254       const FieldDescriptor* field,  // Cannot be nullptr
255       const Message& message);
256 
257   enum Operation {
258     PARSE = 0,
259     SERIALIZE = 1,
260   };
261 
262   // Verifies that a string field is valid UTF8, logging an error if not.
263   // This function will not be called by newly generated protobuf code
264   // but remains present to support existing code.
265   static void VerifyUTF8String(const char* data, int size, Operation op);
266   // The NamedField variant takes a field name in order to produce an
267   // informative error message if verification fails.
268   static void VerifyUTF8StringNamedField(const char* data, int size,
269                                          Operation op,
270                                          absl::string_view field_name);
271 
272  private:
273   struct MessageSetParser;
274   friend class TcParser;
275   // Skip a MessageSet field.
276   static bool SkipMessageSetField(io::CodedInputStream* input,
277                                   uint32_t field_number,
278                                   UnknownFieldSet* unknown_fields);
279 
280   // Parse a MessageSet field.
281   static bool ParseAndMergeMessageSetField(uint32_t field_number,
282                                            const FieldDescriptor* field,
283                                            Message* message,
284                                            io::CodedInputStream* input);
285   // Parses the value from the wire that belongs to tag.
286   static const char* _InternalParseAndMergeField(Message* msg, const char* ptr,
287                                                  internal::ParseContext* ctx,
288                                                  uint64_t tag,
289                                                  const Reflection* reflection,
290                                                  const FieldDescriptor* field);
291 };
292 
293 // Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
294 class PROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
295  public:
UnknownFieldSetFieldSkipper(UnknownFieldSet * unknown_fields)296   explicit UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
297       : unknown_fields_(unknown_fields) {}
298   ~UnknownFieldSetFieldSkipper() override = default;
299 
300   // implements FieldSkipper -----------------------------------------
301   bool SkipField(io::CodedInputStream* input, uint32_t tag) override;
302   bool SkipMessage(io::CodedInputStream* input) override;
303   void SkipUnknownEnum(int field_number, int value) override;
304 
305  protected:
306   UnknownFieldSet* unknown_fields_;
307 };
308 
309 // inline methods ====================================================
310 
WireTypeForField(const FieldDescriptor * field)311 inline WireFormatLite::WireType WireFormat::WireTypeForField(
312     const FieldDescriptor* field) {
313   if (field->is_packed()) {
314     return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
315   } else {
316     return WireTypeForFieldType(field->type());
317   }
318 }
319 
WireTypeForFieldType(FieldDescriptor::Type type)320 inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
321     FieldDescriptor::Type type) {
322   // Some compilers don't like enum -> enum casts, so we implicit_cast to
323   // int first.
324   return WireFormatLite::WireTypeForFieldType(
325       static_cast<WireFormatLite::FieldType>(absl::implicit_cast<int>(type)));
326 }
327 
MakeTag(const FieldDescriptor * field)328 inline uint32_t WireFormat::MakeTag(const FieldDescriptor* field) {
329   return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
330 }
331 
TagSize(int field_number,FieldDescriptor::Type type)332 inline size_t WireFormat::TagSize(int field_number,
333                                   FieldDescriptor::Type type) {
334   // Some compilers don't like enum -> enum casts, so we implicit_cast to
335   // int first.
336   return WireFormatLite::TagSize(
337       field_number,
338       static_cast<WireFormatLite::FieldType>(absl::implicit_cast<int>(type)));
339 }
340 
VerifyUTF8String(const char * data,int size,WireFormat::Operation op)341 inline void WireFormat::VerifyUTF8String(const char* data, int size,
342                                          WireFormat::Operation op) {
343 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
344   WireFormatLite::VerifyUtf8String(data, size,
345                                    static_cast<WireFormatLite::Operation>(op),
346                                    /* field_name = */ "");
347 #else
348   // Avoid the compiler warning about unused variables.
349   (void)data;
350   (void)size;
351   (void)op;
352 #endif
353 }
354 
VerifyUTF8StringNamedField(const char * data,int size,WireFormat::Operation op,const absl::string_view field_name)355 inline void WireFormat::VerifyUTF8StringNamedField(
356     const char* data, int size, WireFormat::Operation op,
357     const absl::string_view field_name) {
358 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
359   WireFormatLite::VerifyUtf8String(
360       data, size, static_cast<WireFormatLite::Operation>(op), field_name);
361 #else
362   // Avoid the compiler warning about unused variables.
363   (void)data;
364   (void)size;
365   (void)op;
366   (void)field_name;
367 #endif
368 }
369 
370 
InternalSerializeUnknownMessageSetItemsToArray(const UnknownFieldSet & unknown_fields,uint8_t * target,io::EpsCopyOutputStream * stream)371 inline uint8_t* InternalSerializeUnknownMessageSetItemsToArray(
372     const UnknownFieldSet& unknown_fields, uint8_t* target,
373     io::EpsCopyOutputStream* stream) {
374   return WireFormat::InternalSerializeUnknownMessageSetItemsToArray(
375       unknown_fields, target, stream);
376 }
377 
ComputeUnknownMessageSetItemsSize(const UnknownFieldSet & unknown_fields)378 inline size_t ComputeUnknownMessageSetItemsSize(
379     const UnknownFieldSet& unknown_fields) {
380   return WireFormat::ComputeUnknownMessageSetItemsSize(unknown_fields);
381 }
382 
383 // Compute the size of the UnknownFieldSet on the wire.
384 PROTOBUF_EXPORT
385 size_t ComputeUnknownFieldsSize(const InternalMetadata& metadata, size_t size,
386                                 CachedSize* cached_size);
387 
388 size_t MapKeyDataOnlyByteSize(const FieldDescriptor* field,
389                               const MapKey& value);
390 
391 uint8_t* SerializeMapKeyWithCachedSizes(const FieldDescriptor* field,
392                                         const MapKey& value, uint8_t* target,
393                                         io::EpsCopyOutputStream* stream);
394 }  // namespace internal
395 }  // namespace protobuf
396 }  // namespace google
397 
398 #include "google/protobuf/port_undef.inc"
399 
400 #endif  // GOOGLE_PROTOBUF_WIRE_FORMAT_H__
401