1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // atenasio@google.com (Chris Atenasio) (ZigZag transform)
10 // Based on original Protocol Buffers design by
11 // Sanjay Ghemawat, Jeff Dean, and others.
12 //
13 // This header is logically internal, but is made public because it is used
14 // from protocol-compiler-generated code, which may reside in other components.
15
16 #ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__
17 #define GOOGLE_PROTOBUF_WIRE_FORMAT_H__
18
19 #include <cstddef>
20 #include <cstdint>
21
22 #include "absl/base/casts.h"
23 #include "absl/log/absl_check.h"
24 #include "absl/strings/cord.h"
25 #include "absl/strings/string_view.h"
26 #include "google/protobuf/descriptor.h"
27 #include "google/protobuf/generated_message_util.h"
28 #include "google/protobuf/io/coded_stream.h"
29 #include "google/protobuf/message.h"
30 #include "google/protobuf/metadata_lite.h"
31 #include "google/protobuf/parse_context.h"
32 #include "google/protobuf/wire_format_lite.h"
33
34 #ifdef SWIG
35 #error "You cannot SWIG proto headers"
36 #endif
37
38 // Must be included last.
39 #include "google/protobuf/port_def.inc"
40
41 namespace google {
42 namespace protobuf {
43 class MapKey; // map_field.h
44 class UnknownFieldSet; // unknown_field_set.h
45 } // namespace protobuf
46 } // namespace google
47
48 namespace google {
49 namespace protobuf {
50 namespace internal {
51
52 class TcParser;
53
54 // This class is for internal use by the protocol buffer library and by
55 // protocol-compiler-generated message classes. It must not be called
56 // directly by clients.
57 //
58 // This class contains code for implementing the binary protocol buffer
59 // wire format via reflection. The WireFormatLite class implements the
60 // non-reflection based routines.
61 //
62 // This class is really a namespace that contains only static methods
63 class PROTOBUF_EXPORT WireFormat {
64 public:
65 WireFormat() = delete;
66
67 // Given a field return its WireType
68 static inline WireFormatLite::WireType WireTypeForField(
69 const FieldDescriptor* field);
70
71 // Given a FieldDescriptor::Type return its WireType
72 static inline WireFormatLite::WireType WireTypeForFieldType(
73 FieldDescriptor::Type type);
74
75 // Compute the byte size of a tag. For groups, this includes both the start
76 // and end tags.
77 static inline size_t TagSize(int field_number, FieldDescriptor::Type type);
78
79 // These procedures can be used to implement the methods of Message which
80 // handle parsing and serialization of the protocol buffer wire format
81 // using only the Reflection interface. When you ask the protocol
82 // compiler to optimize for code size rather than speed, it will implement
83 // those methods in terms of these procedures. Of course, these are much
84 // slower than the specialized implementations which the protocol compiler
85 // generates when told to optimize for speed.
86
87 // Read a message in protocol buffer wire format.
88 //
89 // This procedure reads either to the end of the input stream or through
90 // a WIRETYPE_END_GROUP tag ending the message, whichever comes first.
91 // It returns false if the input is invalid.
92 //
93 // Required fields are NOT checked by this method. You must call
94 // IsInitialized() on the resulting message yourself.
95 static bool ParseAndMergePartial(io::CodedInputStream* input,
96 Message* message);
97
98 // This is meant for internal protobuf use (WireFormat is an internal class).
99 // This is the reflective implementation of the _InternalParse functionality.
100 static const char* _InternalParse(Message* msg, const char* ptr,
101 internal::ParseContext* ctx);
102
103 // Serialize a message in protocol buffer wire format.
104 //
105 // Any embedded messages within the message must have their correct sizes
106 // cached. However, the top-level message need not; its size is passed as
107 // a parameter to this procedure.
108 //
109 // These return false iff the underlying stream returns a write error.
SerializeWithCachedSizes(const Message & message,int size,io::CodedOutputStream * output)110 static void SerializeWithCachedSizes(const Message& message, int size,
111 io::CodedOutputStream* output) {
112 int expected_endpoint = output->ByteCount() + size;
113 output->SetCur(
114 _InternalSerialize(message, output->Cur(), output->EpsCopy()));
115 ABSL_CHECK_EQ(output->ByteCount(), expected_endpoint)
116 << ": Protocol message serialized to a size different from what was "
117 "originally expected. Perhaps it was modified by another thread "
118 "during serialization?";
119 }
120 static uint8_t* _InternalSerialize(const Message& message, uint8_t* target,
121 io::EpsCopyOutputStream* stream);
122
123 // Implements Message::ByteSize() via reflection. WARNING: The result
124 // of this method is *not* cached anywhere. However, all embedded messages
125 // will have their ByteSize() methods called, so their sizes will be cached.
126 // Therefore, calling this method is sufficient to allow you to call
127 // WireFormat::SerializeWithCachedSizes() on the same object.
128 static size_t ByteSize(const Message& message);
129
130 // -----------------------------------------------------------------
131 // Helpers for dealing with unknown fields
132
133 // Skips a field value of the given WireType. The input should start
134 // positioned immediately after the tag. If unknown_fields is non-nullptr,
135 // the contents of the field will be added to it.
136 static bool SkipField(io::CodedInputStream* input, uint32_t tag,
137 UnknownFieldSet* unknown_fields);
138
139 // Reads and ignores a message from the input. If unknown_fields is
140 // non-nullptr, the contents will be added to it.
141 static bool SkipMessage(io::CodedInputStream* input,
142 UnknownFieldSet* unknown_fields);
143
144 // Read a packed enum field. If the is_valid function is not nullptr, values
145 // for which is_valid(value) returns false are appended to
146 // unknown_fields_stream.
147 static bool ReadPackedEnumPreserveUnknowns(io::CodedInputStream* input,
148 uint32_t field_number,
149 bool (*is_valid)(int),
150 UnknownFieldSet* unknown_fields,
151 RepeatedField<int>* values);
152
153 // Write the contents of an UnknownFieldSet to the output.
SerializeUnknownFields(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)154 static void SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
155 io::CodedOutputStream* output) {
156 output->SetCur(InternalSerializeUnknownFieldsToArray(
157 unknown_fields, output->Cur(), output->EpsCopy()));
158 }
159 // Same as above, except writing directly to the provided buffer.
160 // Requires that the buffer have sufficient capacity for
161 // ComputeUnknownFieldsSize(unknown_fields).
162 //
163 // Returns a pointer past the last written byte.
SerializeUnknownFieldsToArray(const UnknownFieldSet & unknown_fields,uint8_t * target)164 static uint8_t* SerializeUnknownFieldsToArray(
165 const UnknownFieldSet& unknown_fields, uint8_t* target) {
166 io::EpsCopyOutputStream stream(
167 target, static_cast<int>(ComputeUnknownFieldsSize(unknown_fields)),
168 io::CodedOutputStream::IsDefaultSerializationDeterministic());
169 return InternalSerializeUnknownFieldsToArray(unknown_fields, target,
170 &stream);
171 }
172 static uint8_t* InternalSerializeUnknownFieldsToArray(
173 const UnknownFieldSet& unknown_fields, uint8_t* target,
174 io::EpsCopyOutputStream* stream);
175
176 // Same thing except for messages that have the message_set_wire_format
177 // option.
SerializeUnknownMessageSetItems(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)178 static void SerializeUnknownMessageSetItems(
179 const UnknownFieldSet& unknown_fields, io::CodedOutputStream* output) {
180 output->SetCur(InternalSerializeUnknownMessageSetItemsToArray(
181 unknown_fields, output->Cur(), output->EpsCopy()));
182 }
183 // Same as above, except writing directly to the provided buffer.
184 // Requires that the buffer have sufficient capacity for
185 // ComputeUnknownMessageSetItemsSize(unknown_fields).
186 //
187 // Returns a pointer past the last written byte.
188 static uint8_t* SerializeUnknownMessageSetItemsToArray(
189 const UnknownFieldSet& unknown_fields, uint8_t* target);
190 static uint8_t* InternalSerializeUnknownMessageSetItemsToArray(
191 const UnknownFieldSet& unknown_fields, uint8_t* target,
192 io::EpsCopyOutputStream* stream);
193
194 // Compute the size of the UnknownFieldSet on the wire.
195 static size_t ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields);
196
197 // Same thing except for messages that have the message_set_wire_format
198 // option.
199 static size_t ComputeUnknownMessageSetItemsSize(
200 const UnknownFieldSet& unknown_fields);
201
202 // Helper functions for encoding and decoding tags. (Inlined below and in
203 // _inl.h)
204 //
205 // This is different from MakeTag(field->number(), field->type()) in the
206 // case of packed repeated fields.
207 static uint32_t MakeTag(const FieldDescriptor* field);
208
209 // Parse a single field. The input should start out positioned immediately
210 // after the tag.
211 static bool ParseAndMergeField(
212 uint32_t tag,
213 const FieldDescriptor* field, // May be nullptr for unknown
214 Message* message, io::CodedInputStream* input);
215
216 // Serialize a single field.
SerializeFieldWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)217 static void SerializeFieldWithCachedSizes(
218 const FieldDescriptor* field, // Cannot be nullptr
219 const Message& message, io::CodedOutputStream* output) {
220 output->SetCur(InternalSerializeField(field, message, output->Cur(),
221 output->EpsCopy()));
222 }
223 static uint8_t* InternalSerializeField(
224 const FieldDescriptor* field, // Cannot be nullptr
225 const Message& message, uint8_t* target, io::EpsCopyOutputStream* stream);
226
227 // Compute size of a single field. If the field is a message type, this
228 // will call ByteSize() for the embedded message, insuring that it caches
229 // its size.
230 static size_t FieldByteSize(const FieldDescriptor* field, // Can't be nullptr
231 const Message& message);
232
233 // Parse/serialize a MessageSet::Item group. Used with messages that use
234 // option message_set_wire_format = true.
235 static bool ParseAndMergeMessageSetItem(io::CodedInputStream* input,
236 Message* message);
SerializeMessageSetItemWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)237 static void SerializeMessageSetItemWithCachedSizes(
238 const FieldDescriptor* field, const Message& message,
239 io::CodedOutputStream* output) {
240 output->SetCur(InternalSerializeMessageSetItem(
241 field, message, output->Cur(), output->EpsCopy()));
242 }
243 static uint8_t* InternalSerializeMessageSetItem(
244 const FieldDescriptor* field, const Message& message, uint8_t* target,
245 io::EpsCopyOutputStream* stream);
246 static size_t MessageSetItemByteSize(const FieldDescriptor* field,
247 const Message& message);
248
249 // Computes the byte size of a field, excluding tags. For packed fields, it
250 // only includes the size of the raw data, and not the size of the total
251 // length, but for other length-prefixed types, the size of the length is
252 // included.
253 static size_t FieldDataOnlyByteSize(
254 const FieldDescriptor* field, // Cannot be nullptr
255 const Message& message);
256
257 enum Operation {
258 PARSE = 0,
259 SERIALIZE = 1,
260 };
261
262 // Verifies that a string field is valid UTF8, logging an error if not.
263 // This function will not be called by newly generated protobuf code
264 // but remains present to support existing code.
265 static void VerifyUTF8String(const char* data, int size, Operation op);
266 // The NamedField variant takes a field name in order to produce an
267 // informative error message if verification fails.
268 static void VerifyUTF8StringNamedField(const char* data, int size,
269 Operation op,
270 absl::string_view field_name);
271
272 private:
273 struct MessageSetParser;
274 friend class TcParser;
275 // Skip a MessageSet field.
276 static bool SkipMessageSetField(io::CodedInputStream* input,
277 uint32_t field_number,
278 UnknownFieldSet* unknown_fields);
279
280 // Parse a MessageSet field.
281 static bool ParseAndMergeMessageSetField(uint32_t field_number,
282 const FieldDescriptor* field,
283 Message* message,
284 io::CodedInputStream* input);
285 // Parses the value from the wire that belongs to tag.
286 static const char* _InternalParseAndMergeField(Message* msg, const char* ptr,
287 internal::ParseContext* ctx,
288 uint64_t tag,
289 const Reflection* reflection,
290 const FieldDescriptor* field);
291 };
292
293 // Subclass of FieldSkipper which saves skipped fields to an UnknownFieldSet.
294 class PROTOBUF_EXPORT UnknownFieldSetFieldSkipper : public FieldSkipper {
295 public:
UnknownFieldSetFieldSkipper(UnknownFieldSet * unknown_fields)296 explicit UnknownFieldSetFieldSkipper(UnknownFieldSet* unknown_fields)
297 : unknown_fields_(unknown_fields) {}
298 ~UnknownFieldSetFieldSkipper() override = default;
299
300 // implements FieldSkipper -----------------------------------------
301 bool SkipField(io::CodedInputStream* input, uint32_t tag) override;
302 bool SkipMessage(io::CodedInputStream* input) override;
303 void SkipUnknownEnum(int field_number, int value) override;
304
305 protected:
306 UnknownFieldSet* unknown_fields_;
307 };
308
309 // inline methods ====================================================
310
WireTypeForField(const FieldDescriptor * field)311 inline WireFormatLite::WireType WireFormat::WireTypeForField(
312 const FieldDescriptor* field) {
313 if (field->is_packed()) {
314 return WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
315 } else {
316 return WireTypeForFieldType(field->type());
317 }
318 }
319
WireTypeForFieldType(FieldDescriptor::Type type)320 inline WireFormatLite::WireType WireFormat::WireTypeForFieldType(
321 FieldDescriptor::Type type) {
322 // Some compilers don't like enum -> enum casts, so we implicit_cast to
323 // int first.
324 return WireFormatLite::WireTypeForFieldType(
325 static_cast<WireFormatLite::FieldType>(absl::implicit_cast<int>(type)));
326 }
327
MakeTag(const FieldDescriptor * field)328 inline uint32_t WireFormat::MakeTag(const FieldDescriptor* field) {
329 return WireFormatLite::MakeTag(field->number(), WireTypeForField(field));
330 }
331
TagSize(int field_number,FieldDescriptor::Type type)332 inline size_t WireFormat::TagSize(int field_number,
333 FieldDescriptor::Type type) {
334 // Some compilers don't like enum -> enum casts, so we implicit_cast to
335 // int first.
336 return WireFormatLite::TagSize(
337 field_number,
338 static_cast<WireFormatLite::FieldType>(absl::implicit_cast<int>(type)));
339 }
340
VerifyUTF8String(const char * data,int size,WireFormat::Operation op)341 inline void WireFormat::VerifyUTF8String(const char* data, int size,
342 WireFormat::Operation op) {
343 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
344 WireFormatLite::VerifyUtf8String(data, size,
345 static_cast<WireFormatLite::Operation>(op),
346 /* field_name = */ "");
347 #else
348 // Avoid the compiler warning about unused variables.
349 (void)data;
350 (void)size;
351 (void)op;
352 #endif
353 }
354
VerifyUTF8StringNamedField(const char * data,int size,WireFormat::Operation op,const absl::string_view field_name)355 inline void WireFormat::VerifyUTF8StringNamedField(
356 const char* data, int size, WireFormat::Operation op,
357 const absl::string_view field_name) {
358 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
359 WireFormatLite::VerifyUtf8String(
360 data, size, static_cast<WireFormatLite::Operation>(op), field_name);
361 #else
362 // Avoid the compiler warning about unused variables.
363 (void)data;
364 (void)size;
365 (void)op;
366 (void)field_name;
367 #endif
368 }
369
370
InternalSerializeUnknownMessageSetItemsToArray(const UnknownFieldSet & unknown_fields,uint8_t * target,io::EpsCopyOutputStream * stream)371 inline uint8_t* InternalSerializeUnknownMessageSetItemsToArray(
372 const UnknownFieldSet& unknown_fields, uint8_t* target,
373 io::EpsCopyOutputStream* stream) {
374 return WireFormat::InternalSerializeUnknownMessageSetItemsToArray(
375 unknown_fields, target, stream);
376 }
377
ComputeUnknownMessageSetItemsSize(const UnknownFieldSet & unknown_fields)378 inline size_t ComputeUnknownMessageSetItemsSize(
379 const UnknownFieldSet& unknown_fields) {
380 return WireFormat::ComputeUnknownMessageSetItemsSize(unknown_fields);
381 }
382
383 // Compute the size of the UnknownFieldSet on the wire.
384 PROTOBUF_EXPORT
385 size_t ComputeUnknownFieldsSize(const InternalMetadata& metadata, size_t size,
386 CachedSize* cached_size);
387
388 size_t MapKeyDataOnlyByteSize(const FieldDescriptor* field,
389 const MapKey& value);
390
391 uint8_t* SerializeMapKeyWithCachedSizes(const FieldDescriptor* field,
392 const MapKey& value, uint8_t* target,
393 io::EpsCopyOutputStream* stream);
394 } // namespace internal
395 } // namespace protobuf
396 } // namespace google
397
398 #include "google/protobuf/port_undef.inc"
399
400 #endif // GOOGLE_PROTOBUF_WIRE_FORMAT_H__
401