1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <stack>
36 #include <string>
37 #include <vector>
38
39 #include <google/protobuf/wire_format.h>
40
41 #include <google/protobuf/stubs/common.h>
42 #include <google/protobuf/descriptor.h>
43 #include <google/protobuf/wire_format_lite_inl.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/io/coded_stream.h>
46 #include <google/protobuf/io/zero_copy_stream.h>
47 #include <google/protobuf/io/zero_copy_stream_impl.h>
48 #include <google/protobuf/unknown_field_set.h>
49
50
51 namespace google {
52 namespace protobuf {
53 namespace internal {
54
55 using internal::WireFormatLite;
56
57 namespace {
58
59 // This function turns out to be convenient when using some macros later.
GetEnumNumber(const EnumValueDescriptor * descriptor)60 inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
61 return descriptor->number();
62 }
63
64 } // anonymous namespace
65
66 // ===================================================================
67
SkipField(io::CodedInputStream * input,uint32 tag)68 bool UnknownFieldSetFieldSkipper::SkipField(
69 io::CodedInputStream* input, uint32 tag) {
70 return WireFormat::SkipField(input, tag, unknown_fields_);
71 }
72
SkipMessage(io::CodedInputStream * input)73 bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
74 return WireFormat::SkipMessage(input, unknown_fields_);
75 }
76
SkipUnknownEnum(int field_number,int value)77 void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
78 int field_number, int value) {
79 unknown_fields_->AddVarint(field_number, value);
80 }
81
SkipField(io::CodedInputStream * input,uint32 tag,UnknownFieldSet * unknown_fields)82 bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
83 UnknownFieldSet* unknown_fields) {
84 int number = WireFormatLite::GetTagFieldNumber(tag);
85
86 switch (WireFormatLite::GetTagWireType(tag)) {
87 case WireFormatLite::WIRETYPE_VARINT: {
88 uint64 value;
89 if (!input->ReadVarint64(&value)) return false;
90 if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
91 return true;
92 }
93 case WireFormatLite::WIRETYPE_FIXED64: {
94 uint64 value;
95 if (!input->ReadLittleEndian64(&value)) return false;
96 if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
97 return true;
98 }
99 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
100 uint32 length;
101 if (!input->ReadVarint32(&length)) return false;
102 if (unknown_fields == NULL) {
103 if (!input->Skip(length)) return false;
104 } else {
105 if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
106 length)) {
107 return false;
108 }
109 }
110 return true;
111 }
112 case WireFormatLite::WIRETYPE_START_GROUP: {
113 if (!input->IncrementRecursionDepth()) return false;
114 if (!SkipMessage(input, (unknown_fields == NULL) ?
115 NULL : unknown_fields->AddGroup(number))) {
116 return false;
117 }
118 input->DecrementRecursionDepth();
119 // Check that the ending tag matched the starting tag.
120 if (!input->LastTagWas(WireFormatLite::MakeTag(
121 WireFormatLite::GetTagFieldNumber(tag),
122 WireFormatLite::WIRETYPE_END_GROUP))) {
123 return false;
124 }
125 return true;
126 }
127 case WireFormatLite::WIRETYPE_END_GROUP: {
128 return false;
129 }
130 case WireFormatLite::WIRETYPE_FIXED32: {
131 uint32 value;
132 if (!input->ReadLittleEndian32(&value)) return false;
133 if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
134 return true;
135 }
136 default: {
137 return false;
138 }
139 }
140 }
141
SkipMessage(io::CodedInputStream * input,UnknownFieldSet * unknown_fields)142 bool WireFormat::SkipMessage(io::CodedInputStream* input,
143 UnknownFieldSet* unknown_fields) {
144 while(true) {
145 uint32 tag = input->ReadTag();
146 if (tag == 0) {
147 // End of input. This is a valid place to end, so return true.
148 return true;
149 }
150
151 WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
152
153 if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
154 // Must be the end of the message.
155 return true;
156 }
157
158 if (!SkipField(input, tag, unknown_fields)) return false;
159 }
160 }
161
SerializeUnknownFields(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)162 void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
163 io::CodedOutputStream* output) {
164 for (int i = 0; i < unknown_fields.field_count(); i++) {
165 const UnknownField& field = unknown_fields.field(i);
166 switch (field.type()) {
167 case UnknownField::TYPE_VARINT:
168 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
169 WireFormatLite::WIRETYPE_VARINT));
170 output->WriteVarint64(field.varint());
171 break;
172 case UnknownField::TYPE_FIXED32:
173 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
174 WireFormatLite::WIRETYPE_FIXED32));
175 output->WriteLittleEndian32(field.fixed32());
176 break;
177 case UnknownField::TYPE_FIXED64:
178 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
179 WireFormatLite::WIRETYPE_FIXED64));
180 output->WriteLittleEndian64(field.fixed64());
181 break;
182 case UnknownField::TYPE_LENGTH_DELIMITED:
183 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
184 WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
185 output->WriteVarint32(field.length_delimited().size());
186 output->WriteString(field.length_delimited());
187 break;
188 case UnknownField::TYPE_GROUP:
189 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
190 WireFormatLite::WIRETYPE_START_GROUP));
191 SerializeUnknownFields(field.group(), output);
192 output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
193 WireFormatLite::WIRETYPE_END_GROUP));
194 break;
195 }
196 }
197 }
198
SerializeUnknownFieldsToArray(const UnknownFieldSet & unknown_fields,uint8 * target)199 uint8* WireFormat::SerializeUnknownFieldsToArray(
200 const UnknownFieldSet& unknown_fields,
201 uint8* target) {
202 for (int i = 0; i < unknown_fields.field_count(); i++) {
203 const UnknownField& field = unknown_fields.field(i);
204
205 switch (field.type()) {
206 case UnknownField::TYPE_VARINT:
207 target = WireFormatLite::WriteInt64ToArray(
208 field.number(), field.varint(), target);
209 break;
210 case UnknownField::TYPE_FIXED32:
211 target = WireFormatLite::WriteFixed32ToArray(
212 field.number(), field.fixed32(), target);
213 break;
214 case UnknownField::TYPE_FIXED64:
215 target = WireFormatLite::WriteFixed64ToArray(
216 field.number(), field.fixed64(), target);
217 break;
218 case UnknownField::TYPE_LENGTH_DELIMITED:
219 target = WireFormatLite::WriteBytesToArray(
220 field.number(), field.length_delimited(), target);
221 break;
222 case UnknownField::TYPE_GROUP:
223 target = WireFormatLite::WriteTagToArray(
224 field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
225 target = SerializeUnknownFieldsToArray(field.group(), target);
226 target = WireFormatLite::WriteTagToArray(
227 field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
228 break;
229 }
230 }
231 return target;
232 }
233
SerializeUnknownMessageSetItems(const UnknownFieldSet & unknown_fields,io::CodedOutputStream * output)234 void WireFormat::SerializeUnknownMessageSetItems(
235 const UnknownFieldSet& unknown_fields,
236 io::CodedOutputStream* output) {
237 for (int i = 0; i < unknown_fields.field_count(); i++) {
238 const UnknownField& field = unknown_fields.field(i);
239 // The only unknown fields that are allowed to exist in a MessageSet are
240 // messages, which are length-delimited.
241 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
242 const string& data = field.length_delimited();
243
244 // Start group.
245 output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
246
247 // Write type ID.
248 output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
249 output->WriteVarint32(field.number());
250
251 // Write message.
252 output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
253 output->WriteVarint32(data.size());
254 output->WriteString(data);
255
256 // End group.
257 output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
258 }
259 }
260 }
261
SerializeUnknownMessageSetItemsToArray(const UnknownFieldSet & unknown_fields,uint8 * target)262 uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
263 const UnknownFieldSet& unknown_fields,
264 uint8* target) {
265 for (int i = 0; i < unknown_fields.field_count(); i++) {
266 const UnknownField& field = unknown_fields.field(i);
267
268 // The only unknown fields that are allowed to exist in a MessageSet are
269 // messages, which are length-delimited.
270 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
271 const string& data = field.length_delimited();
272
273 // Start group.
274 target = io::CodedOutputStream::WriteTagToArray(
275 WireFormatLite::kMessageSetItemStartTag, target);
276
277 // Write type ID.
278 target = io::CodedOutputStream::WriteTagToArray(
279 WireFormatLite::kMessageSetTypeIdTag, target);
280 target = io::CodedOutputStream::WriteVarint32ToArray(
281 field.number(), target);
282
283 // Write message.
284 target = io::CodedOutputStream::WriteTagToArray(
285 WireFormatLite::kMessageSetMessageTag, target);
286 target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
287 target = io::CodedOutputStream::WriteStringToArray(data, target);
288
289 // End group.
290 target = io::CodedOutputStream::WriteTagToArray(
291 WireFormatLite::kMessageSetItemEndTag, target);
292 }
293 }
294
295 return target;
296 }
297
ComputeUnknownFieldsSize(const UnknownFieldSet & unknown_fields)298 int WireFormat::ComputeUnknownFieldsSize(
299 const UnknownFieldSet& unknown_fields) {
300 int size = 0;
301 for (int i = 0; i < unknown_fields.field_count(); i++) {
302 const UnknownField& field = unknown_fields.field(i);
303
304 switch (field.type()) {
305 case UnknownField::TYPE_VARINT:
306 size += io::CodedOutputStream::VarintSize32(
307 WireFormatLite::MakeTag(field.number(),
308 WireFormatLite::WIRETYPE_VARINT));
309 size += io::CodedOutputStream::VarintSize64(field.varint());
310 break;
311 case UnknownField::TYPE_FIXED32:
312 size += io::CodedOutputStream::VarintSize32(
313 WireFormatLite::MakeTag(field.number(),
314 WireFormatLite::WIRETYPE_FIXED32));
315 size += sizeof(int32);
316 break;
317 case UnknownField::TYPE_FIXED64:
318 size += io::CodedOutputStream::VarintSize32(
319 WireFormatLite::MakeTag(field.number(),
320 WireFormatLite::WIRETYPE_FIXED64));
321 size += sizeof(int64);
322 break;
323 case UnknownField::TYPE_LENGTH_DELIMITED:
324 size += io::CodedOutputStream::VarintSize32(
325 WireFormatLite::MakeTag(field.number(),
326 WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
327 size += io::CodedOutputStream::VarintSize32(
328 field.length_delimited().size());
329 size += field.length_delimited().size();
330 break;
331 case UnknownField::TYPE_GROUP:
332 size += io::CodedOutputStream::VarintSize32(
333 WireFormatLite::MakeTag(field.number(),
334 WireFormatLite::WIRETYPE_START_GROUP));
335 size += ComputeUnknownFieldsSize(field.group());
336 size += io::CodedOutputStream::VarintSize32(
337 WireFormatLite::MakeTag(field.number(),
338 WireFormatLite::WIRETYPE_END_GROUP));
339 break;
340 }
341 }
342
343 return size;
344 }
345
ComputeUnknownMessageSetItemsSize(const UnknownFieldSet & unknown_fields)346 int WireFormat::ComputeUnknownMessageSetItemsSize(
347 const UnknownFieldSet& unknown_fields) {
348 int size = 0;
349 for (int i = 0; i < unknown_fields.field_count(); i++) {
350 const UnknownField& field = unknown_fields.field(i);
351
352 // The only unknown fields that are allowed to exist in a MessageSet are
353 // messages, which are length-delimited.
354 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
355 size += WireFormatLite::kMessageSetItemTagsSize;
356 size += io::CodedOutputStream::VarintSize32(field.number());
357 size += io::CodedOutputStream::VarintSize32(
358 field.length_delimited().size());
359 size += field.length_delimited().size();
360 }
361 }
362
363 return size;
364 }
365
366 // ===================================================================
367
ParseAndMergePartial(io::CodedInputStream * input,Message * message)368 bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
369 Message* message) {
370 const Descriptor* descriptor = message->GetDescriptor();
371 const Reflection* message_reflection = message->GetReflection();
372
373 while(true) {
374 uint32 tag = input->ReadTag();
375 if (tag == 0) {
376 // End of input. This is a valid place to end, so return true.
377 return true;
378 }
379
380 if (WireFormatLite::GetTagWireType(tag) ==
381 WireFormatLite::WIRETYPE_END_GROUP) {
382 // Must be the end of the message.
383 return true;
384 }
385
386 const FieldDescriptor* field = NULL;
387
388 if (descriptor != NULL) {
389 int field_number = WireFormatLite::GetTagFieldNumber(tag);
390 field = descriptor->FindFieldByNumber(field_number);
391
392 // If that failed, check if the field is an extension.
393 if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
394 if (input->GetExtensionPool() == NULL) {
395 field = message_reflection->FindKnownExtensionByNumber(field_number);
396 } else {
397 field = input->GetExtensionPool()
398 ->FindExtensionByNumber(descriptor, field_number);
399 }
400 }
401
402 // If that failed, but we're a MessageSet, and this is the tag for a
403 // MessageSet item, then parse that.
404 if (field == NULL &&
405 descriptor->options().message_set_wire_format() &&
406 tag == WireFormatLite::kMessageSetItemStartTag) {
407 if (!ParseAndMergeMessageSetItem(input, message)) {
408 return false;
409 }
410 continue; // Skip ParseAndMergeField(); already taken care of.
411 }
412 }
413
414 if (!ParseAndMergeField(tag, field, message, input)) {
415 return false;
416 }
417 }
418 }
419
ParseAndMergeField(uint32 tag,const FieldDescriptor * field,Message * message,io::CodedInputStream * input)420 bool WireFormat::ParseAndMergeField(
421 uint32 tag,
422 const FieldDescriptor* field, // May be NULL for unknown
423 Message* message,
424 io::CodedInputStream* input) {
425 const Reflection* message_reflection = message->GetReflection();
426
427 enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format;
428
429 if (field == NULL) {
430 value_format = UNKNOWN;
431 } else if (WireFormatLite::GetTagWireType(tag) ==
432 WireTypeForFieldType(field->type())) {
433 value_format = NORMAL_FORMAT;
434 } else if (field->is_packable() &&
435 WireFormatLite::GetTagWireType(tag) ==
436 WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
437 value_format = PACKED_FORMAT;
438 } else {
439 // We don't recognize this field. Either the field number is unknown
440 // or the wire type doesn't match. Put it in our unknown field set.
441 value_format = UNKNOWN;
442 }
443
444 if (value_format == UNKNOWN) {
445 return SkipField(input, tag,
446 message_reflection->MutableUnknownFields(message));
447 } else if (value_format == PACKED_FORMAT) {
448 uint32 length;
449 if (!input->ReadVarint32(&length)) return false;
450 io::CodedInputStream::Limit limit = input->PushLimit(length);
451
452 switch (field->type()) {
453 #define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \
454 case FieldDescriptor::TYPE_##TYPE: { \
455 while (input->BytesUntilLimit() > 0) { \
456 CPPTYPE value; \
457 if (!WireFormatLite::ReadPrimitive< \
458 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)) \
459 return false; \
460 message_reflection->Add##CPPTYPE_METHOD(message, field, value); \
461 } \
462 break; \
463 }
464
465 HANDLE_PACKED_TYPE( INT32, int32, Int32)
466 HANDLE_PACKED_TYPE( INT64, int64, Int64)
467 HANDLE_PACKED_TYPE(SINT32, int32, Int32)
468 HANDLE_PACKED_TYPE(SINT64, int64, Int64)
469 HANDLE_PACKED_TYPE(UINT32, uint32, UInt32)
470 HANDLE_PACKED_TYPE(UINT64, uint64, UInt64)
471
472 HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32)
473 HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64)
474 HANDLE_PACKED_TYPE(SFIXED32, int32, Int32)
475 HANDLE_PACKED_TYPE(SFIXED64, int64, Int64)
476
477 HANDLE_PACKED_TYPE(FLOAT , float , Float )
478 HANDLE_PACKED_TYPE(DOUBLE, double, Double)
479
480 HANDLE_PACKED_TYPE(BOOL, bool, Bool)
481 #undef HANDLE_PACKED_TYPE
482
483 case FieldDescriptor::TYPE_ENUM: {
484 while (input->BytesUntilLimit() > 0) {
485 int value;
486 if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
487 input, &value)) return false;
488 const EnumValueDescriptor* enum_value =
489 field->enum_type()->FindValueByNumber(value);
490 if (enum_value != NULL) {
491 message_reflection->AddEnum(message, field, enum_value);
492 }
493 }
494
495 break;
496 }
497
498 case FieldDescriptor::TYPE_STRING:
499 case FieldDescriptor::TYPE_GROUP:
500 case FieldDescriptor::TYPE_MESSAGE:
501 case FieldDescriptor::TYPE_BYTES:
502 // Can't have packed fields of these types: these should be caught by
503 // the protocol compiler.
504 return false;
505 break;
506 }
507
508 input->PopLimit(limit);
509 } else {
510 // Non-packed value (value_format == NORMAL_FORMAT)
511 switch (field->type()) {
512 #define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \
513 case FieldDescriptor::TYPE_##TYPE: { \
514 CPPTYPE value; \
515 if (!WireFormatLite::ReadPrimitive< \
516 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)) \
517 return false; \
518 if (field->is_repeated()) { \
519 message_reflection->Add##CPPTYPE_METHOD(message, field, value); \
520 } else { \
521 message_reflection->Set##CPPTYPE_METHOD(message, field, value); \
522 } \
523 break; \
524 }
525
526 HANDLE_TYPE( INT32, int32, Int32)
527 HANDLE_TYPE( INT64, int64, Int64)
528 HANDLE_TYPE(SINT32, int32, Int32)
529 HANDLE_TYPE(SINT64, int64, Int64)
530 HANDLE_TYPE(UINT32, uint32, UInt32)
531 HANDLE_TYPE(UINT64, uint64, UInt64)
532
533 HANDLE_TYPE( FIXED32, uint32, UInt32)
534 HANDLE_TYPE( FIXED64, uint64, UInt64)
535 HANDLE_TYPE(SFIXED32, int32, Int32)
536 HANDLE_TYPE(SFIXED64, int64, Int64)
537
538 HANDLE_TYPE(FLOAT , float , Float )
539 HANDLE_TYPE(DOUBLE, double, Double)
540
541 HANDLE_TYPE(BOOL, bool, Bool)
542 #undef HANDLE_TYPE
543
544 case FieldDescriptor::TYPE_ENUM: {
545 int value;
546 if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
547 input, &value)) return false;
548 const EnumValueDescriptor* enum_value =
549 field->enum_type()->FindValueByNumber(value);
550 if (enum_value != NULL) {
551 if (field->is_repeated()) {
552 message_reflection->AddEnum(message, field, enum_value);
553 } else {
554 message_reflection->SetEnum(message, field, enum_value);
555 }
556 } else {
557 // The enum value is not one of the known values. Add it to the
558 // UnknownFieldSet.
559 int64 sign_extended_value = static_cast<int64>(value);
560 message_reflection->MutableUnknownFields(message)
561 ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
562 sign_extended_value);
563 }
564 break;
565 }
566
567 // Handle strings separately so that we can optimize the ctype=CORD case.
568 case FieldDescriptor::TYPE_STRING: {
569 string value;
570 if (!WireFormatLite::ReadString(input, &value)) return false;
571 VerifyUTF8String(value.data(), value.length(), PARSE);
572 if (field->is_repeated()) {
573 message_reflection->AddString(message, field, value);
574 } else {
575 message_reflection->SetString(message, field, value);
576 }
577 break;
578 }
579
580 case FieldDescriptor::TYPE_BYTES: {
581 string value;
582 if (!WireFormatLite::ReadBytes(input, &value)) return false;
583 if (field->is_repeated()) {
584 message_reflection->AddString(message, field, value);
585 } else {
586 message_reflection->SetString(message, field, value);
587 }
588 break;
589 }
590
591 case FieldDescriptor::TYPE_GROUP: {
592 Message* sub_message;
593 if (field->is_repeated()) {
594 sub_message = message_reflection->AddMessage(
595 message, field, input->GetExtensionFactory());
596 } else {
597 sub_message = message_reflection->MutableMessage(
598 message, field, input->GetExtensionFactory());
599 }
600
601 if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
602 input, sub_message))
603 return false;
604 break;
605 }
606
607 case FieldDescriptor::TYPE_MESSAGE: {
608 Message* sub_message;
609 if (field->is_repeated()) {
610 sub_message = message_reflection->AddMessage(
611 message, field, input->GetExtensionFactory());
612 } else {
613 sub_message = message_reflection->MutableMessage(
614 message, field, input->GetExtensionFactory());
615 }
616
617 if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
618 break;
619 }
620 }
621 }
622
623 return true;
624 }
625
ParseAndMergeMessageSetItem(io::CodedInputStream * input,Message * message)626 bool WireFormat::ParseAndMergeMessageSetItem(
627 io::CodedInputStream* input,
628 Message* message) {
629 const Reflection* message_reflection = message->GetReflection();
630
631 // This method parses a group which should contain two fields:
632 // required int32 type_id = 2;
633 // required data message = 3;
634
635 // Once we see a type_id, we'll construct a fake tag for this extension
636 // which is the tag it would have had under the proto2 extensions wire
637 // format.
638 uint32 fake_tag = 0;
639
640 // Once we see a type_id, we'll look up the FieldDescriptor for the
641 // extension.
642 const FieldDescriptor* field = NULL;
643
644 // If we see message data before the type_id, we'll append it to this so
645 // we can parse it later. This will probably never happen in practice,
646 // as no MessageSet encoder I know of writes the message before the type ID.
647 // But, it's technically valid so we should allow it.
648 // TODO(kenton): Use a Cord instead? Do I care?
649 string message_data;
650
651 while (true) {
652 uint32 tag = input->ReadTag();
653 if (tag == 0) return false;
654
655 switch (tag) {
656 case WireFormatLite::kMessageSetTypeIdTag: {
657 uint32 type_id;
658 if (!input->ReadVarint32(&type_id)) return false;
659 fake_tag = WireFormatLite::MakeTag(
660 type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
661 field = message_reflection->FindKnownExtensionByNumber(type_id);
662
663 if (!message_data.empty()) {
664 // We saw some message data before the type_id. Have to parse it
665 // now.
666 io::ArrayInputStream raw_input(message_data.data(),
667 message_data.size());
668 io::CodedInputStream sub_input(&raw_input);
669 if (!ParseAndMergeField(fake_tag, field, message,
670 &sub_input)) {
671 return false;
672 }
673 message_data.clear();
674 }
675
676 break;
677 }
678
679 case WireFormatLite::kMessageSetMessageTag: {
680 if (fake_tag == 0) {
681 // We haven't seen a type_id yet. Append this data to message_data.
682 string temp;
683 uint32 length;
684 if (!input->ReadVarint32(&length)) return false;
685 if (!input->ReadString(&temp, length)) return false;
686 message_data.append(temp);
687 } else {
688 // Already saw type_id, so we can parse this directly.
689 if (!ParseAndMergeField(fake_tag, field, message, input)) {
690 return false;
691 }
692 }
693
694 break;
695 }
696
697 case WireFormatLite::kMessageSetItemEndTag: {
698 return true;
699 }
700
701 default: {
702 if (!SkipField(input, tag, NULL)) return false;
703 }
704 }
705 }
706 }
707
708 // ===================================================================
709
SerializeWithCachedSizes(const Message & message,int size,io::CodedOutputStream * output)710 void WireFormat::SerializeWithCachedSizes(
711 const Message& message,
712 int size, io::CodedOutputStream* output) {
713 const Descriptor* descriptor = message.GetDescriptor();
714 const Reflection* message_reflection = message.GetReflection();
715 int expected_endpoint = output->ByteCount() + size;
716
717 vector<const FieldDescriptor*> fields;
718 message_reflection->ListFields(message, &fields);
719 for (int i = 0; i < fields.size(); i++) {
720 SerializeFieldWithCachedSizes(fields[i], message, output);
721 }
722
723 if (descriptor->options().message_set_wire_format()) {
724 SerializeUnknownMessageSetItems(
725 message_reflection->GetUnknownFields(message), output);
726 } else {
727 SerializeUnknownFields(
728 message_reflection->GetUnknownFields(message), output);
729 }
730
731 GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
732 << ": Protocol message serialized to a size different from what was "
733 "originally expected. Perhaps it was modified by another thread "
734 "during serialization?";
735 }
736
SerializeFieldWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)737 void WireFormat::SerializeFieldWithCachedSizes(
738 const FieldDescriptor* field,
739 const Message& message,
740 io::CodedOutputStream* output) {
741 const Reflection* message_reflection = message.GetReflection();
742
743 if (field->is_extension() &&
744 field->containing_type()->options().message_set_wire_format() &&
745 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
746 !field->is_repeated()) {
747 SerializeMessageSetItemWithCachedSizes(field, message, output);
748 return;
749 }
750
751 int count = 0;
752
753 if (field->is_repeated()) {
754 count = message_reflection->FieldSize(message, field);
755 } else if (message_reflection->HasField(message, field)) {
756 count = 1;
757 }
758
759 const bool is_packed = field->options().packed();
760 if (is_packed && count > 0) {
761 WireFormatLite::WriteTag(field->number(),
762 WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
763 const int data_size = FieldDataOnlyByteSize(field, message);
764 output->WriteVarint32(data_size);
765 }
766
767 for (int j = 0; j < count; j++) {
768 switch (field->type()) {
769 #define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD) \
770 case FieldDescriptor::TYPE_##TYPE: { \
771 const CPPTYPE value = field->is_repeated() ? \
772 message_reflection->GetRepeated##CPPTYPE_METHOD( \
773 message, field, j) : \
774 message_reflection->Get##CPPTYPE_METHOD( \
775 message, field); \
776 if (is_packed) { \
777 WireFormatLite::Write##TYPE_METHOD##NoTag(value, output); \
778 } else { \
779 WireFormatLite::Write##TYPE_METHOD(field->number(), value, output); \
780 } \
781 break; \
782 }
783
784 HANDLE_PRIMITIVE_TYPE( INT32, int32, Int32, Int32)
785 HANDLE_PRIMITIVE_TYPE( INT64, int64, Int64, Int64)
786 HANDLE_PRIMITIVE_TYPE(SINT32, int32, SInt32, Int32)
787 HANDLE_PRIMITIVE_TYPE(SINT64, int64, SInt64, Int64)
788 HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
789 HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
790
791 HANDLE_PRIMITIVE_TYPE( FIXED32, uint32, Fixed32, UInt32)
792 HANDLE_PRIMITIVE_TYPE( FIXED64, uint64, Fixed64, UInt64)
793 HANDLE_PRIMITIVE_TYPE(SFIXED32, int32, SFixed32, Int32)
794 HANDLE_PRIMITIVE_TYPE(SFIXED64, int64, SFixed64, Int64)
795
796 HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
797 HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
798
799 HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
800 #undef HANDLE_PRIMITIVE_TYPE
801
802 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD) \
803 case FieldDescriptor::TYPE_##TYPE: \
804 WireFormatLite::Write##TYPE_METHOD( \
805 field->number(), \
806 field->is_repeated() ? \
807 message_reflection->GetRepeated##CPPTYPE_METHOD( \
808 message, field, j) : \
809 message_reflection->Get##CPPTYPE_METHOD(message, field), \
810 output); \
811 break;
812
813 HANDLE_TYPE(GROUP , Group , Message)
814 HANDLE_TYPE(MESSAGE, Message, Message)
815 #undef HANDLE_TYPE
816
817 case FieldDescriptor::TYPE_ENUM: {
818 const EnumValueDescriptor* value = field->is_repeated() ?
819 message_reflection->GetRepeatedEnum(message, field, j) :
820 message_reflection->GetEnum(message, field);
821 if (is_packed) {
822 WireFormatLite::WriteEnumNoTag(value->number(), output);
823 } else {
824 WireFormatLite::WriteEnum(field->number(), value->number(), output);
825 }
826 break;
827 }
828
829 // Handle strings separately so that we can get string references
830 // instead of copying.
831 case FieldDescriptor::TYPE_STRING: {
832 string scratch;
833 const string& value = field->is_repeated() ?
834 message_reflection->GetRepeatedStringReference(
835 message, field, j, &scratch) :
836 message_reflection->GetStringReference(message, field, &scratch);
837 VerifyUTF8String(value.data(), value.length(), SERIALIZE);
838 WireFormatLite::WriteString(field->number(), value, output);
839 break;
840 }
841
842 case FieldDescriptor::TYPE_BYTES: {
843 string scratch;
844 const string& value = field->is_repeated() ?
845 message_reflection->GetRepeatedStringReference(
846 message, field, j, &scratch) :
847 message_reflection->GetStringReference(message, field, &scratch);
848 WireFormatLite::WriteBytes(field->number(), value, output);
849 break;
850 }
851 }
852 }
853 }
854
SerializeMessageSetItemWithCachedSizes(const FieldDescriptor * field,const Message & message,io::CodedOutputStream * output)855 void WireFormat::SerializeMessageSetItemWithCachedSizes(
856 const FieldDescriptor* field,
857 const Message& message,
858 io::CodedOutputStream* output) {
859 const Reflection* message_reflection = message.GetReflection();
860
861 // Start group.
862 output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
863
864 // Write type ID.
865 output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
866 output->WriteVarint32(field->number());
867
868 // Write message.
869 output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
870
871 const Message& sub_message = message_reflection->GetMessage(message, field);
872 output->WriteVarint32(sub_message.GetCachedSize());
873 sub_message.SerializeWithCachedSizes(output);
874
875 // End group.
876 output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
877 }
878
879 // ===================================================================
880
ByteSize(const Message & message)881 int WireFormat::ByteSize(const Message& message) {
882 const Descriptor* descriptor = message.GetDescriptor();
883 const Reflection* message_reflection = message.GetReflection();
884
885 int our_size = 0;
886
887 vector<const FieldDescriptor*> fields;
888 message_reflection->ListFields(message, &fields);
889 for (int i = 0; i < fields.size(); i++) {
890 our_size += FieldByteSize(fields[i], message);
891 }
892
893 if (descriptor->options().message_set_wire_format()) {
894 our_size += ComputeUnknownMessageSetItemsSize(
895 message_reflection->GetUnknownFields(message));
896 } else {
897 our_size += ComputeUnknownFieldsSize(
898 message_reflection->GetUnknownFields(message));
899 }
900
901 return our_size;
902 }
903
FieldByteSize(const FieldDescriptor * field,const Message & message)904 int WireFormat::FieldByteSize(
905 const FieldDescriptor* field,
906 const Message& message) {
907 const Reflection* message_reflection = message.GetReflection();
908
909 if (field->is_extension() &&
910 field->containing_type()->options().message_set_wire_format() &&
911 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
912 !field->is_repeated()) {
913 return MessageSetItemByteSize(field, message);
914 }
915
916 int count = 0;
917 if (field->is_repeated()) {
918 count = message_reflection->FieldSize(message, field);
919 } else if (message_reflection->HasField(message, field)) {
920 count = 1;
921 }
922
923 const int data_size = FieldDataOnlyByteSize(field, message);
924 int our_size = data_size;
925 if (field->options().packed()) {
926 if (data_size > 0) {
927 // Packed fields get serialized like a string, not their native type.
928 // Technically this doesn't really matter; the size only changes if it's
929 // a GROUP
930 our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
931 our_size += io::CodedOutputStream::VarintSize32(data_size);
932 }
933 } else {
934 our_size += count * TagSize(field->number(), field->type());
935 }
936 return our_size;
937 }
938
FieldDataOnlyByteSize(const FieldDescriptor * field,const Message & message)939 int WireFormat::FieldDataOnlyByteSize(
940 const FieldDescriptor* field,
941 const Message& message) {
942 const Reflection* message_reflection = message.GetReflection();
943
944 int count = 0;
945 if (field->is_repeated()) {
946 count = message_reflection->FieldSize(message, field);
947 } else if (message_reflection->HasField(message, field)) {
948 count = 1;
949 }
950
951 int data_size = 0;
952 switch (field->type()) {
953 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD) \
954 case FieldDescriptor::TYPE_##TYPE: \
955 if (field->is_repeated()) { \
956 for (int j = 0; j < count; j++) { \
957 data_size += WireFormatLite::TYPE_METHOD##Size( \
958 message_reflection->GetRepeated##CPPTYPE_METHOD( \
959 message, field, j)); \
960 } \
961 } else { \
962 data_size += WireFormatLite::TYPE_METHOD##Size( \
963 message_reflection->Get##CPPTYPE_METHOD(message, field)); \
964 } \
965 break;
966
967 #define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD) \
968 case FieldDescriptor::TYPE_##TYPE: \
969 data_size += count * WireFormatLite::k##TYPE_METHOD##Size; \
970 break;
971
972 HANDLE_TYPE( INT32, Int32, Int32)
973 HANDLE_TYPE( INT64, Int64, Int64)
974 HANDLE_TYPE(SINT32, SInt32, Int32)
975 HANDLE_TYPE(SINT64, SInt64, Int64)
976 HANDLE_TYPE(UINT32, UInt32, UInt32)
977 HANDLE_TYPE(UINT64, UInt64, UInt64)
978
979 HANDLE_FIXED_TYPE( FIXED32, Fixed32)
980 HANDLE_FIXED_TYPE( FIXED64, Fixed64)
981 HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
982 HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
983
984 HANDLE_FIXED_TYPE(FLOAT , Float )
985 HANDLE_FIXED_TYPE(DOUBLE, Double)
986
987 HANDLE_FIXED_TYPE(BOOL, Bool)
988
989 HANDLE_TYPE(GROUP , Group , Message)
990 HANDLE_TYPE(MESSAGE, Message, Message)
991 #undef HANDLE_TYPE
992 #undef HANDLE_FIXED_TYPE
993
994 case FieldDescriptor::TYPE_ENUM: {
995 if (field->is_repeated()) {
996 for (int j = 0; j < count; j++) {
997 data_size += WireFormatLite::EnumSize(
998 message_reflection->GetRepeatedEnum(message, field, j)->number());
999 }
1000 } else {
1001 data_size += WireFormatLite::EnumSize(
1002 message_reflection->GetEnum(message, field)->number());
1003 }
1004 break;
1005 }
1006
1007 // Handle strings separately so that we can get string references
1008 // instead of copying.
1009 case FieldDescriptor::TYPE_STRING:
1010 case FieldDescriptor::TYPE_BYTES: {
1011 for (int j = 0; j < count; j++) {
1012 string scratch;
1013 const string& value = field->is_repeated() ?
1014 message_reflection->GetRepeatedStringReference(
1015 message, field, j, &scratch) :
1016 message_reflection->GetStringReference(message, field, &scratch);
1017 data_size += WireFormatLite::StringSize(value);
1018 }
1019 break;
1020 }
1021 }
1022 return data_size;
1023 }
1024
MessageSetItemByteSize(const FieldDescriptor * field,const Message & message)1025 int WireFormat::MessageSetItemByteSize(
1026 const FieldDescriptor* field,
1027 const Message& message) {
1028 const Reflection* message_reflection = message.GetReflection();
1029
1030 int our_size = WireFormatLite::kMessageSetItemTagsSize;
1031
1032 // type_id
1033 our_size += io::CodedOutputStream::VarintSize32(field->number());
1034
1035 // message
1036 const Message& sub_message = message_reflection->GetMessage(message, field);
1037 int message_size = sub_message.ByteSize();
1038
1039 our_size += io::CodedOutputStream::VarintSize32(message_size);
1040 our_size += message_size;
1041
1042 return our_size;
1043 }
1044
VerifyUTF8StringFallback(const char * data,int size,Operation op)1045 void WireFormat::VerifyUTF8StringFallback(const char* data,
1046 int size,
1047 Operation op) {
1048 if (!IsStructurallyValidUTF8(data, size)) {
1049 const char* operation_str = NULL;
1050 switch (op) {
1051 case PARSE:
1052 operation_str = "parsing";
1053 break;
1054 case SERIALIZE:
1055 operation_str = "serializing";
1056 break;
1057 // no default case: have the compiler warn if a case is not covered.
1058 }
1059 GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
1060 << operation_str
1061 << " protocol buffer. Strings must contain only UTF-8; "
1062 "use the 'bytes' type for raw bytes.";
1063 }
1064 }
1065
1066
1067 } // namespace internal
1068 } // namespace protobuf
1069 } // namespace google
1070