1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // DynamicMessage is implemented by constructing a data structure which
36 // has roughly the same memory layout as a generated message would have.
37 // Then, we use GeneratedMessageReflection to implement our reflection
38 // interface. All the other operations we need to implement (e.g.
39 // parsing, copying, etc.) are already implemented in terms of
40 // Reflection, so the rest is easy.
41 //
42 // The up side of this strategy is that it's very efficient. We don't
43 // need to use hash_maps or generic representations of fields. The
44 // down side is that this is a low-level memory management hack which
45 // can be tricky to get right.
46 //
47 // As mentioned in the header, we only expose a DynamicMessageFactory
48 // publicly, not the DynamicMessage class itself. This is because
49 // GenericMessageReflection wants to have a pointer to a "default"
50 // copy of the class, with all fields initialized to their default
51 // values. We only want to construct one of these per message type,
52 // so DynamicMessageFactory stores a cache of default messages for
53 // each type it sees (each unique Descriptor pointer). The code
54 // refers to the "default" copy of the class as the "prototype".
55 //
56 // Note on memory allocation: This module often calls "operator new()"
57 // to allocate untyped memory, rather than calling something like
58 // "new uint8[]". This is because "operator new()" means "Give me some
59 // space which I can use as I please." while "new uint8[]" means "Give
60 // me an array of 8-bit integers.". In practice, the later may return
61 // a pointer that is not aligned correctly for general use. I believe
62 // Item 8 of "More Effective C++" discusses this in more detail, though
63 // I don't have the book on me right now so I'm not sure.
64
65 #include <algorithm>
66 #include <google/protobuf/stubs/hash.h>
67
68 #include <google/protobuf/stubs/common.h>
69
70 #include <google/protobuf/dynamic_message.h>
71 #include <google/protobuf/descriptor.h>
72 #include <google/protobuf/descriptor.pb.h>
73 #include <google/protobuf/generated_message_util.h>
74 #include <google/protobuf/generated_message_reflection.h>
75 #include <google/protobuf/reflection_ops.h>
76 #include <google/protobuf/repeated_field.h>
77 #include <google/protobuf/extension_set.h>
78 #include <google/protobuf/wire_format.h>
79
80 namespace google {
81 namespace protobuf {
82
83 using internal::WireFormat;
84 using internal::ExtensionSet;
85 using internal::GeneratedMessageReflection;
86
87
88 // ===================================================================
89 // Some helper tables and functions...
90
91 namespace {
92
93 // Compute the byte size of the in-memory representation of the field.
FieldSpaceUsed(const FieldDescriptor * field)94 int FieldSpaceUsed(const FieldDescriptor* field) {
95 typedef FieldDescriptor FD; // avoid line wrapping
96 if (field->label() == FD::LABEL_REPEATED) {
97 switch (field->cpp_type()) {
98 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >);
99 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >);
100 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >);
101 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >);
102 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >);
103 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >);
104 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >);
105 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >);
106 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
107
108 case FD::CPPTYPE_STRING:
109 switch (field->options().ctype()) {
110 default: // TODO(kenton): Support other string reps.
111 case FieldOptions::STRING:
112 return sizeof(RepeatedPtrField<string>);
113 }
114 break;
115 }
116 } else {
117 switch (field->cpp_type()) {
118 case FD::CPPTYPE_INT32 : return sizeof(int32 );
119 case FD::CPPTYPE_INT64 : return sizeof(int64 );
120 case FD::CPPTYPE_UINT32 : return sizeof(uint32 );
121 case FD::CPPTYPE_UINT64 : return sizeof(uint64 );
122 case FD::CPPTYPE_DOUBLE : return sizeof(double );
123 case FD::CPPTYPE_FLOAT : return sizeof(float );
124 case FD::CPPTYPE_BOOL : return sizeof(bool );
125 case FD::CPPTYPE_ENUM : return sizeof(int );
126 case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
127
128 case FD::CPPTYPE_STRING:
129 switch (field->options().ctype()) {
130 default: // TODO(kenton): Support other string reps.
131 case FieldOptions::STRING:
132 return sizeof(string*);
133 }
134 break;
135 }
136 }
137
138 GOOGLE_LOG(DFATAL) << "Can't get here.";
139 return 0;
140 }
141
DivideRoundingUp(int i,int j)142 inline int DivideRoundingUp(int i, int j) {
143 return (i + (j - 1)) / j;
144 }
145
146 static const int kSafeAlignment = sizeof(uint64);
147
AlignTo(int offset,int alignment)148 inline int AlignTo(int offset, int alignment) {
149 return DivideRoundingUp(offset, alignment) * alignment;
150 }
151
152 // Rounds the given byte offset up to the next offset aligned such that any
153 // type may be stored at it.
AlignOffset(int offset)154 inline int AlignOffset(int offset) {
155 return AlignTo(offset, kSafeAlignment);
156 }
157
158 #define bitsizeof(T) (sizeof(T) * 8)
159
160 } // namespace
161
162 // ===================================================================
163
164 class DynamicMessage : public Message {
165 public:
166 struct TypeInfo {
167 int size;
168 int has_bits_offset;
169 int unknown_fields_offset;
170 int extensions_offset;
171
172 // Not owned by the TypeInfo.
173 DynamicMessageFactory* factory; // The factory that created this object.
174 const DescriptorPool* pool; // The factory's DescriptorPool.
175 const Descriptor* type; // Type of this DynamicMessage.
176
177 // Warning: The order in which the following pointers are defined is
178 // important (the prototype must be deleted *before* the offsets).
179 scoped_array<int> offsets;
180 scoped_ptr<const GeneratedMessageReflection> reflection;
181 scoped_ptr<const DynamicMessage> prototype;
182 };
183
184 DynamicMessage(const TypeInfo* type_info);
185 ~DynamicMessage();
186
187 // Called on the prototype after construction to initialize message fields.
188 void CrossLinkPrototypes();
189
190 // implements Message ----------------------------------------------
191
192 Message* New() const;
193
194 int GetCachedSize() const;
195 void SetCachedSize(int size) const;
196
197 Metadata GetMetadata() const;
198
199 private:
200 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
201
is_prototype() const202 inline bool is_prototype() const {
203 return type_info_->prototype == this ||
204 // If type_info_->prototype is NULL, then we must be constructing
205 // the prototype now, which means we must be the prototype.
206 type_info_->prototype == NULL;
207 }
208
OffsetToPointer(int offset)209 inline void* OffsetToPointer(int offset) {
210 return reinterpret_cast<uint8*>(this) + offset;
211 }
OffsetToPointer(int offset) const212 inline const void* OffsetToPointer(int offset) const {
213 return reinterpret_cast<const uint8*>(this) + offset;
214 }
215
216 const TypeInfo* type_info_;
217
218 // TODO(kenton): Make this an atomic<int> when C++ supports it.
219 mutable int cached_byte_size_;
220 };
221
DynamicMessage(const TypeInfo * type_info)222 DynamicMessage::DynamicMessage(const TypeInfo* type_info)
223 : type_info_(type_info),
224 cached_byte_size_(0) {
225 // We need to call constructors for various fields manually and set
226 // default values where appropriate. We use placement new to call
227 // constructors. If you haven't heard of placement new, I suggest Googling
228 // it now. We use placement new even for primitive types that don't have
229 // constructors for consistency. (In theory, placement new should be used
230 // any time you are trying to convert untyped memory to typed memory, though
231 // in practice that's not strictly necessary for types that don't have a
232 // constructor.)
233
234 const Descriptor* descriptor = type_info_->type;
235
236 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet;
237
238 if (type_info_->extensions_offset != -1) {
239 new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet;
240 }
241
242 for (int i = 0; i < descriptor->field_count(); i++) {
243 const FieldDescriptor* field = descriptor->field(i);
244 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
245 switch (field->cpp_type()) {
246 #define HANDLE_TYPE(CPPTYPE, TYPE) \
247 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
248 if (!field->is_repeated()) { \
249 new(field_ptr) TYPE(field->default_value_##TYPE()); \
250 } else { \
251 new(field_ptr) RepeatedField<TYPE>(); \
252 } \
253 break;
254
255 HANDLE_TYPE(INT32 , int32 );
256 HANDLE_TYPE(INT64 , int64 );
257 HANDLE_TYPE(UINT32, uint32);
258 HANDLE_TYPE(UINT64, uint64);
259 HANDLE_TYPE(DOUBLE, double);
260 HANDLE_TYPE(FLOAT , float );
261 HANDLE_TYPE(BOOL , bool );
262 #undef HANDLE_TYPE
263
264 case FieldDescriptor::CPPTYPE_ENUM:
265 if (!field->is_repeated()) {
266 new(field_ptr) int(field->default_value_enum()->number());
267 } else {
268 new(field_ptr) RepeatedField<int>();
269 }
270 break;
271
272 case FieldDescriptor::CPPTYPE_STRING:
273 switch (field->options().ctype()) {
274 default: // TODO(kenton): Support other string reps.
275 case FieldOptions::STRING:
276 if (!field->is_repeated()) {
277 if (is_prototype()) {
278 new(field_ptr) const string*(&field->default_value_string());
279 } else {
280 string* default_value =
281 *reinterpret_cast<string* const*>(
282 type_info_->prototype->OffsetToPointer(
283 type_info_->offsets[i]));
284 new(field_ptr) string*(default_value);
285 }
286 } else {
287 new(field_ptr) RepeatedPtrField<string>();
288 }
289 break;
290 }
291 break;
292
293 case FieldDescriptor::CPPTYPE_MESSAGE: {
294 if (!field->is_repeated()) {
295 new(field_ptr) Message*(NULL);
296 } else {
297 new(field_ptr) RepeatedPtrField<Message>();
298 }
299 break;
300 }
301 }
302 }
303 }
304
~DynamicMessage()305 DynamicMessage::~DynamicMessage() {
306 const Descriptor* descriptor = type_info_->type;
307
308 reinterpret_cast<UnknownFieldSet*>(
309 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet();
310
311 if (type_info_->extensions_offset != -1) {
312 reinterpret_cast<ExtensionSet*>(
313 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet();
314 }
315
316 // We need to manually run the destructors for repeated fields and strings,
317 // just as we ran their constructors in the the DynamicMessage constructor.
318 // Additionally, if any singular embedded messages have been allocated, we
319 // need to delete them, UNLESS we are the prototype message of this type,
320 // in which case any embedded messages are other prototypes and shouldn't
321 // be touched.
322 for (int i = 0; i < descriptor->field_count(); i++) {
323 const FieldDescriptor* field = descriptor->field(i);
324 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
325
326 if (field->is_repeated()) {
327 switch (field->cpp_type()) {
328 #define HANDLE_TYPE(UPPERCASE, LOWERCASE) \
329 case FieldDescriptor::CPPTYPE_##UPPERCASE : \
330 reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr) \
331 ->~RepeatedField<LOWERCASE>(); \
332 break
333
334 HANDLE_TYPE( INT32, int32);
335 HANDLE_TYPE( INT64, int64);
336 HANDLE_TYPE(UINT32, uint32);
337 HANDLE_TYPE(UINT64, uint64);
338 HANDLE_TYPE(DOUBLE, double);
339 HANDLE_TYPE( FLOAT, float);
340 HANDLE_TYPE( BOOL, bool);
341 HANDLE_TYPE( ENUM, int);
342 #undef HANDLE_TYPE
343
344 case FieldDescriptor::CPPTYPE_STRING:
345 switch (field->options().ctype()) {
346 default: // TODO(kenton): Support other string reps.
347 case FieldOptions::STRING:
348 reinterpret_cast<RepeatedPtrField<string>*>(field_ptr)
349 ->~RepeatedPtrField<string>();
350 break;
351 }
352 break;
353
354 case FieldDescriptor::CPPTYPE_MESSAGE:
355 reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr)
356 ->~RepeatedPtrField<Message>();
357 break;
358 }
359
360 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
361 switch (field->options().ctype()) {
362 default: // TODO(kenton): Support other string reps.
363 case FieldOptions::STRING: {
364 string* ptr = *reinterpret_cast<string**>(field_ptr);
365 if (ptr != &field->default_value_string()) {
366 delete ptr;
367 }
368 break;
369 }
370 }
371 } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
372 !is_prototype()) {
373 Message* message = *reinterpret_cast<Message**>(field_ptr);
374 if (message != NULL) {
375 delete message;
376 }
377 }
378 }
379 }
380
CrossLinkPrototypes()381 void DynamicMessage::CrossLinkPrototypes() {
382 // This should only be called on the prototype message.
383 GOOGLE_CHECK(is_prototype());
384
385 DynamicMessageFactory* factory = type_info_->factory;
386 const Descriptor* descriptor = type_info_->type;
387
388 // Cross-link default messages.
389 for (int i = 0; i < descriptor->field_count(); i++) {
390 const FieldDescriptor* field = descriptor->field(i);
391 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
392
393 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
394 !field->is_repeated()) {
395 // For fields with message types, we need to cross-link with the
396 // prototype for the field's type.
397 // For singular fields, the field is just a pointer which should
398 // point to the prototype.
399 *reinterpret_cast<const Message**>(field_ptr) =
400 factory->GetPrototypeNoLock(field->message_type());
401 }
402 }
403 }
404
New() const405 Message* DynamicMessage::New() const {
406 void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size));
407 memset(new_base, 0, type_info_->size);
408 return new(new_base) DynamicMessage(type_info_);
409 }
410
GetCachedSize() const411 int DynamicMessage::GetCachedSize() const {
412 return cached_byte_size_;
413 }
414
SetCachedSize(int size) const415 void DynamicMessage::SetCachedSize(int size) const {
416 // This is theoretically not thread-compatible, but in practice it works
417 // because if multiple threads write this simultaneously, they will be
418 // writing the exact same value.
419 cached_byte_size_ = size;
420 }
421
GetMetadata() const422 Metadata DynamicMessage::GetMetadata() const {
423 Metadata metadata;
424 metadata.descriptor = type_info_->type;
425 metadata.reflection = type_info_->reflection.get();
426 return metadata;
427 }
428
429 // ===================================================================
430
431 struct DynamicMessageFactory::PrototypeMap {
432 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map;
433 Map map_;
434 };
435
DynamicMessageFactory()436 DynamicMessageFactory::DynamicMessageFactory()
437 : pool_(NULL), delegate_to_generated_factory_(false),
438 prototypes_(new PrototypeMap) {
439 }
440
DynamicMessageFactory(const DescriptorPool * pool)441 DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
442 : pool_(pool), delegate_to_generated_factory_(false),
443 prototypes_(new PrototypeMap) {
444 }
445
~DynamicMessageFactory()446 DynamicMessageFactory::~DynamicMessageFactory() {
447 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
448 iter != prototypes_->map_.end(); ++iter) {
449 delete iter->second;
450 }
451 }
452
GetPrototype(const Descriptor * type)453 const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
454 MutexLock lock(&prototypes_mutex_);
455 return GetPrototypeNoLock(type);
456 }
457
GetPrototypeNoLock(const Descriptor * type)458 const Message* DynamicMessageFactory::GetPrototypeNoLock(
459 const Descriptor* type) {
460 if (delegate_to_generated_factory_ &&
461 type->file()->pool() == DescriptorPool::generated_pool()) {
462 return MessageFactory::generated_factory()->GetPrototype(type);
463 }
464
465 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type];
466 if (*target != NULL) {
467 // Already exists.
468 return (*target)->prototype.get();
469 }
470
471 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo;
472 *target = type_info;
473
474 type_info->type = type;
475 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_;
476 type_info->factory = this;
477
478 // We need to construct all the structures passed to
479 // GeneratedMessageReflection's constructor. This includes:
480 // - A block of memory that contains space for all the message's fields.
481 // - An array of integers indicating the byte offset of each field within
482 // this block.
483 // - A big bitfield containing a bit for each field indicating whether
484 // or not that field is set.
485
486 // Compute size and offsets.
487 int* offsets = new int[type->field_count()];
488 type_info->offsets.reset(offsets);
489
490 // Decide all field offsets by packing in order.
491 // We place the DynamicMessage object itself at the beginning of the allocated
492 // space.
493 int size = sizeof(DynamicMessage);
494 size = AlignOffset(size);
495
496 // Next the has_bits, which is an array of uint32s.
497 type_info->has_bits_offset = size;
498 int has_bits_array_size =
499 DivideRoundingUp(type->field_count(), bitsizeof(uint32));
500 size += has_bits_array_size * sizeof(uint32);
501 size = AlignOffset(size);
502
503 // The ExtensionSet, if any.
504 if (type->extension_range_count() > 0) {
505 type_info->extensions_offset = size;
506 size += sizeof(ExtensionSet);
507 size = AlignOffset(size);
508 } else {
509 // No extensions.
510 type_info->extensions_offset = -1;
511 }
512
513 // All the fields.
514 for (int i = 0; i < type->field_count(); i++) {
515 // Make sure field is aligned to avoid bus errors.
516 int field_size = FieldSpaceUsed(type->field(i));
517 size = AlignTo(size, min(kSafeAlignment, field_size));
518 offsets[i] = size;
519 size += field_size;
520 }
521
522 // Add the UnknownFieldSet to the end.
523 size = AlignOffset(size);
524 type_info->unknown_fields_offset = size;
525 size += sizeof(UnknownFieldSet);
526
527 // Align the final size to make sure no clever allocators think that
528 // alignment is not necessary.
529 size = AlignOffset(size);
530 type_info->size = size;
531
532 // Allocate the prototype.
533 void* base = operator new(size);
534 memset(base, 0, size);
535 DynamicMessage* prototype = new(base) DynamicMessage(type_info);
536 type_info->prototype.reset(prototype);
537
538 // Construct the reflection object.
539 type_info->reflection.reset(
540 new GeneratedMessageReflection(
541 type_info->type,
542 type_info->prototype.get(),
543 type_info->offsets.get(),
544 type_info->has_bits_offset,
545 type_info->unknown_fields_offset,
546 type_info->extensions_offset,
547 type_info->pool,
548 this,
549 type_info->size));
550
551 // Cross link prototypes.
552 prototype->CrossLinkPrototypes();
553
554 return prototype;
555 }
556
557 } // namespace protobuf
558 } // namespace google
559