• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // DynamicMessage is implemented by constructing a data structure which
36 // has roughly the same memory layout as a generated message would have.
37 // Then, we use GeneratedMessageReflection to implement our reflection
38 // interface.  All the other operations we need to implement (e.g.
39 // parsing, copying, etc.) are already implemented in terms of
40 // Reflection, so the rest is easy.
41 //
42 // The up side of this strategy is that it's very efficient.  We don't
43 // need to use hash_maps or generic representations of fields.  The
44 // down side is that this is a low-level memory management hack which
45 // can be tricky to get right.
46 //
47 // As mentioned in the header, we only expose a DynamicMessageFactory
48 // publicly, not the DynamicMessage class itself.  This is because
49 // GenericMessageReflection wants to have a pointer to a "default"
50 // copy of the class, with all fields initialized to their default
51 // values.  We only want to construct one of these per message type,
52 // so DynamicMessageFactory stores a cache of default messages for
53 // each type it sees (each unique Descriptor pointer).  The code
54 // refers to the "default" copy of the class as the "prototype".
55 //
56 // Note on memory allocation:  This module often calls "operator new()"
57 // to allocate untyped memory, rather than calling something like
58 // "new uint8[]".  This is because "operator new()" means "Give me some
59 // space which I can use as I please." while "new uint8[]" means "Give
60 // me an array of 8-bit integers.".  In practice, the later may return
61 // a pointer that is not aligned correctly for general use.  I believe
62 // Item 8 of "More Effective C++" discusses this in more detail, though
63 // I don't have the book on me right now so I'm not sure.
64 
65 #include <algorithm>
66 #include <google/protobuf/stubs/hash.h>
67 
68 #include <google/protobuf/stubs/common.h>
69 
70 #include <google/protobuf/dynamic_message.h>
71 #include <google/protobuf/descriptor.h>
72 #include <google/protobuf/descriptor.pb.h>
73 #include <google/protobuf/generated_message_util.h>
74 #include <google/protobuf/generated_message_reflection.h>
75 #include <google/protobuf/reflection_ops.h>
76 #include <google/protobuf/repeated_field.h>
77 #include <google/protobuf/extension_set.h>
78 #include <google/protobuf/wire_format.h>
79 
80 namespace google {
81 namespace protobuf {
82 
83 using internal::WireFormat;
84 using internal::ExtensionSet;
85 using internal::GeneratedMessageReflection;
86 
87 
88 // ===================================================================
89 // Some helper tables and functions...
90 
91 namespace {
92 
93 // Compute the byte size of the in-memory representation of the field.
FieldSpaceUsed(const FieldDescriptor * field)94 int FieldSpaceUsed(const FieldDescriptor* field) {
95   typedef FieldDescriptor FD;  // avoid line wrapping
96   if (field->label() == FD::LABEL_REPEATED) {
97     switch (field->cpp_type()) {
98       case FD::CPPTYPE_INT32  : return sizeof(RepeatedField<int32   >);
99       case FD::CPPTYPE_INT64  : return sizeof(RepeatedField<int64   >);
100       case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32  >);
101       case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64  >);
102       case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double  >);
103       case FD::CPPTYPE_FLOAT  : return sizeof(RepeatedField<float   >);
104       case FD::CPPTYPE_BOOL   : return sizeof(RepeatedField<bool    >);
105       case FD::CPPTYPE_ENUM   : return sizeof(RepeatedField<int     >);
106       case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
107 
108       case FD::CPPTYPE_STRING:
109         switch (field->options().ctype()) {
110           default:  // TODO(kenton):  Support other string reps.
111           case FieldOptions::STRING:
112             return sizeof(RepeatedPtrField<string>);
113         }
114         break;
115     }
116   } else {
117     switch (field->cpp_type()) {
118       case FD::CPPTYPE_INT32  : return sizeof(int32   );
119       case FD::CPPTYPE_INT64  : return sizeof(int64   );
120       case FD::CPPTYPE_UINT32 : return sizeof(uint32  );
121       case FD::CPPTYPE_UINT64 : return sizeof(uint64  );
122       case FD::CPPTYPE_DOUBLE : return sizeof(double  );
123       case FD::CPPTYPE_FLOAT  : return sizeof(float   );
124       case FD::CPPTYPE_BOOL   : return sizeof(bool    );
125       case FD::CPPTYPE_ENUM   : return sizeof(int     );
126       case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
127 
128       case FD::CPPTYPE_STRING:
129         switch (field->options().ctype()) {
130           default:  // TODO(kenton):  Support other string reps.
131           case FieldOptions::STRING:
132             return sizeof(string*);
133         }
134         break;
135     }
136   }
137 
138   GOOGLE_LOG(DFATAL) << "Can't get here.";
139   return 0;
140 }
141 
DivideRoundingUp(int i,int j)142 inline int DivideRoundingUp(int i, int j) {
143   return (i + (j - 1)) / j;
144 }
145 
146 static const int kSafeAlignment = sizeof(uint64);
147 
AlignTo(int offset,int alignment)148 inline int AlignTo(int offset, int alignment) {
149   return DivideRoundingUp(offset, alignment) * alignment;
150 }
151 
152 // Rounds the given byte offset up to the next offset aligned such that any
153 // type may be stored at it.
AlignOffset(int offset)154 inline int AlignOffset(int offset) {
155   return AlignTo(offset, kSafeAlignment);
156 }
157 
158 #define bitsizeof(T) (sizeof(T) * 8)
159 
160 }  // namespace
161 
162 // ===================================================================
163 
164 class DynamicMessage : public Message {
165  public:
166   struct TypeInfo {
167     int size;
168     int has_bits_offset;
169     int unknown_fields_offset;
170     int extensions_offset;
171 
172     // Not owned by the TypeInfo.
173     DynamicMessageFactory* factory;  // The factory that created this object.
174     const DescriptorPool* pool;      // The factory's DescriptorPool.
175     const Descriptor* type;          // Type of this DynamicMessage.
176 
177     // Warning:  The order in which the following pointers are defined is
178     //   important (the prototype must be deleted *before* the offsets).
179     scoped_array<int> offsets;
180     scoped_ptr<const GeneratedMessageReflection> reflection;
181     scoped_ptr<const DynamicMessage> prototype;
182   };
183 
184   DynamicMessage(const TypeInfo* type_info);
185   ~DynamicMessage();
186 
187   // Called on the prototype after construction to initialize message fields.
188   void CrossLinkPrototypes();
189 
190   // implements Message ----------------------------------------------
191 
192   Message* New() const;
193 
194   int GetCachedSize() const;
195   void SetCachedSize(int size) const;
196 
197   Metadata GetMetadata() const;
198 
199  private:
200   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
201 
is_prototype() const202   inline bool is_prototype() const {
203     return type_info_->prototype == this ||
204            // If type_info_->prototype is NULL, then we must be constructing
205            // the prototype now, which means we must be the prototype.
206            type_info_->prototype == NULL;
207   }
208 
OffsetToPointer(int offset)209   inline void* OffsetToPointer(int offset) {
210     return reinterpret_cast<uint8*>(this) + offset;
211   }
OffsetToPointer(int offset) const212   inline const void* OffsetToPointer(int offset) const {
213     return reinterpret_cast<const uint8*>(this) + offset;
214   }
215 
216   const TypeInfo* type_info_;
217 
218   // TODO(kenton):  Make this an atomic<int> when C++ supports it.
219   mutable int cached_byte_size_;
220 };
221 
DynamicMessage(const TypeInfo * type_info)222 DynamicMessage::DynamicMessage(const TypeInfo* type_info)
223   : type_info_(type_info),
224     cached_byte_size_(0) {
225   // We need to call constructors for various fields manually and set
226   // default values where appropriate.  We use placement new to call
227   // constructors.  If you haven't heard of placement new, I suggest Googling
228   // it now.  We use placement new even for primitive types that don't have
229   // constructors for consistency.  (In theory, placement new should be used
230   // any time you are trying to convert untyped memory to typed memory, though
231   // in practice that's not strictly necessary for types that don't have a
232   // constructor.)
233 
234   const Descriptor* descriptor = type_info_->type;
235 
236   new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet;
237 
238   if (type_info_->extensions_offset != -1) {
239     new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet;
240   }
241 
242   for (int i = 0; i < descriptor->field_count(); i++) {
243     const FieldDescriptor* field = descriptor->field(i);
244     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
245     switch (field->cpp_type()) {
246 #define HANDLE_TYPE(CPPTYPE, TYPE)                                           \
247       case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
248         if (!field->is_repeated()) {                                         \
249           new(field_ptr) TYPE(field->default_value_##TYPE());                \
250         } else {                                                             \
251           new(field_ptr) RepeatedField<TYPE>();                              \
252         }                                                                    \
253         break;
254 
255       HANDLE_TYPE(INT32 , int32 );
256       HANDLE_TYPE(INT64 , int64 );
257       HANDLE_TYPE(UINT32, uint32);
258       HANDLE_TYPE(UINT64, uint64);
259       HANDLE_TYPE(DOUBLE, double);
260       HANDLE_TYPE(FLOAT , float );
261       HANDLE_TYPE(BOOL  , bool  );
262 #undef HANDLE_TYPE
263 
264       case FieldDescriptor::CPPTYPE_ENUM:
265         if (!field->is_repeated()) {
266           new(field_ptr) int(field->default_value_enum()->number());
267         } else {
268           new(field_ptr) RepeatedField<int>();
269         }
270         break;
271 
272       case FieldDescriptor::CPPTYPE_STRING:
273         switch (field->options().ctype()) {
274           default:  // TODO(kenton):  Support other string reps.
275           case FieldOptions::STRING:
276             if (!field->is_repeated()) {
277               if (is_prototype()) {
278                 new(field_ptr) const string*(&field->default_value_string());
279               } else {
280                 string* default_value =
281                   *reinterpret_cast<string* const*>(
282                     type_info_->prototype->OffsetToPointer(
283                       type_info_->offsets[i]));
284                 new(field_ptr) string*(default_value);
285               }
286             } else {
287               new(field_ptr) RepeatedPtrField<string>();
288             }
289             break;
290         }
291         break;
292 
293       case FieldDescriptor::CPPTYPE_MESSAGE: {
294         if (!field->is_repeated()) {
295           new(field_ptr) Message*(NULL);
296         } else {
297           new(field_ptr) RepeatedPtrField<Message>();
298         }
299         break;
300       }
301     }
302   }
303 }
304 
~DynamicMessage()305 DynamicMessage::~DynamicMessage() {
306   const Descriptor* descriptor = type_info_->type;
307 
308   reinterpret_cast<UnknownFieldSet*>(
309     OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet();
310 
311   if (type_info_->extensions_offset != -1) {
312     reinterpret_cast<ExtensionSet*>(
313       OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet();
314   }
315 
316   // We need to manually run the destructors for repeated fields and strings,
317   // just as we ran their constructors in the the DynamicMessage constructor.
318   // Additionally, if any singular embedded messages have been allocated, we
319   // need to delete them, UNLESS we are the prototype message of this type,
320   // in which case any embedded messages are other prototypes and shouldn't
321   // be touched.
322   for (int i = 0; i < descriptor->field_count(); i++) {
323     const FieldDescriptor* field = descriptor->field(i);
324     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
325 
326     if (field->is_repeated()) {
327       switch (field->cpp_type()) {
328 #define HANDLE_TYPE(UPPERCASE, LOWERCASE)                                     \
329         case FieldDescriptor::CPPTYPE_##UPPERCASE :                           \
330           reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr)              \
331               ->~RepeatedField<LOWERCASE>();                                  \
332           break
333 
334         HANDLE_TYPE( INT32,  int32);
335         HANDLE_TYPE( INT64,  int64);
336         HANDLE_TYPE(UINT32, uint32);
337         HANDLE_TYPE(UINT64, uint64);
338         HANDLE_TYPE(DOUBLE, double);
339         HANDLE_TYPE( FLOAT,  float);
340         HANDLE_TYPE(  BOOL,   bool);
341         HANDLE_TYPE(  ENUM,    int);
342 #undef HANDLE_TYPE
343 
344         case FieldDescriptor::CPPTYPE_STRING:
345           switch (field->options().ctype()) {
346             default:  // TODO(kenton):  Support other string reps.
347             case FieldOptions::STRING:
348               reinterpret_cast<RepeatedPtrField<string>*>(field_ptr)
349                   ->~RepeatedPtrField<string>();
350               break;
351           }
352           break;
353 
354         case FieldDescriptor::CPPTYPE_MESSAGE:
355           reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr)
356               ->~RepeatedPtrField<Message>();
357           break;
358       }
359 
360     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
361       switch (field->options().ctype()) {
362         default:  // TODO(kenton):  Support other string reps.
363         case FieldOptions::STRING: {
364           string* ptr = *reinterpret_cast<string**>(field_ptr);
365           if (ptr != &field->default_value_string()) {
366             delete ptr;
367           }
368           break;
369         }
370       }
371     } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
372                !is_prototype()) {
373       Message* message = *reinterpret_cast<Message**>(field_ptr);
374       if (message != NULL) {
375         delete message;
376       }
377     }
378   }
379 }
380 
CrossLinkPrototypes()381 void DynamicMessage::CrossLinkPrototypes() {
382   // This should only be called on the prototype message.
383   GOOGLE_CHECK(is_prototype());
384 
385   DynamicMessageFactory* factory = type_info_->factory;
386   const Descriptor* descriptor = type_info_->type;
387 
388   // Cross-link default messages.
389   for (int i = 0; i < descriptor->field_count(); i++) {
390     const FieldDescriptor* field = descriptor->field(i);
391     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
392 
393     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
394         !field->is_repeated()) {
395       // For fields with message types, we need to cross-link with the
396       // prototype for the field's type.
397       // For singular fields, the field is just a pointer which should
398       // point to the prototype.
399       *reinterpret_cast<const Message**>(field_ptr) =
400         factory->GetPrototypeNoLock(field->message_type());
401     }
402   }
403 }
404 
New() const405 Message* DynamicMessage::New() const {
406   void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size));
407   memset(new_base, 0, type_info_->size);
408   return new(new_base) DynamicMessage(type_info_);
409 }
410 
GetCachedSize() const411 int DynamicMessage::GetCachedSize() const {
412   return cached_byte_size_;
413 }
414 
SetCachedSize(int size) const415 void DynamicMessage::SetCachedSize(int size) const {
416   // This is theoretically not thread-compatible, but in practice it works
417   // because if multiple threads write this simultaneously, they will be
418   // writing the exact same value.
419   cached_byte_size_ = size;
420 }
421 
GetMetadata() const422 Metadata DynamicMessage::GetMetadata() const {
423   Metadata metadata;
424   metadata.descriptor = type_info_->type;
425   metadata.reflection = type_info_->reflection.get();
426   return metadata;
427 }
428 
429 // ===================================================================
430 
431 struct DynamicMessageFactory::PrototypeMap {
432   typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map;
433   Map map_;
434 };
435 
DynamicMessageFactory()436 DynamicMessageFactory::DynamicMessageFactory()
437   : pool_(NULL), delegate_to_generated_factory_(false),
438     prototypes_(new PrototypeMap) {
439 }
440 
DynamicMessageFactory(const DescriptorPool * pool)441 DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
442   : pool_(pool), delegate_to_generated_factory_(false),
443     prototypes_(new PrototypeMap) {
444 }
445 
~DynamicMessageFactory()446 DynamicMessageFactory::~DynamicMessageFactory() {
447   for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
448        iter != prototypes_->map_.end(); ++iter) {
449     delete iter->second;
450   }
451 }
452 
GetPrototype(const Descriptor * type)453 const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
454   MutexLock lock(&prototypes_mutex_);
455   return GetPrototypeNoLock(type);
456 }
457 
GetPrototypeNoLock(const Descriptor * type)458 const Message* DynamicMessageFactory::GetPrototypeNoLock(
459     const Descriptor* type) {
460   if (delegate_to_generated_factory_ &&
461       type->file()->pool() == DescriptorPool::generated_pool()) {
462     return MessageFactory::generated_factory()->GetPrototype(type);
463   }
464 
465   const DynamicMessage::TypeInfo** target = &prototypes_->map_[type];
466   if (*target != NULL) {
467     // Already exists.
468     return (*target)->prototype.get();
469   }
470 
471   DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo;
472   *target = type_info;
473 
474   type_info->type = type;
475   type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_;
476   type_info->factory = this;
477 
478   // We need to construct all the structures passed to
479   // GeneratedMessageReflection's constructor.  This includes:
480   // - A block of memory that contains space for all the message's fields.
481   // - An array of integers indicating the byte offset of each field within
482   //   this block.
483   // - A big bitfield containing a bit for each field indicating whether
484   //   or not that field is set.
485 
486   // Compute size and offsets.
487   int* offsets = new int[type->field_count()];
488   type_info->offsets.reset(offsets);
489 
490   // Decide all field offsets by packing in order.
491   // We place the DynamicMessage object itself at the beginning of the allocated
492   // space.
493   int size = sizeof(DynamicMessage);
494   size = AlignOffset(size);
495 
496   // Next the has_bits, which is an array of uint32s.
497   type_info->has_bits_offset = size;
498   int has_bits_array_size =
499     DivideRoundingUp(type->field_count(), bitsizeof(uint32));
500   size += has_bits_array_size * sizeof(uint32);
501   size = AlignOffset(size);
502 
503   // The ExtensionSet, if any.
504   if (type->extension_range_count() > 0) {
505     type_info->extensions_offset = size;
506     size += sizeof(ExtensionSet);
507     size = AlignOffset(size);
508   } else {
509     // No extensions.
510     type_info->extensions_offset = -1;
511   }
512 
513   // All the fields.
514   for (int i = 0; i < type->field_count(); i++) {
515     // Make sure field is aligned to avoid bus errors.
516     int field_size = FieldSpaceUsed(type->field(i));
517     size = AlignTo(size, min(kSafeAlignment, field_size));
518     offsets[i] = size;
519     size += field_size;
520   }
521 
522   // Add the UnknownFieldSet to the end.
523   size = AlignOffset(size);
524   type_info->unknown_fields_offset = size;
525   size += sizeof(UnknownFieldSet);
526 
527   // Align the final size to make sure no clever allocators think that
528   // alignment is not necessary.
529   size = AlignOffset(size);
530   type_info->size = size;
531 
532   // Allocate the prototype.
533   void* base = operator new(size);
534   memset(base, 0, size);
535   DynamicMessage* prototype = new(base) DynamicMessage(type_info);
536   type_info->prototype.reset(prototype);
537 
538   // Construct the reflection object.
539   type_info->reflection.reset(
540     new GeneratedMessageReflection(
541       type_info->type,
542       type_info->prototype.get(),
543       type_info->offsets.get(),
544       type_info->has_bits_offset,
545       type_info->unknown_fields_offset,
546       type_info->extensions_offset,
547       type_info->pool,
548       this,
549       type_info->size));
550 
551   // Cross link prototypes.
552   prototype->CrossLinkPrototypes();
553 
554   return prototype;
555 }
556 
557 }  // namespace protobuf
558 }  // namespace google
559