1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // This header is logically internal, but is made public because it is used
13 // from protocol-compiler-generated code, which may reside in other components.
14
15 #ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_REFLECTION_H__
16 #define GOOGLE_PROTOBUF_GENERATED_MESSAGE_REFLECTION_H__
17
18 #include <atomic>
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22
23 #include "absl/base/call_once.h"
24 #include "absl/log/absl_check.h"
25 #include "google/protobuf/descriptor.h"
26 #include "google/protobuf/generated_enum_reflection.h"
27 #include "google/protobuf/port.h"
28 #include "google/protobuf/unknown_field_set.h"
29
30 // Must be included last.
31 #include "google/protobuf/port_def.inc"
32
33 #ifdef SWIG
34 #error "You cannot SWIG proto headers"
35 #endif
36
37 namespace google {
38 namespace protobuf {
39 class MapKey;
40 class MapValueRef;
41 class MessageLayoutInspector;
42 class Message;
43 struct Metadata;
44
45 namespace io {
46 class CodedOutputStream;
47 }
48 } // namespace protobuf
49 } // namespace google
50
51 namespace google {
52 namespace protobuf {
53 namespace internal {
54 class DefaultEmptyOneof;
55 // Defined in other files.
56 class ExtensionSet; // extension_set.h
57 class WeakFieldMap; // weak_field_map.h
58
59 // Tag used on offsets for fields that don't have a real offset.
60 // For example, weak message fields go into the WeakFieldMap and not in an
61 // actual field.
62 constexpr uint32_t kInvalidFieldOffsetTag = 0x40000000u;
63
64 // Mask used on offsets for split fields.
65 constexpr uint32_t kSplitFieldOffsetMask = 0x80000000u;
66 constexpr uint32_t kLazyMask = 0x1u;
67 constexpr uint32_t kInlinedMask = 0x1u;
68
69 // This struct describes the internal layout of the message, hence this is
70 // used to act on the message reflectively.
71 // default_instance: The default instance of the message. This is only
72 // used to obtain pointers to default instances of embedded
73 // messages, which GetMessage() will return if the particular
74 // sub-message has not been initialized yet. (Thus, all
75 // embedded message fields *must* have non-null pointers
76 // in the default instance.)
77 // offsets: An array of ints giving the byte offsets.
78 // For each oneof or weak field, the offset is relative to the
79 // default_instance. These can be computed at compile time
80 // using the
81 // PROTO2_GENERATED_DEFAULT_ONEOF_FIELD_OFFSET()
82 // macro. For each none oneof field, the offset is related to
83 // the start of the message object. These can be computed at
84 // compile time using the
85 // PROTO2_GENERATED_MESSAGE_FIELD_OFFSET() macro.
86 // Besides offsets for all fields, this array also contains
87 // offsets for oneof unions. The offset of the i-th oneof union
88 // is offsets[descriptor->field_count() + i].
89 // has_bit_indices: Mapping from field indexes to their index in the has
90 // bit array.
91 // has_bits_offset: Offset in the message of an array of uint32s of size
92 // descriptor->field_count()/32, rounded up. This is a
93 // bitfield where each bit indicates whether or not the
94 // corresponding field of the message has been initialized.
95 // The bit for field index i is obtained by the expression:
96 // has_bits[i / 32] & (1 << (i % 32))
97 // unknown_fields_offset: Offset in the message of the UnknownFieldSet for
98 // the message.
99 // extensions_offset: Offset in the message of the ExtensionSet for the
100 // message, or -1 if the message type has no extension
101 // ranges.
102 // oneof_case_offset: Offset in the message of an array of uint32s of
103 // size descriptor->oneof_decl_count(). Each uint32_t
104 // indicates what field is set for each oneof.
105 // object_size: The size of a message object of this type, as measured
106 // by sizeof().
107 // arena_offset: If a message doesn't have a unknown_field_set that stores
108 // the arena, it must have a direct pointer to the arena.
109 // weak_field_map_offset: If the message proto has weak fields, this is the
110 // offset of _weak_field_map_ in the generated proto. Otherwise
111 // -1.
112 struct ReflectionSchema {
113 public:
114 // Size of a google::protobuf::Message object of this type.
GetObjectSizeReflectionSchema115 uint32_t GetObjectSize() const { return static_cast<uint32_t>(object_size_); }
116
InRealOneofReflectionSchema117 bool InRealOneof(const FieldDescriptor* field) const {
118 return field->real_containing_oneof();
119 }
120
121 // Offset of a non-oneof field. Getting a field offset is slightly more
122 // efficient when we know statically that it is not a oneof field.
GetFieldOffsetNonOneofReflectionSchema123 uint32_t GetFieldOffsetNonOneof(const FieldDescriptor* field) const {
124 ABSL_DCHECK(!InRealOneof(field));
125 return OffsetValue(offsets_[field->index()], field->type());
126 }
127
128 // Offset of any field.
GetFieldOffsetReflectionSchema129 uint32_t GetFieldOffset(const FieldDescriptor* field) const {
130 if (InRealOneof(field)) {
131 size_t offset =
132 static_cast<size_t>(field->containing_type()->field_count()) +
133 field->containing_oneof()->index();
134 return OffsetValue(offsets_[offset], field->type());
135 } else {
136 return GetFieldOffsetNonOneof(field);
137 }
138 }
139
IsFieldInlinedReflectionSchema140 bool IsFieldInlined(const FieldDescriptor* field) const {
141 return Inlined(offsets_[field->index()], field->type());
142 }
143
GetOneofCaseOffsetReflectionSchema144 uint32_t GetOneofCaseOffset(const OneofDescriptor* oneof_descriptor) const {
145 return static_cast<uint32_t>(oneof_case_offset_) +
146 static_cast<uint32_t>(
147 static_cast<size_t>(oneof_descriptor->index()) *
148 sizeof(uint32_t));
149 }
150
HasHasbitsReflectionSchema151 bool HasHasbits() const { return has_bits_offset_ != -1; }
152
153 // Bit index within the bit array of hasbits. Bit order is low-to-high.
HasBitIndexReflectionSchema154 uint32_t HasBitIndex(const FieldDescriptor* field) const {
155 if (has_bits_offset_ == -1) return static_cast<uint32_t>(-1);
156 ABSL_DCHECK(HasHasbits());
157 return has_bit_indices_[field->index()];
158 }
159
160 // Byte offset of the hasbits array.
HasBitsOffsetReflectionSchema161 uint32_t HasBitsOffset() const {
162 ABSL_DCHECK(HasHasbits());
163 return static_cast<uint32_t>(has_bits_offset_);
164 }
165
HasInlinedStringReflectionSchema166 bool HasInlinedString() const { return inlined_string_donated_offset_ != -1; }
167
168 // Bit index within the bit array of _inlined_string_donated_. Bit order is
169 // low-to-high.
InlinedStringIndexReflectionSchema170 uint32_t InlinedStringIndex(const FieldDescriptor* field) const {
171 ABSL_DCHECK(HasInlinedString());
172 return inlined_string_indices_[field->index()];
173 }
174
175 // Byte offset of the _inlined_string_donated_ array.
InlinedStringDonatedOffsetReflectionSchema176 uint32_t InlinedStringDonatedOffset() const {
177 ABSL_DCHECK(HasInlinedString());
178 return static_cast<uint32_t>(inlined_string_donated_offset_);
179 }
180
181 // The offset of the InternalMetadataWithArena member.
182 // For Lite this will actually be an InternalMetadataWithArenaLite.
183 // The schema doesn't contain enough information to distinguish between
184 // these two cases.
GetMetadataOffsetReflectionSchema185 uint32_t GetMetadataOffset() const {
186 return static_cast<uint32_t>(metadata_offset_);
187 }
188
189 // Whether this message has an ExtensionSet.
HasExtensionSetReflectionSchema190 bool HasExtensionSet() const { return extensions_offset_ != -1; }
191
192 // The offset of the ExtensionSet in this message.
GetExtensionSetOffsetReflectionSchema193 uint32_t GetExtensionSetOffset() const {
194 ABSL_DCHECK(HasExtensionSet());
195 return static_cast<uint32_t>(extensions_offset_);
196 }
197
198 // The off set of WeakFieldMap when the message contains weak fields.
199 // The default is 0 for now.
GetWeakFieldMapOffsetReflectionSchema200 int GetWeakFieldMapOffset() const { return weak_field_map_offset_; }
201
IsDefaultInstanceReflectionSchema202 bool IsDefaultInstance(const Message& message) const {
203 return &message == default_instance_;
204 }
205
206 // Returns a pointer to the default value for this field. The size and type
207 // of the underlying data depends on the field's type.
GetFieldDefaultReflectionSchema208 const void* GetFieldDefault(const FieldDescriptor* field) const {
209 return reinterpret_cast<const uint8_t*>(default_instance_) +
210 OffsetValue(offsets_[field->index()], field->type());
211 }
212
213 // Returns true if the field is implicitly backed by LazyField.
IsEagerlyVerifiedLazyFieldReflectionSchema214 bool IsEagerlyVerifiedLazyField(const FieldDescriptor* field) const {
215 ABSL_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_MESSAGE);
216 (void)field;
217 return false;
218 }
219
IsSplitReflectionSchema220 bool IsSplit() const { return split_offset_ != -1; }
221
IsSplitReflectionSchema222 bool IsSplit(const FieldDescriptor* field) const {
223 return split_offset_ != -1 &&
224 (offsets_[field->index()] & kSplitFieldOffsetMask) != 0;
225 }
226
227 // Byte offset of _split_.
SplitOffsetReflectionSchema228 uint32_t SplitOffset() const {
229 ABSL_DCHECK(IsSplit());
230 return static_cast<uint32_t>(split_offset_);
231 }
232
SizeofSplitReflectionSchema233 uint32_t SizeofSplit() const {
234 ABSL_DCHECK(IsSplit());
235 return static_cast<uint32_t>(sizeof_split_);
236 }
237
238
HasWeakFieldsReflectionSchema239 bool HasWeakFields() const { return weak_field_map_offset_ > 0; }
240
241 // These members are intended to be private, but we cannot actually make them
242 // private because this prevents us from using aggregate initialization of
243 // them, ie.
244 //
245 // ReflectionSchema schema = {a, b, c, d, e, ...};
246 // private:
247 const Message* default_instance_;
248 const uint32_t* offsets_;
249 const uint32_t* has_bit_indices_;
250 int has_bits_offset_;
251 int metadata_offset_;
252 int extensions_offset_;
253 int oneof_case_offset_;
254 int object_size_;
255 int weak_field_map_offset_;
256 const uint32_t* inlined_string_indices_;
257 int inlined_string_donated_offset_;
258 int split_offset_;
259 int sizeof_split_;
260
261 // We tag offset values to provide additional data about fields (such as
262 // "unused" or "lazy" or "inlined").
OffsetValueReflectionSchema263 static uint32_t OffsetValue(uint32_t v, FieldDescriptor::Type type) {
264 if (type == FieldDescriptor::TYPE_MESSAGE ||
265 type == FieldDescriptor::TYPE_STRING ||
266 type == FieldDescriptor::TYPE_BYTES) {
267 return v & (~kSplitFieldOffsetMask) & (~kInlinedMask) & (~kLazyMask);
268 }
269 return v & (~kSplitFieldOffsetMask);
270 }
271
InlinedReflectionSchema272 static bool Inlined(uint32_t v, FieldDescriptor::Type type) {
273 if (type == FieldDescriptor::TYPE_STRING ||
274 type == FieldDescriptor::TYPE_BYTES) {
275 return (v & kInlinedMask) != 0u;
276 } else {
277 // Non string/byte fields are not inlined.
278 return false;
279 }
280 }
281 };
282
283 // Structs that the code generator emits directly to describe a message.
284 // These should never used directly except to build a ReflectionSchema
285 // object.
286 //
287 // EXPERIMENTAL: these are changing rapidly, and may completely disappear
288 // or merge with ReflectionSchema.
289 struct MigrationSchema {
290 int32_t offsets_index;
291 int32_t has_bit_indices_index;
292 int32_t inlined_string_indices_index;
293 int object_size;
294 };
295
296 // This struct tries to reduce unnecessary padding.
297 // The num_xxx might not be close to their respective pointer, but this saves
298 // padding.
299 struct PROTOBUF_EXPORT DescriptorTable {
300 mutable bool is_initialized;
301 bool is_eager;
302 int size; // of serialized descriptor
303 const char* descriptor;
304 const char* filename;
305 absl::once_flag* once;
306 const DescriptorTable* const* deps;
307 int num_deps;
308 int num_messages;
309 const MigrationSchema* schemas;
310 const Message* const* default_instances;
311 const uint32_t* offsets;
312 // update the following descriptor arrays.
313 const EnumDescriptor** file_level_enum_descriptors;
314 const ServiceDescriptor** file_level_service_descriptors;
315 };
316
317 // AssignDescriptors() pulls the compiled FileDescriptor from the DescriptorPool
318 // and uses it to populate all of the global variables which store pointers to
319 // the descriptor objects. It also constructs the reflection objects. It is
320 // called the first time anyone calls descriptor() or GetReflection() on one of
321 // the types defined in the file. AssignDescriptors() is thread-safe.
322 void PROTOBUF_EXPORT AssignDescriptors(const DescriptorTable* table);
323 // As above, but the caller did the call_once call already.
324 void PROTOBUF_EXPORT
325 AssignDescriptorsOnceInnerCall(const DescriptorTable* table);
326
327 // These cannot be in lite so we put them in the reflection.
328 PROTOBUF_EXPORT void UnknownFieldSetSerializer(const uint8_t* base,
329 uint32_t offset, uint32_t tag,
330 uint32_t has_offset,
331 io::CodedOutputStream* output);
332
333 PROTOBUF_EXPORT void InitializeFileDescriptorDefaultInstances();
334
335 PROTOBUF_EXPORT void AddDescriptors(const DescriptorTable* table);
336
337 struct PROTOBUF_EXPORT AddDescriptorsRunner {
338 explicit AddDescriptorsRunner(const DescriptorTable* table);
339 };
340
341 // Retrieves the existing prototype out of a descriptor table.
342 // If it doesn't exist:
343 // - If force_build is true, asks the generated message factory for one.
344 // - Otherwise, return null
345 const Message* GetPrototypeForWeakDescriptor(const DescriptorTable* table,
346 int index, bool force_build);
347
348 struct DenseEnumCacheInfo {
349 std::atomic<const std::string**> cache;
350 int min_val;
351 int max_val;
352 const EnumDescriptor* (*descriptor_fn)();
353 };
354 PROTOBUF_EXPORT const std::string& NameOfDenseEnumSlow(int v,
355 DenseEnumCacheInfo*);
356
357 // Similar to the routine NameOfEnum, this routine returns the name of an enum.
358 // Unlike that routine, it allocates, on-demand, a block of pointers to the
359 // std::string objects allocated by reflection to store the enum names. This
360 // way, as long as the enum values are fairly dense, looking them up can be
361 // very fast. This assumes all the enums fall in the range [min_val .. max_val].
362 template <const EnumDescriptor* (*descriptor_fn)(), int min_val, int max_val>
NameOfDenseEnum(int v)363 const std::string& NameOfDenseEnum(int v) {
364 static_assert(max_val - min_val >= 0, "Too many enums between min and max.");
365 static DenseEnumCacheInfo deci = {/* atomic ptr */ {}, min_val, max_val,
366 descriptor_fn};
367 const std::string** cache = deci.cache.load(std::memory_order_acquire );
368 if (PROTOBUF_PREDICT_TRUE(cache != nullptr)) {
369 if (PROTOBUF_PREDICT_TRUE(v >= min_val && v <= max_val)) {
370 return *cache[v - min_val];
371 }
372 }
373 return NameOfDenseEnumSlow(v, &deci);
374 }
375
376 // Returns whether this type of field is stored in the split struct as a raw
377 // pointer.
378 PROTOBUF_EXPORT bool SplitFieldHasExtraIndirection(
379 const FieldDescriptor* field);
380
381 } // namespace internal
382 } // namespace protobuf
383 } // namespace google
384
385 #include "google/protobuf/port_undef.inc"
386
387 #endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_REFLECTION_H__
388