• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/cpp/enum.h"
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <limits>
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 #include "absl/algorithm/container.h"
23 #include "absl/container/btree_map.h"
24 #include "absl/container/btree_set.h"
25 #include "absl/container/flat_hash_map.h"
26 #include "absl/strings/str_cat.h"
27 #include "absl/strings/string_view.h"
28 #include "google/protobuf/compiler/cpp/generator.h"
29 #include "google/protobuf/compiler/cpp/helpers.h"
30 #include "google/protobuf/compiler/cpp/names.h"
31 #include "google/protobuf/compiler/cpp/options.h"
32 #include "google/protobuf/descriptor.h"
33 #include "google/protobuf/generated_enum_util.h"
34 
35 namespace google {
36 namespace protobuf {
37 namespace compiler {
38 namespace cpp {
39 namespace {
40 using Sub = ::google::protobuf::io::Printer::Sub;
41 
EnumVars(const EnumDescriptor * enum_,const Options & options,const EnumValueDescriptor * min,const EnumValueDescriptor * max)42 absl::flat_hash_map<absl::string_view, std::string> EnumVars(
43     const EnumDescriptor* enum_, const Options& options,
44     const EnumValueDescriptor* min, const EnumValueDescriptor* max) {
45   auto classname = ClassName(enum_, false);
46   return {
47       {"Enum", std::string(enum_->name())},
48       {"Enum_", ResolveKeyword(enum_->name())},
49       {"Msg_Enum", classname},
50       {"::Msg_Enum", QualifiedClassName(enum_, options)},
51       {"Msg_Enum_",
52        enum_->containing_type() == nullptr ? "" : absl::StrCat(classname, "_")},
53       {"kMin", absl::StrCat(min->number())},
54       {"kMax", absl::StrCat(max->number())},
55       {"return_type", CppGenerator::GetResolvedSourceFeatures(*enum_)
56                               .GetExtension(::pb::cpp)
57                               .enum_name_uses_string_view()
58                           ? "::absl::string_view"
59                           : "const std::string&"},
60   };
61 }
62 
63 // The ARRAYSIZE constant is the max enum value plus 1. If the max enum value
64 // is kint32max, ARRAYSIZE will overflow. In such cases we should omit the
65 // generation of the ARRAYSIZE constant.
ShouldGenerateArraySize(const EnumDescriptor * descriptor)66 bool ShouldGenerateArraySize(const EnumDescriptor* descriptor) {
67   int32_t max_value = descriptor->value(0)->number();
68   for (int i = 0; i < descriptor->value_count(); i++) {
69     if (descriptor->value(i)->number() > max_value) {
70       max_value = descriptor->value(i)->number();
71     }
72   }
73   return max_value != std::numeric_limits<int32_t>::max();
74 }
75 }  // namespace
FromEnum(const EnumDescriptor * descriptor)76 EnumGenerator::ValueLimits EnumGenerator::ValueLimits::FromEnum(
77     const EnumDescriptor* descriptor) {
78   const EnumValueDescriptor* min_desc = descriptor->value(0);
79   const EnumValueDescriptor* max_desc = descriptor->value(0);
80 
81   for (int i = 1; i < descriptor->value_count(); ++i) {
82     if (descriptor->value(i)->number() < min_desc->number()) {
83       min_desc = descriptor->value(i);
84     }
85     if (descriptor->value(i)->number() > max_desc->number()) {
86       max_desc = descriptor->value(i);
87     }
88   }
89 
90   return EnumGenerator::ValueLimits{min_desc, max_desc};
91 }
92 
EnumGenerator(const EnumDescriptor * descriptor,const Options & options)93 EnumGenerator::EnumGenerator(const EnumDescriptor* descriptor,
94                              const Options& options)
95     : enum_(descriptor),
96       options_(options),
97       generate_array_size_(ShouldGenerateArraySize(descriptor)),
98       has_reflection_(HasDescriptorMethods(enum_->file(), options_)),
99       limits_(ValueLimits::FromEnum(enum_)) {
100   // The conditions here for what is "sparse" are not rigorously
101   // chosen.
102   size_t values_range = static_cast<size_t>(limits_.max->number()) -
103                         static_cast<size_t>(limits_.min->number());
104   size_t total_values = static_cast<size_t>(enum_->value_count());
105   should_cache_ = has_reflection_ &&
106                   (values_range < 16u || values_range < total_values * 2u);
107 }
108 
GenerateDefinition(io::Printer * p)109 void EnumGenerator::GenerateDefinition(io::Printer* p) {
110   auto v1 = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
111 
112   auto v2 = p->WithVars({
113       Sub("Msg_Enum_Enum_MIN",
114           absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(), "_MIN"))
115           .AnnotatedAs(enum_),
116       Sub("Msg_Enum_Enum_MAX",
117           absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(), "_MAX"))
118           .AnnotatedAs(enum_),
119   });
120   p->Emit(
121       {
122           {"values",
123            [&] {
124              for (int i = 0; i < enum_->value_count(); ++i) {
125                const auto* value = enum_->value(i);
126                p->Emit(
127                    {
128                        Sub("Msg_Enum_VALUE",
129                            absl::StrCat(p->LookupVar("Msg_Enum_"),
130                                         EnumValueName(value)))
131                            .AnnotatedAs(value),
132                        {"kNumber", Int32ToString(value->number())},
133                        {"DEPRECATED",
134                         value->options().deprecated() ? "[[deprecated]]" : ""},
135                    },
136                    R"cc(
137                      $Msg_Enum_VALUE$$ DEPRECATED$ = $kNumber$,
138                    )cc");
139              }
140            }},
141           // Only emit annotations for the $Msg_Enum$ used in the `enum`
142           // definition.
143           Sub("Msg_Enum_annotated", p->LookupVar("Msg_Enum"))
144               .AnnotatedAs(enum_),
145           {"open_enum_sentinels",
146            [&] {
147              if (enum_->is_closed()) {
148                return;
149              }
150 
151              // For open enum semantics: generate min and max sentinel values
152              // equal to INT32_MIN and INT32_MAX
153              p->Emit({{"Msg_Enum_Msg_Enum_",
154                        absl::StrCat(p->LookupVar("Msg_Enum"), "_",
155                                     p->LookupVar("Msg_Enum_"))}},
156                      R"cc(
157                        $Msg_Enum_Msg_Enum_$INT_MIN_SENTINEL_DO_NOT_USE_ =
158                            std::numeric_limits<::int32_t>::min(),
159                        $Msg_Enum_Msg_Enum_$INT_MAX_SENTINEL_DO_NOT_USE_ =
160                            std::numeric_limits<::int32_t>::max(),
161                      )cc");
162            }},
163       },
164       R"cc(
165         enum $Msg_Enum_annotated$ : int {
166           $values$,
167           $open_enum_sentinels$,
168         };
169 
170         $dllexport_decl $bool $Msg_Enum$_IsValid(int value);
171         $dllexport_decl $extern const uint32_t $Msg_Enum$_internal_data_[];
172         constexpr $Msg_Enum$ $Msg_Enum_Enum_MIN$ = static_cast<$Msg_Enum$>($kMin$);
173         constexpr $Msg_Enum$ $Msg_Enum_Enum_MAX$ = static_cast<$Msg_Enum$>($kMax$);
174       )cc");
175 
176   if (generate_array_size_) {
177     p->Emit({Sub("Msg_Enum_Enum_ARRAYSIZE",
178                  absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(),
179                               "_ARRAYSIZE"))
180                  .AnnotatedAs(enum_)},
181             R"cc(
182               constexpr int $Msg_Enum_Enum_ARRAYSIZE$ = $kMax$ + 1;
183             )cc");
184   }
185 
186   if (has_reflection_) {
187     p->Emit(R"cc(
188       $dllexport_decl $const ::$proto_ns$::EnumDescriptor*
189       $Msg_Enum$_descriptor();
190     )cc");
191   } else {
192     p->Emit(R"cc(
193       $return_type$ $Msg_Enum$_Name($Msg_Enum$ value);
194     )cc");
195   }
196 
197   // There are three possible implementations of $Enum$_Name() and
198   // $Msg_Enum$_Parse(), depending on whether we are using a dense enum name
199   // cache or not, and whether or not we have reflection. Very little code is
200   // shared between the three, so it is split into three Emit() calls.
201 
202   // Can't use WithVars here, since callbacks can only be passed to Emit()
203   // directly. Because this includes $Enum$, it must be a callback.
204   auto write_assert = [&] {
205     p->Emit(R"cc(
206       static_assert(std::is_same<T, $Msg_Enum$>::value ||
207                         std::is_integral<T>::value,
208                     "Incorrect type passed to $Enum$_Name().");
209     )cc");
210   };
211 
212   if (should_cache_ || !has_reflection_) {
213     p->Emit({{"static_assert", write_assert}}, R"cc(
214       template <typename T>
215       $return_type$ $Msg_Enum$_Name(T value) {
216         $static_assert$;
217         return $Msg_Enum$_Name(static_cast<$Msg_Enum$>(value));
218       }
219     )cc");
220     if (should_cache_) {
221       // Using the NameOfEnum routine can be slow, so we create a small
222       // cache of pointers to the std::string objects that reflection
223       // stores internally.  This cache is a simple contiguous array of
224       // pointers, so if the enum values are sparse, it's not worth it.
225       p->Emit(R"cc(
226         template <>
227         inline $return_type$ $Msg_Enum$_Name($Msg_Enum$ value) {
228           return ::$proto_ns$::internal::NameOfDenseEnum<$Msg_Enum$_descriptor,
229                                                          $kMin$, $kMax$>(
230               static_cast<int>(value));
231         }
232       )cc");
233     }
234   } else {
235     p->Emit({{"static_assert", write_assert}}, R"cc(
236       template <typename T>
237       $return_type$ $Msg_Enum$_Name(T value) {
238         $static_assert$;
239         return ::$proto_ns$::internal::NameOfEnum($Msg_Enum$_descriptor(), value);
240       }
241     )cc");
242   }
243 
244   if (has_reflection_) {
245     p->Emit(R"cc(
246       inline bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value) {
247         return ::$proto_ns$::internal::ParseNamedEnum<$Msg_Enum$>(
248             $Msg_Enum$_descriptor(), name, value);
249       }
250     )cc");
251   } else {
252     p->Emit(R"cc(
253       bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value);
254     )cc");
255   }
256 }
257 
GenerateGetEnumDescriptorSpecializations(io::Printer * p)258 void EnumGenerator::GenerateGetEnumDescriptorSpecializations(io::Printer* p) {
259   auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
260 
261   p->Emit(R"cc(
262     template <>
263     struct is_proto_enum<$::Msg_Enum$> : std::true_type {};
264   )cc");
265   if (!has_reflection_) {
266     return;
267   }
268   p->Emit(R"cc(
269     template <>
270     inline const EnumDescriptor* GetEnumDescriptor<$::Msg_Enum$>() {
271       return $::Msg_Enum$_descriptor();
272     }
273   )cc");
274 }
275 
276 
GenerateSymbolImports(io::Printer * p) const277 void EnumGenerator::GenerateSymbolImports(io::Printer* p) const {
278   auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
279 
280   p->Emit({Sub("Enum_", p->LookupVar("Enum_")).AnnotatedAs(enum_)}, R"cc(
281     using $Enum_$ = $Msg_Enum$;
282   )cc");
283 
284   for (int j = 0; j < enum_->value_count(); ++j) {
285     const auto* value = enum_->value(j);
286     p->Emit(
287         {
288             Sub("VALUE", EnumValueName(enum_->value(j))).AnnotatedAs(value),
289             {"DEPRECATED",
290              value->options().deprecated() ? "[[deprecated]]" : ""},
291         },
292         R"cc(
293           $DEPRECATED $static constexpr $Enum_$ $VALUE$ = $Msg_Enum$_$VALUE$;
294         )cc");
295   }
296 
297   p->Emit(
298       {
299           Sub("Enum_MIN", absl::StrCat(enum_->name(), "_MIN"))
300               .AnnotatedAs(enum_),
301           Sub("Enum_MAX", absl::StrCat(enum_->name(), "_MAX"))
302               .AnnotatedAs(enum_),
303       },
304       R"cc(
305         static inline bool $Enum$_IsValid(int value) {
306           return $Msg_Enum$_IsValid(value);
307         }
308         static constexpr $Enum_$ $Enum_MIN$ = $Msg_Enum$_$Enum$_MIN;
309         static constexpr $Enum_$ $Enum_MAX$ = $Msg_Enum$_$Enum$_MAX;
310       )cc");
311 
312   if (generate_array_size_) {
313     p->Emit(
314         {
315             Sub("Enum_ARRAYSIZE", absl::StrCat(enum_->name(), "_ARRAYSIZE"))
316                 .AnnotatedAs(enum_),
317         },
318         R"cc(
319           static constexpr int $Enum_ARRAYSIZE$ = $Msg_Enum$_$Enum$_ARRAYSIZE;
320         )cc");
321   }
322 
323   if (has_reflection_) {
324     p->Emit(R"cc(
325       static inline const ::$proto_ns$::EnumDescriptor* $Enum$_descriptor() {
326         return $Msg_Enum$_descriptor();
327       }
328     )cc");
329   }
330 
331   p->Emit(R"cc(
332     template <typename T>
333     static inline $return_type$ $Enum$_Name(T value) {
334       return $Msg_Enum$_Name(value);
335     }
336     static inline bool $Enum$_Parse(absl::string_view name, $Enum_$* value) {
337       return $Msg_Enum$_Parse(name, value);
338     }
339   )cc");
340 }
341 
GenerateMethods(int idx,io::Printer * p)342 void EnumGenerator::GenerateMethods(int idx, io::Printer* p) {
343   auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
344 
345   if (has_reflection_) {
346     p->Emit({{"idx", idx}}, R"cc(
347       const ::$proto_ns$::EnumDescriptor* $Msg_Enum$_descriptor() {
348         ::$proto_ns$::internal::AssignDescriptors(&$desc_table$);
349         return $file_level_enum_descriptors$[$idx$];
350       }
351     )cc");
352   }
353 
354   // Multiple values may have the same number. Sort and dedup.
355   std::vector<int> numbers;
356   numbers.reserve(enum_->value_count());
357   for (int i = 0; i < enum_->value_count(); ++i) {
358     numbers.push_back(enum_->value(i)->number());
359   }
360   // Sort and deduplicate `numbers`.
361   absl::c_sort(numbers);
362   numbers.erase(std::unique(numbers.begin(), numbers.end()), numbers.end());
363 
364   // We now generate the XXX_IsValid functions, as well as their encoded enum
365   // data.
366   // For simple enums we skip the generic ValidateEnum call and use better
367   // codegen. It matches the speed of the previous switch-based codegen.
368   // For more complex enums we use the new algorithm with the encoded data.
369   // Always generate the data array, even on the simple cases because someone
370   // might be using it for TDP entries. If it is not used in the end, the linker
371   // will drop it.
372   p->Emit({{"encoded",
373             [&] {
374               for (uint32_t n : google::protobuf::internal::GenerateEnumData(numbers)) {
375                 p->Emit({{"n", n}}, "$n$u, ");
376               }
377             }}},
378           R"cc(
379             PROTOBUF_CONSTINIT const uint32_t $Msg_Enum$_internal_data_[] = {
380                 $encoded$};
381           )cc");
382 
383   if (numbers.front() + static_cast<int64_t>(numbers.size()) - 1 ==
384       numbers.back()) {
385     // They are sequential. Do a simple range check.
386     p->Emit({{"min", numbers.front()}, {"max", numbers.back()}},
387             R"cc(
388               bool $Msg_Enum$_IsValid(int value) {
389                 return $min$ <= value && value <= $max$;
390               }
391             )cc");
392   } else if (numbers.front() >= 0 && numbers.back() < 64) {
393     // Not sequential, but they fit in a 64-bit bitmap.
394     uint64_t bitmap = 0;
395     for (int n : numbers) {
396       bitmap |= uint64_t{1} << n;
397     }
398     p->Emit({{"bitmap", bitmap}, {"max", numbers.back()}},
399             R"cc(
400               bool $Msg_Enum$_IsValid(int value) {
401                 return 0 <= value && value <= $max$ && (($bitmap$u >> value) & 1) != 0;
402               }
403             )cc");
404   } else {
405     // More complex struct. Use enum data structure for lookup.
406     p->Emit(
407         R"cc(
408           bool $Msg_Enum$_IsValid(int value) {
409             return ::_pbi::ValidateEnum(value, $Msg_Enum$_internal_data_);
410           }
411         )cc");
412   }
413 
414   if (!has_reflection_) {
415     // In lite mode (where descriptors are unavailable), we generate separate
416     // tables for mapping between enum names and numbers. The _entries table
417     // contains the bulk of the data and is sorted by name, while
418     // _entries_by_number is sorted by number and just contains pointers into
419     // _entries. The two tables allow mapping from name to number and number to
420     // name, both in time logarithmic in the number of enum entries. This could
421     // probably be made faster, but for now the tables are intended to be simple
422     // and compact.
423     //
424     // Enums with allow_alias = true support multiple entries with the same
425     // numerical value. In cases where there are multiple names for the same
426     // number, we treat the first name appearing in the .proto file as the
427     // canonical one.
428 
429     absl::btree_map<std::string, int> name_to_number;
430     absl::flat_hash_map<int, std::string> number_to_canonical_name;
431     for (int i = 0; i < enum_->value_count(); ++i) {
432       const auto* value = enum_->value(i);
433       name_to_number.emplace(value->name(), value->number());
434 
435       // The same number may appear with multiple names, so we use emplace() to
436       // let the first name win.
437       number_to_canonical_name.emplace(value->number(), value->name());
438     }
439 
440     // Build the offset table for the strings table.
441     struct Offset {
442       int number;
443       size_t index, byte_offset, len;
444     };
445     std::vector<Offset> offsets;
446     size_t index = 0;
447     size_t offset = 0;
448     for (const auto& e : name_to_number) {
449       offsets.push_back(Offset{e.second, index, offset, e.first.size()});
450       ++index;
451       offset += e.first.size();
452     }
453     absl::c_stable_sort(offsets, [](const auto& a, const auto& b) {
454       return a.byte_offset < b.byte_offset;
455     });
456 
457     std::vector<Offset> offsets_by_number = offsets;
458     absl::c_stable_sort(offsets_by_number, [](const auto& a, const auto& b) {
459       return a.number < b.number;
460     });
461 
462     offsets_by_number.erase(
463         std::unique(
464             offsets_by_number.begin(), offsets_by_number.end(),
465             [](const auto& a, const auto& b) { return a.number == b.number; }),
466         offsets_by_number.end());
467 
468     p->Emit(
469         {
470             {"num_unique", number_to_canonical_name.size()},
471             {"num_declared", enum_->value_count()},
472             {"names",
473              // We concatenate all the names for a given enum into one big
474              // string literal. If instead we store an array of string
475              // literals, the linker seems to put all enum strings for a given
476              // .proto file in the same section, which hinders its ability to
477              // strip out unused strings.
478              [&] {
479                for (const auto& e : name_to_number) {
480                  p->Emit({{"name", e.first}}, R"cc(
481                    "$name$"
482                  )cc");
483                }
484              }},
485             {"entries",
486              [&] {
487                for (const auto& offset : offsets) {
488                  p->Emit({{"number", offset.number},
489                           {"offset", offset.byte_offset},
490                           {"len", offset.len}},
491                          R"cc(
492                            {{&$Msg_Enum$_names[$offset$], $len$}, $number$},
493                          )cc");
494                }
495              }},
496             {"entries_by_number",
497              [&] {
498                for (const auto& offset : offsets_by_number) {
499                  p->Emit({{"number", offset.number},
500                           {"index", offset.index},
501                           {"name", number_to_canonical_name[offset.number]}},
502                          R"cc(
503                            $index$,  // $number$ -> $name$
504                          )cc");
505                }
506              }},
507         },
508         R"cc(
509           static ::$proto_ns$::internal::ExplicitlyConstructed<std::string>
510               $Msg_Enum$_strings[$num_unique$] = {};
511 
512           static const char $Msg_Enum$_names[] = {
513               $names$,
514           };
515 
516           static const ::$proto_ns$::internal::EnumEntry $Msg_Enum$_entries[] =
517               {
518                   $entries$,
519           };
520 
521           static const int $Msg_Enum$_entries_by_number[] = {
522               $entries_by_number$,
523           };
524 
525           $return_type$ $Msg_Enum$_Name($Msg_Enum$ value) {
526             static const bool kDummy =
527                 ::$proto_ns$::internal::InitializeEnumStrings(
528                     $Msg_Enum$_entries, $Msg_Enum$_entries_by_number,
529                     $num_unique$, $Msg_Enum$_strings);
530             (void)kDummy;
531 
532             int idx = ::$proto_ns$::internal::LookUpEnumName(
533                 $Msg_Enum$_entries, $Msg_Enum$_entries_by_number, $num_unique$,
534                 value);
535             return idx == -1 ? ::$proto_ns$::internal::GetEmptyString()
536                              : $Msg_Enum$_strings[idx].get();
537           }
538 
539           bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value) {
540             int int_value;
541             bool success = ::$proto_ns$::internal::LookUpEnumValue(
542                 $Msg_Enum$_entries, $num_declared$, name, &int_value);
543             if (success) {
544               *value = static_cast<$Msg_Enum$>(int_value);
545             }
546             return success;
547           }
548         )cc");
549   }
550 
551   if (enum_->containing_type() != nullptr) {
552     // Before C++17, we must define the static constants which were
553     // declared in the header, to give the linker a place to put them.
554     // But MSVC++ pre-2015 and post-2017 (version 15.5+) insists that we not.
555     p->Emit(
556         {
557             {"Msg_", ClassName(enum_->containing_type(), false)},
558             {"constexpr_storage",
559              [&] {
560                for (int i = 0; i < enum_->value_count(); i++) {
561                  p->Emit({{"VALUE", EnumValueName(enum_->value(i))}},
562                          R"cc(
563                            constexpr $Msg_Enum$ $Msg_$::$VALUE$;
564                          )cc");
565                }
566              }},
567             {"array_size",
568              [&] {
569                if (generate_array_size_) {
570                  p->Emit(R"cc(
571                    constexpr int $Msg_$::$Enum$_ARRAYSIZE;
572                  )cc");
573                }
574              }},
575         },
576         R"(
577           #if (__cplusplus < 201703) && \
578             (!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912))
579 
580           $constexpr_storage$;
581           constexpr $Msg_Enum$ $Msg_$::$Enum$_MIN;
582           constexpr $Msg_Enum$ $Msg_$::$Enum$_MAX;
583           $array_size$;
584 
585           #endif  // (__cplusplus < 201703) &&
586                   // (!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912))
587         )");
588   }
589 }
590 }  // namespace cpp
591 }  // namespace compiler
592 }  // namespace protobuf
593 }  // namespace google
594