1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/compiler/cpp/enum.h"
13
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <limits>
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 #include "absl/algorithm/container.h"
23 #include "absl/container/btree_map.h"
24 #include "absl/container/btree_set.h"
25 #include "absl/container/flat_hash_map.h"
26 #include "absl/strings/str_cat.h"
27 #include "absl/strings/string_view.h"
28 #include "google/protobuf/compiler/cpp/generator.h"
29 #include "google/protobuf/compiler/cpp/helpers.h"
30 #include "google/protobuf/compiler/cpp/names.h"
31 #include "google/protobuf/compiler/cpp/options.h"
32 #include "google/protobuf/descriptor.h"
33 #include "google/protobuf/generated_enum_util.h"
34
35 namespace google {
36 namespace protobuf {
37 namespace compiler {
38 namespace cpp {
39 namespace {
40 using Sub = ::google::protobuf::io::Printer::Sub;
41
EnumVars(const EnumDescriptor * enum_,const Options & options,const EnumValueDescriptor * min,const EnumValueDescriptor * max)42 absl::flat_hash_map<absl::string_view, std::string> EnumVars(
43 const EnumDescriptor* enum_, const Options& options,
44 const EnumValueDescriptor* min, const EnumValueDescriptor* max) {
45 auto classname = ClassName(enum_, false);
46 return {
47 {"Enum", std::string(enum_->name())},
48 {"Enum_", ResolveKeyword(enum_->name())},
49 {"Msg_Enum", classname},
50 {"::Msg_Enum", QualifiedClassName(enum_, options)},
51 {"Msg_Enum_",
52 enum_->containing_type() == nullptr ? "" : absl::StrCat(classname, "_")},
53 {"kMin", absl::StrCat(min->number())},
54 {"kMax", absl::StrCat(max->number())},
55 {"return_type", CppGenerator::GetResolvedSourceFeatures(*enum_)
56 .GetExtension(::pb::cpp)
57 .enum_name_uses_string_view()
58 ? "::absl::string_view"
59 : "const std::string&"},
60 };
61 }
62
63 // The ARRAYSIZE constant is the max enum value plus 1. If the max enum value
64 // is kint32max, ARRAYSIZE will overflow. In such cases we should omit the
65 // generation of the ARRAYSIZE constant.
ShouldGenerateArraySize(const EnumDescriptor * descriptor)66 bool ShouldGenerateArraySize(const EnumDescriptor* descriptor) {
67 int32_t max_value = descriptor->value(0)->number();
68 for (int i = 0; i < descriptor->value_count(); i++) {
69 if (descriptor->value(i)->number() > max_value) {
70 max_value = descriptor->value(i)->number();
71 }
72 }
73 return max_value != std::numeric_limits<int32_t>::max();
74 }
75 } // namespace
FromEnum(const EnumDescriptor * descriptor)76 EnumGenerator::ValueLimits EnumGenerator::ValueLimits::FromEnum(
77 const EnumDescriptor* descriptor) {
78 const EnumValueDescriptor* min_desc = descriptor->value(0);
79 const EnumValueDescriptor* max_desc = descriptor->value(0);
80
81 for (int i = 1; i < descriptor->value_count(); ++i) {
82 if (descriptor->value(i)->number() < min_desc->number()) {
83 min_desc = descriptor->value(i);
84 }
85 if (descriptor->value(i)->number() > max_desc->number()) {
86 max_desc = descriptor->value(i);
87 }
88 }
89
90 return EnumGenerator::ValueLimits{min_desc, max_desc};
91 }
92
EnumGenerator(const EnumDescriptor * descriptor,const Options & options)93 EnumGenerator::EnumGenerator(const EnumDescriptor* descriptor,
94 const Options& options)
95 : enum_(descriptor),
96 options_(options),
97 generate_array_size_(ShouldGenerateArraySize(descriptor)),
98 has_reflection_(HasDescriptorMethods(enum_->file(), options_)),
99 limits_(ValueLimits::FromEnum(enum_)) {
100 // The conditions here for what is "sparse" are not rigorously
101 // chosen.
102 size_t values_range = static_cast<size_t>(limits_.max->number()) -
103 static_cast<size_t>(limits_.min->number());
104 size_t total_values = static_cast<size_t>(enum_->value_count());
105 should_cache_ = has_reflection_ &&
106 (values_range < 16u || values_range < total_values * 2u);
107 }
108
GenerateDefinition(io::Printer * p)109 void EnumGenerator::GenerateDefinition(io::Printer* p) {
110 auto v1 = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
111
112 auto v2 = p->WithVars({
113 Sub("Msg_Enum_Enum_MIN",
114 absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(), "_MIN"))
115 .AnnotatedAs(enum_),
116 Sub("Msg_Enum_Enum_MAX",
117 absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(), "_MAX"))
118 .AnnotatedAs(enum_),
119 });
120 p->Emit(
121 {
122 {"values",
123 [&] {
124 for (int i = 0; i < enum_->value_count(); ++i) {
125 const auto* value = enum_->value(i);
126 p->Emit(
127 {
128 Sub("Msg_Enum_VALUE",
129 absl::StrCat(p->LookupVar("Msg_Enum_"),
130 EnumValueName(value)))
131 .AnnotatedAs(value),
132 {"kNumber", Int32ToString(value->number())},
133 {"DEPRECATED",
134 value->options().deprecated() ? "[[deprecated]]" : ""},
135 },
136 R"cc(
137 $Msg_Enum_VALUE$$ DEPRECATED$ = $kNumber$,
138 )cc");
139 }
140 }},
141 // Only emit annotations for the $Msg_Enum$ used in the `enum`
142 // definition.
143 Sub("Msg_Enum_annotated", p->LookupVar("Msg_Enum"))
144 .AnnotatedAs(enum_),
145 {"open_enum_sentinels",
146 [&] {
147 if (enum_->is_closed()) {
148 return;
149 }
150
151 // For open enum semantics: generate min and max sentinel values
152 // equal to INT32_MIN and INT32_MAX
153 p->Emit({{"Msg_Enum_Msg_Enum_",
154 absl::StrCat(p->LookupVar("Msg_Enum"), "_",
155 p->LookupVar("Msg_Enum_"))}},
156 R"cc(
157 $Msg_Enum_Msg_Enum_$INT_MIN_SENTINEL_DO_NOT_USE_ =
158 std::numeric_limits<::int32_t>::min(),
159 $Msg_Enum_Msg_Enum_$INT_MAX_SENTINEL_DO_NOT_USE_ =
160 std::numeric_limits<::int32_t>::max(),
161 )cc");
162 }},
163 },
164 R"cc(
165 enum $Msg_Enum_annotated$ : int {
166 $values$,
167 $open_enum_sentinels$,
168 };
169
170 $dllexport_decl $bool $Msg_Enum$_IsValid(int value);
171 $dllexport_decl $extern const uint32_t $Msg_Enum$_internal_data_[];
172 constexpr $Msg_Enum$ $Msg_Enum_Enum_MIN$ = static_cast<$Msg_Enum$>($kMin$);
173 constexpr $Msg_Enum$ $Msg_Enum_Enum_MAX$ = static_cast<$Msg_Enum$>($kMax$);
174 )cc");
175
176 if (generate_array_size_) {
177 p->Emit({Sub("Msg_Enum_Enum_ARRAYSIZE",
178 absl::StrCat(p->LookupVar("Msg_Enum_"), enum_->name(),
179 "_ARRAYSIZE"))
180 .AnnotatedAs(enum_)},
181 R"cc(
182 constexpr int $Msg_Enum_Enum_ARRAYSIZE$ = $kMax$ + 1;
183 )cc");
184 }
185
186 if (has_reflection_) {
187 p->Emit(R"cc(
188 $dllexport_decl $const ::$proto_ns$::EnumDescriptor*
189 $Msg_Enum$_descriptor();
190 )cc");
191 } else {
192 p->Emit(R"cc(
193 $return_type$ $Msg_Enum$_Name($Msg_Enum$ value);
194 )cc");
195 }
196
197 // There are three possible implementations of $Enum$_Name() and
198 // $Msg_Enum$_Parse(), depending on whether we are using a dense enum name
199 // cache or not, and whether or not we have reflection. Very little code is
200 // shared between the three, so it is split into three Emit() calls.
201
202 // Can't use WithVars here, since callbacks can only be passed to Emit()
203 // directly. Because this includes $Enum$, it must be a callback.
204 auto write_assert = [&] {
205 p->Emit(R"cc(
206 static_assert(std::is_same<T, $Msg_Enum$>::value ||
207 std::is_integral<T>::value,
208 "Incorrect type passed to $Enum$_Name().");
209 )cc");
210 };
211
212 if (should_cache_ || !has_reflection_) {
213 p->Emit({{"static_assert", write_assert}}, R"cc(
214 template <typename T>
215 $return_type$ $Msg_Enum$_Name(T value) {
216 $static_assert$;
217 return $Msg_Enum$_Name(static_cast<$Msg_Enum$>(value));
218 }
219 )cc");
220 if (should_cache_) {
221 // Using the NameOfEnum routine can be slow, so we create a small
222 // cache of pointers to the std::string objects that reflection
223 // stores internally. This cache is a simple contiguous array of
224 // pointers, so if the enum values are sparse, it's not worth it.
225 p->Emit(R"cc(
226 template <>
227 inline $return_type$ $Msg_Enum$_Name($Msg_Enum$ value) {
228 return ::$proto_ns$::internal::NameOfDenseEnum<$Msg_Enum$_descriptor,
229 $kMin$, $kMax$>(
230 static_cast<int>(value));
231 }
232 )cc");
233 }
234 } else {
235 p->Emit({{"static_assert", write_assert}}, R"cc(
236 template <typename T>
237 $return_type$ $Msg_Enum$_Name(T value) {
238 $static_assert$;
239 return ::$proto_ns$::internal::NameOfEnum($Msg_Enum$_descriptor(), value);
240 }
241 )cc");
242 }
243
244 if (has_reflection_) {
245 p->Emit(R"cc(
246 inline bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value) {
247 return ::$proto_ns$::internal::ParseNamedEnum<$Msg_Enum$>(
248 $Msg_Enum$_descriptor(), name, value);
249 }
250 )cc");
251 } else {
252 p->Emit(R"cc(
253 bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value);
254 )cc");
255 }
256 }
257
GenerateGetEnumDescriptorSpecializations(io::Printer * p)258 void EnumGenerator::GenerateGetEnumDescriptorSpecializations(io::Printer* p) {
259 auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
260
261 p->Emit(R"cc(
262 template <>
263 struct is_proto_enum<$::Msg_Enum$> : std::true_type {};
264 )cc");
265 if (!has_reflection_) {
266 return;
267 }
268 p->Emit(R"cc(
269 template <>
270 inline const EnumDescriptor* GetEnumDescriptor<$::Msg_Enum$>() {
271 return $::Msg_Enum$_descriptor();
272 }
273 )cc");
274 }
275
276
GenerateSymbolImports(io::Printer * p) const277 void EnumGenerator::GenerateSymbolImports(io::Printer* p) const {
278 auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
279
280 p->Emit({Sub("Enum_", p->LookupVar("Enum_")).AnnotatedAs(enum_)}, R"cc(
281 using $Enum_$ = $Msg_Enum$;
282 )cc");
283
284 for (int j = 0; j < enum_->value_count(); ++j) {
285 const auto* value = enum_->value(j);
286 p->Emit(
287 {
288 Sub("VALUE", EnumValueName(enum_->value(j))).AnnotatedAs(value),
289 {"DEPRECATED",
290 value->options().deprecated() ? "[[deprecated]]" : ""},
291 },
292 R"cc(
293 $DEPRECATED $static constexpr $Enum_$ $VALUE$ = $Msg_Enum$_$VALUE$;
294 )cc");
295 }
296
297 p->Emit(
298 {
299 Sub("Enum_MIN", absl::StrCat(enum_->name(), "_MIN"))
300 .AnnotatedAs(enum_),
301 Sub("Enum_MAX", absl::StrCat(enum_->name(), "_MAX"))
302 .AnnotatedAs(enum_),
303 },
304 R"cc(
305 static inline bool $Enum$_IsValid(int value) {
306 return $Msg_Enum$_IsValid(value);
307 }
308 static constexpr $Enum_$ $Enum_MIN$ = $Msg_Enum$_$Enum$_MIN;
309 static constexpr $Enum_$ $Enum_MAX$ = $Msg_Enum$_$Enum$_MAX;
310 )cc");
311
312 if (generate_array_size_) {
313 p->Emit(
314 {
315 Sub("Enum_ARRAYSIZE", absl::StrCat(enum_->name(), "_ARRAYSIZE"))
316 .AnnotatedAs(enum_),
317 },
318 R"cc(
319 static constexpr int $Enum_ARRAYSIZE$ = $Msg_Enum$_$Enum$_ARRAYSIZE;
320 )cc");
321 }
322
323 if (has_reflection_) {
324 p->Emit(R"cc(
325 static inline const ::$proto_ns$::EnumDescriptor* $Enum$_descriptor() {
326 return $Msg_Enum$_descriptor();
327 }
328 )cc");
329 }
330
331 p->Emit(R"cc(
332 template <typename T>
333 static inline $return_type$ $Enum$_Name(T value) {
334 return $Msg_Enum$_Name(value);
335 }
336 static inline bool $Enum$_Parse(absl::string_view name, $Enum_$* value) {
337 return $Msg_Enum$_Parse(name, value);
338 }
339 )cc");
340 }
341
GenerateMethods(int idx,io::Printer * p)342 void EnumGenerator::GenerateMethods(int idx, io::Printer* p) {
343 auto v = p->WithVars(EnumVars(enum_, options_, limits_.min, limits_.max));
344
345 if (has_reflection_) {
346 p->Emit({{"idx", idx}}, R"cc(
347 const ::$proto_ns$::EnumDescriptor* $Msg_Enum$_descriptor() {
348 ::$proto_ns$::internal::AssignDescriptors(&$desc_table$);
349 return $file_level_enum_descriptors$[$idx$];
350 }
351 )cc");
352 }
353
354 // Multiple values may have the same number. Sort and dedup.
355 std::vector<int> numbers;
356 numbers.reserve(enum_->value_count());
357 for (int i = 0; i < enum_->value_count(); ++i) {
358 numbers.push_back(enum_->value(i)->number());
359 }
360 // Sort and deduplicate `numbers`.
361 absl::c_sort(numbers);
362 numbers.erase(std::unique(numbers.begin(), numbers.end()), numbers.end());
363
364 // We now generate the XXX_IsValid functions, as well as their encoded enum
365 // data.
366 // For simple enums we skip the generic ValidateEnum call and use better
367 // codegen. It matches the speed of the previous switch-based codegen.
368 // For more complex enums we use the new algorithm with the encoded data.
369 // Always generate the data array, even on the simple cases because someone
370 // might be using it for TDP entries. If it is not used in the end, the linker
371 // will drop it.
372 p->Emit({{"encoded",
373 [&] {
374 for (uint32_t n : google::protobuf::internal::GenerateEnumData(numbers)) {
375 p->Emit({{"n", n}}, "$n$u, ");
376 }
377 }}},
378 R"cc(
379 PROTOBUF_CONSTINIT const uint32_t $Msg_Enum$_internal_data_[] = {
380 $encoded$};
381 )cc");
382
383 if (numbers.front() + static_cast<int64_t>(numbers.size()) - 1 ==
384 numbers.back()) {
385 // They are sequential. Do a simple range check.
386 p->Emit({{"min", numbers.front()}, {"max", numbers.back()}},
387 R"cc(
388 bool $Msg_Enum$_IsValid(int value) {
389 return $min$ <= value && value <= $max$;
390 }
391 )cc");
392 } else if (numbers.front() >= 0 && numbers.back() < 64) {
393 // Not sequential, but they fit in a 64-bit bitmap.
394 uint64_t bitmap = 0;
395 for (int n : numbers) {
396 bitmap |= uint64_t{1} << n;
397 }
398 p->Emit({{"bitmap", bitmap}, {"max", numbers.back()}},
399 R"cc(
400 bool $Msg_Enum$_IsValid(int value) {
401 return 0 <= value && value <= $max$ && (($bitmap$u >> value) & 1) != 0;
402 }
403 )cc");
404 } else {
405 // More complex struct. Use enum data structure for lookup.
406 p->Emit(
407 R"cc(
408 bool $Msg_Enum$_IsValid(int value) {
409 return ::_pbi::ValidateEnum(value, $Msg_Enum$_internal_data_);
410 }
411 )cc");
412 }
413
414 if (!has_reflection_) {
415 // In lite mode (where descriptors are unavailable), we generate separate
416 // tables for mapping between enum names and numbers. The _entries table
417 // contains the bulk of the data and is sorted by name, while
418 // _entries_by_number is sorted by number and just contains pointers into
419 // _entries. The two tables allow mapping from name to number and number to
420 // name, both in time logarithmic in the number of enum entries. This could
421 // probably be made faster, but for now the tables are intended to be simple
422 // and compact.
423 //
424 // Enums with allow_alias = true support multiple entries with the same
425 // numerical value. In cases where there are multiple names for the same
426 // number, we treat the first name appearing in the .proto file as the
427 // canonical one.
428
429 absl::btree_map<std::string, int> name_to_number;
430 absl::flat_hash_map<int, std::string> number_to_canonical_name;
431 for (int i = 0; i < enum_->value_count(); ++i) {
432 const auto* value = enum_->value(i);
433 name_to_number.emplace(value->name(), value->number());
434
435 // The same number may appear with multiple names, so we use emplace() to
436 // let the first name win.
437 number_to_canonical_name.emplace(value->number(), value->name());
438 }
439
440 // Build the offset table for the strings table.
441 struct Offset {
442 int number;
443 size_t index, byte_offset, len;
444 };
445 std::vector<Offset> offsets;
446 size_t index = 0;
447 size_t offset = 0;
448 for (const auto& e : name_to_number) {
449 offsets.push_back(Offset{e.second, index, offset, e.first.size()});
450 ++index;
451 offset += e.first.size();
452 }
453 absl::c_stable_sort(offsets, [](const auto& a, const auto& b) {
454 return a.byte_offset < b.byte_offset;
455 });
456
457 std::vector<Offset> offsets_by_number = offsets;
458 absl::c_stable_sort(offsets_by_number, [](const auto& a, const auto& b) {
459 return a.number < b.number;
460 });
461
462 offsets_by_number.erase(
463 std::unique(
464 offsets_by_number.begin(), offsets_by_number.end(),
465 [](const auto& a, const auto& b) { return a.number == b.number; }),
466 offsets_by_number.end());
467
468 p->Emit(
469 {
470 {"num_unique", number_to_canonical_name.size()},
471 {"num_declared", enum_->value_count()},
472 {"names",
473 // We concatenate all the names for a given enum into one big
474 // string literal. If instead we store an array of string
475 // literals, the linker seems to put all enum strings for a given
476 // .proto file in the same section, which hinders its ability to
477 // strip out unused strings.
478 [&] {
479 for (const auto& e : name_to_number) {
480 p->Emit({{"name", e.first}}, R"cc(
481 "$name$"
482 )cc");
483 }
484 }},
485 {"entries",
486 [&] {
487 for (const auto& offset : offsets) {
488 p->Emit({{"number", offset.number},
489 {"offset", offset.byte_offset},
490 {"len", offset.len}},
491 R"cc(
492 {{&$Msg_Enum$_names[$offset$], $len$}, $number$},
493 )cc");
494 }
495 }},
496 {"entries_by_number",
497 [&] {
498 for (const auto& offset : offsets_by_number) {
499 p->Emit({{"number", offset.number},
500 {"index", offset.index},
501 {"name", number_to_canonical_name[offset.number]}},
502 R"cc(
503 $index$, // $number$ -> $name$
504 )cc");
505 }
506 }},
507 },
508 R"cc(
509 static ::$proto_ns$::internal::ExplicitlyConstructed<std::string>
510 $Msg_Enum$_strings[$num_unique$] = {};
511
512 static const char $Msg_Enum$_names[] = {
513 $names$,
514 };
515
516 static const ::$proto_ns$::internal::EnumEntry $Msg_Enum$_entries[] =
517 {
518 $entries$,
519 };
520
521 static const int $Msg_Enum$_entries_by_number[] = {
522 $entries_by_number$,
523 };
524
525 $return_type$ $Msg_Enum$_Name($Msg_Enum$ value) {
526 static const bool kDummy =
527 ::$proto_ns$::internal::InitializeEnumStrings(
528 $Msg_Enum$_entries, $Msg_Enum$_entries_by_number,
529 $num_unique$, $Msg_Enum$_strings);
530 (void)kDummy;
531
532 int idx = ::$proto_ns$::internal::LookUpEnumName(
533 $Msg_Enum$_entries, $Msg_Enum$_entries_by_number, $num_unique$,
534 value);
535 return idx == -1 ? ::$proto_ns$::internal::GetEmptyString()
536 : $Msg_Enum$_strings[idx].get();
537 }
538
539 bool $Msg_Enum$_Parse(absl::string_view name, $Msg_Enum$* value) {
540 int int_value;
541 bool success = ::$proto_ns$::internal::LookUpEnumValue(
542 $Msg_Enum$_entries, $num_declared$, name, &int_value);
543 if (success) {
544 *value = static_cast<$Msg_Enum$>(int_value);
545 }
546 return success;
547 }
548 )cc");
549 }
550
551 if (enum_->containing_type() != nullptr) {
552 // Before C++17, we must define the static constants which were
553 // declared in the header, to give the linker a place to put them.
554 // But MSVC++ pre-2015 and post-2017 (version 15.5+) insists that we not.
555 p->Emit(
556 {
557 {"Msg_", ClassName(enum_->containing_type(), false)},
558 {"constexpr_storage",
559 [&] {
560 for (int i = 0; i < enum_->value_count(); i++) {
561 p->Emit({{"VALUE", EnumValueName(enum_->value(i))}},
562 R"cc(
563 constexpr $Msg_Enum$ $Msg_$::$VALUE$;
564 )cc");
565 }
566 }},
567 {"array_size",
568 [&] {
569 if (generate_array_size_) {
570 p->Emit(R"cc(
571 constexpr int $Msg_$::$Enum$_ARRAYSIZE;
572 )cc");
573 }
574 }},
575 },
576 R"(
577 #if (__cplusplus < 201703) && \
578 (!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912))
579
580 $constexpr_storage$;
581 constexpr $Msg_Enum$ $Msg_$::$Enum$_MIN;
582 constexpr $Msg_Enum$ $Msg_$::$Enum$_MAX;
583 $array_size$;
584
585 #endif // (__cplusplus < 201703) &&
586 // (!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912))
587 )");
588 }
589 }
590 } // namespace cpp
591 } // namespace compiler
592 } // namespace protobuf
593 } // namespace google
594