1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/compiler/cpp/helpers.h"
13
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <limits>
18 #include <memory>
19 #include <new>
20 #include <queue>
21 #include <string>
22 #include <type_traits>
23 #include <utility>
24 #include <vector>
25
26 #include "absl/base/thread_annotations.h"
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/log/absl_check.h"
30 #include "absl/log/absl_log.h"
31 #include "absl/memory/memory.h"
32 #include "absl/strings/ascii.h"
33 #include "absl/strings/cord.h"
34 #include "absl/strings/escaping.h"
35 #include "absl/strings/match.h"
36 #include "absl/strings/str_cat.h"
37 #include "absl/strings/str_format.h"
38 #include "absl/strings/str_join.h"
39 #include "absl/strings/str_replace.h"
40 #include "absl/strings/str_split.h"
41 #include "absl/strings/string_view.h"
42 #include "absl/strings/substitute.h"
43 #include "absl/synchronization/mutex.h"
44 #include "absl/types/optional.h"
45 #include "absl/types/span.h"
46 #include "google/protobuf/arenastring.h"
47 #include "google/protobuf/compiler/code_generator.h"
48 #include "google/protobuf/compiler/code_generator_lite.h"
49 #include "google/protobuf/compiler/cpp/names.h"
50 #include "google/protobuf/compiler/cpp/options.h"
51 #include "google/protobuf/compiler/scc.h"
52 #include "google/protobuf/descriptor.h"
53 #include "google/protobuf/descriptor.pb.h"
54 #include "google/protobuf/dynamic_message.h"
55 #include "google/protobuf/generated_message_reflection.h"
56 #include "google/protobuf/generated_message_tctable_impl.h"
57 #include "google/protobuf/io/printer.h"
58 #include "google/protobuf/io/strtod.h"
59 #include "google/protobuf/map.h"
60 #include "google/protobuf/repeated_ptr_field.h"
61 #include "google/protobuf/wire_format.h"
62 #include "google/protobuf/wire_format_lite.h"
63
64
65 // Must be last.
66 #include "google/protobuf/port_def.inc"
67
68 namespace google {
69 namespace protobuf {
70 namespace compiler {
71 namespace cpp {
72 namespace {
73 constexpr absl::string_view kAnyMessageName = "Any";
74 constexpr absl::string_view kAnyProtoFile = "google/protobuf/any.proto";
75
76 static const char* const kKeywordList[] = {
77 // clang-format off
78 "NULL",
79 "alignas",
80 "alignof",
81 "and",
82 "and_eq",
83 "asm",
84 "assert",
85 "auto",
86 "bitand",
87 "bitor",
88 "bool",
89 "break",
90 "case",
91 "catch",
92 "char",
93 "class",
94 "compl",
95 "const",
96 "constexpr",
97 "const_cast",
98 "continue",
99 "decltype",
100 "default",
101 "delete",
102 "do",
103 "double",
104 "dynamic_cast",
105 "else",
106 "enum",
107 "explicit",
108 "export",
109 "extern",
110 "false",
111 "float",
112 "for",
113 "friend",
114 "goto",
115 "if",
116 "inline",
117 "int",
118 "long",
119 "mutable",
120 "namespace",
121 "new",
122 "noexcept",
123 "not",
124 "not_eq",
125 "nullptr",
126 "operator",
127 "or",
128 "or_eq",
129 "private",
130 "protected",
131 "public",
132 "register",
133 "reinterpret_cast",
134 "return",
135 "short",
136 "signed",
137 "sizeof",
138 "static",
139 "static_assert",
140 "static_cast",
141 "struct",
142 "switch",
143 "template",
144 "this",
145 "thread_local",
146 "throw",
147 "true",
148 "try",
149 "typedef",
150 "typeid",
151 "typename",
152 "union",
153 "unsigned",
154 "using",
155 "virtual",
156 "void",
157 "volatile",
158 "wchar_t",
159 "while",
160 "xor",
161 "xor_eq",
162 "char8_t",
163 "char16_t",
164 "char32_t",
165 "concept",
166 "consteval",
167 "constinit",
168 "co_await",
169 "co_return",
170 "co_yield",
171 "requires",
172 // clang-format on
173 };
174
Keywords()175 const absl::flat_hash_set<absl::string_view>& Keywords() {
176 static const auto* keywords = [] {
177 auto* keywords = new absl::flat_hash_set<absl::string_view>();
178
179 for (const auto keyword : kKeywordList) {
180 keywords->emplace(keyword);
181 }
182 return keywords;
183 }();
184 return *keywords;
185 }
186
IntTypeName(const Options & options,absl::string_view type)187 std::string IntTypeName(const Options& options, absl::string_view type) {
188 return absl::StrCat("::", type, "_t");
189 }
190
191
192
193 } // namespace
194
IsLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)195 bool IsLazy(const FieldDescriptor* field, const Options& options,
196 MessageSCCAnalyzer* scc_analyzer) {
197 return IsLazilyVerifiedLazy(field, options) ||
198 IsEagerlyVerifiedLazy(field, options, scc_analyzer);
199 }
200
201 // Returns true if "field" is a message field that is backed by LazyField per
202 // profile (go/pdlazy).
IsLazyByProfile(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)203 inline bool IsLazyByProfile(const FieldDescriptor* field,
204 const Options& options,
205 MessageSCCAnalyzer* scc_analyzer) {
206 return false;
207 }
208
IsEagerlyVerifiedLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)209 bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
210 MessageSCCAnalyzer* scc_analyzer) {
211 return false;
212 }
213
IsLazilyVerifiedLazy(const FieldDescriptor * field,const Options & options)214 bool IsLazilyVerifiedLazy(const FieldDescriptor* field,
215 const Options& options) {
216 return false;
217 }
218
GetLazyStyle(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)219 internal::field_layout::TransformValidation GetLazyStyle(
220 const FieldDescriptor* field, const Options& options,
221 MessageSCCAnalyzer* scc_analyzer) {
222 if (IsEagerlyVerifiedLazy(field, options, scc_analyzer)) {
223 return internal::field_layout::kTvEager;
224 }
225 if (IsLazilyVerifiedLazy(field, options)) {
226 return internal::field_layout::kTvLazy;
227 }
228 return {};
229 }
230
MessageVars(const Descriptor * desc)231 absl::flat_hash_map<absl::string_view, std::string> MessageVars(
232 const Descriptor* desc) {
233 absl::string_view prefix = "_impl_.";
234 return {
235 {"any_metadata", absl::StrCat(prefix, "_any_metadata_")},
236 {"cached_size", absl::StrCat(prefix, "_cached_size_")},
237 {"extensions", absl::StrCat(prefix, "_extensions_")},
238 {"has_bits", absl::StrCat(prefix, "_has_bits_")},
239 {"inlined_string_donated_array",
240 absl::StrCat(prefix, "_inlined_string_donated_")},
241 {"oneof_case", absl::StrCat(prefix, "_oneof_case_")},
242 {"tracker", "Impl_::_tracker_"},
243 {"weak_field_map", absl::StrCat(prefix, "_weak_field_map_")},
244 {"split", absl::StrCat(prefix, "_split_")},
245 {"cached_split_ptr", "cached_split_ptr"},
246 };
247 }
248
SetCommonMessageDataVariables(const Descriptor * descriptor,absl::flat_hash_map<absl::string_view,std::string> * variables)249 void SetCommonMessageDataVariables(
250 const Descriptor* descriptor,
251 absl::flat_hash_map<absl::string_view, std::string>* variables) {
252 for (auto& pair : MessageVars(descriptor)) {
253 variables->emplace(pair);
254 }
255 }
256
UnknownFieldsVars(const Descriptor * desc,const Options & opts)257 absl::flat_hash_map<absl::string_view, std::string> UnknownFieldsVars(
258 const Descriptor* desc, const Options& opts) {
259 std::string unknown_fields_type;
260 std::string default_instance;
261 if (UseUnknownFieldSet(desc->file(), opts)) {
262 unknown_fields_type =
263 absl::StrCat("::", ProtobufNamespace(opts), "::UnknownFieldSet");
264 default_instance = absl::StrCat(unknown_fields_type, "::default_instance");
265 } else {
266 unknown_fields_type =
267 PrimitiveTypeName(opts, FieldDescriptor::CPPTYPE_STRING);
268 default_instance = absl::StrCat("::", ProtobufNamespace(opts),
269 "::internal::GetEmptyString");
270 }
271
272 return {
273 {"unknown_fields",
274 absl::Substitute("_internal_metadata_.unknown_fields<$0>($1)",
275 unknown_fields_type, default_instance)},
276 {"unknown_fields_type", unknown_fields_type},
277 {"have_unknown_fields", "_internal_metadata_.have_unknown_fields()"},
278 {"mutable_unknown_fields",
279 absl::Substitute("_internal_metadata_.mutable_unknown_fields<$0>()",
280 unknown_fields_type)},
281 };
282 }
283
SetUnknownFieldsVariable(const Descriptor * descriptor,const Options & options,absl::flat_hash_map<absl::string_view,std::string> * variables)284 void SetUnknownFieldsVariable(
285 const Descriptor* descriptor, const Options& options,
286 absl::flat_hash_map<absl::string_view, std::string>* variables) {
287 for (auto& pair : UnknownFieldsVars(descriptor, options)) {
288 variables->emplace(pair);
289 }
290 }
291
UnderscoresToCamelCase(absl::string_view input,bool cap_next_letter)292 std::string UnderscoresToCamelCase(absl::string_view input,
293 bool cap_next_letter) {
294 std::string result;
295 // Note: I distrust ctype.h due to locales.
296 for (size_t i = 0; i < input.size(); ++i) {
297 if ('a' <= input[i] && input[i] <= 'z') {
298 if (cap_next_letter) {
299 result += input[i] + ('A' - 'a');
300 } else {
301 result += input[i];
302 }
303 cap_next_letter = false;
304 } else if ('A' <= input[i] && input[i] <= 'Z') {
305 // Capital letters are left as-is.
306 result += input[i];
307 cap_next_letter = false;
308 } else if ('0' <= input[i] && input[i] <= '9') {
309 result += input[i];
310 cap_next_letter = true;
311 } else {
312 cap_next_letter = true;
313 }
314 }
315 return result;
316 }
317
318 const char kThickSeparator[] =
319 "// ===================================================================\n";
320 const char kThinSeparator[] =
321 "// -------------------------------------------------------------------\n";
322
CanInitializeByZeroing(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)323 bool CanInitializeByZeroing(const FieldDescriptor* field,
324 const Options& options,
325 MessageSCCAnalyzer* scc_analyzer) {
326 static_assert(
327 std::numeric_limits<float>::is_iec559 &&
328 std::numeric_limits<double>::is_iec559,
329 "proto / abseil requires iec559, which has zero initialized floats.");
330
331 if (field->is_repeated() || field->is_extension()) return false;
332 switch (field->cpp_type()) {
333 case FieldDescriptor::CPPTYPE_ENUM:
334 return field->default_value_enum()->number() == 0;
335 case FieldDescriptor::CPPTYPE_INT32:
336 return field->default_value_int32() == 0;
337 case FieldDescriptor::CPPTYPE_INT64:
338 return field->default_value_int64() == 0;
339 case FieldDescriptor::CPPTYPE_UINT32:
340 return field->default_value_uint32() == 0;
341 case FieldDescriptor::CPPTYPE_UINT64:
342 return field->default_value_uint64() == 0;
343 case FieldDescriptor::CPPTYPE_FLOAT:
344 return field->default_value_float() == 0;
345 case FieldDescriptor::CPPTYPE_DOUBLE:
346 return field->default_value_double() == 0;
347 case FieldDescriptor::CPPTYPE_BOOL:
348 return field->default_value_bool() == false;
349 case FieldDescriptor::CPPTYPE_MESSAGE:
350 return true;
351 default:
352 return false;
353 }
354 }
355
CanClearByZeroing(const FieldDescriptor * field)356 bool CanClearByZeroing(const FieldDescriptor* field) {
357 if (field->is_repeated() || field->is_extension()) return false;
358 switch (field->cpp_type()) {
359 case FieldDescriptor::CPPTYPE_ENUM:
360 return field->default_value_enum()->number() == 0;
361 case FieldDescriptor::CPPTYPE_INT32:
362 return field->default_value_int32() == 0;
363 case FieldDescriptor::CPPTYPE_INT64:
364 return field->default_value_int64() == 0;
365 case FieldDescriptor::CPPTYPE_UINT32:
366 return field->default_value_uint32() == 0;
367 case FieldDescriptor::CPPTYPE_UINT64:
368 return field->default_value_uint64() == 0;
369 case FieldDescriptor::CPPTYPE_FLOAT:
370 return field->default_value_float() == 0;
371 case FieldDescriptor::CPPTYPE_DOUBLE:
372 return field->default_value_double() == 0;
373 case FieldDescriptor::CPPTYPE_BOOL:
374 return field->default_value_bool() == false;
375 default:
376 return false;
377 }
378 }
379
380 // Determines if swap can be implemented via memcpy.
HasTrivialSwap(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)381 bool HasTrivialSwap(const FieldDescriptor* field, const Options& options,
382 MessageSCCAnalyzer* scc_analyzer) {
383 if (field->is_repeated() || field->is_extension()) return false;
384 switch (field->cpp_type()) {
385 case FieldDescriptor::CPPTYPE_ENUM:
386 case FieldDescriptor::CPPTYPE_INT32:
387 case FieldDescriptor::CPPTYPE_INT64:
388 case FieldDescriptor::CPPTYPE_UINT32:
389 case FieldDescriptor::CPPTYPE_UINT64:
390 case FieldDescriptor::CPPTYPE_FLOAT:
391 case FieldDescriptor::CPPTYPE_DOUBLE:
392 case FieldDescriptor::CPPTYPE_BOOL:
393 return true;
394 case FieldDescriptor::CPPTYPE_MESSAGE:
395 // Non-repeated, non-lazy message fields are simply raw pointers, so we
396 // can swap them with memcpy.
397 return !IsLazy(field, options, scc_analyzer);
398 default:
399 return false;
400 }
401 }
402
ClassName(const Descriptor * descriptor)403 std::string ClassName(const Descriptor* descriptor) {
404 const Descriptor* parent = descriptor->containing_type();
405 std::string res;
406 if (parent) absl::StrAppend(&res, ClassName(parent), "_");
407 absl::StrAppend(&res, descriptor->name());
408 if (IsMapEntryMessage(descriptor)) absl::StrAppend(&res, "_DoNotUse");
409 return ResolveKeyword(res);
410 }
411
ClassName(const EnumDescriptor * enum_descriptor)412 std::string ClassName(const EnumDescriptor* enum_descriptor) {
413 if (enum_descriptor->containing_type() == nullptr) {
414 return ResolveKeyword(enum_descriptor->name());
415 } else {
416 return absl::StrCat(ClassName(enum_descriptor->containing_type()), "_",
417 enum_descriptor->name());
418 }
419 }
420
QualifiedClassName(const Descriptor * d,const Options & options)421 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
422 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
423 }
424
QualifiedClassName(const EnumDescriptor * d,const Options & options)425 std::string QualifiedClassName(const EnumDescriptor* d,
426 const Options& options) {
427 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
428 }
429
QualifiedClassName(const Descriptor * d)430 std::string QualifiedClassName(const Descriptor* d) {
431 return QualifiedClassName(d, Options());
432 }
433
QualifiedClassName(const EnumDescriptor * d)434 std::string QualifiedClassName(const EnumDescriptor* d) {
435 return QualifiedClassName(d, Options());
436 }
437
ExtensionName(const FieldDescriptor * d)438 std::string ExtensionName(const FieldDescriptor* d) {
439 if (const Descriptor* scope = d->extension_scope())
440 return absl::StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
441 return ResolveKeyword(d->name());
442 }
443
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)444 std::string QualifiedExtensionName(const FieldDescriptor* d,
445 const Options& options) {
446 ABSL_DCHECK(d->is_extension());
447 return QualifiedFileLevelSymbol(d->file(), ExtensionName(d), options);
448 }
449
QualifiedExtensionName(const FieldDescriptor * d)450 std::string QualifiedExtensionName(const FieldDescriptor* d) {
451 return QualifiedExtensionName(d, Options());
452 }
453
ResolveKeyword(absl::string_view name)454 std::string ResolveKeyword(absl::string_view name) {
455 if (Keywords().count(name) > 0) {
456 return absl::StrCat(name, "_");
457 }
458 return std::string(name);
459 }
460
DotsToColons(absl::string_view name)461 std::string DotsToColons(absl::string_view name) {
462 std::vector<std::string> scope = absl::StrSplit(name, '.', absl::SkipEmpty());
463 for (auto& word : scope) {
464 word = ResolveKeyword(word);
465 }
466 return absl::StrJoin(scope, "::");
467 }
468
Namespace(absl::string_view package)469 std::string Namespace(absl::string_view package) {
470 if (package.empty()) return "";
471 return absl::StrCat("::", DotsToColons(package));
472 }
473
Namespace(const FileDescriptor * d)474 std::string Namespace(const FileDescriptor* d) { return Namespace(d, {}); }
Namespace(const FileDescriptor * d,const Options & options)475 std::string Namespace(const FileDescriptor* d, const Options& options) {
476 return Namespace(d->package());
477 }
478
Namespace(const Descriptor * d)479 std::string Namespace(const Descriptor* d) { return Namespace(d, {}); }
Namespace(const Descriptor * d,const Options & options)480 std::string Namespace(const Descriptor* d, const Options& options) {
481 return Namespace(d->file(), options);
482 }
483
Namespace(const FieldDescriptor * d)484 std::string Namespace(const FieldDescriptor* d) { return Namespace(d, {}); }
Namespace(const FieldDescriptor * d,const Options & options)485 std::string Namespace(const FieldDescriptor* d, const Options& options) {
486 return Namespace(d->file(), options);
487 }
488
Namespace(const EnumDescriptor * d)489 std::string Namespace(const EnumDescriptor* d) { return Namespace(d, {}); }
Namespace(const EnumDescriptor * d,const Options & options)490 std::string Namespace(const EnumDescriptor* d, const Options& options) {
491 return Namespace(d->file(), options);
492 }
493
DefaultInstanceType(const Descriptor * descriptor,const Options &,bool split)494 std::string DefaultInstanceType(const Descriptor* descriptor,
495 const Options& /*options*/, bool split) {
496 return ClassName(descriptor) + (split ? "__Impl_Split" : "") +
497 "DefaultTypeInternal";
498 }
499
DefaultInstanceName(const Descriptor * descriptor,const Options &,bool split)500 std::string DefaultInstanceName(const Descriptor* descriptor,
501 const Options& /*options*/, bool split) {
502 return absl::StrCat("_", ClassName(descriptor, false),
503 (split ? "__Impl_Split" : ""), "_default_instance_");
504 }
505
DefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)506 std::string DefaultInstancePtr(const Descriptor* descriptor,
507 const Options& options, bool split) {
508 return absl::StrCat(DefaultInstanceName(descriptor, options, split), "ptr_");
509 }
510
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options,bool split)511 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
512 const Options& options, bool split) {
513 return QualifiedFileLevelSymbol(
514 descriptor->file(), DefaultInstanceName(descriptor, options, split),
515 options);
516 }
517
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)518 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
519 const Options& options, bool split) {
520 return absl::StrCat(QualifiedDefaultInstanceName(descriptor, options, split),
521 "ptr_");
522 }
523
DescriptorTableName(const FileDescriptor * file,const Options & options)524 std::string DescriptorTableName(const FileDescriptor* file,
525 const Options& options) {
526 return UniqueName("descriptor_table", file, options);
527 }
528
FileDllExport(const FileDescriptor * file,const Options & options)529 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
530 return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
531 }
532
SuperClassName(const Descriptor * descriptor,const Options & options)533 std::string SuperClassName(const Descriptor* descriptor,
534 const Options& options) {
535 if (!HasDescriptorMethods(descriptor->file(), options)) {
536 return absl::StrCat("::", ProtobufNamespace(options), "::MessageLite");
537 }
538 auto simple_base = SimpleBaseClass(descriptor, options);
539 if (simple_base.empty()) {
540 return absl::StrCat("::", ProtobufNamespace(options), "::Message");
541 }
542 return absl::StrCat("::", ProtobufNamespace(options),
543 "::internal::", simple_base);
544 }
545
FieldName(const FieldDescriptor * field)546 std::string FieldName(const FieldDescriptor* field) {
547 std::string result = std::string(field->name());
548 absl::AsciiStrToLower(&result);
549 if (Keywords().count(result) > 0) {
550 result.append("_");
551 }
552 return result;
553 }
554
FieldMemberName(const FieldDescriptor * field,bool split)555 std::string FieldMemberName(const FieldDescriptor* field, bool split) {
556 absl::string_view prefix = "_impl_.";
557 absl::string_view split_prefix = split ? "_split_->" : "";
558 if (field->real_containing_oneof() == nullptr) {
559 return absl::StrCat(prefix, split_prefix, FieldName(field), "_");
560 }
561 // Oneof fields are never split.
562 ABSL_CHECK(!split);
563 return absl::StrCat(prefix, field->containing_oneof()->name(), "_.",
564 FieldName(field), "_");
565 }
566
OneofCaseConstantName(const FieldDescriptor * field)567 std::string OneofCaseConstantName(const FieldDescriptor* field) {
568 ABSL_DCHECK(field->containing_oneof());
569 std::string field_name = UnderscoresToCamelCase(field->name(), true);
570 return absl::StrCat("k", field_name);
571 }
572
QualifiedOneofCaseConstantName(const FieldDescriptor * field)573 std::string QualifiedOneofCaseConstantName(const FieldDescriptor* field) {
574 ABSL_DCHECK(field->containing_oneof());
575 const std::string qualification =
576 QualifiedClassName(field->containing_type());
577 return absl::StrCat(qualification, "::", OneofCaseConstantName(field));
578 }
579
EnumValueName(const EnumValueDescriptor * enum_value)580 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
581 std::string result = std::string(enum_value->name());
582 if (Keywords().count(result) > 0) {
583 result.append("_");
584 }
585 return result;
586 }
587
EstimateAlignmentSize(const FieldDescriptor * field)588 int EstimateAlignmentSize(const FieldDescriptor* field) {
589 if (field == nullptr) return 0;
590 if (field->is_repeated()) return 8;
591 switch (field->cpp_type()) {
592 case FieldDescriptor::CPPTYPE_BOOL:
593 return 1;
594
595 case FieldDescriptor::CPPTYPE_INT32:
596 case FieldDescriptor::CPPTYPE_UINT32:
597 case FieldDescriptor::CPPTYPE_ENUM:
598 case FieldDescriptor::CPPTYPE_FLOAT:
599 return 4;
600
601 case FieldDescriptor::CPPTYPE_INT64:
602 case FieldDescriptor::CPPTYPE_UINT64:
603 case FieldDescriptor::CPPTYPE_DOUBLE:
604 case FieldDescriptor::CPPTYPE_STRING:
605 case FieldDescriptor::CPPTYPE_MESSAGE:
606 return 8;
607 }
608 ABSL_LOG(FATAL) << "Can't get here.";
609 return -1; // Make compiler happy.
610 }
611
EstimateSize(const FieldDescriptor * field)612 int EstimateSize(const FieldDescriptor* field) {
613 if (field == nullptr) return 0;
614 if (field->is_repeated()) {
615 if (field->is_map()) {
616 return sizeof(google::protobuf::Map<int32_t, int32_t>);
617 }
618 return field->cpp_type() < FieldDescriptor::CPPTYPE_STRING || IsCord(field)
619 ? sizeof(RepeatedField<int32_t>)
620 : sizeof(internal::RepeatedPtrFieldBase);
621 }
622 switch (field->cpp_type()) {
623 case FieldDescriptor::CPPTYPE_BOOL:
624 return 1;
625
626 case FieldDescriptor::CPPTYPE_INT32:
627 case FieldDescriptor::CPPTYPE_UINT32:
628 case FieldDescriptor::CPPTYPE_ENUM:
629 case FieldDescriptor::CPPTYPE_FLOAT:
630 return 4;
631
632 case FieldDescriptor::CPPTYPE_INT64:
633 case FieldDescriptor::CPPTYPE_UINT64:
634 case FieldDescriptor::CPPTYPE_DOUBLE:
635 case FieldDescriptor::CPPTYPE_MESSAGE:
636 return 8;
637
638 case FieldDescriptor::CPPTYPE_STRING:
639 if (IsCord(field)) return sizeof(absl::Cord);
640 return sizeof(internal::ArenaStringPtr);
641 }
642 ABSL_LOG(FATAL) << "Can't get here.";
643 return -1; // Make compiler happy.
644 }
645
FieldConstantName(const FieldDescriptor * field)646 std::string FieldConstantName(const FieldDescriptor* field) {
647 std::string field_name = UnderscoresToCamelCase(field->name(), true);
648 std::string result = absl::StrCat("k", field_name, "FieldNumber");
649
650 if (!field->is_extension() &&
651 field->containing_type()->FindFieldByCamelcaseName(
652 field->camelcase_name()) != field) {
653 // This field's camelcase name is not unique. As a hack, add the field
654 // number to the constant name. This makes the constant rather useless,
655 // but what can we do?
656 absl::StrAppend(&result, "_", field->number());
657 }
658
659 return result;
660 }
661
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)662 std::string FieldMessageTypeName(const FieldDescriptor* field,
663 const Options& options) {
664 // Note: The Google-internal version of Protocol Buffers uses this function
665 // as a hook point for hacks to support legacy code.
666 return QualifiedClassName(field->message_type(), options);
667 }
668
StripProto(absl::string_view filename)669 std::string StripProto(absl::string_view filename) {
670 /*
671 * TODO remove this proxy method
672 * once Google's internal codebase will become ready
673 */
674 return compiler::StripProto(filename);
675 }
676
PrimitiveTypeName(FieldDescriptor::CppType type)677 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
678 switch (type) {
679 case FieldDescriptor::CPPTYPE_INT32:
680 return "::int32_t";
681 case FieldDescriptor::CPPTYPE_INT64:
682 return "::int64_t";
683 case FieldDescriptor::CPPTYPE_UINT32:
684 return "::uint32_t";
685 case FieldDescriptor::CPPTYPE_UINT64:
686 return "::uint64_t";
687 case FieldDescriptor::CPPTYPE_DOUBLE:
688 return "double";
689 case FieldDescriptor::CPPTYPE_FLOAT:
690 return "float";
691 case FieldDescriptor::CPPTYPE_BOOL:
692 return "bool";
693 case FieldDescriptor::CPPTYPE_ENUM:
694 return "int";
695 case FieldDescriptor::CPPTYPE_STRING:
696 return "std::string";
697 case FieldDescriptor::CPPTYPE_MESSAGE:
698 return nullptr;
699
700 // No default because we want the compiler to complain if any new
701 // CppTypes are added.
702 }
703
704 ABSL_LOG(FATAL) << "Can't get here.";
705 return nullptr;
706 }
707
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)708 std::string PrimitiveTypeName(const Options& options,
709 FieldDescriptor::CppType type) {
710 switch (type) {
711 case FieldDescriptor::CPPTYPE_INT32:
712 return IntTypeName(options, "int32");
713 case FieldDescriptor::CPPTYPE_INT64:
714 return IntTypeName(options, "int64");
715 case FieldDescriptor::CPPTYPE_UINT32:
716 return IntTypeName(options, "uint32");
717 case FieldDescriptor::CPPTYPE_UINT64:
718 return IntTypeName(options, "uint64");
719 case FieldDescriptor::CPPTYPE_DOUBLE:
720 return "double";
721 case FieldDescriptor::CPPTYPE_FLOAT:
722 return "float";
723 case FieldDescriptor::CPPTYPE_BOOL:
724 return "bool";
725 case FieldDescriptor::CPPTYPE_ENUM:
726 return "int";
727 case FieldDescriptor::CPPTYPE_STRING:
728 return "std::string";
729 case FieldDescriptor::CPPTYPE_MESSAGE:
730 return "";
731
732 // No default because we want the compiler to complain if any new
733 // CppTypes are added.
734 }
735
736 ABSL_LOG(FATAL) << "Can't get here.";
737 return "";
738 }
739
DeclaredTypeMethodName(FieldDescriptor::Type type)740 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
741 switch (type) {
742 case FieldDescriptor::TYPE_INT32:
743 return "Int32";
744 case FieldDescriptor::TYPE_INT64:
745 return "Int64";
746 case FieldDescriptor::TYPE_UINT32:
747 return "UInt32";
748 case FieldDescriptor::TYPE_UINT64:
749 return "UInt64";
750 case FieldDescriptor::TYPE_SINT32:
751 return "SInt32";
752 case FieldDescriptor::TYPE_SINT64:
753 return "SInt64";
754 case FieldDescriptor::TYPE_FIXED32:
755 return "Fixed32";
756 case FieldDescriptor::TYPE_FIXED64:
757 return "Fixed64";
758 case FieldDescriptor::TYPE_SFIXED32:
759 return "SFixed32";
760 case FieldDescriptor::TYPE_SFIXED64:
761 return "SFixed64";
762 case FieldDescriptor::TYPE_FLOAT:
763 return "Float";
764 case FieldDescriptor::TYPE_DOUBLE:
765 return "Double";
766
767 case FieldDescriptor::TYPE_BOOL:
768 return "Bool";
769 case FieldDescriptor::TYPE_ENUM:
770 return "Enum";
771
772 case FieldDescriptor::TYPE_STRING:
773 return "String";
774 case FieldDescriptor::TYPE_BYTES:
775 return "Bytes";
776 case FieldDescriptor::TYPE_GROUP:
777 return "Group";
778 case FieldDescriptor::TYPE_MESSAGE:
779 return "Message";
780
781 // No default because we want the compiler to complain if any new
782 // types are added.
783 }
784 ABSL_LOG(FATAL) << "Can't get here.";
785 return "";
786 }
787
Int32ToString(int number)788 std::string Int32ToString(int number) {
789 if (number == std::numeric_limits<int32_t>::min()) {
790 // This needs to be special-cased, see explanation here:
791 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
792 return absl::StrCat(number + 1, " - 1");
793 } else {
794 return absl::StrCat(number);
795 }
796 }
797
Int64ToString(int64_t number)798 static std::string Int64ToString(int64_t number) {
799 if (number == std::numeric_limits<int64_t>::min()) {
800 // This needs to be special-cased, see explanation here:
801 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
802 return absl::StrCat("::int64_t{", number + 1, "} - 1");
803 }
804 return absl::StrCat("::int64_t{", number, "}");
805 }
806
UInt64ToString(uint64_t number)807 static std::string UInt64ToString(uint64_t number) {
808 return absl::StrCat("::uint64_t{", number, "u}");
809 }
810
DefaultValue(const FieldDescriptor * field)811 std::string DefaultValue(const FieldDescriptor* field) {
812 return DefaultValue(Options(), field);
813 }
814
DefaultValue(const Options & options,const FieldDescriptor * field)815 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
816 switch (field->cpp_type()) {
817 case FieldDescriptor::CPPTYPE_INT32:
818 return Int32ToString(field->default_value_int32());
819 case FieldDescriptor::CPPTYPE_UINT32:
820 return absl::StrCat(field->default_value_uint32(), "u");
821 case FieldDescriptor::CPPTYPE_INT64:
822 return Int64ToString(field->default_value_int64());
823 case FieldDescriptor::CPPTYPE_UINT64:
824 return UInt64ToString(field->default_value_uint64());
825 case FieldDescriptor::CPPTYPE_DOUBLE: {
826 double value = field->default_value_double();
827 if (value == std::numeric_limits<double>::infinity()) {
828 return "std::numeric_limits<double>::infinity()";
829 } else if (value == -std::numeric_limits<double>::infinity()) {
830 return "-std::numeric_limits<double>::infinity()";
831 } else if (value != value) {
832 return "std::numeric_limits<double>::quiet_NaN()";
833 } else {
834 return io::SimpleDtoa(value);
835 }
836 }
837 case FieldDescriptor::CPPTYPE_FLOAT: {
838 float value = field->default_value_float();
839 if (value == std::numeric_limits<float>::infinity()) {
840 return "std::numeric_limits<float>::infinity()";
841 } else if (value == -std::numeric_limits<float>::infinity()) {
842 return "-std::numeric_limits<float>::infinity()";
843 } else if (value != value) {
844 return "std::numeric_limits<float>::quiet_NaN()";
845 } else {
846 std::string float_value = io::SimpleFtoa(value);
847 // If floating point value contains a period (.) or an exponent
848 // (either E or e), then append suffix 'f' to make it a float
849 // literal.
850 if (float_value.find_first_of(".eE") != std::string::npos) {
851 float_value.push_back('f');
852 }
853 return float_value;
854 }
855 }
856 case FieldDescriptor::CPPTYPE_BOOL:
857 return field->default_value_bool() ? "true" : "false";
858 case FieldDescriptor::CPPTYPE_ENUM:
859 // Lazy: Generate a static_cast because we don't have a helper function
860 // that constructs the full name of an enum value.
861 return absl::Substitute(
862 "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
863 Int32ToString(field->default_value_enum()->number()));
864 case FieldDescriptor::CPPTYPE_STRING:
865 return absl::StrCat(
866 "\"", EscapeTrigraphs(absl::CEscape(field->default_value_string())),
867 "\"");
868 case FieldDescriptor::CPPTYPE_MESSAGE:
869 return absl::StrCat("*", FieldMessageTypeName(field, options),
870 "::internal_default_instance()");
871 }
872 // Can't actually get here; make compiler happy. (We could add a default
873 // case above but then we wouldn't get the nice compiler warning when a
874 // new type is added.)
875 ABSL_LOG(FATAL) << "Can't get here.";
876 return "";
877 }
878
879 // Convert a file name into a valid identifier.
FilenameIdentifier(absl::string_view filename)880 std::string FilenameIdentifier(absl::string_view filename) {
881 std::string result;
882 for (size_t i = 0; i < filename.size(); ++i) {
883 if (absl::ascii_isalnum(filename[i])) {
884 result.push_back(filename[i]);
885 } else {
886 // Not alphanumeric. To avoid any possibility of name conflicts we
887 // use the hex code for the character.
888 absl::StrAppend(&result, "_",
889 absl::Hex(static_cast<uint8_t>(filename[i])));
890 }
891 }
892 return result;
893 }
894
UniqueName(absl::string_view name,absl::string_view filename,const Options & options)895 std::string UniqueName(absl::string_view name, absl::string_view filename,
896 const Options& options) {
897 return absl::StrCat(name, "_", FilenameIdentifier(filename));
898 }
899
900 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,absl::string_view name,const Options & options)901 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
902 absl::string_view name,
903 const Options& options) {
904 if (file->package().empty()) {
905 return absl::StrCat("::", name);
906 }
907 return absl::StrCat(Namespace(file, options), "::", name);
908 }
909
910 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(absl::string_view to_escape)911 std::string EscapeTrigraphs(absl::string_view to_escape) {
912 return absl::StrReplaceAll(to_escape, {{"?", "\\?"}});
913 }
914
915 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,absl::string_view prefix)916 std::string SafeFunctionName(const Descriptor* descriptor,
917 const FieldDescriptor* field,
918 absl::string_view prefix) {
919 // Do not use FieldName() since it will escape keywords.
920 std::string name = std::string(field->name());
921 absl::AsciiStrToLower(&name);
922 std::string function_name = absl::StrCat(prefix, name);
923 if (descriptor->FindFieldByName(function_name)) {
924 // Single underscore will also make it conflicting with the private data
925 // member. We use double underscore to escape function names.
926 function_name.append("__");
927 } else if (Keywords().count(name) > 0) {
928 // If the field name is a keyword, we append the underscore back to keep it
929 // consistent with other function names.
930 function_name.append("_");
931 }
932 return function_name;
933 }
934
IsProfileDriven(const Options & options)935 bool IsProfileDriven(const Options& options) {
936 return !options.bootstrap && !options.opensource_runtime &&
937 options.access_info_map != nullptr;
938 }
939
IsRarelyPresent(const FieldDescriptor * field,const Options & options)940 bool IsRarelyPresent(const FieldDescriptor* field, const Options& options) {
941 return false;
942 }
943
IsLikelyPresent(const FieldDescriptor * field,const Options & options)944 bool IsLikelyPresent(const FieldDescriptor* field, const Options& options) {
945 return false;
946 }
947
GetPresenceProbability(const FieldDescriptor * field,const Options & options)948 float GetPresenceProbability(const FieldDescriptor* field,
949 const Options& options) {
950 return 1.f;
951 }
952
IsStringInliningEnabled(const Options & options)953 bool IsStringInliningEnabled(const Options& options) {
954 return options.force_inline_string || IsProfileDriven(options);
955 }
956
CanStringBeInlined(const FieldDescriptor * field)957 bool CanStringBeInlined(const FieldDescriptor* field) {
958 // TODO: Handle inlining for any.proto.
959 if (IsAnyMessage(field->containing_type())) return false;
960 if (field->containing_type()->options().map_entry()) return false;
961 if (field->is_repeated()) return false;
962
963 // We rely on has bits to distinguish field presence for release_$name$. When
964 // there is no hasbit, we cannot use the address of the string instance when
965 // the field has been inlined.
966 if (!internal::cpp::HasHasbit(field)) return false;
967
968 if (!IsString(field)) return false;
969 if (!field->default_value_string().empty()) return false;
970
971 return true;
972 }
973
IsStringInlined(const FieldDescriptor * field,const Options & options)974 bool IsStringInlined(const FieldDescriptor* field, const Options& options) {
975 (void)field;
976 (void)options;
977 return false;
978 }
979
HasLazyFields(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)980 static bool HasLazyFields(const Descriptor* descriptor, const Options& options,
981 MessageSCCAnalyzer* scc_analyzer) {
982 for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
983 if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) {
984 return true;
985 }
986 }
987 for (int idx = 0; idx < descriptor->extension_count(); idx++) {
988 if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) {
989 return true;
990 }
991 }
992 for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
993 if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) {
994 return true;
995 }
996 }
997 return false;
998 }
999
1000 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)1001 bool HasLazyFields(const FileDescriptor* file, const Options& options,
1002 MessageSCCAnalyzer* scc_analyzer) {
1003 for (int i = 0; i < file->message_type_count(); i++) {
1004 const Descriptor* descriptor(file->message_type(i));
1005 if (HasLazyFields(descriptor, options, scc_analyzer)) {
1006 return true;
1007 }
1008 }
1009 for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
1010 if (IsLazy(file->extension(field_idx), options, scc_analyzer)) {
1011 return true;
1012 }
1013 }
1014 return false;
1015 }
1016
ShouldVerify(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)1017 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1018 MessageSCCAnalyzer* scc_analyzer) {
1019 (void)descriptor;
1020 (void)options;
1021 (void)scc_analyzer;
1022 return false;
1023 }
1024
ShouldVerify(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)1025 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1026 MessageSCCAnalyzer* scc_analyzer) {
1027 (void)file;
1028 (void)options;
1029 (void)scc_analyzer;
1030 return false;
1031 }
1032
ShouldVerifyRecursively(const FieldDescriptor * field)1033 bool ShouldVerifyRecursively(const FieldDescriptor* field) {
1034 (void)field;
1035 return false;
1036 }
1037
ShouldVerifySimple(const Descriptor * descriptor)1038 VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor) {
1039 (void)descriptor;
1040 return VerifySimpleType::kCustom;
1041 }
1042
ShouldSplit(const Descriptor *,const Options &)1043 bool ShouldSplit(const Descriptor*, const Options&) { return false; }
ShouldSplit(const FieldDescriptor *,const Options &)1044 bool ShouldSplit(const FieldDescriptor*, const Options&) { return false; }
1045
ShouldForceAllocationOnConstruction(const Descriptor * desc,const Options & options)1046 bool ShouldForceAllocationOnConstruction(const Descriptor* desc,
1047 const Options& options) {
1048 (void)desc;
1049 (void)options;
1050 return false;
1051 }
1052
IsPresentMessage(const Descriptor * descriptor,const Options & options)1053 bool IsPresentMessage(const Descriptor* descriptor, const Options& options) {
1054 (void)descriptor;
1055 (void)options;
1056 // Assume that the message is present if there is no profile.
1057 return true;
1058 }
1059
FindHottestField(const std::vector<const FieldDescriptor * > & fields,const Options & options)1060 const FieldDescriptor* FindHottestField(
1061 const std::vector<const FieldDescriptor*>& fields, const Options& options) {
1062 (void)fields;
1063 (void)options;
1064 return nullptr;
1065 }
1066
HasRepeatedFields(const Descriptor * descriptor)1067 static bool HasRepeatedFields(const Descriptor* descriptor) {
1068 for (int i = 0; i < descriptor->field_count(); ++i) {
1069 if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
1070 return true;
1071 }
1072 }
1073 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1074 if (HasRepeatedFields(descriptor->nested_type(i))) return true;
1075 }
1076 return false;
1077 }
1078
HasRepeatedFields(const FileDescriptor * file)1079 bool HasRepeatedFields(const FileDescriptor* file) {
1080 for (int i = 0; i < file->message_type_count(); ++i) {
1081 if (HasRepeatedFields(file->message_type(i))) return true;
1082 }
1083 return false;
1084 }
1085
IsStringPieceField(const FieldDescriptor * field,const Options & options)1086 static bool IsStringPieceField(const FieldDescriptor* field,
1087 const Options& options) {
1088 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1089 internal::cpp::EffectiveStringCType(field) ==
1090 FieldOptions::STRING_PIECE;
1091 }
1092
HasStringPieceFields(const Descriptor * descriptor,const Options & options)1093 static bool HasStringPieceFields(const Descriptor* descriptor,
1094 const Options& options) {
1095 for (int i = 0; i < descriptor->field_count(); ++i) {
1096 if (IsStringPieceField(descriptor->field(i), options)) return true;
1097 }
1098 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1099 if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
1100 }
1101 return false;
1102 }
1103
HasStringPieceFields(const FileDescriptor * file,const Options & options)1104 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
1105 for (int i = 0; i < file->message_type_count(); ++i) {
1106 if (HasStringPieceFields(file->message_type(i), options)) return true;
1107 }
1108 return false;
1109 }
1110
IsCordField(const FieldDescriptor * field,const Options & options)1111 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
1112 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1113 internal::cpp::EffectiveStringCType(field) == FieldOptions::CORD;
1114 }
1115
HasCordFields(const Descriptor * descriptor,const Options & options)1116 static bool HasCordFields(const Descriptor* descriptor,
1117 const Options& options) {
1118 for (int i = 0; i < descriptor->field_count(); ++i) {
1119 if (IsCordField(descriptor->field(i), options)) return true;
1120 }
1121 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1122 if (HasCordFields(descriptor->nested_type(i), options)) return true;
1123 }
1124 return false;
1125 }
1126
HasCordFields(const FileDescriptor * file,const Options & options)1127 bool HasCordFields(const FileDescriptor* file, const Options& options) {
1128 for (int i = 0; i < file->message_type_count(); ++i) {
1129 if (HasCordFields(file->message_type(i), options)) return true;
1130 }
1131 return false;
1132 }
1133
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)1134 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
1135 if (descriptor->extension_range_count() > 0) return true;
1136 if (descriptor->extension_count() > 0) return true;
1137 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1138 if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
1139 return true;
1140 }
1141 }
1142 return false;
1143 }
1144
HasExtensionsOrExtendableMessage(const FileDescriptor * file)1145 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
1146 if (file->extension_count() > 0) return true;
1147 for (int i = 0; i < file->message_type_count(); ++i) {
1148 if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
1149 }
1150 return false;
1151 }
1152
HasMapFields(const Descriptor * descriptor)1153 static bool HasMapFields(const Descriptor* descriptor) {
1154 for (int i = 0; i < descriptor->field_count(); ++i) {
1155 if (descriptor->field(i)->is_map()) {
1156 return true;
1157 }
1158 }
1159 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1160 if (HasMapFields(descriptor->nested_type(i))) return true;
1161 }
1162 return false;
1163 }
1164
HasMapFields(const FileDescriptor * file)1165 bool HasMapFields(const FileDescriptor* file) {
1166 for (int i = 0; i < file->message_type_count(); ++i) {
1167 if (HasMapFields(file->message_type(i))) return true;
1168 }
1169 return false;
1170 }
1171
HasV2Table(const Descriptor * descriptor)1172 bool HasV2Table(const Descriptor* descriptor) {
1173 return false;
1174 }
1175
HasV2Table(const FileDescriptor * file)1176 bool HasV2Table(const FileDescriptor* file) {
1177 for (int i = 0; i < file->message_type_count(); ++i) {
1178 if (HasV2Table(file->message_type(i))) return true;
1179 }
1180 return false;
1181 }
1182
HasEnumDefinitions(const Descriptor * message_type)1183 static bool HasEnumDefinitions(const Descriptor* message_type) {
1184 if (message_type->enum_type_count() > 0) return true;
1185 for (int i = 0; i < message_type->nested_type_count(); ++i) {
1186 if (HasEnumDefinitions(message_type->nested_type(i))) return true;
1187 }
1188 return false;
1189 }
1190
HasEnumDefinitions(const FileDescriptor * file)1191 bool HasEnumDefinitions(const FileDescriptor* file) {
1192 if (file->enum_type_count() > 0) return true;
1193 for (int i = 0; i < file->message_type_count(); ++i) {
1194 if (HasEnumDefinitions(file->message_type(i))) return true;
1195 }
1196 return false;
1197 }
1198
IsStringOrMessage(const FieldDescriptor * field)1199 bool IsStringOrMessage(const FieldDescriptor* field) {
1200 switch (field->cpp_type()) {
1201 case FieldDescriptor::CPPTYPE_INT32:
1202 case FieldDescriptor::CPPTYPE_INT64:
1203 case FieldDescriptor::CPPTYPE_UINT32:
1204 case FieldDescriptor::CPPTYPE_UINT64:
1205 case FieldDescriptor::CPPTYPE_DOUBLE:
1206 case FieldDescriptor::CPPTYPE_FLOAT:
1207 case FieldDescriptor::CPPTYPE_BOOL:
1208 case FieldDescriptor::CPPTYPE_ENUM:
1209 return false;
1210 case FieldDescriptor::CPPTYPE_STRING:
1211 case FieldDescriptor::CPPTYPE_MESSAGE:
1212 return true;
1213 }
1214
1215 ABSL_LOG(FATAL) << "Can't get here.";
1216 return false;
1217 }
1218
IsAnyMessage(const FileDescriptor * descriptor)1219 bool IsAnyMessage(const FileDescriptor* descriptor) {
1220 return descriptor->name() == kAnyProtoFile;
1221 }
1222
IsAnyMessage(const Descriptor * descriptor)1223 bool IsAnyMessage(const Descriptor* descriptor) {
1224 return descriptor->name() == kAnyMessageName &&
1225 IsAnyMessage(descriptor->file());
1226 }
1227
IsWellKnownMessage(const FileDescriptor * file)1228 bool IsWellKnownMessage(const FileDescriptor* file) {
1229 static const auto* well_known_files = new absl::flat_hash_set<std::string>{
1230 "google/protobuf/any.proto",
1231 "google/protobuf/api.proto",
1232 "google/protobuf/compiler/plugin.proto",
1233 "google/protobuf/descriptor.proto",
1234 "google/protobuf/duration.proto",
1235 "google/protobuf/empty.proto",
1236 "google/protobuf/field_mask.proto",
1237 "google/protobuf/source_context.proto",
1238 "google/protobuf/struct.proto",
1239 "google/protobuf/timestamp.proto",
1240 "google/protobuf/type.proto",
1241 "google/protobuf/wrappers.proto",
1242 };
1243 return well_known_files->find(file->name()) != well_known_files->end();
1244 }
1245
ChangeTo(absl::string_view name,io::Printer::SourceLocation loc)1246 void NamespaceOpener::ChangeTo(absl::string_view name,
1247 io::Printer::SourceLocation loc) {
1248 std::vector<std::string> new_stack =
1249 absl::StrSplit(name, "::", absl::SkipEmpty());
1250 size_t len = std::min(name_stack_.size(), new_stack.size());
1251 size_t common_idx = 0;
1252 while (common_idx < len) {
1253 if (name_stack_[common_idx] != new_stack[common_idx]) {
1254 break;
1255 }
1256 ++common_idx;
1257 }
1258
1259 for (size_t i = name_stack_.size(); i > common_idx; i--) {
1260 p_->Emit({{"ns", name_stack_[i - 1]}}, R"(
1261 } // namespace $ns$
1262 )",
1263 loc);
1264 }
1265 for (size_t i = common_idx; i < new_stack.size(); ++i) {
1266 p_->Emit({{"ns", new_stack[i]}}, R"(
1267 namespace $ns$ {
1268 )",
1269 loc);
1270 }
1271
1272 name_stack_ = std::move(new_stack);
1273 }
1274
GenerateUtf8CheckCode(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view params,absl::string_view strict_function,absl::string_view verify_function)1275 static void GenerateUtf8CheckCode(io::Printer* p, const FieldDescriptor* field,
1276 const Options& options, bool for_parse,
1277 absl::string_view params,
1278 absl::string_view strict_function,
1279 absl::string_view verify_function) {
1280 if (field->type() != FieldDescriptor::TYPE_STRING) return;
1281
1282 auto v = p->WithVars({
1283 {"params", params},
1284 {"Strict", strict_function},
1285 {"Verify", verify_function},
1286 });
1287
1288 bool is_lite =
1289 GetOptimizeFor(field->file(), options) == FileOptions::LITE_RUNTIME;
1290 switch (internal::cpp::GetUtf8CheckMode(field, is_lite)) {
1291 case internal::cpp::Utf8CheckMode::kStrict:
1292 if (for_parse) {
1293 p->Emit(R"cc(
1294 DO_($pbi$::WireFormatLite::$Strict$(
1295 $params$ $pbi$::WireFormatLite::PARSE, "$pkg.Msg.field$"));
1296 )cc");
1297 } else {
1298 p->Emit(R"cc(
1299 $pbi$::WireFormatLite::$Strict$(
1300 $params$ $pbi$::WireFormatLite::SERIALIZE, "$pkg.Msg.field$");
1301 )cc");
1302 }
1303 break;
1304
1305 case internal::cpp::Utf8CheckMode::kVerify:
1306 if (for_parse) {
1307 p->Emit(R"cc(
1308 $pbi$::WireFormat::$Verify$($params$ $pbi$::WireFormat::PARSE,
1309 "$pkg.Msg.field$");
1310 )cc");
1311 } else {
1312 p->Emit(R"cc(
1313 $pbi$::WireFormat::$Verify$($params$ $pbi$::WireFormat::SERIALIZE,
1314 "$pkg.Msg.field$");
1315 )cc");
1316 }
1317 break;
1318
1319 case internal::cpp::Utf8CheckMode::kNone:
1320 break;
1321 }
1322 }
1323
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters,const Formatter & format)1324 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1325 const Options& options, bool for_parse,
1326 absl::string_view parameters,
1327 const Formatter& format) {
1328 GenerateUtf8CheckCode(format.printer(), field, options, for_parse, parameters,
1329 "VerifyUtf8String", "VerifyUTF8StringNamedField");
1330 }
1331
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters,const Formatter & format)1332 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1333 const Options& options, bool for_parse,
1334 absl::string_view parameters,
1335 const Formatter& format) {
1336 GenerateUtf8CheckCode(format.printer(), field, options, for_parse, parameters,
1337 "VerifyUtf8Cord", "VerifyUTF8CordNamedField");
1338 }
1339
GenerateUtf8CheckCodeForString(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters)1340 void GenerateUtf8CheckCodeForString(io::Printer* p,
1341 const FieldDescriptor* field,
1342 const Options& options, bool for_parse,
1343 absl::string_view parameters) {
1344 GenerateUtf8CheckCode(p, field, options, for_parse, parameters,
1345 "VerifyUtf8String", "VerifyUTF8StringNamedField");
1346 }
1347
GenerateUtf8CheckCodeForCord(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters)1348 void GenerateUtf8CheckCodeForCord(io::Printer* p, const FieldDescriptor* field,
1349 const Options& options, bool for_parse,
1350 absl::string_view parameters) {
1351 GenerateUtf8CheckCode(p, field, options, for_parse, parameters,
1352 "VerifyUtf8Cord", "VerifyUTF8CordNamedField");
1353 }
1354
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1355 void FlattenMessagesInFile(const FileDescriptor* file,
1356 std::vector<const Descriptor*>* result) {
1357 internal::cpp::VisitDescriptorsInFileOrder(file,
1358 [&](const Descriptor* descriptor) {
1359 result->push_back(descriptor);
1360 return std::false_type{};
1361 });
1362 }
1363
1364 // TopologicalSortMessagesInFile topologically sorts and returns a vector of
1365 // proto descriptors defined in the file provided as input. The underlying
1366 // graph is defined using dependency relationship between protos. For example,
1367 // if proto A contains proto B as a member, then proto B would be ordered before
1368 // proto A in a topological ordering, assuming there is no mutual dependence
1369 // between the two protos. The topological order is used to emit proto
1370 // declarations so that a proto is declared after all the protos it is dependent
1371 // on have been declared (again assuming no mutual dependence). This is needed
1372 // in cases where we may declare proto B as a member of proto A using an object,
1373 // instead of a pointer.
1374 //
1375 // The proto dependency graph can have cycles. So instead of directly working
1376 // with protos, we compute strong connected components (SCCs) composed of protos
1377 // with mutual dependence. The dependency graph on SCCs is a directed acyclic
1378 // graph (DAG) and therefore a topological order can be computed for it i.e. an
1379 // order where an SCC is ordered after all other SCCs it is dependent on have
1380 // been ordered.
1381 //
1382 // The function below first constructs the SCC graph and then computes a
1383 // deterministic topological order for the graph.
1384 //
1385 // For computing the SCC graph, we follow the following steps:
1386 // 1. Collect the descriptors for the messages in the file.
1387 // 2. Construct a map for descriptor to SCC mapping.
1388 // 3. Construct a map for dependence between SCCs, referred to as
1389 // child_to_parent_scc_map below. This map constructed by running a BFS on the
1390 // SCCs.
1391 //
1392 // For computing a deterministic topological order on the graph computed in step
1393 // 3 above, we do the following:
1394 // 1. Since the graph on SCCs is a DAG, therefore there will be at least one SCC
1395 // that does not depend on other SCCs. We first construct a list of all such
1396 // SCCs.
1397 // 2. Next we run a BFS starting with the list of SCCs computed in step 1. For
1398 // each SCC, we track the number of the SCC it is dependent on and the number of
1399 // those SCC that have been ordered. Once all the SCCs an SCC is dependent on
1400 // have been ordered, this SCC is added to list of SCCs that are to be ordered
1401 // next.
1402 // 3. Within an SCC, the descriptors are ordered on the basis of the full_name()
1403 // of the descriptors.
TopologicalSortMessagesInFile(const FileDescriptor * file,MessageSCCAnalyzer & scc_analyzer)1404 std::vector<const Descriptor*> TopologicalSortMessagesInFile(
1405 const FileDescriptor* file, MessageSCCAnalyzer& scc_analyzer) {
1406 // Collect the messages defined in this file.
1407 std::vector<const Descriptor*> messages_in_file = FlattenMessagesInFile(file);
1408 if (messages_in_file.empty()) return {};
1409 // Populate the map from the descriptor to the SCC to which the descriptor
1410 // belongs.
1411 absl::flat_hash_map<const Descriptor*, const SCC*> descriptor_to_scc_map;
1412 descriptor_to_scc_map.reserve(messages_in_file.size());
1413 for (const Descriptor* d : messages_in_file) {
1414 descriptor_to_scc_map.emplace(d, scc_analyzer.GetSCC(d));
1415 }
1416 ABSL_DCHECK(messages_in_file.size() == descriptor_to_scc_map.size())
1417 << "messages_in_file has duplicate messages!";
1418 // Each parent SCC has information about the child SCCs i.e. SCCs for fields
1419 // that are contained in the protos that belong to the parent SCC. Use this
1420 // information to construct the inverse map from child SCC to parent SCC.
1421 absl::flat_hash_map<const SCC*, absl::flat_hash_set<const SCC*>>
1422 child_to_parent_scc_map;
1423 // For recording the number of edges from each SCC to other SCCs in the
1424 // forward map.
1425 absl::flat_hash_map<const SCC*, int> scc_to_outgoing_edges_map;
1426 std::queue<const SCC*> sccs_to_process;
1427 for (const auto& p : descriptor_to_scc_map) {
1428 sccs_to_process.push(p.second);
1429 }
1430 // Run a BFS to fill the two data structures: child_to_parent_scc_map and
1431 // scc_to_outgoing_edges_map.
1432 while (!sccs_to_process.empty()) {
1433 const SCC* scc = sccs_to_process.front();
1434 sccs_to_process.pop();
1435 auto& count = scc_to_outgoing_edges_map[scc];
1436 for (const auto& child : scc->children) {
1437 // Test whether this child has been seen thus far. We do not know if the
1438 // children SCC vector contains unique children SCC.
1439 auto& parent_set = child_to_parent_scc_map[child];
1440 if (parent_set.empty()) {
1441 // Just added.
1442 sccs_to_process.push(child);
1443 }
1444 auto ret = parent_set.insert(scc);
1445 if (ret.second) {
1446 ++count;
1447 }
1448 }
1449 }
1450 std::vector<const SCC*> next_scc_q;
1451 // Find out the SCCs that do not have an outgoing edge i.e. the protos in this
1452 // SCC do not depend on protos other than the ones in this SCC.
1453 for (const auto& p : scc_to_outgoing_edges_map) {
1454 if (p.second == 0) {
1455 next_scc_q.push_back(p.first);
1456 }
1457 }
1458 ABSL_DCHECK(!next_scc_q.empty()) << "No independent components!";
1459 // Topologically sort the SCCs.
1460 // If an SCC no longer has an outgoing edge i.e. all the SCCs it depends on
1461 // have been ordered, then this SCC is now a candidate for ordering.
1462 std::vector<const Descriptor*> sorted_messages;
1463 while (!next_scc_q.empty()) {
1464 std::vector<const SCC*> current_scc_q;
1465 current_scc_q.swap(next_scc_q);
1466 // SCCs present in the current_scc_q are topologically equivalent to each
1467 // other. Therefore they can be added to the output in any order. We sort
1468 // these SCCs by the full_name() of the first descriptor that belongs to the
1469 // SCC. This works well since the descriptors in each SCC are sorted by
1470 // full_name() and also that a descriptor can be part of only one SCC.
1471 std::sort(current_scc_q.begin(), current_scc_q.end(),
1472 [](const SCC* a, const SCC* b) {
1473 ABSL_DCHECK(!a->descriptors.empty()) << "No descriptors!";
1474 ABSL_DCHECK(!b->descriptors.empty()) << "No descriptors!";
1475 const Descriptor* ad = a->descriptors[0];
1476 const Descriptor* bd = b->descriptors[0];
1477 return ad->full_name() < bd->full_name();
1478 });
1479 while (!current_scc_q.empty()) {
1480 const SCC* scc = current_scc_q.back();
1481 current_scc_q.pop_back();
1482 // Messages in an SCC are already sorted on full_name(). So we can emit
1483 // them right away.
1484 for (const Descriptor* d : scc->descriptors) {
1485 // Only push messages that are defined in the file.
1486 if (descriptor_to_scc_map.contains(d)) {
1487 sorted_messages.push_back(d);
1488 }
1489 }
1490 // Find all the SCCs that are dependent on the current SCC.
1491 const auto& parents = child_to_parent_scc_map.find(scc);
1492 if (parents == child_to_parent_scc_map.end()) continue;
1493 for (const SCC* parent : parents->second) {
1494 auto it = scc_to_outgoing_edges_map.find(parent);
1495 ABSL_CHECK(it != scc_to_outgoing_edges_map.end());
1496 ABSL_CHECK(it->second > 0);
1497 // Reduce the dependency count for the SCC. In case the dependency
1498 // count reaches 0, add the SCC to the list of SCCs to be ordered next.
1499 it->second--;
1500 if (it->second == 0) {
1501 next_scc_q.push_back(parent);
1502 }
1503 }
1504 }
1505 }
1506 for (const auto& p : scc_to_outgoing_edges_map) {
1507 ABSL_DCHECK(p.second == 0) << "SCC left behind!";
1508 }
1509 return sorted_messages;
1510 }
1511
HasWeakFields(const Descriptor * descriptor,const Options & options)1512 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1513 for (int i = 0; i < descriptor->field_count(); i++) {
1514 if (IsWeak(descriptor->field(i), options)) return true;
1515 }
1516 return false;
1517 }
1518
HasWeakFields(const FileDescriptor * file,const Options & options)1519 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1520 for (int i = 0; i < file->message_type_count(); ++i) {
1521 if (HasWeakFields(file->message_type(i), options)) return true;
1522 }
1523 return false;
1524 }
1525
UsingImplicitWeakDescriptor(const FileDescriptor * file,const Options & options)1526 bool UsingImplicitWeakDescriptor(const FileDescriptor* file,
1527 const Options& options) {
1528 return HasDescriptorMethods(file, options) &&
1529 !IsBootstrapProto(options, file) &&
1530 options.descriptor_implicit_weak_messages &&
1531 !options.opensource_runtime;
1532 }
1533
StrongReferenceToType(const Descriptor * desc,const Options & options)1534 std::string StrongReferenceToType(const Descriptor* desc,
1535 const Options& options) {
1536 const auto name = QualifiedDefaultInstanceName(desc, options);
1537 return absl::StrFormat("::%s::internal::StrongPointer<decltype(%s)*, &%s>()",
1538 ProtobufNamespace(options), name, name);
1539 }
1540
WeakDescriptorDataSection(absl::string_view prefix,const Descriptor * descriptor,int index_in_file_messages,const Options & options)1541 std::string WeakDescriptorDataSection(absl::string_view prefix,
1542 const Descriptor* descriptor,
1543 int index_in_file_messages,
1544 const Options& options) {
1545 const auto* file = descriptor->file();
1546
1547 // To make a compact name we use the index of the object in its file
1548 // of its name.
1549 // So the name could be `pb_def_3_HASH` instead of
1550 // `pd_def_VeryLongClassName_WithNesting_AndMoreNames_HASH`
1551 // We need a know common prefix to merge the sections later on.
1552 return UniqueName(absl::StrCat("pb_", prefix, "_", index_in_file_messages),
1553 file, options);
1554 }
1555
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1556 bool UsingImplicitWeakFields(const FileDescriptor* file,
1557 const Options& options) {
1558 return options.lite_implicit_weak_fields &&
1559 GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1560 }
1561
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1562 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1563 MessageSCCAnalyzer* scc_analyzer) {
1564 return UsingImplicitWeakFields(field->file(), options) &&
1565 field->type() == FieldDescriptor::TYPE_MESSAGE &&
1566 !field->is_required() && !field->is_map() && !field->is_extension() &&
1567 !IsWellKnownMessage(field->message_type()->file()) &&
1568 field->message_type()->file()->name() !=
1569 "net/proto2/proto/descriptor.proto" &&
1570 // We do not support implicit weak fields between messages in the same
1571 // strongly-connected component.
1572 scc_analyzer->GetSCC(field->containing_type()) !=
1573 scc_analyzer->GetSCC(field->message_type());
1574 }
1575
GetSCCAnalysis(const SCC * scc)1576 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1577 auto it = analysis_cache_.find(scc);
1578 if (it != analysis_cache_.end()) return it->second;
1579
1580 MessageAnalysis result;
1581 if (UsingImplicitWeakFields(scc->GetFile(), options_)) {
1582 result.contains_weak = true;
1583 }
1584 for (size_t i = 0; i < scc->descriptors.size(); ++i) {
1585 const Descriptor* descriptor = scc->descriptors[i];
1586 if (descriptor->extension_range_count() > 0) {
1587 result.contains_extension = true;
1588 }
1589 for (int j = 0; j < descriptor->field_count(); j++) {
1590 const FieldDescriptor* field = descriptor->field(j);
1591 if (field->is_required()) {
1592 result.contains_required = true;
1593 }
1594 if (field->options().weak()) {
1595 result.contains_weak = true;
1596 }
1597 switch (field->type()) {
1598 case FieldDescriptor::TYPE_STRING:
1599 case FieldDescriptor::TYPE_BYTES: {
1600 if (field->cpp_string_type() ==
1601 FieldDescriptor::CppStringType::kCord) {
1602 result.contains_cord = true;
1603 }
1604 break;
1605 }
1606 case FieldDescriptor::TYPE_GROUP:
1607 case FieldDescriptor::TYPE_MESSAGE: {
1608 const SCC* child = analyzer_.GetSCC(field->message_type());
1609 if (child != scc) {
1610 MessageAnalysis analysis = GetSCCAnalysis(child);
1611 result.contains_cord |= analysis.contains_cord;
1612 result.contains_extension |= analysis.contains_extension;
1613 if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1614 result.contains_required |= analysis.contains_required;
1615 }
1616 result.contains_weak |= analysis.contains_weak;
1617 } else {
1618 // This field points back into the same SCC hence the messages
1619 // in the SCC are recursive. Note if SCC contains more than two
1620 // nodes it has to be recursive, however this test also works for
1621 // a single node that is recursive.
1622 result.is_recursive = true;
1623 }
1624 break;
1625 }
1626 default:
1627 break;
1628 }
1629 }
1630 }
1631 // We deliberately only insert the result here. After we contracted the SCC
1632 // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1633 // nodes visited as we can never return to them. By inserting them here
1634 // we will go in an infinite loop if the SCC is not correct.
1635 return analysis_cache_[scc] = std::move(result);
1636 }
1637
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1638 void ListAllFields(const Descriptor* d,
1639 std::vector<const FieldDescriptor*>* fields) {
1640 // Collect sub messages
1641 for (int i = 0; i < d->nested_type_count(); i++) {
1642 ListAllFields(d->nested_type(i), fields);
1643 }
1644 // Collect message level extensions.
1645 for (int i = 0; i < d->extension_count(); i++) {
1646 fields->push_back(d->extension(i));
1647 }
1648 // Add types of fields necessary
1649 for (int i = 0; i < d->field_count(); i++) {
1650 fields->push_back(d->field(i));
1651 }
1652 }
1653
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1654 void ListAllFields(const FileDescriptor* d,
1655 std::vector<const FieldDescriptor*>* fields) {
1656 // Collect file level message.
1657 for (int i = 0; i < d->message_type_count(); i++) {
1658 ListAllFields(d->message_type(i), fields);
1659 }
1660 // Collect message level extensions.
1661 for (int i = 0; i < d->extension_count(); i++) {
1662 fields->push_back(d->extension(i));
1663 }
1664 }
1665
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1666 void ListAllTypesForServices(const FileDescriptor* fd,
1667 std::vector<const Descriptor*>* types) {
1668 for (int i = 0; i < fd->service_count(); i++) {
1669 const ServiceDescriptor* sd = fd->service(i);
1670 for (int j = 0; j < sd->method_count(); j++) {
1671 const MethodDescriptor* method = sd->method(j);
1672 types->push_back(method->input_type());
1673 types->push_back(method->output_type());
1674 }
1675 }
1676 }
1677
GetBootstrapBasename(const Options & options,absl::string_view basename,std::string * bootstrap_basename)1678 bool GetBootstrapBasename(const Options& options, absl::string_view basename,
1679 std::string* bootstrap_basename) {
1680 if (options.opensource_runtime) {
1681 return false;
1682 }
1683
1684 static const auto* bootstrap_mapping =
1685 // TODO Replace these with string_view once we remove
1686 // StringPiece.
1687 new absl::flat_hash_map<absl::string_view, std::string>{
1688 {"net/proto2/proto/descriptor",
1689 "third_party/protobuf/descriptor"},
1690 {"third_party/protobuf/cpp_features",
1691 "third_party/protobuf/cpp_features"},
1692 {"third_party/protobuf/compiler/plugin",
1693 "third_party/protobuf/compiler/plugin"},
1694 {"net/proto2/compiler/proto/profile",
1695 "net/proto2/compiler/proto/profile_bootstrap"},
1696 };
1697 auto iter = bootstrap_mapping->find(basename);
1698 if (iter == bootstrap_mapping->end()) {
1699 *bootstrap_basename = std::string(basename);
1700 return false;
1701 } else {
1702 *bootstrap_basename = iter->second;
1703 return true;
1704 }
1705 }
1706
IsBootstrapProto(const Options & options,const FileDescriptor * file)1707 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1708 std::string my_name = StripProto(file->name());
1709 return GetBootstrapBasename(options, my_name, &my_name);
1710 }
1711
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1712 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1713 bool bootstrap_flag, std::string* basename) {
1714 std::string bootstrap_basename;
1715 if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1716 return false;
1717 }
1718
1719 if (bootstrap_flag) {
1720 // Adjust basename, but don't abort code generation.
1721 *basename = bootstrap_basename;
1722 return false;
1723 }
1724
1725 auto pb_h = absl::WrapUnique(
1726 generator_context->Open(absl::StrCat(*basename, ".pb.h")));
1727
1728 io::Printer p(pb_h.get());
1729 p.Emit(
1730 {
1731 {"fwd_to", bootstrap_basename},
1732 {"file", FilenameIdentifier(*basename)},
1733 {"fwd_to_suffix", options.opensource_runtime ? "pb" : "proto"},
1734 {"swig_evil",
1735 [&] {
1736 if (options.opensource_runtime) {
1737 return;
1738 }
1739 p.Emit(R"(
1740 #ifdef SWIG
1741 %include "$fwd_to$.pb.h"
1742 #endif // SWIG
1743 )");
1744 }},
1745 },
1746 R"(
1747 #ifndef PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1748 #define PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1749 #include "$fwd_to$.$fwd_to_suffix$.h" // IWYU pragma: export
1750 #endif // PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1751 $swig_evil$;
1752 )");
1753
1754 auto proto_h = absl::WrapUnique(
1755 generator_context->Open(absl::StrCat(*basename, ".proto.h")));
1756 io::Printer(proto_h.get())
1757 .Emit(
1758 {
1759 {"fwd_to", bootstrap_basename},
1760 {"file", FilenameIdentifier(*basename)},
1761 },
1762 R"(
1763 #ifndef PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1764 #define PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1765 #include "$fwd_to$.proto.h" // IWYU pragma: export
1766 #endif // PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1767 )");
1768
1769 auto pb_cc = absl::WrapUnique(
1770 generator_context->Open(absl::StrCat(*basename, ".pb.cc")));
1771 io::Printer(pb_cc.get()).PrintRaw("\n");
1772
1773 (void)absl::WrapUnique(
1774 generator_context->Open(absl::StrCat(*basename, ".pb.h.meta")));
1775
1776 (void)absl::WrapUnique(
1777 generator_context->Open(absl::StrCat(*basename, ".proto.h.meta")));
1778
1779 // Abort code generation.
1780 return true;
1781 }
1782
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1783 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1784 const Options& options,
1785 bool* has_opt_codesize_extension) {
1786 std::vector<const FieldDescriptor*> fields;
1787 auto reflection = msg.GetReflection();
1788 reflection->ListFields(msg, &fields);
1789 for (auto field : fields) {
1790 const auto* field_msg = field->message_type();
1791 if (field_msg == nullptr) {
1792 // It so happens that enums Is_Valid are still generated so enums work.
1793 // Only messages have potential problems.
1794 continue;
1795 }
1796 // If this option has an extension set AND that extension is defined in the
1797 // same file we have bootstrap problem.
1798 if (field->is_extension()) {
1799 const auto* msg_extension_file = field->message_type()->file();
1800 if (msg_extension_file == file) return true;
1801 if (has_opt_codesize_extension &&
1802 GetOptimizeFor(msg_extension_file, options) ==
1803 FileOptions::CODE_SIZE) {
1804 *has_opt_codesize_extension = true;
1805 }
1806 }
1807 // Recurse in this field to see if there is a problem in there
1808 if (field->is_repeated()) {
1809 for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1810 if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1811 file, options, has_opt_codesize_extension)) {
1812 return true;
1813 }
1814 }
1815 } else {
1816 if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1817 options, has_opt_codesize_extension)) {
1818 return true;
1819 }
1820 }
1821 }
1822 return false;
1823 }
1824
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1825 static bool HasBootstrapProblem(const FileDescriptor* file,
1826 const Options& options,
1827 bool* has_opt_codesize_extension) {
1828 struct BootstrapGlobals {
1829 absl::Mutex mutex;
1830 absl::flat_hash_set<const FileDescriptor*> cached ABSL_GUARDED_BY(mutex);
1831 absl::flat_hash_set<const FileDescriptor*> non_cached
1832 ABSL_GUARDED_BY(mutex);
1833 };
1834 static auto& bootstrap_cache = *new BootstrapGlobals();
1835
1836 absl::MutexLock lock(&bootstrap_cache.mutex);
1837 if (bootstrap_cache.cached.contains(file)) return true;
1838 if (bootstrap_cache.non_cached.contains(file)) return false;
1839
1840 // In order to build the data structures for the reflective parse, it needs
1841 // to parse the serialized descriptor describing all the messages defined in
1842 // this file. Obviously this presents a bootstrap problem for descriptor
1843 // messages.
1844 if (file->name() == "net/proto2/proto/descriptor.proto" ||
1845 file->name() == "google/protobuf/descriptor.proto") {
1846 return true;
1847 }
1848 // Unfortunately we're not done yet. The descriptor option messages allow
1849 // for extensions. So we need to be able to parse these extensions in order
1850 // to parse the file descriptor for a file that has custom options. This is a
1851 // problem when these custom options extensions are defined in the same file.
1852 FileDescriptorProto linkedin_fd_proto;
1853 const DescriptorPool* pool = file->pool();
1854 const Descriptor* fd_proto_descriptor =
1855 pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1856 // Not all pools have descriptor.proto in them. In these cases there for sure
1857 // are no custom options.
1858 if (fd_proto_descriptor == nullptr) return false;
1859
1860 // It's easier to inspect file as a proto, because we can use reflection on
1861 // the proto to iterate over all content.
1862 file->CopyTo(&linkedin_fd_proto);
1863
1864 // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1865 // such it doesn't know the extensions that are potentially present in the
1866 // descriptor pool constructed from the protos that are being compiled. These
1867 // custom options are therefore in the unknown fields.
1868 // By building the corresponding FileDescriptorProto in the pool constructed
1869 // by the protos that are being compiled, ie. file's pool, the unknown fields
1870 // are converted to extensions.
1871 DynamicMessageFactory factory(pool);
1872 Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1873 fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1874
1875 bool res = HasExtensionFromFile(*fd_proto, file, options,
1876 has_opt_codesize_extension);
1877 if (res) {
1878 bootstrap_cache.cached.insert(file);
1879 } else {
1880 bootstrap_cache.non_cached.insert(file);
1881 }
1882 delete fd_proto;
1883 return res;
1884 }
1885
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1886 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1887 const Options& options,
1888 bool* has_opt_codesize_extension) {
1889 if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1890 switch (options.enforce_mode) {
1891 case EnforceOptimizeMode::kSpeed:
1892 return FileOptions::SPEED;
1893 case EnforceOptimizeMode::kLiteRuntime:
1894 return FileOptions::LITE_RUNTIME;
1895 case EnforceOptimizeMode::kCodeSize:
1896 if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1897 return FileOptions::LITE_RUNTIME;
1898 }
1899 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1900 return FileOptions::SPEED;
1901 }
1902 return FileOptions::CODE_SIZE;
1903 case EnforceOptimizeMode::kNoEnforcement:
1904 if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1905 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1906 ABSL_LOG(WARNING)
1907 << "Proto states optimize_for = CODE_SIZE, but we "
1908 "cannot honor that because it contains custom option "
1909 "extensions defined in the same proto.";
1910 return FileOptions::SPEED;
1911 }
1912 }
1913 return file->options().optimize_for();
1914 }
1915
1916 ABSL_LOG(FATAL) << "Unknown optimization enforcement requested.";
1917 // The phony return below serves to silence a warning from GCC 8.
1918 return FileOptions::SPEED;
1919 }
1920
HasMessageFieldOrExtension(const Descriptor * desc)1921 bool HasMessageFieldOrExtension(const Descriptor* desc) {
1922 if (desc->extension_range_count() > 0) return true;
1923 for (const auto* f : FieldRange(desc)) {
1924 if (f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) return true;
1925 }
1926 return false;
1927 }
1928
AnnotatedAccessors(const FieldDescriptor * field,absl::Span<const absl::string_view> prefixes,absl::optional<google::protobuf::io::AnnotationCollector::Semantic> semantic)1929 std::vector<io::Printer::Sub> AnnotatedAccessors(
1930 const FieldDescriptor* field, absl::Span<const absl::string_view> prefixes,
1931 absl::optional<google::protobuf::io::AnnotationCollector::Semantic> semantic) {
1932 auto field_name = FieldName(field);
1933
1934 std::vector<io::Printer::Sub> vars;
1935 for (auto prefix : prefixes) {
1936 vars.push_back(io::Printer::Sub(absl::StrCat(prefix, "name"),
1937 absl::StrCat(prefix, field_name))
1938 .AnnotatedAs({field, semantic}));
1939 }
1940
1941 return vars;
1942 }
1943
IsFileDescriptorProto(const FileDescriptor * file,const Options & options)1944 bool IsFileDescriptorProto(const FileDescriptor* file, const Options& options) {
1945 if (Namespace(file, options) !=
1946 absl::StrCat("::", ProtobufNamespace(options))) {
1947 return false;
1948 }
1949 for (int i = 0; i < file->message_type_count(); ++i) {
1950 if (file->message_type(i)->name() == "FileDescriptorProto") return true;
1951 }
1952 return false;
1953 }
1954
ShouldGenerateClass(const Descriptor * descriptor,const Options & options)1955 bool ShouldGenerateClass(const Descriptor* descriptor, const Options& options) {
1956 return !IsMapEntryMessage(descriptor) ||
1957 HasDescriptorMethods(descriptor->file(), options);
1958 }
1959
HasOnDeserializeTracker(const Descriptor * descriptor,const Options & options)1960 bool HasOnDeserializeTracker(const Descriptor* descriptor,
1961 const Options& options) {
1962 return HasTracker(descriptor, options) &&
1963 !options.field_listener_options.forbidden_field_listener_events
1964 .contains("deserialize");
1965 }
1966
1967
NeedsPostLoopHandler(const Descriptor * descriptor,const Options & options)1968 bool NeedsPostLoopHandler(const Descriptor* descriptor,
1969 const Options& options) {
1970 if (HasOnDeserializeTracker(descriptor, options)) {
1971 return true;
1972 }
1973 return false;
1974 }
1975
1976 } // namespace cpp
1977 } // namespace compiler
1978 } // namespace protobuf
1979 } // namespace google
1980
1981 #include "google/protobuf/port_undef.inc"
1982