• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/cpp/helpers.h"
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <cstdint>
17 #include <limits>
18 #include <memory>
19 #include <new>
20 #include <queue>
21 #include <string>
22 #include <type_traits>
23 #include <utility>
24 #include <vector>
25 
26 #include "absl/base/thread_annotations.h"
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/container/flat_hash_set.h"
29 #include "absl/log/absl_check.h"
30 #include "absl/log/absl_log.h"
31 #include "absl/memory/memory.h"
32 #include "absl/strings/ascii.h"
33 #include "absl/strings/cord.h"
34 #include "absl/strings/escaping.h"
35 #include "absl/strings/match.h"
36 #include "absl/strings/str_cat.h"
37 #include "absl/strings/str_format.h"
38 #include "absl/strings/str_join.h"
39 #include "absl/strings/str_replace.h"
40 #include "absl/strings/str_split.h"
41 #include "absl/strings/string_view.h"
42 #include "absl/strings/substitute.h"
43 #include "absl/synchronization/mutex.h"
44 #include "absl/types/optional.h"
45 #include "absl/types/span.h"
46 #include "google/protobuf/arenastring.h"
47 #include "google/protobuf/compiler/code_generator.h"
48 #include "google/protobuf/compiler/code_generator_lite.h"
49 #include "google/protobuf/compiler/cpp/names.h"
50 #include "google/protobuf/compiler/cpp/options.h"
51 #include "google/protobuf/compiler/scc.h"
52 #include "google/protobuf/descriptor.h"
53 #include "google/protobuf/descriptor.pb.h"
54 #include "google/protobuf/dynamic_message.h"
55 #include "google/protobuf/generated_message_reflection.h"
56 #include "google/protobuf/generated_message_tctable_impl.h"
57 #include "google/protobuf/io/printer.h"
58 #include "google/protobuf/io/strtod.h"
59 #include "google/protobuf/map.h"
60 #include "google/protobuf/repeated_ptr_field.h"
61 #include "google/protobuf/wire_format.h"
62 #include "google/protobuf/wire_format_lite.h"
63 
64 
65 // Must be last.
66 #include "google/protobuf/port_def.inc"
67 
68 namespace google {
69 namespace protobuf {
70 namespace compiler {
71 namespace cpp {
72 namespace {
73 constexpr absl::string_view kAnyMessageName = "Any";
74 constexpr absl::string_view kAnyProtoFile = "google/protobuf/any.proto";
75 
76 static const char* const kKeywordList[] = {
77     // clang-format off
78     "NULL",
79     "alignas",
80     "alignof",
81     "and",
82     "and_eq",
83     "asm",
84     "assert",
85     "auto",
86     "bitand",
87     "bitor",
88     "bool",
89     "break",
90     "case",
91     "catch",
92     "char",
93     "class",
94     "compl",
95     "const",
96     "constexpr",
97     "const_cast",
98     "continue",
99     "decltype",
100     "default",
101     "delete",
102     "do",
103     "double",
104     "dynamic_cast",
105     "else",
106     "enum",
107     "explicit",
108     "export",
109     "extern",
110     "false",
111     "float",
112     "for",
113     "friend",
114     "goto",
115     "if",
116     "inline",
117     "int",
118     "long",
119     "mutable",
120     "namespace",
121     "new",
122     "noexcept",
123     "not",
124     "not_eq",
125     "nullptr",
126     "operator",
127     "or",
128     "or_eq",
129     "private",
130     "protected",
131     "public",
132     "register",
133     "reinterpret_cast",
134     "return",
135     "short",
136     "signed",
137     "sizeof",
138     "static",
139     "static_assert",
140     "static_cast",
141     "struct",
142     "switch",
143     "template",
144     "this",
145     "thread_local",
146     "throw",
147     "true",
148     "try",
149     "typedef",
150     "typeid",
151     "typename",
152     "union",
153     "unsigned",
154     "using",
155     "virtual",
156     "void",
157     "volatile",
158     "wchar_t",
159     "while",
160     "xor",
161     "xor_eq",
162     "char8_t",
163     "char16_t",
164     "char32_t",
165     "concept",
166     "consteval",
167     "constinit",
168     "co_await",
169     "co_return",
170     "co_yield",
171     "requires",
172     // clang-format on
173 };
174 
Keywords()175 const absl::flat_hash_set<absl::string_view>& Keywords() {
176   static const auto* keywords = [] {
177     auto* keywords = new absl::flat_hash_set<absl::string_view>();
178 
179     for (const auto keyword : kKeywordList) {
180       keywords->emplace(keyword);
181     }
182     return keywords;
183   }();
184   return *keywords;
185 }
186 
IntTypeName(const Options & options,absl::string_view type)187 std::string IntTypeName(const Options& options, absl::string_view type) {
188   return absl::StrCat("::", type, "_t");
189 }
190 
191 
192 
193 }  // namespace
194 
IsLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)195 bool IsLazy(const FieldDescriptor* field, const Options& options,
196             MessageSCCAnalyzer* scc_analyzer) {
197   return IsLazilyVerifiedLazy(field, options) ||
198          IsEagerlyVerifiedLazy(field, options, scc_analyzer);
199 }
200 
201 // Returns true if "field" is a message field that is backed by LazyField per
202 // profile (go/pdlazy).
IsLazyByProfile(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)203 inline bool IsLazyByProfile(const FieldDescriptor* field,
204                             const Options& options,
205                             MessageSCCAnalyzer* scc_analyzer) {
206   return false;
207 }
208 
IsEagerlyVerifiedLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)209 bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
210                            MessageSCCAnalyzer* scc_analyzer) {
211   return false;
212 }
213 
IsLazilyVerifiedLazy(const FieldDescriptor * field,const Options & options)214 bool IsLazilyVerifiedLazy(const FieldDescriptor* field,
215                           const Options& options) {
216   return false;
217 }
218 
GetLazyStyle(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)219 internal::field_layout::TransformValidation GetLazyStyle(
220     const FieldDescriptor* field, const Options& options,
221     MessageSCCAnalyzer* scc_analyzer) {
222   if (IsEagerlyVerifiedLazy(field, options, scc_analyzer)) {
223     return internal::field_layout::kTvEager;
224   }
225   if (IsLazilyVerifiedLazy(field, options)) {
226     return internal::field_layout::kTvLazy;
227   }
228   return {};
229 }
230 
MessageVars(const Descriptor * desc)231 absl::flat_hash_map<absl::string_view, std::string> MessageVars(
232     const Descriptor* desc) {
233   absl::string_view prefix = "_impl_.";
234   return {
235       {"any_metadata", absl::StrCat(prefix, "_any_metadata_")},
236       {"cached_size", absl::StrCat(prefix, "_cached_size_")},
237       {"extensions", absl::StrCat(prefix, "_extensions_")},
238       {"has_bits", absl::StrCat(prefix, "_has_bits_")},
239       {"inlined_string_donated_array",
240        absl::StrCat(prefix, "_inlined_string_donated_")},
241       {"oneof_case", absl::StrCat(prefix, "_oneof_case_")},
242       {"tracker", "Impl_::_tracker_"},
243       {"weak_field_map", absl::StrCat(prefix, "_weak_field_map_")},
244       {"split", absl::StrCat(prefix, "_split_")},
245       {"cached_split_ptr", "cached_split_ptr"},
246   };
247 }
248 
SetCommonMessageDataVariables(const Descriptor * descriptor,absl::flat_hash_map<absl::string_view,std::string> * variables)249 void SetCommonMessageDataVariables(
250     const Descriptor* descriptor,
251     absl::flat_hash_map<absl::string_view, std::string>* variables) {
252   for (auto& pair : MessageVars(descriptor)) {
253     variables->emplace(pair);
254   }
255 }
256 
UnknownFieldsVars(const Descriptor * desc,const Options & opts)257 absl::flat_hash_map<absl::string_view, std::string> UnknownFieldsVars(
258     const Descriptor* desc, const Options& opts) {
259   std::string unknown_fields_type;
260   std::string default_instance;
261   if (UseUnknownFieldSet(desc->file(), opts)) {
262     unknown_fields_type =
263         absl::StrCat("::", ProtobufNamespace(opts), "::UnknownFieldSet");
264     default_instance = absl::StrCat(unknown_fields_type, "::default_instance");
265   } else {
266     unknown_fields_type =
267         PrimitiveTypeName(opts, FieldDescriptor::CPPTYPE_STRING);
268     default_instance = absl::StrCat("::", ProtobufNamespace(opts),
269                                     "::internal::GetEmptyString");
270   }
271 
272   return {
273       {"unknown_fields",
274        absl::Substitute("_internal_metadata_.unknown_fields<$0>($1)",
275                         unknown_fields_type, default_instance)},
276       {"unknown_fields_type", unknown_fields_type},
277       {"have_unknown_fields", "_internal_metadata_.have_unknown_fields()"},
278       {"mutable_unknown_fields",
279        absl::Substitute("_internal_metadata_.mutable_unknown_fields<$0>()",
280                         unknown_fields_type)},
281   };
282 }
283 
SetUnknownFieldsVariable(const Descriptor * descriptor,const Options & options,absl::flat_hash_map<absl::string_view,std::string> * variables)284 void SetUnknownFieldsVariable(
285     const Descriptor* descriptor, const Options& options,
286     absl::flat_hash_map<absl::string_view, std::string>* variables) {
287   for (auto& pair : UnknownFieldsVars(descriptor, options)) {
288     variables->emplace(pair);
289   }
290 }
291 
UnderscoresToCamelCase(absl::string_view input,bool cap_next_letter)292 std::string UnderscoresToCamelCase(absl::string_view input,
293                                    bool cap_next_letter) {
294   std::string result;
295   // Note:  I distrust ctype.h due to locales.
296   for (size_t i = 0; i < input.size(); ++i) {
297     if ('a' <= input[i] && input[i] <= 'z') {
298       if (cap_next_letter) {
299         result += input[i] + ('A' - 'a');
300       } else {
301         result += input[i];
302       }
303       cap_next_letter = false;
304     } else if ('A' <= input[i] && input[i] <= 'Z') {
305       // Capital letters are left as-is.
306       result += input[i];
307       cap_next_letter = false;
308     } else if ('0' <= input[i] && input[i] <= '9') {
309       result += input[i];
310       cap_next_letter = true;
311     } else {
312       cap_next_letter = true;
313     }
314   }
315   return result;
316 }
317 
318 const char kThickSeparator[] =
319     "// ===================================================================\n";
320 const char kThinSeparator[] =
321     "// -------------------------------------------------------------------\n";
322 
CanInitializeByZeroing(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)323 bool CanInitializeByZeroing(const FieldDescriptor* field,
324                             const Options& options,
325                             MessageSCCAnalyzer* scc_analyzer) {
326   static_assert(
327       std::numeric_limits<float>::is_iec559 &&
328           std::numeric_limits<double>::is_iec559,
329       "proto / abseil requires iec559, which has zero initialized floats.");
330 
331   if (field->is_repeated() || field->is_extension()) return false;
332   switch (field->cpp_type()) {
333     case FieldDescriptor::CPPTYPE_ENUM:
334       return field->default_value_enum()->number() == 0;
335     case FieldDescriptor::CPPTYPE_INT32:
336       return field->default_value_int32() == 0;
337     case FieldDescriptor::CPPTYPE_INT64:
338       return field->default_value_int64() == 0;
339     case FieldDescriptor::CPPTYPE_UINT32:
340       return field->default_value_uint32() == 0;
341     case FieldDescriptor::CPPTYPE_UINT64:
342       return field->default_value_uint64() == 0;
343     case FieldDescriptor::CPPTYPE_FLOAT:
344       return field->default_value_float() == 0;
345     case FieldDescriptor::CPPTYPE_DOUBLE:
346       return field->default_value_double() == 0;
347     case FieldDescriptor::CPPTYPE_BOOL:
348       return field->default_value_bool() == false;
349     case FieldDescriptor::CPPTYPE_MESSAGE:
350       return true;
351     default:
352       return false;
353   }
354 }
355 
CanClearByZeroing(const FieldDescriptor * field)356 bool CanClearByZeroing(const FieldDescriptor* field) {
357   if (field->is_repeated() || field->is_extension()) return false;
358   switch (field->cpp_type()) {
359     case FieldDescriptor::CPPTYPE_ENUM:
360       return field->default_value_enum()->number() == 0;
361     case FieldDescriptor::CPPTYPE_INT32:
362       return field->default_value_int32() == 0;
363     case FieldDescriptor::CPPTYPE_INT64:
364       return field->default_value_int64() == 0;
365     case FieldDescriptor::CPPTYPE_UINT32:
366       return field->default_value_uint32() == 0;
367     case FieldDescriptor::CPPTYPE_UINT64:
368       return field->default_value_uint64() == 0;
369     case FieldDescriptor::CPPTYPE_FLOAT:
370       return field->default_value_float() == 0;
371     case FieldDescriptor::CPPTYPE_DOUBLE:
372       return field->default_value_double() == 0;
373     case FieldDescriptor::CPPTYPE_BOOL:
374       return field->default_value_bool() == false;
375     default:
376       return false;
377   }
378 }
379 
380 // Determines if swap can be implemented via memcpy.
HasTrivialSwap(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)381 bool HasTrivialSwap(const FieldDescriptor* field, const Options& options,
382                     MessageSCCAnalyzer* scc_analyzer) {
383   if (field->is_repeated() || field->is_extension()) return false;
384   switch (field->cpp_type()) {
385     case FieldDescriptor::CPPTYPE_ENUM:
386     case FieldDescriptor::CPPTYPE_INT32:
387     case FieldDescriptor::CPPTYPE_INT64:
388     case FieldDescriptor::CPPTYPE_UINT32:
389     case FieldDescriptor::CPPTYPE_UINT64:
390     case FieldDescriptor::CPPTYPE_FLOAT:
391     case FieldDescriptor::CPPTYPE_DOUBLE:
392     case FieldDescriptor::CPPTYPE_BOOL:
393       return true;
394     case FieldDescriptor::CPPTYPE_MESSAGE:
395       // Non-repeated, non-lazy message fields are simply raw pointers, so we
396       // can swap them with memcpy.
397       return !IsLazy(field, options, scc_analyzer);
398     default:
399       return false;
400   }
401 }
402 
ClassName(const Descriptor * descriptor)403 std::string ClassName(const Descriptor* descriptor) {
404   const Descriptor* parent = descriptor->containing_type();
405   std::string res;
406   if (parent) absl::StrAppend(&res, ClassName(parent), "_");
407   absl::StrAppend(&res, descriptor->name());
408   if (IsMapEntryMessage(descriptor)) absl::StrAppend(&res, "_DoNotUse");
409   return ResolveKeyword(res);
410 }
411 
ClassName(const EnumDescriptor * enum_descriptor)412 std::string ClassName(const EnumDescriptor* enum_descriptor) {
413   if (enum_descriptor->containing_type() == nullptr) {
414     return ResolveKeyword(enum_descriptor->name());
415   } else {
416     return absl::StrCat(ClassName(enum_descriptor->containing_type()), "_",
417                         enum_descriptor->name());
418   }
419 }
420 
QualifiedClassName(const Descriptor * d,const Options & options)421 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
422   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
423 }
424 
QualifiedClassName(const EnumDescriptor * d,const Options & options)425 std::string QualifiedClassName(const EnumDescriptor* d,
426                                const Options& options) {
427   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
428 }
429 
QualifiedClassName(const Descriptor * d)430 std::string QualifiedClassName(const Descriptor* d) {
431   return QualifiedClassName(d, Options());
432 }
433 
QualifiedClassName(const EnumDescriptor * d)434 std::string QualifiedClassName(const EnumDescriptor* d) {
435   return QualifiedClassName(d, Options());
436 }
437 
ExtensionName(const FieldDescriptor * d)438 std::string ExtensionName(const FieldDescriptor* d) {
439   if (const Descriptor* scope = d->extension_scope())
440     return absl::StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
441   return ResolveKeyword(d->name());
442 }
443 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)444 std::string QualifiedExtensionName(const FieldDescriptor* d,
445                                    const Options& options) {
446   ABSL_DCHECK(d->is_extension());
447   return QualifiedFileLevelSymbol(d->file(), ExtensionName(d), options);
448 }
449 
QualifiedExtensionName(const FieldDescriptor * d)450 std::string QualifiedExtensionName(const FieldDescriptor* d) {
451   return QualifiedExtensionName(d, Options());
452 }
453 
ResolveKeyword(absl::string_view name)454 std::string ResolveKeyword(absl::string_view name) {
455   if (Keywords().count(name) > 0) {
456     return absl::StrCat(name, "_");
457   }
458   return std::string(name);
459 }
460 
DotsToColons(absl::string_view name)461 std::string DotsToColons(absl::string_view name) {
462   std::vector<std::string> scope = absl::StrSplit(name, '.', absl::SkipEmpty());
463   for (auto& word : scope) {
464     word = ResolveKeyword(word);
465   }
466   return absl::StrJoin(scope, "::");
467 }
468 
Namespace(absl::string_view package)469 std::string Namespace(absl::string_view package) {
470   if (package.empty()) return "";
471   return absl::StrCat("::", DotsToColons(package));
472 }
473 
Namespace(const FileDescriptor * d)474 std::string Namespace(const FileDescriptor* d) { return Namespace(d, {}); }
Namespace(const FileDescriptor * d,const Options & options)475 std::string Namespace(const FileDescriptor* d, const Options& options) {
476   return Namespace(d->package());
477 }
478 
Namespace(const Descriptor * d)479 std::string Namespace(const Descriptor* d) { return Namespace(d, {}); }
Namespace(const Descriptor * d,const Options & options)480 std::string Namespace(const Descriptor* d, const Options& options) {
481   return Namespace(d->file(), options);
482 }
483 
Namespace(const FieldDescriptor * d)484 std::string Namespace(const FieldDescriptor* d) { return Namespace(d, {}); }
Namespace(const FieldDescriptor * d,const Options & options)485 std::string Namespace(const FieldDescriptor* d, const Options& options) {
486   return Namespace(d->file(), options);
487 }
488 
Namespace(const EnumDescriptor * d)489 std::string Namespace(const EnumDescriptor* d) { return Namespace(d, {}); }
Namespace(const EnumDescriptor * d,const Options & options)490 std::string Namespace(const EnumDescriptor* d, const Options& options) {
491   return Namespace(d->file(), options);
492 }
493 
DefaultInstanceType(const Descriptor * descriptor,const Options &,bool split)494 std::string DefaultInstanceType(const Descriptor* descriptor,
495                                 const Options& /*options*/, bool split) {
496   return ClassName(descriptor) + (split ? "__Impl_Split" : "") +
497          "DefaultTypeInternal";
498 }
499 
DefaultInstanceName(const Descriptor * descriptor,const Options &,bool split)500 std::string DefaultInstanceName(const Descriptor* descriptor,
501                                 const Options& /*options*/, bool split) {
502   return absl::StrCat("_", ClassName(descriptor, false),
503                       (split ? "__Impl_Split" : ""), "_default_instance_");
504 }
505 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)506 std::string DefaultInstancePtr(const Descriptor* descriptor,
507                                const Options& options, bool split) {
508   return absl::StrCat(DefaultInstanceName(descriptor, options, split), "ptr_");
509 }
510 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options,bool split)511 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
512                                          const Options& options, bool split) {
513   return QualifiedFileLevelSymbol(
514       descriptor->file(), DefaultInstanceName(descriptor, options, split),
515       options);
516 }
517 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)518 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
519                                         const Options& options, bool split) {
520   return absl::StrCat(QualifiedDefaultInstanceName(descriptor, options, split),
521                       "ptr_");
522 }
523 
DescriptorTableName(const FileDescriptor * file,const Options & options)524 std::string DescriptorTableName(const FileDescriptor* file,
525                                 const Options& options) {
526   return UniqueName("descriptor_table", file, options);
527 }
528 
FileDllExport(const FileDescriptor * file,const Options & options)529 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
530   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
531 }
532 
SuperClassName(const Descriptor * descriptor,const Options & options)533 std::string SuperClassName(const Descriptor* descriptor,
534                            const Options& options) {
535   if (!HasDescriptorMethods(descriptor->file(), options)) {
536     return absl::StrCat("::", ProtobufNamespace(options), "::MessageLite");
537   }
538   auto simple_base = SimpleBaseClass(descriptor, options);
539   if (simple_base.empty()) {
540     return absl::StrCat("::", ProtobufNamespace(options), "::Message");
541   }
542   return absl::StrCat("::", ProtobufNamespace(options),
543                       "::internal::", simple_base);
544 }
545 
FieldName(const FieldDescriptor * field)546 std::string FieldName(const FieldDescriptor* field) {
547   std::string result = std::string(field->name());
548   absl::AsciiStrToLower(&result);
549   if (Keywords().count(result) > 0) {
550     result.append("_");
551   }
552   return result;
553 }
554 
FieldMemberName(const FieldDescriptor * field,bool split)555 std::string FieldMemberName(const FieldDescriptor* field, bool split) {
556   absl::string_view prefix = "_impl_.";
557   absl::string_view split_prefix = split ? "_split_->" : "";
558   if (field->real_containing_oneof() == nullptr) {
559     return absl::StrCat(prefix, split_prefix, FieldName(field), "_");
560   }
561   // Oneof fields are never split.
562   ABSL_CHECK(!split);
563   return absl::StrCat(prefix, field->containing_oneof()->name(), "_.",
564                       FieldName(field), "_");
565 }
566 
OneofCaseConstantName(const FieldDescriptor * field)567 std::string OneofCaseConstantName(const FieldDescriptor* field) {
568   ABSL_DCHECK(field->containing_oneof());
569   std::string field_name = UnderscoresToCamelCase(field->name(), true);
570   return absl::StrCat("k", field_name);
571 }
572 
QualifiedOneofCaseConstantName(const FieldDescriptor * field)573 std::string QualifiedOneofCaseConstantName(const FieldDescriptor* field) {
574   ABSL_DCHECK(field->containing_oneof());
575   const std::string qualification =
576       QualifiedClassName(field->containing_type());
577   return absl::StrCat(qualification, "::", OneofCaseConstantName(field));
578 }
579 
EnumValueName(const EnumValueDescriptor * enum_value)580 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
581   std::string result = std::string(enum_value->name());
582   if (Keywords().count(result) > 0) {
583     result.append("_");
584   }
585   return result;
586 }
587 
EstimateAlignmentSize(const FieldDescriptor * field)588 int EstimateAlignmentSize(const FieldDescriptor* field) {
589   if (field == nullptr) return 0;
590   if (field->is_repeated()) return 8;
591   switch (field->cpp_type()) {
592     case FieldDescriptor::CPPTYPE_BOOL:
593       return 1;
594 
595     case FieldDescriptor::CPPTYPE_INT32:
596     case FieldDescriptor::CPPTYPE_UINT32:
597     case FieldDescriptor::CPPTYPE_ENUM:
598     case FieldDescriptor::CPPTYPE_FLOAT:
599       return 4;
600 
601     case FieldDescriptor::CPPTYPE_INT64:
602     case FieldDescriptor::CPPTYPE_UINT64:
603     case FieldDescriptor::CPPTYPE_DOUBLE:
604     case FieldDescriptor::CPPTYPE_STRING:
605     case FieldDescriptor::CPPTYPE_MESSAGE:
606       return 8;
607   }
608   ABSL_LOG(FATAL) << "Can't get here.";
609   return -1;  // Make compiler happy.
610 }
611 
EstimateSize(const FieldDescriptor * field)612 int EstimateSize(const FieldDescriptor* field) {
613   if (field == nullptr) return 0;
614   if (field->is_repeated()) {
615     if (field->is_map()) {
616       return sizeof(google::protobuf::Map<int32_t, int32_t>);
617     }
618     return field->cpp_type() < FieldDescriptor::CPPTYPE_STRING || IsCord(field)
619                ? sizeof(RepeatedField<int32_t>)
620                : sizeof(internal::RepeatedPtrFieldBase);
621   }
622   switch (field->cpp_type()) {
623     case FieldDescriptor::CPPTYPE_BOOL:
624       return 1;
625 
626     case FieldDescriptor::CPPTYPE_INT32:
627     case FieldDescriptor::CPPTYPE_UINT32:
628     case FieldDescriptor::CPPTYPE_ENUM:
629     case FieldDescriptor::CPPTYPE_FLOAT:
630       return 4;
631 
632     case FieldDescriptor::CPPTYPE_INT64:
633     case FieldDescriptor::CPPTYPE_UINT64:
634     case FieldDescriptor::CPPTYPE_DOUBLE:
635     case FieldDescriptor::CPPTYPE_MESSAGE:
636       return 8;
637 
638     case FieldDescriptor::CPPTYPE_STRING:
639       if (IsCord(field)) return sizeof(absl::Cord);
640       return sizeof(internal::ArenaStringPtr);
641   }
642   ABSL_LOG(FATAL) << "Can't get here.";
643   return -1;  // Make compiler happy.
644 }
645 
FieldConstantName(const FieldDescriptor * field)646 std::string FieldConstantName(const FieldDescriptor* field) {
647   std::string field_name = UnderscoresToCamelCase(field->name(), true);
648   std::string result = absl::StrCat("k", field_name, "FieldNumber");
649 
650   if (!field->is_extension() &&
651       field->containing_type()->FindFieldByCamelcaseName(
652           field->camelcase_name()) != field) {
653     // This field's camelcase name is not unique.  As a hack, add the field
654     // number to the constant name.  This makes the constant rather useless,
655     // but what can we do?
656     absl::StrAppend(&result, "_", field->number());
657   }
658 
659   return result;
660 }
661 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)662 std::string FieldMessageTypeName(const FieldDescriptor* field,
663                                  const Options& options) {
664   // Note:  The Google-internal version of Protocol Buffers uses this function
665   //   as a hook point for hacks to support legacy code.
666   return QualifiedClassName(field->message_type(), options);
667 }
668 
StripProto(absl::string_view filename)669 std::string StripProto(absl::string_view filename) {
670   /*
671    * TODO remove this proxy method
672    * once Google's internal codebase will become ready
673    */
674   return compiler::StripProto(filename);
675 }
676 
PrimitiveTypeName(FieldDescriptor::CppType type)677 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
678   switch (type) {
679     case FieldDescriptor::CPPTYPE_INT32:
680       return "::int32_t";
681     case FieldDescriptor::CPPTYPE_INT64:
682       return "::int64_t";
683     case FieldDescriptor::CPPTYPE_UINT32:
684       return "::uint32_t";
685     case FieldDescriptor::CPPTYPE_UINT64:
686       return "::uint64_t";
687     case FieldDescriptor::CPPTYPE_DOUBLE:
688       return "double";
689     case FieldDescriptor::CPPTYPE_FLOAT:
690       return "float";
691     case FieldDescriptor::CPPTYPE_BOOL:
692       return "bool";
693     case FieldDescriptor::CPPTYPE_ENUM:
694       return "int";
695     case FieldDescriptor::CPPTYPE_STRING:
696       return "std::string";
697     case FieldDescriptor::CPPTYPE_MESSAGE:
698       return nullptr;
699 
700       // No default because we want the compiler to complain if any new
701       // CppTypes are added.
702   }
703 
704   ABSL_LOG(FATAL) << "Can't get here.";
705   return nullptr;
706 }
707 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)708 std::string PrimitiveTypeName(const Options& options,
709                               FieldDescriptor::CppType type) {
710   switch (type) {
711     case FieldDescriptor::CPPTYPE_INT32:
712       return IntTypeName(options, "int32");
713     case FieldDescriptor::CPPTYPE_INT64:
714       return IntTypeName(options, "int64");
715     case FieldDescriptor::CPPTYPE_UINT32:
716       return IntTypeName(options, "uint32");
717     case FieldDescriptor::CPPTYPE_UINT64:
718       return IntTypeName(options, "uint64");
719     case FieldDescriptor::CPPTYPE_DOUBLE:
720       return "double";
721     case FieldDescriptor::CPPTYPE_FLOAT:
722       return "float";
723     case FieldDescriptor::CPPTYPE_BOOL:
724       return "bool";
725     case FieldDescriptor::CPPTYPE_ENUM:
726       return "int";
727     case FieldDescriptor::CPPTYPE_STRING:
728       return "std::string";
729     case FieldDescriptor::CPPTYPE_MESSAGE:
730       return "";
731 
732       // No default because we want the compiler to complain if any new
733       // CppTypes are added.
734   }
735 
736   ABSL_LOG(FATAL) << "Can't get here.";
737   return "";
738 }
739 
DeclaredTypeMethodName(FieldDescriptor::Type type)740 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
741   switch (type) {
742     case FieldDescriptor::TYPE_INT32:
743       return "Int32";
744     case FieldDescriptor::TYPE_INT64:
745       return "Int64";
746     case FieldDescriptor::TYPE_UINT32:
747       return "UInt32";
748     case FieldDescriptor::TYPE_UINT64:
749       return "UInt64";
750     case FieldDescriptor::TYPE_SINT32:
751       return "SInt32";
752     case FieldDescriptor::TYPE_SINT64:
753       return "SInt64";
754     case FieldDescriptor::TYPE_FIXED32:
755       return "Fixed32";
756     case FieldDescriptor::TYPE_FIXED64:
757       return "Fixed64";
758     case FieldDescriptor::TYPE_SFIXED32:
759       return "SFixed32";
760     case FieldDescriptor::TYPE_SFIXED64:
761       return "SFixed64";
762     case FieldDescriptor::TYPE_FLOAT:
763       return "Float";
764     case FieldDescriptor::TYPE_DOUBLE:
765       return "Double";
766 
767     case FieldDescriptor::TYPE_BOOL:
768       return "Bool";
769     case FieldDescriptor::TYPE_ENUM:
770       return "Enum";
771 
772     case FieldDescriptor::TYPE_STRING:
773       return "String";
774     case FieldDescriptor::TYPE_BYTES:
775       return "Bytes";
776     case FieldDescriptor::TYPE_GROUP:
777       return "Group";
778     case FieldDescriptor::TYPE_MESSAGE:
779       return "Message";
780 
781       // No default because we want the compiler to complain if any new
782       // types are added.
783   }
784   ABSL_LOG(FATAL) << "Can't get here.";
785   return "";
786 }
787 
Int32ToString(int number)788 std::string Int32ToString(int number) {
789   if (number == std::numeric_limits<int32_t>::min()) {
790     // This needs to be special-cased, see explanation here:
791     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
792     return absl::StrCat(number + 1, " - 1");
793   } else {
794     return absl::StrCat(number);
795   }
796 }
797 
Int64ToString(int64_t number)798 static std::string Int64ToString(int64_t number) {
799   if (number == std::numeric_limits<int64_t>::min()) {
800     // This needs to be special-cased, see explanation here:
801     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
802     return absl::StrCat("::int64_t{", number + 1, "} - 1");
803   }
804   return absl::StrCat("::int64_t{", number, "}");
805 }
806 
UInt64ToString(uint64_t number)807 static std::string UInt64ToString(uint64_t number) {
808   return absl::StrCat("::uint64_t{", number, "u}");
809 }
810 
DefaultValue(const FieldDescriptor * field)811 std::string DefaultValue(const FieldDescriptor* field) {
812   return DefaultValue(Options(), field);
813 }
814 
DefaultValue(const Options & options,const FieldDescriptor * field)815 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
816   switch (field->cpp_type()) {
817     case FieldDescriptor::CPPTYPE_INT32:
818       return Int32ToString(field->default_value_int32());
819     case FieldDescriptor::CPPTYPE_UINT32:
820       return absl::StrCat(field->default_value_uint32(), "u");
821     case FieldDescriptor::CPPTYPE_INT64:
822       return Int64ToString(field->default_value_int64());
823     case FieldDescriptor::CPPTYPE_UINT64:
824       return UInt64ToString(field->default_value_uint64());
825     case FieldDescriptor::CPPTYPE_DOUBLE: {
826       double value = field->default_value_double();
827       if (value == std::numeric_limits<double>::infinity()) {
828         return "std::numeric_limits<double>::infinity()";
829       } else if (value == -std::numeric_limits<double>::infinity()) {
830         return "-std::numeric_limits<double>::infinity()";
831       } else if (value != value) {
832         return "std::numeric_limits<double>::quiet_NaN()";
833       } else {
834         return io::SimpleDtoa(value);
835       }
836     }
837     case FieldDescriptor::CPPTYPE_FLOAT: {
838       float value = field->default_value_float();
839       if (value == std::numeric_limits<float>::infinity()) {
840         return "std::numeric_limits<float>::infinity()";
841       } else if (value == -std::numeric_limits<float>::infinity()) {
842         return "-std::numeric_limits<float>::infinity()";
843       } else if (value != value) {
844         return "std::numeric_limits<float>::quiet_NaN()";
845       } else {
846         std::string float_value = io::SimpleFtoa(value);
847         // If floating point value contains a period (.) or an exponent
848         // (either E or e), then append suffix 'f' to make it a float
849         // literal.
850         if (float_value.find_first_of(".eE") != std::string::npos) {
851           float_value.push_back('f');
852         }
853         return float_value;
854       }
855     }
856     case FieldDescriptor::CPPTYPE_BOOL:
857       return field->default_value_bool() ? "true" : "false";
858     case FieldDescriptor::CPPTYPE_ENUM:
859       // Lazy:  Generate a static_cast because we don't have a helper function
860       //   that constructs the full name of an enum value.
861       return absl::Substitute(
862           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
863           Int32ToString(field->default_value_enum()->number()));
864     case FieldDescriptor::CPPTYPE_STRING:
865       return absl::StrCat(
866           "\"", EscapeTrigraphs(absl::CEscape(field->default_value_string())),
867           "\"");
868     case FieldDescriptor::CPPTYPE_MESSAGE:
869       return absl::StrCat("*", FieldMessageTypeName(field, options),
870                           "::internal_default_instance()");
871   }
872   // Can't actually get here; make compiler happy.  (We could add a default
873   // case above but then we wouldn't get the nice compiler warning when a
874   // new type is added.)
875   ABSL_LOG(FATAL) << "Can't get here.";
876   return "";
877 }
878 
879 // Convert a file name into a valid identifier.
FilenameIdentifier(absl::string_view filename)880 std::string FilenameIdentifier(absl::string_view filename) {
881   std::string result;
882   for (size_t i = 0; i < filename.size(); ++i) {
883     if (absl::ascii_isalnum(filename[i])) {
884       result.push_back(filename[i]);
885     } else {
886       // Not alphanumeric.  To avoid any possibility of name conflicts we
887       // use the hex code for the character.
888       absl::StrAppend(&result, "_",
889                       absl::Hex(static_cast<uint8_t>(filename[i])));
890     }
891   }
892   return result;
893 }
894 
UniqueName(absl::string_view name,absl::string_view filename,const Options & options)895 std::string UniqueName(absl::string_view name, absl::string_view filename,
896                        const Options& options) {
897   return absl::StrCat(name, "_", FilenameIdentifier(filename));
898 }
899 
900 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,absl::string_view name,const Options & options)901 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
902                                      absl::string_view name,
903                                      const Options& options) {
904   if (file->package().empty()) {
905     return absl::StrCat("::", name);
906   }
907   return absl::StrCat(Namespace(file, options), "::", name);
908 }
909 
910 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(absl::string_view to_escape)911 std::string EscapeTrigraphs(absl::string_view to_escape) {
912   return absl::StrReplaceAll(to_escape, {{"?", "\\?"}});
913 }
914 
915 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,absl::string_view prefix)916 std::string SafeFunctionName(const Descriptor* descriptor,
917                              const FieldDescriptor* field,
918                              absl::string_view prefix) {
919   // Do not use FieldName() since it will escape keywords.
920   std::string name = std::string(field->name());
921   absl::AsciiStrToLower(&name);
922   std::string function_name = absl::StrCat(prefix, name);
923   if (descriptor->FindFieldByName(function_name)) {
924     // Single underscore will also make it conflicting with the private data
925     // member. We use double underscore to escape function names.
926     function_name.append("__");
927   } else if (Keywords().count(name) > 0) {
928     // If the field name is a keyword, we append the underscore back to keep it
929     // consistent with other function names.
930     function_name.append("_");
931   }
932   return function_name;
933 }
934 
IsProfileDriven(const Options & options)935 bool IsProfileDriven(const Options& options) {
936   return !options.bootstrap && !options.opensource_runtime &&
937          options.access_info_map != nullptr;
938 }
939 
IsRarelyPresent(const FieldDescriptor * field,const Options & options)940 bool IsRarelyPresent(const FieldDescriptor* field, const Options& options) {
941   return false;
942 }
943 
IsLikelyPresent(const FieldDescriptor * field,const Options & options)944 bool IsLikelyPresent(const FieldDescriptor* field, const Options& options) {
945   return false;
946 }
947 
GetPresenceProbability(const FieldDescriptor * field,const Options & options)948 float GetPresenceProbability(const FieldDescriptor* field,
949                              const Options& options) {
950   return 1.f;
951 }
952 
IsStringInliningEnabled(const Options & options)953 bool IsStringInliningEnabled(const Options& options) {
954   return options.force_inline_string || IsProfileDriven(options);
955 }
956 
CanStringBeInlined(const FieldDescriptor * field)957 bool CanStringBeInlined(const FieldDescriptor* field) {
958   // TODO: Handle inlining for any.proto.
959   if (IsAnyMessage(field->containing_type())) return false;
960   if (field->containing_type()->options().map_entry()) return false;
961   if (field->is_repeated()) return false;
962 
963   // We rely on has bits to distinguish field presence for release_$name$.  When
964   // there is no hasbit, we cannot use the address of the string instance when
965   // the field has been inlined.
966   if (!internal::cpp::HasHasbit(field)) return false;
967 
968   if (!IsString(field)) return false;
969   if (!field->default_value_string().empty()) return false;
970 
971   return true;
972 }
973 
IsStringInlined(const FieldDescriptor * field,const Options & options)974 bool IsStringInlined(const FieldDescriptor* field, const Options& options) {
975   (void)field;
976   (void)options;
977   return false;
978 }
979 
HasLazyFields(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)980 static bool HasLazyFields(const Descriptor* descriptor, const Options& options,
981                           MessageSCCAnalyzer* scc_analyzer) {
982   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
983     if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) {
984       return true;
985     }
986   }
987   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
988     if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) {
989       return true;
990     }
991   }
992   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
993     if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) {
994       return true;
995     }
996   }
997   return false;
998 }
999 
1000 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)1001 bool HasLazyFields(const FileDescriptor* file, const Options& options,
1002                    MessageSCCAnalyzer* scc_analyzer) {
1003   for (int i = 0; i < file->message_type_count(); i++) {
1004     const Descriptor* descriptor(file->message_type(i));
1005     if (HasLazyFields(descriptor, options, scc_analyzer)) {
1006       return true;
1007     }
1008   }
1009   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
1010     if (IsLazy(file->extension(field_idx), options, scc_analyzer)) {
1011       return true;
1012     }
1013   }
1014   return false;
1015 }
1016 
ShouldVerify(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)1017 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1018                   MessageSCCAnalyzer* scc_analyzer) {
1019   (void)descriptor;
1020   (void)options;
1021   (void)scc_analyzer;
1022   return false;
1023 }
1024 
ShouldVerify(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)1025 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1026                   MessageSCCAnalyzer* scc_analyzer) {
1027   (void)file;
1028   (void)options;
1029   (void)scc_analyzer;
1030   return false;
1031 }
1032 
ShouldVerifyRecursively(const FieldDescriptor * field)1033 bool ShouldVerifyRecursively(const FieldDescriptor* field) {
1034   (void)field;
1035   return false;
1036 }
1037 
ShouldVerifySimple(const Descriptor * descriptor)1038 VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor) {
1039   (void)descriptor;
1040   return VerifySimpleType::kCustom;
1041 }
1042 
ShouldSplit(const Descriptor *,const Options &)1043 bool ShouldSplit(const Descriptor*, const Options&) { return false; }
ShouldSplit(const FieldDescriptor *,const Options &)1044 bool ShouldSplit(const FieldDescriptor*, const Options&) { return false; }
1045 
ShouldForceAllocationOnConstruction(const Descriptor * desc,const Options & options)1046 bool ShouldForceAllocationOnConstruction(const Descriptor* desc,
1047                                          const Options& options) {
1048   (void)desc;
1049   (void)options;
1050   return false;
1051 }
1052 
IsPresentMessage(const Descriptor * descriptor,const Options & options)1053 bool IsPresentMessage(const Descriptor* descriptor, const Options& options) {
1054   (void)descriptor;
1055   (void)options;
1056   // Assume that the message is present if there is no profile.
1057   return true;
1058 }
1059 
FindHottestField(const std::vector<const FieldDescriptor * > & fields,const Options & options)1060 const FieldDescriptor* FindHottestField(
1061     const std::vector<const FieldDescriptor*>& fields, const Options& options) {
1062   (void)fields;
1063   (void)options;
1064   return nullptr;
1065 }
1066 
HasRepeatedFields(const Descriptor * descriptor)1067 static bool HasRepeatedFields(const Descriptor* descriptor) {
1068   for (int i = 0; i < descriptor->field_count(); ++i) {
1069     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
1070       return true;
1071     }
1072   }
1073   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1074     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
1075   }
1076   return false;
1077 }
1078 
HasRepeatedFields(const FileDescriptor * file)1079 bool HasRepeatedFields(const FileDescriptor* file) {
1080   for (int i = 0; i < file->message_type_count(); ++i) {
1081     if (HasRepeatedFields(file->message_type(i))) return true;
1082   }
1083   return false;
1084 }
1085 
IsStringPieceField(const FieldDescriptor * field,const Options & options)1086 static bool IsStringPieceField(const FieldDescriptor* field,
1087                                const Options& options) {
1088   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1089          internal::cpp::EffectiveStringCType(field) ==
1090              FieldOptions::STRING_PIECE;
1091 }
1092 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)1093 static bool HasStringPieceFields(const Descriptor* descriptor,
1094                                  const Options& options) {
1095   for (int i = 0; i < descriptor->field_count(); ++i) {
1096     if (IsStringPieceField(descriptor->field(i), options)) return true;
1097   }
1098   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1099     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
1100   }
1101   return false;
1102 }
1103 
HasStringPieceFields(const FileDescriptor * file,const Options & options)1104 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
1105   for (int i = 0; i < file->message_type_count(); ++i) {
1106     if (HasStringPieceFields(file->message_type(i), options)) return true;
1107   }
1108   return false;
1109 }
1110 
IsCordField(const FieldDescriptor * field,const Options & options)1111 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
1112   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
1113          internal::cpp::EffectiveStringCType(field) == FieldOptions::CORD;
1114 }
1115 
HasCordFields(const Descriptor * descriptor,const Options & options)1116 static bool HasCordFields(const Descriptor* descriptor,
1117                           const Options& options) {
1118   for (int i = 0; i < descriptor->field_count(); ++i) {
1119     if (IsCordField(descriptor->field(i), options)) return true;
1120   }
1121   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1122     if (HasCordFields(descriptor->nested_type(i), options)) return true;
1123   }
1124   return false;
1125 }
1126 
HasCordFields(const FileDescriptor * file,const Options & options)1127 bool HasCordFields(const FileDescriptor* file, const Options& options) {
1128   for (int i = 0; i < file->message_type_count(); ++i) {
1129     if (HasCordFields(file->message_type(i), options)) return true;
1130   }
1131   return false;
1132 }
1133 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)1134 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
1135   if (descriptor->extension_range_count() > 0) return true;
1136   if (descriptor->extension_count() > 0) return true;
1137   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1138     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
1139       return true;
1140     }
1141   }
1142   return false;
1143 }
1144 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)1145 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
1146   if (file->extension_count() > 0) return true;
1147   for (int i = 0; i < file->message_type_count(); ++i) {
1148     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
1149   }
1150   return false;
1151 }
1152 
HasMapFields(const Descriptor * descriptor)1153 static bool HasMapFields(const Descriptor* descriptor) {
1154   for (int i = 0; i < descriptor->field_count(); ++i) {
1155     if (descriptor->field(i)->is_map()) {
1156       return true;
1157     }
1158   }
1159   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1160     if (HasMapFields(descriptor->nested_type(i))) return true;
1161   }
1162   return false;
1163 }
1164 
HasMapFields(const FileDescriptor * file)1165 bool HasMapFields(const FileDescriptor* file) {
1166   for (int i = 0; i < file->message_type_count(); ++i) {
1167     if (HasMapFields(file->message_type(i))) return true;
1168   }
1169   return false;
1170 }
1171 
HasV2Table(const Descriptor * descriptor)1172 bool HasV2Table(const Descriptor* descriptor) {
1173   return false;
1174 }
1175 
HasV2Table(const FileDescriptor * file)1176 bool HasV2Table(const FileDescriptor* file) {
1177   for (int i = 0; i < file->message_type_count(); ++i) {
1178     if (HasV2Table(file->message_type(i))) return true;
1179   }
1180   return false;
1181 }
1182 
HasEnumDefinitions(const Descriptor * message_type)1183 static bool HasEnumDefinitions(const Descriptor* message_type) {
1184   if (message_type->enum_type_count() > 0) return true;
1185   for (int i = 0; i < message_type->nested_type_count(); ++i) {
1186     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
1187   }
1188   return false;
1189 }
1190 
HasEnumDefinitions(const FileDescriptor * file)1191 bool HasEnumDefinitions(const FileDescriptor* file) {
1192   if (file->enum_type_count() > 0) return true;
1193   for (int i = 0; i < file->message_type_count(); ++i) {
1194     if (HasEnumDefinitions(file->message_type(i))) return true;
1195   }
1196   return false;
1197 }
1198 
IsStringOrMessage(const FieldDescriptor * field)1199 bool IsStringOrMessage(const FieldDescriptor* field) {
1200   switch (field->cpp_type()) {
1201     case FieldDescriptor::CPPTYPE_INT32:
1202     case FieldDescriptor::CPPTYPE_INT64:
1203     case FieldDescriptor::CPPTYPE_UINT32:
1204     case FieldDescriptor::CPPTYPE_UINT64:
1205     case FieldDescriptor::CPPTYPE_DOUBLE:
1206     case FieldDescriptor::CPPTYPE_FLOAT:
1207     case FieldDescriptor::CPPTYPE_BOOL:
1208     case FieldDescriptor::CPPTYPE_ENUM:
1209       return false;
1210     case FieldDescriptor::CPPTYPE_STRING:
1211     case FieldDescriptor::CPPTYPE_MESSAGE:
1212       return true;
1213   }
1214 
1215   ABSL_LOG(FATAL) << "Can't get here.";
1216   return false;
1217 }
1218 
IsAnyMessage(const FileDescriptor * descriptor)1219 bool IsAnyMessage(const FileDescriptor* descriptor) {
1220   return descriptor->name() == kAnyProtoFile;
1221 }
1222 
IsAnyMessage(const Descriptor * descriptor)1223 bool IsAnyMessage(const Descriptor* descriptor) {
1224   return descriptor->name() == kAnyMessageName &&
1225          IsAnyMessage(descriptor->file());
1226 }
1227 
IsWellKnownMessage(const FileDescriptor * file)1228 bool IsWellKnownMessage(const FileDescriptor* file) {
1229   static const auto* well_known_files = new absl::flat_hash_set<std::string>{
1230       "google/protobuf/any.proto",
1231       "google/protobuf/api.proto",
1232       "google/protobuf/compiler/plugin.proto",
1233       "google/protobuf/descriptor.proto",
1234       "google/protobuf/duration.proto",
1235       "google/protobuf/empty.proto",
1236       "google/protobuf/field_mask.proto",
1237       "google/protobuf/source_context.proto",
1238       "google/protobuf/struct.proto",
1239       "google/protobuf/timestamp.proto",
1240       "google/protobuf/type.proto",
1241       "google/protobuf/wrappers.proto",
1242   };
1243   return well_known_files->find(file->name()) != well_known_files->end();
1244 }
1245 
ChangeTo(absl::string_view name,io::Printer::SourceLocation loc)1246 void NamespaceOpener::ChangeTo(absl::string_view name,
1247                                io::Printer::SourceLocation loc) {
1248   std::vector<std::string> new_stack =
1249       absl::StrSplit(name, "::", absl::SkipEmpty());
1250   size_t len = std::min(name_stack_.size(), new_stack.size());
1251   size_t common_idx = 0;
1252   while (common_idx < len) {
1253     if (name_stack_[common_idx] != new_stack[common_idx]) {
1254       break;
1255     }
1256     ++common_idx;
1257   }
1258 
1259   for (size_t i = name_stack_.size(); i > common_idx; i--) {
1260     p_->Emit({{"ns", name_stack_[i - 1]}}, R"(
1261       }  // namespace $ns$
1262     )",
1263              loc);
1264   }
1265   for (size_t i = common_idx; i < new_stack.size(); ++i) {
1266     p_->Emit({{"ns", new_stack[i]}}, R"(
1267       namespace $ns$ {
1268     )",
1269              loc);
1270   }
1271 
1272   name_stack_ = std::move(new_stack);
1273 }
1274 
GenerateUtf8CheckCode(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view params,absl::string_view strict_function,absl::string_view verify_function)1275 static void GenerateUtf8CheckCode(io::Printer* p, const FieldDescriptor* field,
1276                                   const Options& options, bool for_parse,
1277                                   absl::string_view params,
1278                                   absl::string_view strict_function,
1279                                   absl::string_view verify_function) {
1280   if (field->type() != FieldDescriptor::TYPE_STRING) return;
1281 
1282   auto v = p->WithVars({
1283       {"params", params},
1284       {"Strict", strict_function},
1285       {"Verify", verify_function},
1286   });
1287 
1288   bool is_lite =
1289       GetOptimizeFor(field->file(), options) == FileOptions::LITE_RUNTIME;
1290   switch (internal::cpp::GetUtf8CheckMode(field, is_lite)) {
1291     case internal::cpp::Utf8CheckMode::kStrict:
1292       if (for_parse) {
1293         p->Emit(R"cc(
1294           DO_($pbi$::WireFormatLite::$Strict$(
1295               $params$ $pbi$::WireFormatLite::PARSE, "$pkg.Msg.field$"));
1296         )cc");
1297       } else {
1298         p->Emit(R"cc(
1299           $pbi$::WireFormatLite::$Strict$(
1300               $params$ $pbi$::WireFormatLite::SERIALIZE, "$pkg.Msg.field$");
1301         )cc");
1302       }
1303       break;
1304 
1305     case internal::cpp::Utf8CheckMode::kVerify:
1306       if (for_parse) {
1307         p->Emit(R"cc(
1308           $pbi$::WireFormat::$Verify$($params$ $pbi$::WireFormat::PARSE,
1309                                       "$pkg.Msg.field$");
1310         )cc");
1311       } else {
1312         p->Emit(R"cc(
1313           $pbi$::WireFormat::$Verify$($params$ $pbi$::WireFormat::SERIALIZE,
1314                                       "$pkg.Msg.field$");
1315         )cc");
1316       }
1317       break;
1318 
1319     case internal::cpp::Utf8CheckMode::kNone:
1320       break;
1321   }
1322 }
1323 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters,const Formatter & format)1324 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1325                                     const Options& options, bool for_parse,
1326                                     absl::string_view parameters,
1327                                     const Formatter& format) {
1328   GenerateUtf8CheckCode(format.printer(), field, options, for_parse, parameters,
1329                         "VerifyUtf8String", "VerifyUTF8StringNamedField");
1330 }
1331 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters,const Formatter & format)1332 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1333                                   const Options& options, bool for_parse,
1334                                   absl::string_view parameters,
1335                                   const Formatter& format) {
1336   GenerateUtf8CheckCode(format.printer(), field, options, for_parse, parameters,
1337                         "VerifyUtf8Cord", "VerifyUTF8CordNamedField");
1338 }
1339 
GenerateUtf8CheckCodeForString(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters)1340 void GenerateUtf8CheckCodeForString(io::Printer* p,
1341                                     const FieldDescriptor* field,
1342                                     const Options& options, bool for_parse,
1343                                     absl::string_view parameters) {
1344   GenerateUtf8CheckCode(p, field, options, for_parse, parameters,
1345                         "VerifyUtf8String", "VerifyUTF8StringNamedField");
1346 }
1347 
GenerateUtf8CheckCodeForCord(io::Printer * p,const FieldDescriptor * field,const Options & options,bool for_parse,absl::string_view parameters)1348 void GenerateUtf8CheckCodeForCord(io::Printer* p, const FieldDescriptor* field,
1349                                   const Options& options, bool for_parse,
1350                                   absl::string_view parameters) {
1351   GenerateUtf8CheckCode(p, field, options, for_parse, parameters,
1352                         "VerifyUtf8Cord", "VerifyUTF8CordNamedField");
1353 }
1354 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1355 void FlattenMessagesInFile(const FileDescriptor* file,
1356                            std::vector<const Descriptor*>* result) {
1357   internal::cpp::VisitDescriptorsInFileOrder(file,
1358                                              [&](const Descriptor* descriptor) {
1359                                                result->push_back(descriptor);
1360                                                return std::false_type{};
1361                                              });
1362 }
1363 
1364 // TopologicalSortMessagesInFile topologically sorts and returns a vector of
1365 // proto descriptors defined in the file provided as input.  The underlying
1366 // graph is defined using dependency relationship between protos.  For example,
1367 // if proto A contains proto B as a member, then proto B would be ordered before
1368 // proto A in a topological ordering, assuming there is no mutual dependence
1369 // between the two protos.  The topological order is used to emit proto
1370 // declarations so that a proto is declared after all the protos it is dependent
1371 // on have been declared (again assuming no mutual dependence).  This is needed
1372 // in cases where we may declare proto B as a member of proto A using an object,
1373 // instead of a pointer.
1374 //
1375 // The proto dependency graph can have cycles.  So instead of directly working
1376 // with protos, we compute strong connected components (SCCs) composed of protos
1377 // with mutual dependence.  The dependency graph on SCCs is a directed acyclic
1378 // graph (DAG) and therefore a topological order can be computed for it i.e. an
1379 // order where an SCC is ordered after all other SCCs it is dependent on have
1380 // been ordered.
1381 //
1382 // The function below first constructs the SCC graph and then computes a
1383 // deterministic topological order for the graph.
1384 //
1385 // For computing the SCC graph, we follow the following steps:
1386 // 1. Collect the descriptors for the messages in the file.
1387 // 2. Construct a map for descriptor to SCC mapping.
1388 // 3. Construct a map for dependence between SCCs, referred to as
1389 // child_to_parent_scc_map below.  This map constructed by running a BFS on the
1390 // SCCs.
1391 //
1392 // For computing a deterministic topological order on the graph computed in step
1393 // 3 above, we do the following:
1394 // 1. Since the graph on SCCs is a DAG, therefore there will be at least one SCC
1395 // that does not depend on other SCCs.  We first construct a list of all such
1396 // SCCs.
1397 // 2. Next we run a BFS starting with the list of SCCs computed in step 1.  For
1398 // each SCC, we track the number of the SCC it is dependent on and the number of
1399 // those SCC that have been ordered.  Once all the SCCs an SCC is dependent on
1400 // have been ordered, this SCC is added to list of SCCs that are to be ordered
1401 // next.
1402 // 3. Within an SCC, the descriptors are ordered on the basis of the full_name()
1403 // of the descriptors.
TopologicalSortMessagesInFile(const FileDescriptor * file,MessageSCCAnalyzer & scc_analyzer)1404 std::vector<const Descriptor*> TopologicalSortMessagesInFile(
1405     const FileDescriptor* file, MessageSCCAnalyzer& scc_analyzer) {
1406   // Collect the messages defined in this file.
1407   std::vector<const Descriptor*> messages_in_file = FlattenMessagesInFile(file);
1408   if (messages_in_file.empty()) return {};
1409   // Populate the map from the descriptor to the SCC to which the descriptor
1410   // belongs.
1411   absl::flat_hash_map<const Descriptor*, const SCC*> descriptor_to_scc_map;
1412   descriptor_to_scc_map.reserve(messages_in_file.size());
1413   for (const Descriptor* d : messages_in_file) {
1414     descriptor_to_scc_map.emplace(d, scc_analyzer.GetSCC(d));
1415   }
1416   ABSL_DCHECK(messages_in_file.size() == descriptor_to_scc_map.size())
1417       << "messages_in_file has duplicate messages!";
1418   // Each parent SCC has information about the child SCCs i.e. SCCs for fields
1419   // that are contained in the protos that belong to the parent SCC.  Use this
1420   // information to construct the inverse map from child SCC to parent SCC.
1421   absl::flat_hash_map<const SCC*, absl::flat_hash_set<const SCC*>>
1422       child_to_parent_scc_map;
1423   // For recording the number of edges from each SCC to other SCCs in the
1424   // forward map.
1425   absl::flat_hash_map<const SCC*, int> scc_to_outgoing_edges_map;
1426   std::queue<const SCC*> sccs_to_process;
1427   for (const auto& p : descriptor_to_scc_map) {
1428     sccs_to_process.push(p.second);
1429   }
1430   // Run a BFS to fill the two data structures: child_to_parent_scc_map and
1431   // scc_to_outgoing_edges_map.
1432   while (!sccs_to_process.empty()) {
1433     const SCC* scc = sccs_to_process.front();
1434     sccs_to_process.pop();
1435     auto& count = scc_to_outgoing_edges_map[scc];
1436     for (const auto& child : scc->children) {
1437       // Test whether this child has been seen thus far.  We do not know if the
1438       // children SCC vector contains unique children SCC.
1439       auto& parent_set = child_to_parent_scc_map[child];
1440       if (parent_set.empty()) {
1441         // Just added.
1442         sccs_to_process.push(child);
1443       }
1444       auto ret = parent_set.insert(scc);
1445       if (ret.second) {
1446         ++count;
1447       }
1448     }
1449   }
1450   std::vector<const SCC*> next_scc_q;
1451   // Find out the SCCs that do not have an outgoing edge i.e. the protos in this
1452   // SCC do not depend on protos other than the ones in this SCC.
1453   for (const auto& p : scc_to_outgoing_edges_map) {
1454     if (p.second == 0) {
1455       next_scc_q.push_back(p.first);
1456     }
1457   }
1458   ABSL_DCHECK(!next_scc_q.empty()) << "No independent components!";
1459   // Topologically sort the SCCs.
1460   // If an SCC no longer has an outgoing edge i.e. all the SCCs it depends on
1461   // have been ordered, then this SCC is now a candidate for ordering.
1462   std::vector<const Descriptor*> sorted_messages;
1463   while (!next_scc_q.empty()) {
1464     std::vector<const SCC*> current_scc_q;
1465     current_scc_q.swap(next_scc_q);
1466     // SCCs present in the current_scc_q are topologically equivalent to each
1467     // other.  Therefore they can be added to the output in any order.  We sort
1468     // these SCCs by the full_name() of the first descriptor that belongs to the
1469     // SCC.  This works well since the descriptors in each SCC are sorted by
1470     // full_name() and also that a descriptor can be part of only one SCC.
1471     std::sort(current_scc_q.begin(), current_scc_q.end(),
1472               [](const SCC* a, const SCC* b) {
1473                 ABSL_DCHECK(!a->descriptors.empty()) << "No descriptors!";
1474                 ABSL_DCHECK(!b->descriptors.empty()) << "No descriptors!";
1475                 const Descriptor* ad = a->descriptors[0];
1476                 const Descriptor* bd = b->descriptors[0];
1477                 return ad->full_name() < bd->full_name();
1478               });
1479     while (!current_scc_q.empty()) {
1480       const SCC* scc = current_scc_q.back();
1481       current_scc_q.pop_back();
1482       // Messages in an SCC are already sorted on full_name().  So we can emit
1483       // them right away.
1484       for (const Descriptor* d : scc->descriptors) {
1485         // Only push messages that are defined in the file.
1486         if (descriptor_to_scc_map.contains(d)) {
1487           sorted_messages.push_back(d);
1488         }
1489       }
1490       // Find all the SCCs that are dependent on the current SCC.
1491       const auto& parents = child_to_parent_scc_map.find(scc);
1492       if (parents == child_to_parent_scc_map.end()) continue;
1493       for (const SCC* parent : parents->second) {
1494         auto it = scc_to_outgoing_edges_map.find(parent);
1495         ABSL_CHECK(it != scc_to_outgoing_edges_map.end());
1496         ABSL_CHECK(it->second > 0);
1497         // Reduce the dependency count for the SCC.  In case the dependency
1498         // count reaches 0, add the SCC to the list of SCCs to be ordered next.
1499         it->second--;
1500         if (it->second == 0) {
1501           next_scc_q.push_back(parent);
1502         }
1503       }
1504     }
1505   }
1506   for (const auto& p : scc_to_outgoing_edges_map) {
1507     ABSL_DCHECK(p.second == 0) << "SCC left behind!";
1508   }
1509   return sorted_messages;
1510 }
1511 
HasWeakFields(const Descriptor * descriptor,const Options & options)1512 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1513   for (int i = 0; i < descriptor->field_count(); i++) {
1514     if (IsWeak(descriptor->field(i), options)) return true;
1515   }
1516   return false;
1517 }
1518 
HasWeakFields(const FileDescriptor * file,const Options & options)1519 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1520   for (int i = 0; i < file->message_type_count(); ++i) {
1521     if (HasWeakFields(file->message_type(i), options)) return true;
1522   }
1523   return false;
1524 }
1525 
UsingImplicitWeakDescriptor(const FileDescriptor * file,const Options & options)1526 bool UsingImplicitWeakDescriptor(const FileDescriptor* file,
1527                                  const Options& options) {
1528   return HasDescriptorMethods(file, options) &&
1529          !IsBootstrapProto(options, file) &&
1530          options.descriptor_implicit_weak_messages &&
1531          !options.opensource_runtime;
1532 }
1533 
StrongReferenceToType(const Descriptor * desc,const Options & options)1534 std::string StrongReferenceToType(const Descriptor* desc,
1535                                   const Options& options) {
1536   const auto name = QualifiedDefaultInstanceName(desc, options);
1537   return absl::StrFormat("::%s::internal::StrongPointer<decltype(%s)*, &%s>()",
1538                          ProtobufNamespace(options), name, name);
1539 }
1540 
WeakDescriptorDataSection(absl::string_view prefix,const Descriptor * descriptor,int index_in_file_messages,const Options & options)1541 std::string WeakDescriptorDataSection(absl::string_view prefix,
1542                                       const Descriptor* descriptor,
1543                                       int index_in_file_messages,
1544                                       const Options& options) {
1545   const auto* file = descriptor->file();
1546 
1547   // To make a compact name we use the index of the object in its file
1548   // of its name.
1549   // So the name could be `pb_def_3_HASH` instead of
1550   // `pd_def_VeryLongClassName_WithNesting_AndMoreNames_HASH`
1551   // We need a know common prefix to merge the sections later on.
1552   return UniqueName(absl::StrCat("pb_", prefix, "_", index_in_file_messages),
1553                     file, options);
1554 }
1555 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1556 bool UsingImplicitWeakFields(const FileDescriptor* file,
1557                              const Options& options) {
1558   return options.lite_implicit_weak_fields &&
1559          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1560 }
1561 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1562 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1563                          MessageSCCAnalyzer* scc_analyzer) {
1564   return UsingImplicitWeakFields(field->file(), options) &&
1565          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1566          !field->is_required() && !field->is_map() && !field->is_extension() &&
1567          !IsWellKnownMessage(field->message_type()->file()) &&
1568          field->message_type()->file()->name() !=
1569              "net/proto2/proto/descriptor.proto" &&
1570          // We do not support implicit weak fields between messages in the same
1571          // strongly-connected component.
1572          scc_analyzer->GetSCC(field->containing_type()) !=
1573              scc_analyzer->GetSCC(field->message_type());
1574 }
1575 
GetSCCAnalysis(const SCC * scc)1576 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1577   auto it = analysis_cache_.find(scc);
1578   if (it != analysis_cache_.end()) return it->second;
1579 
1580   MessageAnalysis result;
1581   if (UsingImplicitWeakFields(scc->GetFile(), options_)) {
1582     result.contains_weak = true;
1583   }
1584   for (size_t i = 0; i < scc->descriptors.size(); ++i) {
1585     const Descriptor* descriptor = scc->descriptors[i];
1586     if (descriptor->extension_range_count() > 0) {
1587       result.contains_extension = true;
1588     }
1589     for (int j = 0; j < descriptor->field_count(); j++) {
1590       const FieldDescriptor* field = descriptor->field(j);
1591       if (field->is_required()) {
1592         result.contains_required = true;
1593       }
1594       if (field->options().weak()) {
1595         result.contains_weak = true;
1596       }
1597       switch (field->type()) {
1598         case FieldDescriptor::TYPE_STRING:
1599         case FieldDescriptor::TYPE_BYTES: {
1600           if (field->cpp_string_type() ==
1601               FieldDescriptor::CppStringType::kCord) {
1602             result.contains_cord = true;
1603           }
1604           break;
1605         }
1606         case FieldDescriptor::TYPE_GROUP:
1607         case FieldDescriptor::TYPE_MESSAGE: {
1608           const SCC* child = analyzer_.GetSCC(field->message_type());
1609           if (child != scc) {
1610             MessageAnalysis analysis = GetSCCAnalysis(child);
1611             result.contains_cord |= analysis.contains_cord;
1612             result.contains_extension |= analysis.contains_extension;
1613             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1614               result.contains_required |= analysis.contains_required;
1615             }
1616             result.contains_weak |= analysis.contains_weak;
1617           } else {
1618             // This field points back into the same SCC hence the messages
1619             // in the SCC are recursive. Note if SCC contains more than two
1620             // nodes it has to be recursive, however this test also works for
1621             // a single node that is recursive.
1622             result.is_recursive = true;
1623           }
1624           break;
1625         }
1626         default:
1627           break;
1628       }
1629     }
1630   }
1631   // We deliberately only insert the result here. After we contracted the SCC
1632   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1633   // nodes visited as we can never return to them. By inserting them here
1634   // we will go in an infinite loop if the SCC is not correct.
1635   return analysis_cache_[scc] = std::move(result);
1636 }
1637 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1638 void ListAllFields(const Descriptor* d,
1639                    std::vector<const FieldDescriptor*>* fields) {
1640   // Collect sub messages
1641   for (int i = 0; i < d->nested_type_count(); i++) {
1642     ListAllFields(d->nested_type(i), fields);
1643   }
1644   // Collect message level extensions.
1645   for (int i = 0; i < d->extension_count(); i++) {
1646     fields->push_back(d->extension(i));
1647   }
1648   // Add types of fields necessary
1649   for (int i = 0; i < d->field_count(); i++) {
1650     fields->push_back(d->field(i));
1651   }
1652 }
1653 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1654 void ListAllFields(const FileDescriptor* d,
1655                    std::vector<const FieldDescriptor*>* fields) {
1656   // Collect file level message.
1657   for (int i = 0; i < d->message_type_count(); i++) {
1658     ListAllFields(d->message_type(i), fields);
1659   }
1660   // Collect message level extensions.
1661   for (int i = 0; i < d->extension_count(); i++) {
1662     fields->push_back(d->extension(i));
1663   }
1664 }
1665 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1666 void ListAllTypesForServices(const FileDescriptor* fd,
1667                              std::vector<const Descriptor*>* types) {
1668   for (int i = 0; i < fd->service_count(); i++) {
1669     const ServiceDescriptor* sd = fd->service(i);
1670     for (int j = 0; j < sd->method_count(); j++) {
1671       const MethodDescriptor* method = sd->method(j);
1672       types->push_back(method->input_type());
1673       types->push_back(method->output_type());
1674     }
1675   }
1676 }
1677 
GetBootstrapBasename(const Options & options,absl::string_view basename,std::string * bootstrap_basename)1678 bool GetBootstrapBasename(const Options& options, absl::string_view basename,
1679                           std::string* bootstrap_basename) {
1680   if (options.opensource_runtime) {
1681     return false;
1682   }
1683 
1684   static const auto* bootstrap_mapping =
1685       // TODO Replace these with string_view once we remove
1686       // StringPiece.
1687       new absl::flat_hash_map<absl::string_view, std::string>{
1688           {"net/proto2/proto/descriptor",
1689            "third_party/protobuf/descriptor"},
1690           {"third_party/protobuf/cpp_features",
1691            "third_party/protobuf/cpp_features"},
1692           {"third_party/protobuf/compiler/plugin",
1693            "third_party/protobuf/compiler/plugin"},
1694           {"net/proto2/compiler/proto/profile",
1695            "net/proto2/compiler/proto/profile_bootstrap"},
1696       };
1697   auto iter = bootstrap_mapping->find(basename);
1698   if (iter == bootstrap_mapping->end()) {
1699     *bootstrap_basename = std::string(basename);
1700     return false;
1701   } else {
1702     *bootstrap_basename = iter->second;
1703     return true;
1704   }
1705 }
1706 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1707 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1708   std::string my_name = StripProto(file->name());
1709   return GetBootstrapBasename(options, my_name, &my_name);
1710 }
1711 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1712 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1713                     bool bootstrap_flag, std::string* basename) {
1714   std::string bootstrap_basename;
1715   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1716     return false;
1717   }
1718 
1719   if (bootstrap_flag) {
1720     // Adjust basename, but don't abort code generation.
1721     *basename = bootstrap_basename;
1722     return false;
1723   }
1724 
1725   auto pb_h = absl::WrapUnique(
1726       generator_context->Open(absl::StrCat(*basename, ".pb.h")));
1727 
1728   io::Printer p(pb_h.get());
1729   p.Emit(
1730       {
1731           {"fwd_to", bootstrap_basename},
1732           {"file", FilenameIdentifier(*basename)},
1733           {"fwd_to_suffix", options.opensource_runtime ? "pb" : "proto"},
1734           {"swig_evil",
1735            [&] {
1736              if (options.opensource_runtime) {
1737                return;
1738              }
1739              p.Emit(R"(
1740                #ifdef SWIG
1741                %include "$fwd_to$.pb.h"
1742                #endif  // SWIG
1743              )");
1744            }},
1745       },
1746       R"(
1747           #ifndef PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1748           #define PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1749           #include "$fwd_to$.$fwd_to_suffix$.h"  // IWYU pragma: export
1750           #endif  // PROTOBUF_INCLUDED_$file$_FORWARD_PB_H
1751           $swig_evil$;
1752       )");
1753 
1754   auto proto_h = absl::WrapUnique(
1755       generator_context->Open(absl::StrCat(*basename, ".proto.h")));
1756   io::Printer(proto_h.get())
1757       .Emit(
1758           {
1759               {"fwd_to", bootstrap_basename},
1760               {"file", FilenameIdentifier(*basename)},
1761           },
1762           R"(
1763             #ifndef PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1764             #define PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1765             #include "$fwd_to$.proto.h"  // IWYU pragma: export
1766             #endif // PROTOBUF_INCLUDED_$file$_FORWARD_PROTO_H
1767           )");
1768 
1769   auto pb_cc = absl::WrapUnique(
1770       generator_context->Open(absl::StrCat(*basename, ".pb.cc")));
1771   io::Printer(pb_cc.get()).PrintRaw("\n");
1772 
1773   (void)absl::WrapUnique(
1774       generator_context->Open(absl::StrCat(*basename, ".pb.h.meta")));
1775 
1776   (void)absl::WrapUnique(
1777       generator_context->Open(absl::StrCat(*basename, ".proto.h.meta")));
1778 
1779   // Abort code generation.
1780   return true;
1781 }
1782 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1783 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1784                                  const Options& options,
1785                                  bool* has_opt_codesize_extension) {
1786   std::vector<const FieldDescriptor*> fields;
1787   auto reflection = msg.GetReflection();
1788   reflection->ListFields(msg, &fields);
1789   for (auto field : fields) {
1790     const auto* field_msg = field->message_type();
1791     if (field_msg == nullptr) {
1792       // It so happens that enums Is_Valid are still generated so enums work.
1793       // Only messages have potential problems.
1794       continue;
1795     }
1796     // If this option has an extension set AND that extension is defined in the
1797     // same file we have bootstrap problem.
1798     if (field->is_extension()) {
1799       const auto* msg_extension_file = field->message_type()->file();
1800       if (msg_extension_file == file) return true;
1801       if (has_opt_codesize_extension &&
1802           GetOptimizeFor(msg_extension_file, options) ==
1803               FileOptions::CODE_SIZE) {
1804         *has_opt_codesize_extension = true;
1805       }
1806     }
1807     // Recurse in this field to see if there is a problem in there
1808     if (field->is_repeated()) {
1809       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1810         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1811                                  file, options, has_opt_codesize_extension)) {
1812           return true;
1813         }
1814       }
1815     } else {
1816       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1817                                options, has_opt_codesize_extension)) {
1818         return true;
1819       }
1820     }
1821   }
1822   return false;
1823 }
1824 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1825 static bool HasBootstrapProblem(const FileDescriptor* file,
1826                                 const Options& options,
1827                                 bool* has_opt_codesize_extension) {
1828   struct BootstrapGlobals {
1829     absl::Mutex mutex;
1830     absl::flat_hash_set<const FileDescriptor*> cached ABSL_GUARDED_BY(mutex);
1831     absl::flat_hash_set<const FileDescriptor*> non_cached
1832         ABSL_GUARDED_BY(mutex);
1833   };
1834   static auto& bootstrap_cache = *new BootstrapGlobals();
1835 
1836   absl::MutexLock lock(&bootstrap_cache.mutex);
1837   if (bootstrap_cache.cached.contains(file)) return true;
1838   if (bootstrap_cache.non_cached.contains(file)) return false;
1839 
1840   // In order to build the data structures for the reflective parse, it needs
1841   // to parse the serialized descriptor describing all the messages defined in
1842   // this file. Obviously this presents a bootstrap problem for descriptor
1843   // messages.
1844   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1845       file->name() == "google/protobuf/descriptor.proto") {
1846     return true;
1847   }
1848   // Unfortunately we're not done yet. The descriptor option messages allow
1849   // for extensions. So we need to be able to parse these extensions in order
1850   // to parse the file descriptor for a file that has custom options. This is a
1851   // problem when these custom options extensions are defined in the same file.
1852   FileDescriptorProto linkedin_fd_proto;
1853   const DescriptorPool* pool = file->pool();
1854   const Descriptor* fd_proto_descriptor =
1855       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1856   // Not all pools have descriptor.proto in them. In these cases there for sure
1857   // are no custom options.
1858   if (fd_proto_descriptor == nullptr) return false;
1859 
1860   // It's easier to inspect file as a proto, because we can use reflection on
1861   // the proto to iterate over all content.
1862   file->CopyTo(&linkedin_fd_proto);
1863 
1864   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1865   // such it doesn't know the extensions that are potentially present in the
1866   // descriptor pool constructed from the protos that are being compiled. These
1867   // custom options are therefore in the unknown fields.
1868   // By building the corresponding FileDescriptorProto in the pool constructed
1869   // by the protos that are being compiled, ie. file's pool, the unknown fields
1870   // are converted to extensions.
1871   DynamicMessageFactory factory(pool);
1872   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1873   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1874 
1875   bool res = HasExtensionFromFile(*fd_proto, file, options,
1876                                   has_opt_codesize_extension);
1877   if (res) {
1878     bootstrap_cache.cached.insert(file);
1879   } else {
1880     bootstrap_cache.non_cached.insert(file);
1881   }
1882   delete fd_proto;
1883   return res;
1884 }
1885 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1886 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1887                                         const Options& options,
1888                                         bool* has_opt_codesize_extension) {
1889   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1890   switch (options.enforce_mode) {
1891     case EnforceOptimizeMode::kSpeed:
1892       return FileOptions::SPEED;
1893     case EnforceOptimizeMode::kLiteRuntime:
1894       return FileOptions::LITE_RUNTIME;
1895     case EnforceOptimizeMode::kCodeSize:
1896       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1897         return FileOptions::LITE_RUNTIME;
1898       }
1899       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1900         return FileOptions::SPEED;
1901       }
1902       return FileOptions::CODE_SIZE;
1903     case EnforceOptimizeMode::kNoEnforcement:
1904       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1905         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1906           ABSL_LOG(WARNING)
1907               << "Proto states optimize_for = CODE_SIZE, but we "
1908                  "cannot honor that because it contains custom option "
1909                  "extensions defined in the same proto.";
1910           return FileOptions::SPEED;
1911         }
1912       }
1913       return file->options().optimize_for();
1914   }
1915 
1916   ABSL_LOG(FATAL) << "Unknown optimization enforcement requested.";
1917   // The phony return below serves to silence a warning from GCC 8.
1918   return FileOptions::SPEED;
1919 }
1920 
HasMessageFieldOrExtension(const Descriptor * desc)1921 bool HasMessageFieldOrExtension(const Descriptor* desc) {
1922   if (desc->extension_range_count() > 0) return true;
1923   for (const auto* f : FieldRange(desc)) {
1924     if (f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) return true;
1925   }
1926   return false;
1927 }
1928 
AnnotatedAccessors(const FieldDescriptor * field,absl::Span<const absl::string_view> prefixes,absl::optional<google::protobuf::io::AnnotationCollector::Semantic> semantic)1929 std::vector<io::Printer::Sub> AnnotatedAccessors(
1930     const FieldDescriptor* field, absl::Span<const absl::string_view> prefixes,
1931     absl::optional<google::protobuf::io::AnnotationCollector::Semantic> semantic) {
1932   auto field_name = FieldName(field);
1933 
1934   std::vector<io::Printer::Sub> vars;
1935   for (auto prefix : prefixes) {
1936     vars.push_back(io::Printer::Sub(absl::StrCat(prefix, "name"),
1937                                     absl::StrCat(prefix, field_name))
1938                        .AnnotatedAs({field, semantic}));
1939   }
1940 
1941   return vars;
1942 }
1943 
IsFileDescriptorProto(const FileDescriptor * file,const Options & options)1944 bool IsFileDescriptorProto(const FileDescriptor* file, const Options& options) {
1945   if (Namespace(file, options) !=
1946       absl::StrCat("::", ProtobufNamespace(options))) {
1947     return false;
1948   }
1949   for (int i = 0; i < file->message_type_count(); ++i) {
1950     if (file->message_type(i)->name() == "FileDescriptorProto") return true;
1951   }
1952   return false;
1953 }
1954 
ShouldGenerateClass(const Descriptor * descriptor,const Options & options)1955 bool ShouldGenerateClass(const Descriptor* descriptor, const Options& options) {
1956   return !IsMapEntryMessage(descriptor) ||
1957          HasDescriptorMethods(descriptor->file(), options);
1958 }
1959 
HasOnDeserializeTracker(const Descriptor * descriptor,const Options & options)1960 bool HasOnDeserializeTracker(const Descriptor* descriptor,
1961                              const Options& options) {
1962   return HasTracker(descriptor, options) &&
1963          !options.field_listener_options.forbidden_field_listener_events
1964               .contains("deserialize");
1965 }
1966 
1967 
NeedsPostLoopHandler(const Descriptor * descriptor,const Options & options)1968 bool NeedsPostLoopHandler(const Descriptor* descriptor,
1969                           const Options& options) {
1970   if (HasOnDeserializeTracker(descriptor, options)) {
1971     return true;
1972   }
1973   return false;
1974 }
1975 
1976 }  // namespace cpp
1977 }  // namespace compiler
1978 }  // namespace protobuf
1979 }  // namespace google
1980 
1981 #include "google/protobuf/port_undef.inc"
1982