1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36 
37 #include <cstdint>
38 #include <functional>
39 #include <limits>
40 #include <map>
41 #include <queue>
42 #include <unordered_set>
43 #include <vector>
44 
45 #include <google/protobuf/stubs/common.h>
46 #include <google/protobuf/stubs/logging.h>
47 #include <google/protobuf/descriptor.h>
48 #include <google/protobuf/compiler/cpp/cpp_names.h>
49 #include <google/protobuf/compiler/cpp/cpp_options.h>
50 #include <google/protobuf/descriptor.pb.h>
51 #include <google/protobuf/compiler/scc.h>
52 #include <google/protobuf/io/printer.h>
53 #include <google/protobuf/io/zero_copy_stream.h>
54 #include <google/protobuf/dynamic_message.h>
55 #include <google/protobuf/wire_format.h>
56 #include <google/protobuf/wire_format_lite.h>
57 #include <google/protobuf/stubs/strutil.h>
58 #include <google/protobuf/stubs/substitute.h>
59 #include <google/protobuf/stubs/hash.h>
60 
61 // Must be last.
62 #include <google/protobuf/port_def.inc>
63 
64 namespace google {
65 namespace protobuf {
66 namespace compiler {
67 namespace cpp {
68 
69 namespace {
70 
71 static const char kAnyMessageName[] = "Any";
72 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
73 
DotsToColons(const std::string & name)74 std::string DotsToColons(const std::string& name) {
75   return StringReplace(name, ".", "::", true);
76 }
77 
78 static const char* const kKeywordList[] = {  //
79     "NULL",
80     "alignas",
81     "alignof",
82     "and",
83     "and_eq",
84     "asm",
85     "auto",
86     "bitand",
87     "bitor",
88     "bool",
89     "break",
90     "case",
91     "catch",
92     "char",
93     "class",
94     "compl",
95     "const",
96     "constexpr",
97     "const_cast",
98     "continue",
99     "decltype",
100     "default",
101     "delete",
102     "do",
103     "double",
104     "dynamic_cast",
105     "else",
106     "enum",
107     "explicit",
108     "export",
109     "extern",
110     "false",
111     "float",
112     "for",
113     "friend",
114     "goto",
115     "if",
116     "inline",
117     "int",
118     "long",
119     "mutable",
120     "namespace",
121     "new",
122     "noexcept",
123     "not",
124     "not_eq",
125     "nullptr",
126     "operator",
127     "or",
128     "or_eq",
129     "private",
130     "protected",
131     "public",
132     "register",
133     "reinterpret_cast",
134     "return",
135     "short",
136     "signed",
137     "sizeof",
138     "static",
139     "static_assert",
140     "static_cast",
141     "struct",
142     "switch",
143     "template",
144     "this",
145     "thread_local",
146     "throw",
147     "true",
148     "try",
149     "typedef",
150     "typeid",
151     "typename",
152     "union",
153     "unsigned",
154     "using",
155     "virtual",
156     "void",
157     "volatile",
158     "wchar_t",
159     "while",
160     "xor",
161     "xor_eq"};
162 
MakeKeywordsMap()163 static std::unordered_set<std::string>* MakeKeywordsMap() {
164   auto* result = new std::unordered_set<std::string>();
165   for (const auto keyword : kKeywordList) {
166     result->emplace(keyword);
167   }
168   return result;
169 }
170 
171 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
172 
IntTypeName(const Options & options,const std::string & type)173 std::string IntTypeName(const Options& options, const std::string& type) {
174   return type + "_t";
175 }
176 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)177 void SetIntVar(const Options& options, const std::string& type,
178                std::map<std::string, std::string>* variables) {
179   (*variables)[type] = IntTypeName(options, type);
180 }
IsEagerlyVerifiedLazyImpl(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)181 bool IsEagerlyVerifiedLazyImpl(const FieldDescriptor* field,
182                                const Options& options,
183                                MessageSCCAnalyzer* scc_analyzer) {
184   return false;
185 }
186 
187 }  // namespace
188 
IsLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)189 bool IsLazy(const FieldDescriptor* field, const Options& options,
190             MessageSCCAnalyzer* scc_analyzer) {
191   return IsLazilyVerifiedLazy(field, options) ||
192          IsEagerlyVerifiedLazyImpl(field, options, scc_analyzer);
193 }
194 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)195 void SetCommonVars(const Options& options,
196                    std::map<std::string, std::string>* variables) {
197   (*variables)["proto_ns"] = ProtobufNamespace(options);
198 
199   // Warning: there is some clever naming/splitting here to avoid extract script
200   // rewrites.  The names of these variables must not be things that the extract
201   // script will rewrite.  That's why we use "CHK" (for example) instead of
202   // "GOOGLE_CHECK".
203   if (options.opensource_runtime) {
204     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
205     (*variables)["CHK"] = "GOOGLE_CHECK";
206     (*variables)["DCHK"] = "GOOGLE_DCHECK";
207   } else {
208     // These values are things the extract script would rewrite if we did not
209     // split them.  It might not strictly matter since we don't generate google3
210     // code in open-source.  But it's good to prevent surprising things from
211     // happening.
212     (*variables)["GOOGLE_PROTOBUF"] =
213         "GOOGLE3"
214         "_PROTOBUF";
215     (*variables)["CHK"] =
216         "CH"
217         "ECK";
218     (*variables)["DCHK"] =
219         "DCH"
220         "ECK";
221   }
222 
223   SetIntVar(options, "int8", variables);
224   SetIntVar(options, "uint8", variables);
225   SetIntVar(options, "uint32", variables);
226   SetIntVar(options, "uint64", variables);
227   SetIntVar(options, "int32", variables);
228   SetIntVar(options, "int64", variables);
229   (*variables)["string"] = "std::string";
230 }
231 
SetCommonMessageDataVariables(std::map<std::string,std::string> * variables)232 void SetCommonMessageDataVariables(
233     std::map<std::string, std::string>* variables) {
234   (*variables)["any_metadata"] = "_any_metadata_";
235   (*variables)["cached_size"] = "_cached_size_";
236   (*variables)["extensions"] = "_extensions_";
237   (*variables)["has_bits"] = "_has_bits_";
238   (*variables)["inlined_string_donated_array"] = "_inlined_string_donated_";
239   (*variables)["oneof_case"] = "_oneof_case_";
240   (*variables)["tracker"] = "_tracker_";
241   (*variables)["weak_field_map"] = "_weak_field_map_";
242 }
243 
SetUnknownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)244 void SetUnknownFieldsVariable(const Descriptor* descriptor,
245                               const Options& options,
246                               std::map<std::string, std::string>* variables) {
247   std::string proto_ns = ProtobufNamespace(options);
248   std::string unknown_fields_type;
249   if (UseUnknownFieldSet(descriptor->file(), options)) {
250     unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
251     (*variables)["unknown_fields"] =
252         "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
253         unknown_fields_type + "::default_instance)";
254   } else {
255     unknown_fields_type =
256         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
257     (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
258                                      unknown_fields_type + ">(::" + proto_ns +
259                                      "::internal::GetEmptyString)";
260   }
261   (*variables)["unknown_fields_type"] = unknown_fields_type;
262   (*variables)["have_unknown_fields"] =
263       "_internal_metadata_.have_unknown_fields()";
264   (*variables)["mutable_unknown_fields"] =
265       "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
266       ">()";
267 }
268 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)269 std::string UnderscoresToCamelCase(const std::string& input,
270                                    bool cap_next_letter) {
271   std::string result;
272   // Note:  I distrust ctype.h due to locales.
273   for (int i = 0; i < input.size(); i++) {
274     if ('a' <= input[i] && input[i] <= 'z') {
275       if (cap_next_letter) {
276         result += input[i] + ('A' - 'a');
277       } else {
278         result += input[i];
279       }
280       cap_next_letter = false;
281     } else if ('A' <= input[i] && input[i] <= 'Z') {
282       // Capital letters are left as-is.
283       result += input[i];
284       cap_next_letter = false;
285     } else if ('0' <= input[i] && input[i] <= '9') {
286       result += input[i];
287       cap_next_letter = true;
288     } else {
289       cap_next_letter = true;
290     }
291   }
292   return result;
293 }
294 
295 const char kThickSeparator[] =
296     "// ===================================================================\n";
297 const char kThinSeparator[] =
298     "// -------------------------------------------------------------------\n";
299 
CanInitializeByZeroing(const FieldDescriptor * field)300 bool CanInitializeByZeroing(const FieldDescriptor* field) {
301   if (field->is_repeated() || field->is_extension()) return false;
302   switch (field->cpp_type()) {
303     case FieldDescriptor::CPPTYPE_ENUM:
304       return field->default_value_enum()->number() == 0;
305     case FieldDescriptor::CPPTYPE_INT32:
306       return field->default_value_int32() == 0;
307     case FieldDescriptor::CPPTYPE_INT64:
308       return field->default_value_int64() == 0;
309     case FieldDescriptor::CPPTYPE_UINT32:
310       return field->default_value_uint32() == 0;
311     case FieldDescriptor::CPPTYPE_UINT64:
312       return field->default_value_uint64() == 0;
313     case FieldDescriptor::CPPTYPE_FLOAT:
314       return field->default_value_float() == 0;
315     case FieldDescriptor::CPPTYPE_DOUBLE:
316       return field->default_value_double() == 0;
317     case FieldDescriptor::CPPTYPE_BOOL:
318       return field->default_value_bool() == false;
319     default:
320       return false;
321   }
322 }
323 
ClassName(const Descriptor * descriptor)324 std::string ClassName(const Descriptor* descriptor) {
325   const Descriptor* parent = descriptor->containing_type();
326   std::string res;
327   if (parent) res += ClassName(parent) + "_";
328   res += descriptor->name();
329   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
330   return ResolveKeyword(res);
331 }
332 
ClassName(const EnumDescriptor * enum_descriptor)333 std::string ClassName(const EnumDescriptor* enum_descriptor) {
334   if (enum_descriptor->containing_type() == nullptr) {
335     return ResolveKeyword(enum_descriptor->name());
336   } else {
337     return ClassName(enum_descriptor->containing_type()) + "_" +
338            enum_descriptor->name();
339   }
340 }
341 
QualifiedClassName(const Descriptor * d,const Options & options)342 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
343   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
344 }
345 
QualifiedClassName(const EnumDescriptor * d,const Options & options)346 std::string QualifiedClassName(const EnumDescriptor* d,
347                                const Options& options) {
348   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
349 }
350 
QualifiedClassName(const Descriptor * d)351 std::string QualifiedClassName(const Descriptor* d) {
352   return QualifiedClassName(d, Options());
353 }
354 
QualifiedClassName(const EnumDescriptor * d)355 std::string QualifiedClassName(const EnumDescriptor* d) {
356   return QualifiedClassName(d, Options());
357 }
358 
ExtensionName(const FieldDescriptor * d)359 std::string ExtensionName(const FieldDescriptor* d) {
360   if (const Descriptor* scope = d->extension_scope())
361     return StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
362   return ResolveKeyword(d->name());
363 }
364 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)365 std::string QualifiedExtensionName(const FieldDescriptor* d,
366                                    const Options& options) {
367   GOOGLE_DCHECK(d->is_extension());
368   return QualifiedFileLevelSymbol(d->file(), ExtensionName(d), options);
369 }
370 
QualifiedExtensionName(const FieldDescriptor * d)371 std::string QualifiedExtensionName(const FieldDescriptor* d) {
372   return QualifiedExtensionName(d, Options());
373 }
374 
Namespace(const std::string & package)375 std::string Namespace(const std::string& package) {
376   if (package.empty()) return "";
377   return "::" + DotsToColons(package);
378 }
379 
Namespace(const FileDescriptor * d,const Options & options)380 std::string Namespace(const FileDescriptor* d, const Options& options) {
381   std::string ret = Namespace(d->package());
382   if (IsWellKnownMessage(d) && options.opensource_runtime) {
383     // Written with string concatenation to prevent rewriting of
384     // ::google::protobuf.
385     ret = StringReplace(ret,
386                         "::google::"
387                         "protobuf",
388                         "::PROTOBUF_NAMESPACE_ID", false);
389   }
390   return ret;
391 }
392 
Namespace(const Descriptor * d,const Options & options)393 std::string Namespace(const Descriptor* d, const Options& options) {
394   return Namespace(d->file(), options);
395 }
396 
Namespace(const FieldDescriptor * d,const Options & options)397 std::string Namespace(const FieldDescriptor* d, const Options& options) {
398   return Namespace(d->file(), options);
399 }
400 
Namespace(const EnumDescriptor * d,const Options & options)401 std::string Namespace(const EnumDescriptor* d, const Options& options) {
402   return Namespace(d->file(), options);
403 }
404 
DefaultInstanceType(const Descriptor * descriptor,const Options & options)405 std::string DefaultInstanceType(const Descriptor* descriptor,
406                                 const Options& options) {
407   return ClassName(descriptor) + "DefaultTypeInternal";
408 }
409 
DefaultInstanceName(const Descriptor * descriptor,const Options & options)410 std::string DefaultInstanceName(const Descriptor* descriptor,
411                                 const Options& options) {
412   return "_" + ClassName(descriptor, false) + "_default_instance_";
413 }
414 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)415 std::string DefaultInstancePtr(const Descriptor* descriptor,
416                                const Options& options) {
417   return DefaultInstanceName(descriptor, options) + "ptr_";
418 }
419 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)420 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421                                          const Options& options) {
422   return QualifiedFileLevelSymbol(
423       descriptor->file(), DefaultInstanceName(descriptor, options), options);
424 }
425 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)426 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427                                         const Options& options) {
428   return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429 }
430 
DescriptorTableName(const FileDescriptor * file,const Options & options)431 std::string DescriptorTableName(const FileDescriptor* file,
432                                 const Options& options) {
433   return UniqueName("descriptor_table", file, options);
434 }
435 
FileDllExport(const FileDescriptor * file,const Options & options)436 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438 }
439 
SuperClassName(const Descriptor * descriptor,const Options & options)440 std::string SuperClassName(const Descriptor* descriptor,
441                            const Options& options) {
442   if (!HasDescriptorMethods(descriptor->file(), options)) {
443     return "::" + ProtobufNamespace(options) + "::MessageLite";
444   }
445   auto simple_base = SimpleBaseClass(descriptor, options);
446   if (simple_base.empty()) {
447     return "::" + ProtobufNamespace(options) + "::Message";
448   }
449   return "::" + ProtobufNamespace(options) + "::internal::" + simple_base;
450 }
451 
ResolveKeyword(const std::string & name)452 std::string ResolveKeyword(const std::string& name) {
453   if (kKeywords.count(name) > 0) {
454     return name + "_";
455   }
456   return name;
457 }
458 
FieldName(const FieldDescriptor * field)459 std::string FieldName(const FieldDescriptor* field) {
460   std::string result = field->name();
461   LowerString(&result);
462   if (kKeywords.count(result) > 0) {
463     result.append("_");
464   }
465   return result;
466 }
467 
FieldMemberName(const FieldDescriptor * field)468 std::string FieldMemberName(const FieldDescriptor* field) {
469   if (field->real_containing_oneof() == nullptr) {
470     return StrCat(FieldName(field), "_");
471   }
472   return StrCat(field->containing_oneof()->name(), "_.", FieldName(field),
473                       "_");
474 }
475 
OneofCaseConstantName(const FieldDescriptor * field)476 std::string OneofCaseConstantName(const FieldDescriptor* field) {
477   GOOGLE_DCHECK(field->containing_oneof());
478   std::string field_name = UnderscoresToCamelCase(field->name(), true);
479   return "k" + field_name;
480 }
481 
QualifiedOneofCaseConstantName(const FieldDescriptor * field)482 std::string QualifiedOneofCaseConstantName(const FieldDescriptor* field) {
483   GOOGLE_DCHECK(field->containing_oneof());
484   const std::string qualification =
485       QualifiedClassName(field->containing_type());
486   return StrCat(qualification, "::", OneofCaseConstantName(field));
487 }
488 
EnumValueName(const EnumValueDescriptor * enum_value)489 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
490   std::string result = enum_value->name();
491   if (kKeywords.count(result) > 0) {
492     result.append("_");
493   }
494   return result;
495 }
496 
EstimateAlignmentSize(const FieldDescriptor * field)497 int EstimateAlignmentSize(const FieldDescriptor* field) {
498   if (field == nullptr) return 0;
499   if (field->is_repeated()) return 8;
500   switch (field->cpp_type()) {
501     case FieldDescriptor::CPPTYPE_BOOL:
502       return 1;
503 
504     case FieldDescriptor::CPPTYPE_INT32:
505     case FieldDescriptor::CPPTYPE_UINT32:
506     case FieldDescriptor::CPPTYPE_ENUM:
507     case FieldDescriptor::CPPTYPE_FLOAT:
508       return 4;
509 
510     case FieldDescriptor::CPPTYPE_INT64:
511     case FieldDescriptor::CPPTYPE_UINT64:
512     case FieldDescriptor::CPPTYPE_DOUBLE:
513     case FieldDescriptor::CPPTYPE_STRING:
514     case FieldDescriptor::CPPTYPE_MESSAGE:
515       return 8;
516   }
517   GOOGLE_LOG(FATAL) << "Can't get here.";
518   return -1;  // Make compiler happy.
519 }
520 
FieldConstantName(const FieldDescriptor * field)521 std::string FieldConstantName(const FieldDescriptor* field) {
522   std::string field_name = UnderscoresToCamelCase(field->name(), true);
523   std::string result = "k" + field_name + "FieldNumber";
524 
525   if (!field->is_extension() &&
526       field->containing_type()->FindFieldByCamelcaseName(
527           field->camelcase_name()) != field) {
528     // This field's camelcase name is not unique.  As a hack, add the field
529     // number to the constant name.  This makes the constant rather useless,
530     // but what can we do?
531     result += "_" + StrCat(field->number());
532   }
533 
534   return result;
535 }
536 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)537 std::string FieldMessageTypeName(const FieldDescriptor* field,
538                                  const Options& options) {
539   // Note:  The Google-internal version of Protocol Buffers uses this function
540   //   as a hook point for hacks to support legacy code.
541   return QualifiedClassName(field->message_type(), options);
542 }
543 
StripProto(const std::string & filename)544 std::string StripProto(const std::string& filename) {
545   /*
546    * TODO(github/georgthegreat) remove this proxy method
547    * once Google's internal codebase will become ready
548    */
549   return compiler::StripProto(filename);
550 }
551 
PrimitiveTypeName(FieldDescriptor::CppType type)552 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
553   switch (type) {
554     case FieldDescriptor::CPPTYPE_INT32:
555       return "int32_t";
556     case FieldDescriptor::CPPTYPE_INT64:
557       return "int64_t";
558     case FieldDescriptor::CPPTYPE_UINT32:
559       return "uint32_t";
560     case FieldDescriptor::CPPTYPE_UINT64:
561       return "uint64_t";
562     case FieldDescriptor::CPPTYPE_DOUBLE:
563       return "double";
564     case FieldDescriptor::CPPTYPE_FLOAT:
565       return "float";
566     case FieldDescriptor::CPPTYPE_BOOL:
567       return "bool";
568     case FieldDescriptor::CPPTYPE_ENUM:
569       return "int";
570     case FieldDescriptor::CPPTYPE_STRING:
571       return "std::string";
572     case FieldDescriptor::CPPTYPE_MESSAGE:
573       return nullptr;
574 
575       // No default because we want the compiler to complain if any new
576       // CppTypes are added.
577   }
578 
579   GOOGLE_LOG(FATAL) << "Can't get here.";
580   return nullptr;
581 }
582 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)583 std::string PrimitiveTypeName(const Options& options,
584                               FieldDescriptor::CppType type) {
585   switch (type) {
586     case FieldDescriptor::CPPTYPE_INT32:
587       return IntTypeName(options, "int32");
588     case FieldDescriptor::CPPTYPE_INT64:
589       return IntTypeName(options, "int64");
590     case FieldDescriptor::CPPTYPE_UINT32:
591       return IntTypeName(options, "uint32");
592     case FieldDescriptor::CPPTYPE_UINT64:
593       return IntTypeName(options, "uint64");
594     case FieldDescriptor::CPPTYPE_DOUBLE:
595       return "double";
596     case FieldDescriptor::CPPTYPE_FLOAT:
597       return "float";
598     case FieldDescriptor::CPPTYPE_BOOL:
599       return "bool";
600     case FieldDescriptor::CPPTYPE_ENUM:
601       return "int";
602     case FieldDescriptor::CPPTYPE_STRING:
603       return "std::string";
604     case FieldDescriptor::CPPTYPE_MESSAGE:
605       return "";
606 
607       // No default because we want the compiler to complain if any new
608       // CppTypes are added.
609   }
610 
611   GOOGLE_LOG(FATAL) << "Can't get here.";
612   return "";
613 }
614 
DeclaredTypeMethodName(FieldDescriptor::Type type)615 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
616   switch (type) {
617     case FieldDescriptor::TYPE_INT32:
618       return "Int32";
619     case FieldDescriptor::TYPE_INT64:
620       return "Int64";
621     case FieldDescriptor::TYPE_UINT32:
622       return "UInt32";
623     case FieldDescriptor::TYPE_UINT64:
624       return "UInt64";
625     case FieldDescriptor::TYPE_SINT32:
626       return "SInt32";
627     case FieldDescriptor::TYPE_SINT64:
628       return "SInt64";
629     case FieldDescriptor::TYPE_FIXED32:
630       return "Fixed32";
631     case FieldDescriptor::TYPE_FIXED64:
632       return "Fixed64";
633     case FieldDescriptor::TYPE_SFIXED32:
634       return "SFixed32";
635     case FieldDescriptor::TYPE_SFIXED64:
636       return "SFixed64";
637     case FieldDescriptor::TYPE_FLOAT:
638       return "Float";
639     case FieldDescriptor::TYPE_DOUBLE:
640       return "Double";
641 
642     case FieldDescriptor::TYPE_BOOL:
643       return "Bool";
644     case FieldDescriptor::TYPE_ENUM:
645       return "Enum";
646 
647     case FieldDescriptor::TYPE_STRING:
648       return "String";
649     case FieldDescriptor::TYPE_BYTES:
650       return "Bytes";
651     case FieldDescriptor::TYPE_GROUP:
652       return "Group";
653     case FieldDescriptor::TYPE_MESSAGE:
654       return "Message";
655 
656       // No default because we want the compiler to complain if any new
657       // types are added.
658   }
659   GOOGLE_LOG(FATAL) << "Can't get here.";
660   return "";
661 }
662 
Int32ToString(int number)663 std::string Int32ToString(int number) {
664   if (number == std::numeric_limits<int32_t>::min()) {
665     // This needs to be special-cased, see explanation here:
666     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
667     return StrCat(number + 1, " - 1");
668   } else {
669     return StrCat(number);
670   }
671 }
672 
Int64ToString(int64_t number)673 static std::string Int64ToString(int64_t number) {
674   if (number == std::numeric_limits<int64_t>::min()) {
675     // This needs to be special-cased, see explanation here:
676     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
677     return StrCat("int64_t{", number + 1, "} - 1");
678   }
679   return StrCat("int64_t{", number, "}");
680 }
681 
UInt64ToString(uint64_t number)682 static std::string UInt64ToString(uint64_t number) {
683   return StrCat("uint64_t{", number, "u}");
684 }
685 
DefaultValue(const FieldDescriptor * field)686 std::string DefaultValue(const FieldDescriptor* field) {
687   return DefaultValue(Options(), field);
688 }
689 
DefaultValue(const Options & options,const FieldDescriptor * field)690 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
691   switch (field->cpp_type()) {
692     case FieldDescriptor::CPPTYPE_INT32:
693       return Int32ToString(field->default_value_int32());
694     case FieldDescriptor::CPPTYPE_UINT32:
695       return StrCat(field->default_value_uint32()) + "u";
696     case FieldDescriptor::CPPTYPE_INT64:
697       return Int64ToString(field->default_value_int64());
698     case FieldDescriptor::CPPTYPE_UINT64:
699       return UInt64ToString(field->default_value_uint64());
700     case FieldDescriptor::CPPTYPE_DOUBLE: {
701       double value = field->default_value_double();
702       if (value == std::numeric_limits<double>::infinity()) {
703         return "std::numeric_limits<double>::infinity()";
704       } else if (value == -std::numeric_limits<double>::infinity()) {
705         return "-std::numeric_limits<double>::infinity()";
706       } else if (value != value) {
707         return "std::numeric_limits<double>::quiet_NaN()";
708       } else {
709         return SimpleDtoa(value);
710       }
711     }
712     case FieldDescriptor::CPPTYPE_FLOAT: {
713       float value = field->default_value_float();
714       if (value == std::numeric_limits<float>::infinity()) {
715         return "std::numeric_limits<float>::infinity()";
716       } else if (value == -std::numeric_limits<float>::infinity()) {
717         return "-std::numeric_limits<float>::infinity()";
718       } else if (value != value) {
719         return "std::numeric_limits<float>::quiet_NaN()";
720       } else {
721         std::string float_value = SimpleFtoa(value);
722         // If floating point value contains a period (.) or an exponent
723         // (either E or e), then append suffix 'f' to make it a float
724         // literal.
725         if (float_value.find_first_of(".eE") != std::string::npos) {
726           float_value.push_back('f');
727         }
728         return float_value;
729       }
730     }
731     case FieldDescriptor::CPPTYPE_BOOL:
732       return field->default_value_bool() ? "true" : "false";
733     case FieldDescriptor::CPPTYPE_ENUM:
734       // Lazy:  Generate a static_cast because we don't have a helper function
735       //   that constructs the full name of an enum value.
736       return strings::Substitute(
737           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
738           Int32ToString(field->default_value_enum()->number()));
739     case FieldDescriptor::CPPTYPE_STRING:
740       return "\"" +
741              EscapeTrigraphs(CEscape(field->default_value_string())) +
742              "\"";
743     case FieldDescriptor::CPPTYPE_MESSAGE:
744       return "*" + FieldMessageTypeName(field, options) +
745              "::internal_default_instance()";
746   }
747   // Can't actually get here; make compiler happy.  (We could add a default
748   // case above but then we wouldn't get the nice compiler warning when a
749   // new type is added.)
750   GOOGLE_LOG(FATAL) << "Can't get here.";
751   return "";
752 }
753 
754 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)755 std::string FilenameIdentifier(const std::string& filename) {
756   std::string result;
757   for (int i = 0; i < filename.size(); i++) {
758     if (ascii_isalnum(filename[i])) {
759       result.push_back(filename[i]);
760     } else {
761       // Not alphanumeric.  To avoid any possibility of name conflicts we
762       // use the hex code for the character.
763       StrAppend(&result, "_",
764                       strings::Hex(static_cast<uint8_t>(filename[i])));
765     }
766   }
767   return result;
768 }
769 
UniqueName(const std::string & name,const std::string & filename,const Options & options)770 std::string UniqueName(const std::string& name, const std::string& filename,
771                        const Options& options) {
772   return name + "_" + FilenameIdentifier(filename);
773 }
774 
775 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)776 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
777                                      const std::string& name,
778                                      const Options& options) {
779   if (file->package().empty()) {
780     return StrCat("::", name);
781   }
782   return StrCat(Namespace(file, options), "::", name);
783 }
784 
785 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)786 std::string EscapeTrigraphs(const std::string& to_escape) {
787   return StringReplace(to_escape, "?", "\\?", true);
788 }
789 
790 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)791 std::string SafeFunctionName(const Descriptor* descriptor,
792                              const FieldDescriptor* field,
793                              const std::string& prefix) {
794   // Do not use FieldName() since it will escape keywords.
795   std::string name = field->name();
796   LowerString(&name);
797   std::string function_name = prefix + name;
798   if (descriptor->FindFieldByName(function_name)) {
799     // Single underscore will also make it conflicting with the private data
800     // member. We use double underscore to escape function names.
801     function_name.append("__");
802   } else if (kKeywords.count(name) > 0) {
803     // If the field name is a keyword, we append the underscore back to keep it
804     // consistent with other function names.
805     function_name.append("_");
806   }
807   return function_name;
808 }
809 
IsStringInlined(const FieldDescriptor * descriptor,const Options & options)810 bool IsStringInlined(const FieldDescriptor* descriptor,
811                      const Options& options) {
812   (void)descriptor;
813   (void)options;
814   return false;
815 }
816 
HasLazyFields(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)817 static bool HasLazyFields(const Descriptor* descriptor, const Options& options,
818                           MessageSCCAnalyzer* scc_analyzer) {
819   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
820     if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) {
821       return true;
822     }
823   }
824   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
825     if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) {
826       return true;
827     }
828   }
829   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
830     if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) {
831       return true;
832     }
833   }
834   return false;
835 }
836 
837 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)838 bool HasLazyFields(const FileDescriptor* file, const Options& options,
839                    MessageSCCAnalyzer* scc_analyzer) {
840   for (int i = 0; i < file->message_type_count(); i++) {
841     const Descriptor* descriptor(file->message_type(i));
842     if (HasLazyFields(descriptor, options, scc_analyzer)) {
843       return true;
844     }
845   }
846   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
847     if (IsLazy(file->extension(field_idx), options, scc_analyzer)) {
848       return true;
849     }
850   }
851   return false;
852 }
853 
HasRepeatedFields(const Descriptor * descriptor)854 static bool HasRepeatedFields(const Descriptor* descriptor) {
855   for (int i = 0; i < descriptor->field_count(); ++i) {
856     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
857       return true;
858     }
859   }
860   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
861     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
862   }
863   return false;
864 }
865 
HasRepeatedFields(const FileDescriptor * file)866 bool HasRepeatedFields(const FileDescriptor* file) {
867   for (int i = 0; i < file->message_type_count(); ++i) {
868     if (HasRepeatedFields(file->message_type(i))) return true;
869   }
870   return false;
871 }
872 
IsStringPieceField(const FieldDescriptor * field,const Options & options)873 static bool IsStringPieceField(const FieldDescriptor* field,
874                                const Options& options) {
875   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
876          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
877 }
878 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)879 static bool HasStringPieceFields(const Descriptor* descriptor,
880                                  const Options& options) {
881   for (int i = 0; i < descriptor->field_count(); ++i) {
882     if (IsStringPieceField(descriptor->field(i), options)) return true;
883   }
884   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
885     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
886   }
887   return false;
888 }
889 
HasStringPieceFields(const FileDescriptor * file,const Options & options)890 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
891   for (int i = 0; i < file->message_type_count(); ++i) {
892     if (HasStringPieceFields(file->message_type(i), options)) return true;
893   }
894   return false;
895 }
896 
IsCordField(const FieldDescriptor * field,const Options & options)897 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
898   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
899          EffectiveStringCType(field, options) == FieldOptions::CORD;
900 }
901 
HasCordFields(const Descriptor * descriptor,const Options & options)902 static bool HasCordFields(const Descriptor* descriptor,
903                           const Options& options) {
904   for (int i = 0; i < descriptor->field_count(); ++i) {
905     if (IsCordField(descriptor->field(i), options)) return true;
906   }
907   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
908     if (HasCordFields(descriptor->nested_type(i), options)) return true;
909   }
910   return false;
911 }
912 
HasCordFields(const FileDescriptor * file,const Options & options)913 bool HasCordFields(const FileDescriptor* file, const Options& options) {
914   for (int i = 0; i < file->message_type_count(); ++i) {
915     if (HasCordFields(file->message_type(i), options)) return true;
916   }
917   return false;
918 }
919 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)920 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
921   if (descriptor->extension_range_count() > 0) return true;
922   if (descriptor->extension_count() > 0) return true;
923   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
924     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
925       return true;
926     }
927   }
928   return false;
929 }
930 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)931 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
932   if (file->extension_count() > 0) return true;
933   for (int i = 0; i < file->message_type_count(); ++i) {
934     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
935   }
936   return false;
937 }
938 
HasMapFields(const Descriptor * descriptor)939 static bool HasMapFields(const Descriptor* descriptor) {
940   for (int i = 0; i < descriptor->field_count(); ++i) {
941     if (descriptor->field(i)->is_map()) {
942       return true;
943     }
944   }
945   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
946     if (HasMapFields(descriptor->nested_type(i))) return true;
947   }
948   return false;
949 }
950 
HasMapFields(const FileDescriptor * file)951 bool HasMapFields(const FileDescriptor* file) {
952   for (int i = 0; i < file->message_type_count(); ++i) {
953     if (HasMapFields(file->message_type(i))) return true;
954   }
955   return false;
956 }
957 
HasEnumDefinitions(const Descriptor * message_type)958 static bool HasEnumDefinitions(const Descriptor* message_type) {
959   if (message_type->enum_type_count() > 0) return true;
960   for (int i = 0; i < message_type->nested_type_count(); ++i) {
961     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
962   }
963   return false;
964 }
965 
HasEnumDefinitions(const FileDescriptor * file)966 bool HasEnumDefinitions(const FileDescriptor* file) {
967   if (file->enum_type_count() > 0) return true;
968   for (int i = 0; i < file->message_type_count(); ++i) {
969     if (HasEnumDefinitions(file->message_type(i))) return true;
970   }
971   return false;
972 }
973 
ShouldVerify(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)974 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
975                   MessageSCCAnalyzer* scc_analyzer) {
976   (void)descriptor;
977   (void)options;
978   (void)scc_analyzer;
979   return false;
980 }
981 
ShouldVerify(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)982 bool ShouldVerify(const FileDescriptor* file, const Options& options,
983                   MessageSCCAnalyzer* scc_analyzer) {
984   (void)file;
985   (void)options;
986   (void)scc_analyzer;
987   return false;
988 }
989 
IsStringOrMessage(const FieldDescriptor * field)990 bool IsStringOrMessage(const FieldDescriptor* field) {
991   switch (field->cpp_type()) {
992     case FieldDescriptor::CPPTYPE_INT32:
993     case FieldDescriptor::CPPTYPE_INT64:
994     case FieldDescriptor::CPPTYPE_UINT32:
995     case FieldDescriptor::CPPTYPE_UINT64:
996     case FieldDescriptor::CPPTYPE_DOUBLE:
997     case FieldDescriptor::CPPTYPE_FLOAT:
998     case FieldDescriptor::CPPTYPE_BOOL:
999     case FieldDescriptor::CPPTYPE_ENUM:
1000       return false;
1001     case FieldDescriptor::CPPTYPE_STRING:
1002     case FieldDescriptor::CPPTYPE_MESSAGE:
1003       return true;
1004   }
1005 
1006   GOOGLE_LOG(FATAL) << "Can't get here.";
1007   return false;
1008 }
1009 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)1010 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
1011                                          const Options& options) {
1012   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
1013   if (options.opensource_runtime) {
1014     // Open-source protobuf release only supports STRING ctype.
1015     return FieldOptions::STRING;
1016   } else {
1017     // Google-internal supports all ctypes.
1018     return field->options().ctype();
1019   }
1020 }
1021 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)1022 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
1023   return descriptor->name() == kAnyProtoFile;
1024 }
1025 
IsAnyMessage(const Descriptor * descriptor,const Options & options)1026 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
1027   return descriptor->name() == kAnyMessageName &&
1028          IsAnyMessage(descriptor->file(), options);
1029 }
1030 
IsWellKnownMessage(const FileDescriptor * file)1031 bool IsWellKnownMessage(const FileDescriptor* file) {
1032   static const std::unordered_set<std::string> well_known_files{
1033       "google/protobuf/any.proto",
1034       "google/protobuf/api.proto",
1035       "google/protobuf/compiler/plugin.proto",
1036       "google/protobuf/descriptor.proto",
1037       "google/protobuf/duration.proto",
1038       "google/protobuf/empty.proto",
1039       "google/protobuf/field_mask.proto",
1040       "google/protobuf/source_context.proto",
1041       "google/protobuf/struct.proto",
1042       "google/protobuf/timestamp.proto",
1043       "google/protobuf/type.proto",
1044       "google/protobuf/wrappers.proto",
1045   };
1046   return well_known_files.find(file->name()) != well_known_files.end();
1047 }
1048 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1049 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1050                              const Options& options) {
1051   return true;
1052 }
1053 
FileUtf8Verification(const FileDescriptor * file,const Options & options)1054 static bool FileUtf8Verification(const FileDescriptor* file,
1055                                  const Options& options) {
1056   return true;
1057 }
1058 
1059 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1060 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1061                                const Options& options) {
1062   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1063       FieldEnforceUtf8(field, options)) {
1064     return Utf8CheckMode::kStrict;
1065   } else if (GetOptimizeFor(field->file(), options) !=
1066                  FileOptions::LITE_RUNTIME &&
1067              FileUtf8Verification(field->file(), options)) {
1068     return Utf8CheckMode::kVerify;
1069   } else {
1070     return Utf8CheckMode::kNone;
1071   }
1072 }
1073 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1074 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1075                                   const Options& options, bool for_parse,
1076                                   const char* parameters,
1077                                   const char* strict_function,
1078                                   const char* verify_function,
1079                                   const Formatter& format) {
1080   switch (GetUtf8CheckMode(field, options)) {
1081     case Utf8CheckMode::kStrict: {
1082       if (for_parse) {
1083         format("DO_(");
1084       }
1085       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1086       format.Indent();
1087       format(parameters);
1088       if (for_parse) {
1089         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1090       } else {
1091         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1092       }
1093       format("\"$1$\")", field->full_name());
1094       if (for_parse) {
1095         format(")");
1096       }
1097       format(";\n");
1098       format.Outdent();
1099       break;
1100     }
1101     case Utf8CheckMode::kVerify: {
1102       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1103       format.Indent();
1104       format(parameters);
1105       if (for_parse) {
1106         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1107       } else {
1108         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1109       }
1110       format("\"$1$\");\n", field->full_name());
1111       format.Outdent();
1112       break;
1113     }
1114     case Utf8CheckMode::kNone:
1115       break;
1116   }
1117 }
1118 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1119 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1120                                     const Options& options, bool for_parse,
1121                                     const char* parameters,
1122                                     const Formatter& format) {
1123   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1124                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1125                         format);
1126 }
1127 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1128 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1129                                   const Options& options, bool for_parse,
1130                                   const char* parameters,
1131                                   const Formatter& format) {
1132   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1133                         "VerifyUTF8CordNamedField", format);
1134 }
1135 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1136 void FlattenMessagesInFile(const FileDescriptor* file,
1137                            std::vector<const Descriptor*>* result) {
1138   for (int i = 0; i < file->message_type_count(); i++) {
1139     ForEachMessage(file->message_type(i), [&](const Descriptor* descriptor) {
1140       result->push_back(descriptor);
1141     });
1142   }
1143 }
1144 
HasWeakFields(const Descriptor * descriptor,const Options & options)1145 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1146   for (int i = 0; i < descriptor->field_count(); i++) {
1147     if (IsWeak(descriptor->field(i), options)) return true;
1148   }
1149   return false;
1150 }
1151 
HasWeakFields(const FileDescriptor * file,const Options & options)1152 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1153   for (int i = 0; i < file->message_type_count(); ++i) {
1154     if (HasWeakFields(file->message_type(i), options)) return true;
1155   }
1156   return false;
1157 }
1158 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1159 bool UsingImplicitWeakFields(const FileDescriptor* file,
1160                              const Options& options) {
1161   return options.lite_implicit_weak_fields &&
1162          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1163 }
1164 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1165 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1166                          MessageSCCAnalyzer* scc_analyzer) {
1167   return UsingImplicitWeakFields(field->file(), options) &&
1168          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1169          !field->is_required() && !field->is_map() && !field->is_extension() &&
1170          !IsWellKnownMessage(field->message_type()->file()) &&
1171          field->message_type()->file()->name() !=
1172              "net/proto2/proto/descriptor.proto" &&
1173          // We do not support implicit weak fields between messages in the same
1174          // strongly-connected component.
1175          scc_analyzer->GetSCC(field->containing_type()) !=
1176              scc_analyzer->GetSCC(field->message_type());
1177 }
1178 
GetSCCAnalysis(const SCC * scc)1179 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1180   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1181   MessageAnalysis result;
1182   if (UsingImplicitWeakFields(scc->GetFile(), options_)) {
1183     result.contains_weak = true;
1184   }
1185   for (int i = 0; i < scc->descriptors.size(); i++) {
1186     const Descriptor* descriptor = scc->descriptors[i];
1187     if (descriptor->extension_range_count() > 0) {
1188       result.contains_extension = true;
1189     }
1190     for (int j = 0; j < descriptor->field_count(); j++) {
1191       const FieldDescriptor* field = descriptor->field(j);
1192       if (field->is_required()) {
1193         result.contains_required = true;
1194       }
1195       if (field->options().weak()) {
1196         result.contains_weak = true;
1197       }
1198       switch (field->type()) {
1199         case FieldDescriptor::TYPE_STRING:
1200         case FieldDescriptor::TYPE_BYTES: {
1201           if (field->options().ctype() == FieldOptions::CORD) {
1202             result.contains_cord = true;
1203           }
1204           break;
1205         }
1206         case FieldDescriptor::TYPE_GROUP:
1207         case FieldDescriptor::TYPE_MESSAGE: {
1208           const SCC* child = analyzer_.GetSCC(field->message_type());
1209           if (child != scc) {
1210             MessageAnalysis analysis = GetSCCAnalysis(child);
1211             result.contains_cord |= analysis.contains_cord;
1212             result.contains_extension |= analysis.contains_extension;
1213             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1214               result.contains_required |= analysis.contains_required;
1215             }
1216             result.contains_weak |= analysis.contains_weak;
1217           } else {
1218             // This field points back into the same SCC hence the messages
1219             // in the SCC are recursive. Note if SCC contains more than two
1220             // nodes it has to be recursive, however this test also works for
1221             // a single node that is recursive.
1222             result.is_recursive = true;
1223           }
1224           break;
1225         }
1226         default:
1227           break;
1228       }
1229     }
1230   }
1231   // We deliberately only insert the result here. After we contracted the SCC
1232   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1233   // nodes visited as we can never return to them. By inserting them here
1234   // we will go in an infinite loop if the SCC is not correct.
1235   return analysis_cache_[scc] = result;
1236 }
1237 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1238 void ListAllFields(const Descriptor* d,
1239                    std::vector<const FieldDescriptor*>* fields) {
1240   // Collect sub messages
1241   for (int i = 0; i < d->nested_type_count(); i++) {
1242     ListAllFields(d->nested_type(i), fields);
1243   }
1244   // Collect message level extensions.
1245   for (int i = 0; i < d->extension_count(); i++) {
1246     fields->push_back(d->extension(i));
1247   }
1248   // Add types of fields necessary
1249   for (int i = 0; i < d->field_count(); i++) {
1250     fields->push_back(d->field(i));
1251   }
1252 }
1253 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1254 void ListAllFields(const FileDescriptor* d,
1255                    std::vector<const FieldDescriptor*>* fields) {
1256   // Collect file level message.
1257   for (int i = 0; i < d->message_type_count(); i++) {
1258     ListAllFields(d->message_type(i), fields);
1259   }
1260   // Collect message level extensions.
1261   for (int i = 0; i < d->extension_count(); i++) {
1262     fields->push_back(d->extension(i));
1263   }
1264 }
1265 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1266 void ListAllTypesForServices(const FileDescriptor* fd,
1267                              std::vector<const Descriptor*>* types) {
1268   for (int i = 0; i < fd->service_count(); i++) {
1269     const ServiceDescriptor* sd = fd->service(i);
1270     for (int j = 0; j < sd->method_count(); j++) {
1271       const MethodDescriptor* method = sd->method(j);
1272       types->push_back(method->input_type());
1273       types->push_back(method->output_type());
1274     }
1275   }
1276 }
1277 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1278 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1279                           std::string* bootstrap_basename) {
1280   if (options.opensource_runtime) {
1281     return false;
1282   }
1283 
1284   std::unordered_map<std::string, std::string> bootstrap_mapping{
1285       {"net/proto2/proto/descriptor",
1286        "third_party/protobuf/descriptor"},
1287       {"net/proto2/compiler/proto/plugin",
1288        "net/proto2/compiler/proto/plugin"},
1289       {"net/proto2/compiler/proto/profile",
1290        "net/proto2/compiler/proto/profile_bootstrap"},
1291   };
1292   auto iter = bootstrap_mapping.find(basename);
1293   if (iter == bootstrap_mapping.end()) {
1294     *bootstrap_basename = basename;
1295     return false;
1296   } else {
1297     *bootstrap_basename = iter->second;
1298     return true;
1299   }
1300 }
1301 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1302 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1303   std::string my_name = StripProto(file->name());
1304   return GetBootstrapBasename(options, my_name, &my_name);
1305 }
1306 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1307 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1308                     bool bootstrap_flag, std::string* basename) {
1309   std::string bootstrap_basename;
1310   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1311     return false;
1312   }
1313 
1314   if (bootstrap_flag) {
1315     // Adjust basename, but don't abort code generation.
1316     *basename = bootstrap_basename;
1317     return false;
1318   } else {
1319     const std::string& forward_to_basename = bootstrap_basename;
1320 
1321     // Generate forwarding headers and empty .pb.cc.
1322     {
1323       std::unique_ptr<io::ZeroCopyOutputStream> output(
1324           generator_context->Open(*basename + ".pb.h"));
1325       io::Printer printer(output.get(), '$', nullptr);
1326       printer.Print(
1327           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1328           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1329           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1330           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1331           "forward_to_basename", forward_to_basename, "filename_identifier",
1332           FilenameIdentifier(*basename));
1333 
1334       if (!options.opensource_runtime) {
1335         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1336         // protocoltype is SWIG'ed and we need to forward
1337         if (*basename == "net/proto/protocoltype") {
1338           printer.Print(
1339               "#ifdef SWIG\n"
1340               "%include \"$forward_to_basename$.pb.h\"\n"
1341               "#endif  // SWIG\n",
1342               "forward_to_basename", forward_to_basename);
1343         }
1344       }
1345     }
1346 
1347     {
1348       std::unique_ptr<io::ZeroCopyOutputStream> output(
1349           generator_context->Open(*basename + ".proto.h"));
1350       io::Printer printer(output.get(), '$', nullptr);
1351       printer.Print(
1352           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1353           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1354           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1355           "export\n"
1356           "#endif  // "
1357           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1358           "forward_to_basename", forward_to_basename, "filename_identifier",
1359           FilenameIdentifier(*basename));
1360     }
1361 
1362     {
1363       std::unique_ptr<io::ZeroCopyOutputStream> output(
1364           generator_context->Open(*basename + ".pb.cc"));
1365       io::Printer printer(output.get(), '$', nullptr);
1366       printer.Print("\n");
1367     }
1368 
1369     {
1370       std::unique_ptr<io::ZeroCopyOutputStream> output(
1371           generator_context->Open(*basename + ".pb.h.meta"));
1372     }
1373 
1374     {
1375       std::unique_ptr<io::ZeroCopyOutputStream> output(
1376           generator_context->Open(*basename + ".proto.h.meta"));
1377     }
1378 
1379     // Abort code generation.
1380     return true;
1381   }
1382 }
1383 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1384 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1385                                  const Options& options,
1386                                  bool* has_opt_codesize_extension) {
1387   std::vector<const FieldDescriptor*> fields;
1388   auto reflection = msg.GetReflection();
1389   reflection->ListFields(msg, &fields);
1390   for (auto field : fields) {
1391     const auto* field_msg = field->message_type();
1392     if (field_msg == nullptr) {
1393       // It so happens that enums Is_Valid are still generated so enums work.
1394       // Only messages have potential problems.
1395       continue;
1396     }
1397     // If this option has an extension set AND that extension is defined in the
1398     // same file we have bootstrap problem.
1399     if (field->is_extension()) {
1400       const auto* msg_extension_file = field->message_type()->file();
1401       if (msg_extension_file == file) return true;
1402       if (has_opt_codesize_extension &&
1403           GetOptimizeFor(msg_extension_file, options) ==
1404               FileOptions::CODE_SIZE) {
1405         *has_opt_codesize_extension = true;
1406       }
1407     }
1408     // Recurse in this field to see if there is a problem in there
1409     if (field->is_repeated()) {
1410       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1411         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1412                                  file, options, has_opt_codesize_extension)) {
1413           return true;
1414         }
1415       }
1416     } else {
1417       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1418                                options, has_opt_codesize_extension)) {
1419         return true;
1420       }
1421     }
1422   }
1423   return false;
1424 }
1425 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1426 static bool HasBootstrapProblem(const FileDescriptor* file,
1427                                 const Options& options,
1428                                 bool* has_opt_codesize_extension) {
1429   static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1430   auto it = cache.find(file);
1431   if (it != cache.end()) return it->second;
1432   // In order to build the data structures for the reflective parse, it needs
1433   // to parse the serialized descriptor describing all the messages defined in
1434   // this file. Obviously this presents a bootstrap problem for descriptor
1435   // messages.
1436   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1437       file->name() == "google/protobuf/descriptor.proto") {
1438     return true;
1439   }
1440   // Unfortunately we're not done yet. The descriptor option messages allow
1441   // for extensions. So we need to be able to parse these extensions in order
1442   // to parse the file descriptor for a file that has custom options. This is a
1443   // problem when these custom options extensions are defined in the same file.
1444   FileDescriptorProto linkedin_fd_proto;
1445   const DescriptorPool* pool = file->pool();
1446   const Descriptor* fd_proto_descriptor =
1447       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1448   // Not all pools have descriptor.proto in them. In these cases there for sure
1449   // are no custom options.
1450   if (fd_proto_descriptor == nullptr) return false;
1451 
1452   // It's easier to inspect file as a proto, because we can use reflection on
1453   // the proto to iterate over all content.
1454   file->CopyTo(&linkedin_fd_proto);
1455 
1456   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1457   // such it doesn't know the extensions that are potentially present in the
1458   // descriptor pool constructed from the protos that are being compiled. These
1459   // custom options are therefore in the unknown fields.
1460   // By building the corresponding FileDescriptorProto in the pool constructed
1461   // by the protos that are being compiled, ie. file's pool, the unknown fields
1462   // are converted to extensions.
1463   DynamicMessageFactory factory(pool);
1464   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1465   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1466 
1467   bool& res = cache[file];
1468   res = HasExtensionFromFile(*fd_proto, file, options,
1469                              has_opt_codesize_extension);
1470   delete fd_proto;
1471   return res;
1472 }
1473 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1474 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1475                                         const Options& options,
1476                                         bool* has_opt_codesize_extension) {
1477   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1478   switch (options.enforce_mode) {
1479     case EnforceOptimizeMode::kSpeed:
1480       return FileOptions::SPEED;
1481     case EnforceOptimizeMode::kLiteRuntime:
1482       return FileOptions::LITE_RUNTIME;
1483     case EnforceOptimizeMode::kCodeSize:
1484       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1485         return FileOptions::LITE_RUNTIME;
1486       }
1487       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1488         return FileOptions::SPEED;
1489       }
1490       return FileOptions::CODE_SIZE;
1491     case EnforceOptimizeMode::kNoEnforcement:
1492       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1493         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1494           GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1495                           "cannot honor that because it contains custom option "
1496                           "extensions defined in the same proto.";
1497           return FileOptions::SPEED;
1498         }
1499       }
1500       return file->options().optimize_for();
1501   }
1502 
1503   GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
1504   // The phony return below serves to silence a warning from GCC 8.
1505   return FileOptions::SPEED;
1506 }
1507 
EnableMessageOwnedArena(const Descriptor * desc,const Options & options)1508 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options) {
1509   (void)desc;
1510   (void)options;
1511   return false;
1512 }
1513 
1514 }  // namespace cpp
1515 }  // namespace compiler
1516 }  // namespace protobuf
1517 }  // namespace google
1518