• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/helpers.h>
36 
37 #include <cstdint>
38 #include <functional>
39 #include <limits>
40 #include <map>
41 #include <memory>
42 #include <queue>
43 #include <unordered_set>
44 #include <vector>
45 
46 #include <google/protobuf/stubs/common.h>
47 #include <google/protobuf/stubs/logging.h>
48 #include <google/protobuf/descriptor.h>
49 #include <google/protobuf/compiler/cpp/names.h>
50 #include <google/protobuf/compiler/cpp/options.h>
51 #include <google/protobuf/descriptor.pb.h>
52 #include <google/protobuf/compiler/scc.h>
53 #include <google/protobuf/io/printer.h>
54 #include <google/protobuf/io/zero_copy_stream.h>
55 #include <google/protobuf/dynamic_message.h>
56 #include <google/protobuf/wire_format.h>
57 #include <google/protobuf/wire_format_lite.h>
58 #include <google/protobuf/stubs/strutil.h>
59 #include <google/protobuf/stubs/substitute.h>
60 #include <google/protobuf/stubs/hash.h>
61 
62 // Must be last.
63 #include <google/protobuf/port_def.inc>
64 
65 namespace google {
66 namespace protobuf {
67 namespace compiler {
68 namespace cpp {
69 
70 namespace {
71 
72 static const char kAnyMessageName[] = "Any";
73 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
74 
DotsToColons(const std::string & name)75 std::string DotsToColons(const std::string& name) {
76   return StringReplace(name, ".", "::", true);
77 }
78 
79 static const char* const kKeywordList[] = {  //
80     "NULL",
81     "alignas",
82     "alignof",
83     "and",
84     "and_eq",
85     "asm",
86     "auto",
87     "bitand",
88     "bitor",
89     "bool",
90     "break",
91     "case",
92     "catch",
93     "char",
94     "class",
95     "compl",
96     "const",
97     "constexpr",
98     "const_cast",
99     "continue",
100     "decltype",
101     "default",
102     "delete",
103     "do",
104     "double",
105     "dynamic_cast",
106     "else",
107     "enum",
108     "explicit",
109     "export",
110     "extern",
111     "false",
112     "float",
113     "for",
114     "friend",
115     "goto",
116     "if",
117     "inline",
118     "int",
119     "long",
120     "mutable",
121     "namespace",
122     "new",
123     "noexcept",
124     "not",
125     "not_eq",
126     "nullptr",
127     "operator",
128     "or",
129     "or_eq",
130     "private",
131     "protected",
132     "public",
133     "register",
134     "reinterpret_cast",
135     "return",
136     "short",
137     "signed",
138     "sizeof",
139     "static",
140     "static_assert",
141     "static_cast",
142     "struct",
143     "switch",
144     "template",
145     "this",
146     "thread_local",
147     "throw",
148     "true",
149     "try",
150     "typedef",
151     "typeid",
152     "typename",
153     "union",
154     "unsigned",
155     "using",
156     "virtual",
157     "void",
158     "volatile",
159     "wchar_t",
160     "while",
161     "xor",
162     "xor_eq"};
163 
MakeKeywordsMap()164 static std::unordered_set<std::string>* MakeKeywordsMap() {
165   auto* result = new std::unordered_set<std::string>();
166   for (const auto keyword : kKeywordList) {
167     result->emplace(keyword);
168   }
169   return result;
170 }
171 
172 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
173 
IntTypeName(const Options & options,const std::string & type)174 std::string IntTypeName(const Options& options, const std::string& type) {
175   return StrCat("::", type, "_t");
176 }
177 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)178 void SetIntVar(const Options& options, const std::string& type,
179                std::map<std::string, std::string>* variables) {
180   (*variables)[type] = IntTypeName(options, type);
181 }
182 
183 // Returns true if the message can potentially allocate memory for its field.
184 // This is used to determine if message-owned arena will be useful.
AllocExpected(const Descriptor * descriptor)185 bool AllocExpected(const Descriptor* descriptor) {
186   return false;
187 }
188 
189 // Describes different approaches to detect non-canonical int32 encoding. Only
190 // kNever or kAlways is eligible for *simple* verification methods.
191 enum class VerifyInt32Type {
192   kCustom,  // Only check if field number matches.
193   kNever,   // Do not check.
194   kAlways,  // Always check.
195 };
196 
VerifyInt32TypeToVerifyCustom(VerifyInt32Type t)197 inline VerifySimpleType VerifyInt32TypeToVerifyCustom(VerifyInt32Type t) {
198   static VerifySimpleType kCustomTypes[] = {
199       VerifySimpleType::kCustom, VerifySimpleType::kCustomInt32Never,
200       VerifySimpleType::kCustomInt32Always};
201   return kCustomTypes[static_cast<int32_t>(t) -
202                       static_cast<int32_t>(VerifyInt32Type::kCustom)];
203 }
204 
205 }  // namespace
206 
IsLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)207 bool IsLazy(const FieldDescriptor* field, const Options& options,
208             MessageSCCAnalyzer* scc_analyzer) {
209   return IsLazilyVerifiedLazy(field, options) ||
210          IsEagerlyVerifiedLazy(field, options, scc_analyzer);
211 }
212 
213 // Returns true if "field" is a message field that is backed by LazyField per
214 // profile (go/pdlazy).
IsEagerlyVerifiedLazyByProfile(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)215 inline bool IsEagerlyVerifiedLazyByProfile(const FieldDescriptor* field,
216                                            const Options& options,
217                                            MessageSCCAnalyzer* scc_analyzer) {
218   return false;
219 }
220 
IsEagerlyVerifiedLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)221 bool IsEagerlyVerifiedLazy(const FieldDescriptor* field, const Options& options,
222                            MessageSCCAnalyzer* scc_analyzer) {
223   return false;
224 }
225 
IsLazilyVerifiedLazy(const FieldDescriptor * field,const Options & options)226 bool IsLazilyVerifiedLazy(const FieldDescriptor* field,
227                           const Options& options) {
228   return false;
229 }
230 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)231 void SetCommonVars(const Options& options,
232                    std::map<std::string, std::string>* variables) {
233   (*variables)["proto_ns"] = ProtobufNamespace(options);
234 
235   // Warning: there is some clever naming/splitting here to avoid extract script
236   // rewrites.  The names of these variables must not be things that the extract
237   // script will rewrite.  That's why we use "CHK" (for example) instead of
238   // "GOOGLE_CHECK".
239   if (options.opensource_runtime) {
240     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
241     (*variables)["CHK"] = "GOOGLE_CHECK";
242     (*variables)["DCHK"] = "GOOGLE_DCHECK";
243   } else {
244     // These values are things the extract script would rewrite if we did not
245     // split them.  It might not strictly matter since we don't generate google3
246     // code in open-source.  But it's good to prevent surprising things from
247     // happening.
248     (*variables)["GOOGLE_PROTOBUF"] =
249         "GOOGLE3"
250         "_PROTOBUF";
251     (*variables)["CHK"] =
252         "CH"
253         "ECK";
254     (*variables)["DCHK"] =
255         "DCH"
256         "ECK";
257   }
258 
259   SetIntVar(options, "int8", variables);
260   SetIntVar(options, "uint8", variables);
261   SetIntVar(options, "uint32", variables);
262   SetIntVar(options, "uint64", variables);
263   SetIntVar(options, "int32", variables);
264   SetIntVar(options, "int64", variables);
265   (*variables)["string"] = "std::string";
266 }
267 
SetCommonMessageDataVariables(const Descriptor * descriptor,std::map<std::string,std::string> * variables)268 void SetCommonMessageDataVariables(
269     const Descriptor* descriptor,
270     std::map<std::string, std::string>* variables) {
271   std::string prefix = IsMapEntryMessage(descriptor) ? "" : "_impl_.";
272   (*variables)["any_metadata"] = prefix + "_any_metadata_";
273   (*variables)["cached_size"] = prefix + "_cached_size_";
274   (*variables)["extensions"] = prefix + "_extensions_";
275   (*variables)["has_bits"] = prefix + "_has_bits_";
276   (*variables)["inlined_string_donated_array"] =
277       prefix + "_inlined_string_donated_";
278   (*variables)["oneof_case"] = prefix + "_oneof_case_";
279   (*variables)["tracker"] = "Impl_::_tracker_";
280   (*variables)["weak_field_map"] = prefix + "_weak_field_map_";
281   (*variables)["split"] = prefix + "_split_";
282   (*variables)["cached_split_ptr"] = "cached_split_ptr";
283 }
284 
SetUnknownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)285 void SetUnknownFieldsVariable(const Descriptor* descriptor,
286                               const Options& options,
287                               std::map<std::string, std::string>* variables) {
288   std::string proto_ns = ProtobufNamespace(options);
289   std::string unknown_fields_type;
290   if (UseUnknownFieldSet(descriptor->file(), options)) {
291     unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
292     (*variables)["unknown_fields"] =
293         "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
294         unknown_fields_type + "::default_instance)";
295   } else {
296     unknown_fields_type =
297         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
298     (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
299                                      unknown_fields_type + ">(::" + proto_ns +
300                                      "::internal::GetEmptyString)";
301   }
302   (*variables)["unknown_fields_type"] = unknown_fields_type;
303   (*variables)["have_unknown_fields"] =
304       "_internal_metadata_.have_unknown_fields()";
305   (*variables)["mutable_unknown_fields"] =
306       "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
307       ">()";
308 }
309 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)310 std::string UnderscoresToCamelCase(const std::string& input,
311                                    bool cap_next_letter) {
312   std::string result;
313   // Note:  I distrust ctype.h due to locales.
314   for (int i = 0; i < input.size(); i++) {
315     if ('a' <= input[i] && input[i] <= 'z') {
316       if (cap_next_letter) {
317         result += input[i] + ('A' - 'a');
318       } else {
319         result += input[i];
320       }
321       cap_next_letter = false;
322     } else if ('A' <= input[i] && input[i] <= 'Z') {
323       // Capital letters are left as-is.
324       result += input[i];
325       cap_next_letter = false;
326     } else if ('0' <= input[i] && input[i] <= '9') {
327       result += input[i];
328       cap_next_letter = true;
329     } else {
330       cap_next_letter = true;
331     }
332   }
333   return result;
334 }
335 
336 const char kThickSeparator[] =
337     "// ===================================================================\n";
338 const char kThinSeparator[] =
339     "// -------------------------------------------------------------------\n";
340 
CanInitializeByZeroing(const FieldDescriptor * field)341 bool CanInitializeByZeroing(const FieldDescriptor* field) {
342   if (field->is_repeated() || field->is_extension()) return false;
343   switch (field->cpp_type()) {
344     case FieldDescriptor::CPPTYPE_ENUM:
345       return field->default_value_enum()->number() == 0;
346     case FieldDescriptor::CPPTYPE_INT32:
347       return field->default_value_int32() == 0;
348     case FieldDescriptor::CPPTYPE_INT64:
349       return field->default_value_int64() == 0;
350     case FieldDescriptor::CPPTYPE_UINT32:
351       return field->default_value_uint32() == 0;
352     case FieldDescriptor::CPPTYPE_UINT64:
353       return field->default_value_uint64() == 0;
354     case FieldDescriptor::CPPTYPE_FLOAT:
355       return field->default_value_float() == 0;
356     case FieldDescriptor::CPPTYPE_DOUBLE:
357       return field->default_value_double() == 0;
358     case FieldDescriptor::CPPTYPE_BOOL:
359       return field->default_value_bool() == false;
360     default:
361       return false;
362   }
363 }
364 
ClassName(const Descriptor * descriptor)365 std::string ClassName(const Descriptor* descriptor) {
366   const Descriptor* parent = descriptor->containing_type();
367   std::string res;
368   if (parent) res += ClassName(parent) + "_";
369   res += descriptor->name();
370   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
371   return ResolveKeyword(res);
372 }
373 
ClassName(const EnumDescriptor * enum_descriptor)374 std::string ClassName(const EnumDescriptor* enum_descriptor) {
375   if (enum_descriptor->containing_type() == nullptr) {
376     return ResolveKeyword(enum_descriptor->name());
377   } else {
378     return ClassName(enum_descriptor->containing_type()) + "_" +
379            enum_descriptor->name();
380   }
381 }
382 
QualifiedClassName(const Descriptor * d,const Options & options)383 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
384   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
385 }
386 
QualifiedClassName(const EnumDescriptor * d,const Options & options)387 std::string QualifiedClassName(const EnumDescriptor* d,
388                                const Options& options) {
389   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
390 }
391 
QualifiedClassName(const Descriptor * d)392 std::string QualifiedClassName(const Descriptor* d) {
393   return QualifiedClassName(d, Options());
394 }
395 
QualifiedClassName(const EnumDescriptor * d)396 std::string QualifiedClassName(const EnumDescriptor* d) {
397   return QualifiedClassName(d, Options());
398 }
399 
ExtensionName(const FieldDescriptor * d)400 std::string ExtensionName(const FieldDescriptor* d) {
401   if (const Descriptor* scope = d->extension_scope())
402     return StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
403   return ResolveKeyword(d->name());
404 }
405 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)406 std::string QualifiedExtensionName(const FieldDescriptor* d,
407                                    const Options& options) {
408   GOOGLE_DCHECK(d->is_extension());
409   return QualifiedFileLevelSymbol(d->file(), ExtensionName(d), options);
410 }
411 
QualifiedExtensionName(const FieldDescriptor * d)412 std::string QualifiedExtensionName(const FieldDescriptor* d) {
413   return QualifiedExtensionName(d, Options());
414 }
415 
Namespace(const std::string & package)416 std::string Namespace(const std::string& package) {
417   if (package.empty()) return "";
418   return "::" + DotsToColons(package);
419 }
420 
Namespace(const FileDescriptor * d,const Options & options)421 std::string Namespace(const FileDescriptor* d, const Options& options) {
422   std::string ret = Namespace(d->package());
423   if (IsWellKnownMessage(d) && options.opensource_runtime) {
424     // Written with string concatenation to prevent rewriting of
425     // ::google::protobuf.
426     ret = StringReplace(ret,
427                         "::google::"
428                         "protobuf",
429                         "::PROTOBUF_NAMESPACE_ID", false);
430   }
431   return ret;
432 }
433 
Namespace(const Descriptor * d,const Options & options)434 std::string Namespace(const Descriptor* d, const Options& options) {
435   return Namespace(d->file(), options);
436 }
437 
Namespace(const FieldDescriptor * d,const Options & options)438 std::string Namespace(const FieldDescriptor* d, const Options& options) {
439   return Namespace(d->file(), options);
440 }
441 
Namespace(const EnumDescriptor * d,const Options & options)442 std::string Namespace(const EnumDescriptor* d, const Options& options) {
443   return Namespace(d->file(), options);
444 }
445 
DefaultInstanceType(const Descriptor * descriptor,const Options &,bool split)446 std::string DefaultInstanceType(const Descriptor* descriptor,
447                                 const Options& /*options*/, bool split) {
448   return ClassName(descriptor) + (split ? "__Impl_Split" : "") +
449          "DefaultTypeInternal";
450 }
451 
DefaultInstanceName(const Descriptor * descriptor,const Options &,bool split)452 std::string DefaultInstanceName(const Descriptor* descriptor,
453                                 const Options& /*options*/, bool split) {
454   return "_" + ClassName(descriptor, false) + (split ? "__Impl_Split" : "") +
455          "_default_instance_";
456 }
457 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)458 std::string DefaultInstancePtr(const Descriptor* descriptor,
459                                const Options& options, bool split) {
460   return DefaultInstanceName(descriptor, options, split) + "ptr_";
461 }
462 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options,bool split)463 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
464                                          const Options& options, bool split) {
465   return QualifiedFileLevelSymbol(
466       descriptor->file(), DefaultInstanceName(descriptor, options, split),
467       options);
468 }
469 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options,bool split)470 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
471                                         const Options& options, bool split) {
472   return QualifiedDefaultInstanceName(descriptor, options, split) + "ptr_";
473 }
474 
DescriptorTableName(const FileDescriptor * file,const Options & options)475 std::string DescriptorTableName(const FileDescriptor* file,
476                                 const Options& options) {
477   return UniqueName("descriptor_table", file, options);
478 }
479 
FileDllExport(const FileDescriptor * file,const Options & options)480 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
481   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
482 }
483 
SuperClassName(const Descriptor * descriptor,const Options & options)484 std::string SuperClassName(const Descriptor* descriptor,
485                            const Options& options) {
486   if (!HasDescriptorMethods(descriptor->file(), options)) {
487     return "::" + ProtobufNamespace(options) + "::MessageLite";
488   }
489   auto simple_base = SimpleBaseClass(descriptor, options);
490   if (simple_base.empty()) {
491     return "::" + ProtobufNamespace(options) + "::Message";
492   }
493   return "::" + ProtobufNamespace(options) + "::internal::" + simple_base;
494 }
495 
ResolveKeyword(const std::string & name)496 std::string ResolveKeyword(const std::string& name) {
497   if (kKeywords.count(name) > 0) {
498     return name + "_";
499   }
500   return name;
501 }
502 
FieldName(const FieldDescriptor * field)503 std::string FieldName(const FieldDescriptor* field) {
504   std::string result = field->name();
505   LowerString(&result);
506   if (kKeywords.count(result) > 0) {
507     result.append("_");
508   }
509   return result;
510 }
511 
FieldMemberName(const FieldDescriptor * field,bool split)512 std::string FieldMemberName(const FieldDescriptor* field, bool split) {
513   StringPiece prefix =
514       IsMapEntryMessage(field->containing_type()) ? "" : "_impl_.";
515   StringPiece split_prefix = split ? "_split_->" : "";
516   if (field->real_containing_oneof() == nullptr) {
517     return StrCat(prefix, split_prefix, FieldName(field), "_");
518   }
519   // Oneof fields are never split.
520   GOOGLE_CHECK(!split);
521   return StrCat(prefix, field->containing_oneof()->name(), "_.",
522                       FieldName(field), "_");
523 }
524 
OneofCaseConstantName(const FieldDescriptor * field)525 std::string OneofCaseConstantName(const FieldDescriptor* field) {
526   GOOGLE_DCHECK(field->containing_oneof());
527   std::string field_name = UnderscoresToCamelCase(field->name(), true);
528   return "k" + field_name;
529 }
530 
QualifiedOneofCaseConstantName(const FieldDescriptor * field)531 std::string QualifiedOneofCaseConstantName(const FieldDescriptor* field) {
532   GOOGLE_DCHECK(field->containing_oneof());
533   const std::string qualification =
534       QualifiedClassName(field->containing_type());
535   return StrCat(qualification, "::", OneofCaseConstantName(field));
536 }
537 
EnumValueName(const EnumValueDescriptor * enum_value)538 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
539   std::string result = enum_value->name();
540   if (kKeywords.count(result) > 0) {
541     result.append("_");
542   }
543   return result;
544 }
545 
EstimateAlignmentSize(const FieldDescriptor * field)546 int EstimateAlignmentSize(const FieldDescriptor* field) {
547   if (field == nullptr) return 0;
548   if (field->is_repeated()) return 8;
549   switch (field->cpp_type()) {
550     case FieldDescriptor::CPPTYPE_BOOL:
551       return 1;
552 
553     case FieldDescriptor::CPPTYPE_INT32:
554     case FieldDescriptor::CPPTYPE_UINT32:
555     case FieldDescriptor::CPPTYPE_ENUM:
556     case FieldDescriptor::CPPTYPE_FLOAT:
557       return 4;
558 
559     case FieldDescriptor::CPPTYPE_INT64:
560     case FieldDescriptor::CPPTYPE_UINT64:
561     case FieldDescriptor::CPPTYPE_DOUBLE:
562     case FieldDescriptor::CPPTYPE_STRING:
563     case FieldDescriptor::CPPTYPE_MESSAGE:
564       return 8;
565   }
566   GOOGLE_LOG(FATAL) << "Can't get here.";
567   return -1;  // Make compiler happy.
568 }
569 
FieldConstantName(const FieldDescriptor * field)570 std::string FieldConstantName(const FieldDescriptor* field) {
571   std::string field_name = UnderscoresToCamelCase(field->name(), true);
572   std::string result = "k" + field_name + "FieldNumber";
573 
574   if (!field->is_extension() &&
575       field->containing_type()->FindFieldByCamelcaseName(
576           field->camelcase_name()) != field) {
577     // This field's camelcase name is not unique.  As a hack, add the field
578     // number to the constant name.  This makes the constant rather useless,
579     // but what can we do?
580     result += "_" + StrCat(field->number());
581   }
582 
583   return result;
584 }
585 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)586 std::string FieldMessageTypeName(const FieldDescriptor* field,
587                                  const Options& options) {
588   // Note:  The Google-internal version of Protocol Buffers uses this function
589   //   as a hook point for hacks to support legacy code.
590   return QualifiedClassName(field->message_type(), options);
591 }
592 
StripProto(const std::string & filename)593 std::string StripProto(const std::string& filename) {
594   /*
595    * TODO(github/georgthegreat) remove this proxy method
596    * once Google's internal codebase will become ready
597    */
598   return compiler::StripProto(filename);
599 }
600 
PrimitiveTypeName(FieldDescriptor::CppType type)601 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
602   switch (type) {
603     case FieldDescriptor::CPPTYPE_INT32:
604       return "::int32_t";
605     case FieldDescriptor::CPPTYPE_INT64:
606       return "::int64_t";
607     case FieldDescriptor::CPPTYPE_UINT32:
608       return "::uint32_t";
609     case FieldDescriptor::CPPTYPE_UINT64:
610       return "::uint64_t";
611     case FieldDescriptor::CPPTYPE_DOUBLE:
612       return "double";
613     case FieldDescriptor::CPPTYPE_FLOAT:
614       return "float";
615     case FieldDescriptor::CPPTYPE_BOOL:
616       return "bool";
617     case FieldDescriptor::CPPTYPE_ENUM:
618       return "int";
619     case FieldDescriptor::CPPTYPE_STRING:
620       return "std::string";
621     case FieldDescriptor::CPPTYPE_MESSAGE:
622       return nullptr;
623 
624       // No default because we want the compiler to complain if any new
625       // CppTypes are added.
626   }
627 
628   GOOGLE_LOG(FATAL) << "Can't get here.";
629   return nullptr;
630 }
631 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)632 std::string PrimitiveTypeName(const Options& options,
633                               FieldDescriptor::CppType type) {
634   switch (type) {
635     case FieldDescriptor::CPPTYPE_INT32:
636       return IntTypeName(options, "int32");
637     case FieldDescriptor::CPPTYPE_INT64:
638       return IntTypeName(options, "int64");
639     case FieldDescriptor::CPPTYPE_UINT32:
640       return IntTypeName(options, "uint32");
641     case FieldDescriptor::CPPTYPE_UINT64:
642       return IntTypeName(options, "uint64");
643     case FieldDescriptor::CPPTYPE_DOUBLE:
644       return "double";
645     case FieldDescriptor::CPPTYPE_FLOAT:
646       return "float";
647     case FieldDescriptor::CPPTYPE_BOOL:
648       return "bool";
649     case FieldDescriptor::CPPTYPE_ENUM:
650       return "int";
651     case FieldDescriptor::CPPTYPE_STRING:
652       return "std::string";
653     case FieldDescriptor::CPPTYPE_MESSAGE:
654       return "";
655 
656       // No default because we want the compiler to complain if any new
657       // CppTypes are added.
658   }
659 
660   GOOGLE_LOG(FATAL) << "Can't get here.";
661   return "";
662 }
663 
DeclaredTypeMethodName(FieldDescriptor::Type type)664 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
665   switch (type) {
666     case FieldDescriptor::TYPE_INT32:
667       return "Int32";
668     case FieldDescriptor::TYPE_INT64:
669       return "Int64";
670     case FieldDescriptor::TYPE_UINT32:
671       return "UInt32";
672     case FieldDescriptor::TYPE_UINT64:
673       return "UInt64";
674     case FieldDescriptor::TYPE_SINT32:
675       return "SInt32";
676     case FieldDescriptor::TYPE_SINT64:
677       return "SInt64";
678     case FieldDescriptor::TYPE_FIXED32:
679       return "Fixed32";
680     case FieldDescriptor::TYPE_FIXED64:
681       return "Fixed64";
682     case FieldDescriptor::TYPE_SFIXED32:
683       return "SFixed32";
684     case FieldDescriptor::TYPE_SFIXED64:
685       return "SFixed64";
686     case FieldDescriptor::TYPE_FLOAT:
687       return "Float";
688     case FieldDescriptor::TYPE_DOUBLE:
689       return "Double";
690 
691     case FieldDescriptor::TYPE_BOOL:
692       return "Bool";
693     case FieldDescriptor::TYPE_ENUM:
694       return "Enum";
695 
696     case FieldDescriptor::TYPE_STRING:
697       return "String";
698     case FieldDescriptor::TYPE_BYTES:
699       return "Bytes";
700     case FieldDescriptor::TYPE_GROUP:
701       return "Group";
702     case FieldDescriptor::TYPE_MESSAGE:
703       return "Message";
704 
705       // No default because we want the compiler to complain if any new
706       // types are added.
707   }
708   GOOGLE_LOG(FATAL) << "Can't get here.";
709   return "";
710 }
711 
Int32ToString(int number)712 std::string Int32ToString(int number) {
713   if (number == std::numeric_limits<int32_t>::min()) {
714     // This needs to be special-cased, see explanation here:
715     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
716     return StrCat(number + 1, " - 1");
717   } else {
718     return StrCat(number);
719   }
720 }
721 
Int64ToString(int64_t number)722 static std::string Int64ToString(int64_t number) {
723   if (number == std::numeric_limits<int64_t>::min()) {
724     // This needs to be special-cased, see explanation here:
725     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
726     return StrCat("::int64_t{", number + 1, "} - 1");
727   }
728   return StrCat("::int64_t{", number, "}");
729 }
730 
UInt64ToString(uint64_t number)731 static std::string UInt64ToString(uint64_t number) {
732   return StrCat("::uint64_t{", number, "u}");
733 }
734 
DefaultValue(const FieldDescriptor * field)735 std::string DefaultValue(const FieldDescriptor* field) {
736   return DefaultValue(Options(), field);
737 }
738 
DefaultValue(const Options & options,const FieldDescriptor * field)739 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
740   switch (field->cpp_type()) {
741     case FieldDescriptor::CPPTYPE_INT32:
742       return Int32ToString(field->default_value_int32());
743     case FieldDescriptor::CPPTYPE_UINT32:
744       return StrCat(field->default_value_uint32()) + "u";
745     case FieldDescriptor::CPPTYPE_INT64:
746       return Int64ToString(field->default_value_int64());
747     case FieldDescriptor::CPPTYPE_UINT64:
748       return UInt64ToString(field->default_value_uint64());
749     case FieldDescriptor::CPPTYPE_DOUBLE: {
750       double value = field->default_value_double();
751       if (value == std::numeric_limits<double>::infinity()) {
752         return "std::numeric_limits<double>::infinity()";
753       } else if (value == -std::numeric_limits<double>::infinity()) {
754         return "-std::numeric_limits<double>::infinity()";
755       } else if (value != value) {
756         return "std::numeric_limits<double>::quiet_NaN()";
757       } else {
758         return SimpleDtoa(value);
759       }
760     }
761     case FieldDescriptor::CPPTYPE_FLOAT: {
762       float value = field->default_value_float();
763       if (value == std::numeric_limits<float>::infinity()) {
764         return "std::numeric_limits<float>::infinity()";
765       } else if (value == -std::numeric_limits<float>::infinity()) {
766         return "-std::numeric_limits<float>::infinity()";
767       } else if (value != value) {
768         return "std::numeric_limits<float>::quiet_NaN()";
769       } else {
770         std::string float_value = SimpleFtoa(value);
771         // If floating point value contains a period (.) or an exponent
772         // (either E or e), then append suffix 'f' to make it a float
773         // literal.
774         if (float_value.find_first_of(".eE") != std::string::npos) {
775           float_value.push_back('f');
776         }
777         return float_value;
778       }
779     }
780     case FieldDescriptor::CPPTYPE_BOOL:
781       return field->default_value_bool() ? "true" : "false";
782     case FieldDescriptor::CPPTYPE_ENUM:
783       // Lazy:  Generate a static_cast because we don't have a helper function
784       //   that constructs the full name of an enum value.
785       return strings::Substitute(
786           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
787           Int32ToString(field->default_value_enum()->number()));
788     case FieldDescriptor::CPPTYPE_STRING:
789       return "\"" +
790              EscapeTrigraphs(CEscape(field->default_value_string())) +
791              "\"";
792     case FieldDescriptor::CPPTYPE_MESSAGE:
793       return "*" + FieldMessageTypeName(field, options) +
794              "::internal_default_instance()";
795   }
796   // Can't actually get here; make compiler happy.  (We could add a default
797   // case above but then we wouldn't get the nice compiler warning when a
798   // new type is added.)
799   GOOGLE_LOG(FATAL) << "Can't get here.";
800   return "";
801 }
802 
803 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)804 std::string FilenameIdentifier(const std::string& filename) {
805   std::string result;
806   for (int i = 0; i < filename.size(); i++) {
807     if (ascii_isalnum(filename[i])) {
808       result.push_back(filename[i]);
809     } else {
810       // Not alphanumeric.  To avoid any possibility of name conflicts we
811       // use the hex code for the character.
812       StrAppend(&result, "_",
813                       strings::Hex(static_cast<uint8_t>(filename[i])));
814     }
815   }
816   return result;
817 }
818 
UniqueName(const std::string & name,const std::string & filename,const Options & options)819 std::string UniqueName(const std::string& name, const std::string& filename,
820                        const Options& options) {
821   return name + "_" + FilenameIdentifier(filename);
822 }
823 
824 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)825 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
826                                      const std::string& name,
827                                      const Options& options) {
828   if (file->package().empty()) {
829     return StrCat("::", name);
830   }
831   return StrCat(Namespace(file, options), "::", name);
832 }
833 
834 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)835 std::string EscapeTrigraphs(const std::string& to_escape) {
836   return StringReplace(to_escape, "?", "\\?", true);
837 }
838 
839 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)840 std::string SafeFunctionName(const Descriptor* descriptor,
841                              const FieldDescriptor* field,
842                              const std::string& prefix) {
843   // Do not use FieldName() since it will escape keywords.
844   std::string name = field->name();
845   LowerString(&name);
846   std::string function_name = prefix + name;
847   if (descriptor->FindFieldByName(function_name)) {
848     // Single underscore will also make it conflicting with the private data
849     // member. We use double underscore to escape function names.
850     function_name.append("__");
851   } else if (kKeywords.count(name) > 0) {
852     // If the field name is a keyword, we append the underscore back to keep it
853     // consistent with other function names.
854     function_name.append("_");
855   }
856   return function_name;
857 }
858 
IsStringInlined(const FieldDescriptor * descriptor,const Options & options)859 bool IsStringInlined(const FieldDescriptor* descriptor,
860                      const Options& options) {
861   (void)descriptor;
862   (void)options;
863   return false;
864 }
865 
HasLazyFields(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)866 static bool HasLazyFields(const Descriptor* descriptor, const Options& options,
867                           MessageSCCAnalyzer* scc_analyzer) {
868   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
869     if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) {
870       return true;
871     }
872   }
873   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
874     if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) {
875       return true;
876     }
877   }
878   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
879     if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) {
880       return true;
881     }
882   }
883   return false;
884 }
885 
886 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)887 bool HasLazyFields(const FileDescriptor* file, const Options& options,
888                    MessageSCCAnalyzer* scc_analyzer) {
889   for (int i = 0; i < file->message_type_count(); i++) {
890     const Descriptor* descriptor(file->message_type(i));
891     if (HasLazyFields(descriptor, options, scc_analyzer)) {
892       return true;
893     }
894   }
895   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
896     if (IsLazy(file->extension(field_idx), options, scc_analyzer)) {
897       return true;
898     }
899   }
900   return false;
901 }
902 
ShouldSplit(const Descriptor *,const Options &)903 bool ShouldSplit(const Descriptor*, const Options&) { return false; }
ShouldSplit(const FieldDescriptor *,const Options &)904 bool ShouldSplit(const FieldDescriptor*, const Options&) { return false; }
905 
HasRepeatedFields(const Descriptor * descriptor)906 static bool HasRepeatedFields(const Descriptor* descriptor) {
907   for (int i = 0; i < descriptor->field_count(); ++i) {
908     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
909       return true;
910     }
911   }
912   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
913     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
914   }
915   return false;
916 }
917 
HasRepeatedFields(const FileDescriptor * file)918 bool HasRepeatedFields(const FileDescriptor* file) {
919   for (int i = 0; i < file->message_type_count(); ++i) {
920     if (HasRepeatedFields(file->message_type(i))) return true;
921   }
922   return false;
923 }
924 
IsStringPieceField(const FieldDescriptor * field,const Options & options)925 static bool IsStringPieceField(const FieldDescriptor* field,
926                                const Options& options) {
927   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
928          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
929 }
930 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)931 static bool HasStringPieceFields(const Descriptor* descriptor,
932                                  const Options& options) {
933   for (int i = 0; i < descriptor->field_count(); ++i) {
934     if (IsStringPieceField(descriptor->field(i), options)) return true;
935   }
936   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
937     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
938   }
939   return false;
940 }
941 
HasStringPieceFields(const FileDescriptor * file,const Options & options)942 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
943   for (int i = 0; i < file->message_type_count(); ++i) {
944     if (HasStringPieceFields(file->message_type(i), options)) return true;
945   }
946   return false;
947 }
948 
IsCordField(const FieldDescriptor * field,const Options & options)949 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
950   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
951          EffectiveStringCType(field, options) == FieldOptions::CORD;
952 }
953 
HasCordFields(const Descriptor * descriptor,const Options & options)954 static bool HasCordFields(const Descriptor* descriptor,
955                           const Options& options) {
956   for (int i = 0; i < descriptor->field_count(); ++i) {
957     if (IsCordField(descriptor->field(i), options)) return true;
958   }
959   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
960     if (HasCordFields(descriptor->nested_type(i), options)) return true;
961   }
962   return false;
963 }
964 
HasCordFields(const FileDescriptor * file,const Options & options)965 bool HasCordFields(const FileDescriptor* file, const Options& options) {
966   for (int i = 0; i < file->message_type_count(); ++i) {
967     if (HasCordFields(file->message_type(i), options)) return true;
968   }
969   return false;
970 }
971 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)972 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
973   if (descriptor->extension_range_count() > 0) return true;
974   if (descriptor->extension_count() > 0) return true;
975   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
976     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
977       return true;
978     }
979   }
980   return false;
981 }
982 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)983 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
984   if (file->extension_count() > 0) return true;
985   for (int i = 0; i < file->message_type_count(); ++i) {
986     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
987   }
988   return false;
989 }
990 
HasMapFields(const Descriptor * descriptor)991 static bool HasMapFields(const Descriptor* descriptor) {
992   for (int i = 0; i < descriptor->field_count(); ++i) {
993     if (descriptor->field(i)->is_map()) {
994       return true;
995     }
996   }
997   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
998     if (HasMapFields(descriptor->nested_type(i))) return true;
999   }
1000   return false;
1001 }
1002 
HasMapFields(const FileDescriptor * file)1003 bool HasMapFields(const FileDescriptor* file) {
1004   for (int i = 0; i < file->message_type_count(); ++i) {
1005     if (HasMapFields(file->message_type(i))) return true;
1006   }
1007   return false;
1008 }
1009 
HasEnumDefinitions(const Descriptor * message_type)1010 static bool HasEnumDefinitions(const Descriptor* message_type) {
1011   if (message_type->enum_type_count() > 0) return true;
1012   for (int i = 0; i < message_type->nested_type_count(); ++i) {
1013     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
1014   }
1015   return false;
1016 }
1017 
HasEnumDefinitions(const FileDescriptor * file)1018 bool HasEnumDefinitions(const FileDescriptor* file) {
1019   if (file->enum_type_count() > 0) return true;
1020   for (int i = 0; i < file->message_type_count(); ++i) {
1021     if (HasEnumDefinitions(file->message_type(i))) return true;
1022   }
1023   return false;
1024 }
1025 
ShouldVerify(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)1026 bool ShouldVerify(const Descriptor* descriptor, const Options& options,
1027                   MessageSCCAnalyzer* scc_analyzer) {
1028   (void)descriptor;
1029   (void)options;
1030   (void)scc_analyzer;
1031   return false;
1032 }
1033 
ShouldVerify(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)1034 bool ShouldVerify(const FileDescriptor* file, const Options& options,
1035                   MessageSCCAnalyzer* scc_analyzer) {
1036   (void)file;
1037   (void)options;
1038   (void)scc_analyzer;
1039   return false;
1040 }
1041 
IsUtf8String(const FieldDescriptor * field)1042 bool IsUtf8String(const FieldDescriptor* field) {
1043   return IsProto3(field->file()) &&
1044       field->type() == FieldDescriptor::TYPE_STRING;
1045 }
1046 
ShouldVerifySimple(const Descriptor * descriptor)1047 VerifySimpleType ShouldVerifySimple(const Descriptor* descriptor) {
1048   (void)descriptor;
1049   return VerifySimpleType::kCustom;
1050 }
1051 
IsStringOrMessage(const FieldDescriptor * field)1052 bool IsStringOrMessage(const FieldDescriptor* field) {
1053   switch (field->cpp_type()) {
1054     case FieldDescriptor::CPPTYPE_INT32:
1055     case FieldDescriptor::CPPTYPE_INT64:
1056     case FieldDescriptor::CPPTYPE_UINT32:
1057     case FieldDescriptor::CPPTYPE_UINT64:
1058     case FieldDescriptor::CPPTYPE_DOUBLE:
1059     case FieldDescriptor::CPPTYPE_FLOAT:
1060     case FieldDescriptor::CPPTYPE_BOOL:
1061     case FieldDescriptor::CPPTYPE_ENUM:
1062       return false;
1063     case FieldDescriptor::CPPTYPE_STRING:
1064     case FieldDescriptor::CPPTYPE_MESSAGE:
1065       return true;
1066   }
1067 
1068   GOOGLE_LOG(FATAL) << "Can't get here.";
1069   return false;
1070 }
1071 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)1072 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
1073                                          const Options& options) {
1074   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
1075   if (options.opensource_runtime) {
1076     // Open-source protobuf release only supports STRING ctype.
1077     return FieldOptions::STRING;
1078   } else {
1079     // Google-internal supports all ctypes.
1080     return field->options().ctype();
1081   }
1082 }
1083 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)1084 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
1085   return descriptor->name() == kAnyProtoFile;
1086 }
1087 
IsAnyMessage(const Descriptor * descriptor,const Options & options)1088 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
1089   return descriptor->name() == kAnyMessageName &&
1090          IsAnyMessage(descriptor->file(), options);
1091 }
1092 
IsWellKnownMessage(const FileDescriptor * file)1093 bool IsWellKnownMessage(const FileDescriptor* file) {
1094   static const std::unordered_set<std::string> well_known_files{
1095       "google/protobuf/any.proto",
1096       "google/protobuf/api.proto",
1097       "google/protobuf/compiler/plugin.proto",
1098       "google/protobuf/descriptor.proto",
1099       "google/protobuf/duration.proto",
1100       "google/protobuf/empty.proto",
1101       "google/protobuf/field_mask.proto",
1102       "google/protobuf/source_context.proto",
1103       "google/protobuf/struct.proto",
1104       "google/protobuf/timestamp.proto",
1105       "google/protobuf/type.proto",
1106       "google/protobuf/wrappers.proto",
1107   };
1108   return well_known_files.find(file->name()) != well_known_files.end();
1109 }
1110 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1111 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1112                              const Options& options) {
1113   return true;
1114 }
1115 
FileUtf8Verification(const FileDescriptor * file,const Options & options)1116 static bool FileUtf8Verification(const FileDescriptor* file,
1117                                  const Options& options) {
1118   return true;
1119 }
1120 
1121 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1122 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1123                                const Options& options) {
1124   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1125       FieldEnforceUtf8(field, options)) {
1126     return Utf8CheckMode::kStrict;
1127   } else if (GetOptimizeFor(field->file(), options) !=
1128                  FileOptions::LITE_RUNTIME &&
1129              FileUtf8Verification(field->file(), options)) {
1130     return Utf8CheckMode::kVerify;
1131   } else {
1132     return Utf8CheckMode::kNone;
1133   }
1134 }
1135 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1136 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1137                                   const Options& options, bool for_parse,
1138                                   const char* parameters,
1139                                   const char* strict_function,
1140                                   const char* verify_function,
1141                                   const Formatter& format) {
1142   switch (GetUtf8CheckMode(field, options)) {
1143     case Utf8CheckMode::kStrict: {
1144       if (for_parse) {
1145         format("DO_(");
1146       }
1147       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1148       format.Indent();
1149       format(parameters);
1150       if (for_parse) {
1151         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1152       } else {
1153         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1154       }
1155       format("\"$1$\")", field->full_name());
1156       if (for_parse) {
1157         format(")");
1158       }
1159       format(";\n");
1160       format.Outdent();
1161       break;
1162     }
1163     case Utf8CheckMode::kVerify: {
1164       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1165       format.Indent();
1166       format(parameters);
1167       if (for_parse) {
1168         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1169       } else {
1170         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1171       }
1172       format("\"$1$\");\n", field->full_name());
1173       format.Outdent();
1174       break;
1175     }
1176     case Utf8CheckMode::kNone:
1177       break;
1178   }
1179 }
1180 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1181 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1182                                     const Options& options, bool for_parse,
1183                                     const char* parameters,
1184                                     const Formatter& format) {
1185   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1186                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1187                         format);
1188 }
1189 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1190 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1191                                   const Options& options, bool for_parse,
1192                                   const char* parameters,
1193                                   const Formatter& format) {
1194   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1195                         "VerifyUTF8CordNamedField", format);
1196 }
1197 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1198 void FlattenMessagesInFile(const FileDescriptor* file,
1199                            std::vector<const Descriptor*>* result) {
1200   for (int i = 0; i < file->message_type_count(); i++) {
1201     ForEachMessage(file->message_type(i), [&](const Descriptor* descriptor) {
1202       result->push_back(descriptor);
1203     });
1204   }
1205 }
1206 
HasWeakFields(const Descriptor * descriptor,const Options & options)1207 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1208   for (int i = 0; i < descriptor->field_count(); i++) {
1209     if (IsWeak(descriptor->field(i), options)) return true;
1210   }
1211   return false;
1212 }
1213 
HasWeakFields(const FileDescriptor * file,const Options & options)1214 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1215   for (int i = 0; i < file->message_type_count(); ++i) {
1216     if (HasWeakFields(file->message_type(i), options)) return true;
1217   }
1218   return false;
1219 }
1220 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1221 bool UsingImplicitWeakFields(const FileDescriptor* file,
1222                              const Options& options) {
1223   return options.lite_implicit_weak_fields &&
1224          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1225 }
1226 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1227 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1228                          MessageSCCAnalyzer* scc_analyzer) {
1229   return UsingImplicitWeakFields(field->file(), options) &&
1230          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1231          !field->is_required() && !field->is_map() && !field->is_extension() &&
1232          !IsWellKnownMessage(field->message_type()->file()) &&
1233          field->message_type()->file()->name() !=
1234              "net/proto2/proto/descriptor.proto" &&
1235          // We do not support implicit weak fields between messages in the same
1236          // strongly-connected component.
1237          scc_analyzer->GetSCC(field->containing_type()) !=
1238              scc_analyzer->GetSCC(field->message_type());
1239 }
1240 
GetSCCAnalysis(const SCC * scc)1241 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1242   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1243   MessageAnalysis result;
1244   if (UsingImplicitWeakFields(scc->GetFile(), options_)) {
1245     result.contains_weak = true;
1246   }
1247   for (int i = 0; i < scc->descriptors.size(); i++) {
1248     const Descriptor* descriptor = scc->descriptors[i];
1249     if (descriptor->extension_range_count() > 0) {
1250       result.contains_extension = true;
1251     }
1252     for (int j = 0; j < descriptor->field_count(); j++) {
1253       const FieldDescriptor* field = descriptor->field(j);
1254       if (field->is_required()) {
1255         result.contains_required = true;
1256       }
1257       if (field->options().weak()) {
1258         result.contains_weak = true;
1259       }
1260       switch (field->type()) {
1261         case FieldDescriptor::TYPE_STRING:
1262         case FieldDescriptor::TYPE_BYTES: {
1263           if (field->options().ctype() == FieldOptions::CORD) {
1264             result.contains_cord = true;
1265           }
1266           break;
1267         }
1268         case FieldDescriptor::TYPE_GROUP:
1269         case FieldDescriptor::TYPE_MESSAGE: {
1270           const SCC* child = analyzer_.GetSCC(field->message_type());
1271           if (child != scc) {
1272             MessageAnalysis analysis = GetSCCAnalysis(child);
1273             result.contains_cord |= analysis.contains_cord;
1274             result.contains_extension |= analysis.contains_extension;
1275             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1276               result.contains_required |= analysis.contains_required;
1277             }
1278             result.contains_weak |= analysis.contains_weak;
1279           } else {
1280             // This field points back into the same SCC hence the messages
1281             // in the SCC are recursive. Note if SCC contains more than two
1282             // nodes it has to be recursive, however this test also works for
1283             // a single node that is recursive.
1284             result.is_recursive = true;
1285           }
1286           break;
1287         }
1288         default:
1289           break;
1290       }
1291     }
1292   }
1293   // We deliberately only insert the result here. After we contracted the SCC
1294   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1295   // nodes visited as we can never return to them. By inserting them here
1296   // we will go in an infinite loop if the SCC is not correct.
1297   return analysis_cache_[scc] = result;
1298 }
1299 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1300 void ListAllFields(const Descriptor* d,
1301                    std::vector<const FieldDescriptor*>* fields) {
1302   // Collect sub messages
1303   for (int i = 0; i < d->nested_type_count(); i++) {
1304     ListAllFields(d->nested_type(i), fields);
1305   }
1306   // Collect message level extensions.
1307   for (int i = 0; i < d->extension_count(); i++) {
1308     fields->push_back(d->extension(i));
1309   }
1310   // Add types of fields necessary
1311   for (int i = 0; i < d->field_count(); i++) {
1312     fields->push_back(d->field(i));
1313   }
1314 }
1315 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1316 void ListAllFields(const FileDescriptor* d,
1317                    std::vector<const FieldDescriptor*>* fields) {
1318   // Collect file level message.
1319   for (int i = 0; i < d->message_type_count(); i++) {
1320     ListAllFields(d->message_type(i), fields);
1321   }
1322   // Collect message level extensions.
1323   for (int i = 0; i < d->extension_count(); i++) {
1324     fields->push_back(d->extension(i));
1325   }
1326 }
1327 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1328 void ListAllTypesForServices(const FileDescriptor* fd,
1329                              std::vector<const Descriptor*>* types) {
1330   for (int i = 0; i < fd->service_count(); i++) {
1331     const ServiceDescriptor* sd = fd->service(i);
1332     for (int j = 0; j < sd->method_count(); j++) {
1333       const MethodDescriptor* method = sd->method(j);
1334       types->push_back(method->input_type());
1335       types->push_back(method->output_type());
1336     }
1337   }
1338 }
1339 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1340 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1341                           std::string* bootstrap_basename) {
1342   if (options.opensource_runtime) {
1343     return false;
1344   }
1345 
1346   std::unordered_map<std::string, std::string> bootstrap_mapping{
1347       {"net/proto2/proto/descriptor",
1348        "third_party/protobuf/descriptor"},
1349       {"net/proto2/compiler/proto/plugin",
1350        "net/proto2/compiler/proto/plugin"},
1351       {"net/proto2/compiler/proto/profile",
1352        "net/proto2/compiler/proto/profile_bootstrap"},
1353   };
1354   auto iter = bootstrap_mapping.find(basename);
1355   if (iter == bootstrap_mapping.end()) {
1356     *bootstrap_basename = basename;
1357     return false;
1358   } else {
1359     *bootstrap_basename = iter->second;
1360     return true;
1361   }
1362 }
1363 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1364 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1365   std::string my_name = StripProto(file->name());
1366   return GetBootstrapBasename(options, my_name, &my_name);
1367 }
1368 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1369 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1370                     bool bootstrap_flag, std::string* basename) {
1371   std::string bootstrap_basename;
1372   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1373     return false;
1374   }
1375 
1376   if (bootstrap_flag) {
1377     // Adjust basename, but don't abort code generation.
1378     *basename = bootstrap_basename;
1379     return false;
1380   } else {
1381     const std::string& forward_to_basename = bootstrap_basename;
1382 
1383     // Generate forwarding headers and empty .pb.cc.
1384     {
1385       std::unique_ptr<io::ZeroCopyOutputStream> output(
1386           generator_context->Open(*basename + ".pb.h"));
1387       io::Printer printer(output.get(), '$', nullptr);
1388       printer.Print(
1389           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1390           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1391           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1392           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1393           "forward_to_basename", forward_to_basename, "filename_identifier",
1394           FilenameIdentifier(*basename));
1395 
1396       if (!options.opensource_runtime) {
1397         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1398         // protocoltype is SWIG'ed and we need to forward
1399         if (*basename == "net/proto/protocoltype") {
1400           printer.Print(
1401               "#ifdef SWIG\n"
1402               "%include \"$forward_to_basename$.pb.h\"\n"
1403               "#endif  // SWIG\n",
1404               "forward_to_basename", forward_to_basename);
1405         }
1406       }
1407     }
1408 
1409     {
1410       std::unique_ptr<io::ZeroCopyOutputStream> output(
1411           generator_context->Open(*basename + ".proto.h"));
1412       io::Printer printer(output.get(), '$', nullptr);
1413       printer.Print(
1414           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1415           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1416           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1417           "export\n"
1418           "#endif  // "
1419           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1420           "forward_to_basename", forward_to_basename, "filename_identifier",
1421           FilenameIdentifier(*basename));
1422     }
1423 
1424     {
1425       std::unique_ptr<io::ZeroCopyOutputStream> output(
1426           generator_context->Open(*basename + ".pb.cc"));
1427       io::Printer printer(output.get(), '$', nullptr);
1428       printer.Print("\n");
1429     }
1430 
1431     {
1432       std::unique_ptr<io::ZeroCopyOutputStream> output(
1433           generator_context->Open(*basename + ".pb.h.meta"));
1434     }
1435 
1436     {
1437       std::unique_ptr<io::ZeroCopyOutputStream> output(
1438           generator_context->Open(*basename + ".proto.h.meta"));
1439     }
1440 
1441     // Abort code generation.
1442     return true;
1443   }
1444 }
1445 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1446 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1447                                  const Options& options,
1448                                  bool* has_opt_codesize_extension) {
1449   std::vector<const FieldDescriptor*> fields;
1450   auto reflection = msg.GetReflection();
1451   reflection->ListFields(msg, &fields);
1452   for (auto field : fields) {
1453     const auto* field_msg = field->message_type();
1454     if (field_msg == nullptr) {
1455       // It so happens that enums Is_Valid are still generated so enums work.
1456       // Only messages have potential problems.
1457       continue;
1458     }
1459     // If this option has an extension set AND that extension is defined in the
1460     // same file we have bootstrap problem.
1461     if (field->is_extension()) {
1462       const auto* msg_extension_file = field->message_type()->file();
1463       if (msg_extension_file == file) return true;
1464       if (has_opt_codesize_extension &&
1465           GetOptimizeFor(msg_extension_file, options) ==
1466               FileOptions::CODE_SIZE) {
1467         *has_opt_codesize_extension = true;
1468       }
1469     }
1470     // Recurse in this field to see if there is a problem in there
1471     if (field->is_repeated()) {
1472       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1473         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1474                                  file, options, has_opt_codesize_extension)) {
1475           return true;
1476         }
1477       }
1478     } else {
1479       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1480                                options, has_opt_codesize_extension)) {
1481         return true;
1482       }
1483     }
1484   }
1485   return false;
1486 }
1487 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1488 static bool HasBootstrapProblem(const FileDescriptor* file,
1489                                 const Options& options,
1490                                 bool* has_opt_codesize_extension) {
1491   static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1492   auto it = cache.find(file);
1493   if (it != cache.end()) return it->second;
1494   // In order to build the data structures for the reflective parse, it needs
1495   // to parse the serialized descriptor describing all the messages defined in
1496   // this file. Obviously this presents a bootstrap problem for descriptor
1497   // messages.
1498   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1499       file->name() == "google/protobuf/descriptor.proto") {
1500     return true;
1501   }
1502   // Unfortunately we're not done yet. The descriptor option messages allow
1503   // for extensions. So we need to be able to parse these extensions in order
1504   // to parse the file descriptor for a file that has custom options. This is a
1505   // problem when these custom options extensions are defined in the same file.
1506   FileDescriptorProto linkedin_fd_proto;
1507   const DescriptorPool* pool = file->pool();
1508   const Descriptor* fd_proto_descriptor =
1509       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1510   // Not all pools have descriptor.proto in them. In these cases there for sure
1511   // are no custom options.
1512   if (fd_proto_descriptor == nullptr) return false;
1513 
1514   // It's easier to inspect file as a proto, because we can use reflection on
1515   // the proto to iterate over all content.
1516   file->CopyTo(&linkedin_fd_proto);
1517 
1518   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1519   // such it doesn't know the extensions that are potentially present in the
1520   // descriptor pool constructed from the protos that are being compiled. These
1521   // custom options are therefore in the unknown fields.
1522   // By building the corresponding FileDescriptorProto in the pool constructed
1523   // by the protos that are being compiled, ie. file's pool, the unknown fields
1524   // are converted to extensions.
1525   DynamicMessageFactory factory(pool);
1526   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1527   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1528 
1529   bool& res = cache[file];
1530   res = HasExtensionFromFile(*fd_proto, file, options,
1531                              has_opt_codesize_extension);
1532   delete fd_proto;
1533   return res;
1534 }
1535 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1536 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1537                                         const Options& options,
1538                                         bool* has_opt_codesize_extension) {
1539   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1540   switch (options.enforce_mode) {
1541     case EnforceOptimizeMode::kSpeed:
1542       return FileOptions::SPEED;
1543     case EnforceOptimizeMode::kLiteRuntime:
1544       return FileOptions::LITE_RUNTIME;
1545     case EnforceOptimizeMode::kCodeSize:
1546       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1547         return FileOptions::LITE_RUNTIME;
1548       }
1549       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1550         return FileOptions::SPEED;
1551       }
1552       return FileOptions::CODE_SIZE;
1553     case EnforceOptimizeMode::kNoEnforcement:
1554       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1555         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1556           GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1557                           "cannot honor that because it contains custom option "
1558                           "extensions defined in the same proto.";
1559           return FileOptions::SPEED;
1560         }
1561       }
1562       return file->options().optimize_for();
1563   }
1564 
1565   GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
1566   // The phony return below serves to silence a warning from GCC 8.
1567   return FileOptions::SPEED;
1568 }
1569 
IsMessageOwnedArenaEligible(const Descriptor * desc,const Options & options)1570 inline bool IsMessageOwnedArenaEligible(const Descriptor* desc,
1571                                         const Options& options) {
1572   return GetOptimizeFor(desc->file(), options) != FileOptions::LITE_RUNTIME &&
1573          !options.bootstrap && !options.opensource_runtime &&
1574          AllocExpected(desc);
1575 }
1576 
EnableMessageOwnedArena(const Descriptor * desc,const Options & options)1577 bool EnableMessageOwnedArena(const Descriptor* desc, const Options& options) {
1578   (void)desc;
1579   (void)options;
1580   return false;
1581 }
1582 
EnableMessageOwnedArenaTrial(const Descriptor * desc,const Options & options)1583 bool EnableMessageOwnedArenaTrial(const Descriptor* desc,
1584                                   const Options& options) {
1585   return false;
1586 }
1587 
HasMessageFieldOrExtension(const Descriptor * desc)1588 bool HasMessageFieldOrExtension(const Descriptor* desc) {
1589   if (desc->extension_range_count() > 0) return true;
1590   for (const auto* f : FieldRange(desc)) {
1591     if (f->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) return true;
1592   }
1593   return false;
1594 }
1595 
1596 }  // namespace cpp
1597 }  // namespace compiler
1598 }  // namespace protobuf
1599 }  // namespace google
1600