• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36 
37 #include <functional>
38 #include <limits>
39 #include <map>
40 #include <queue>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/compiler/cpp/cpp_options.h>
47 #include <google/protobuf/descriptor.pb.h>
48 #include <google/protobuf/descriptor.h>
49 #include <google/protobuf/compiler/scc.h>
50 #include <google/protobuf/io/printer.h>
51 #include <google/protobuf/io/zero_copy_stream.h>
52 #include <google/protobuf/dynamic_message.h>
53 #include <google/protobuf/wire_format.h>
54 #include <google/protobuf/wire_format_lite.h>
55 #include <google/protobuf/stubs/strutil.h>
56 #include <google/protobuf/stubs/substitute.h>
57 #include <google/protobuf/stubs/hash.h>
58 
59 #include <google/protobuf/port_def.inc>
60 
61 namespace google {
62 namespace protobuf {
63 namespace compiler {
64 namespace cpp {
65 
66 namespace {
67 
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70 
DotsToColons(const std::string & name)71 std::string DotsToColons(const std::string& name) {
72   return StringReplace(name, ".", "::", true);
73 }
74 
75 static const char* const kKeywordList[] = {  //
76     "NULL",
77     "alignas",
78     "alignof",
79     "and",
80     "and_eq",
81     "asm",
82     "auto",
83     "bitand",
84     "bitor",
85     "bool",
86     "break",
87     "case",
88     "catch",
89     "char",
90     "class",
91     "compl",
92     "const",
93     "constexpr",
94     "const_cast",
95     "continue",
96     "decltype",
97     "default",
98     "delete",
99     "do",
100     "double",
101     "dynamic_cast",
102     "else",
103     "enum",
104     "explicit",
105     "export",
106     "extern",
107     "false",
108     "float",
109     "for",
110     "friend",
111     "goto",
112     "if",
113     "inline",
114     "int",
115     "long",
116     "mutable",
117     "namespace",
118     "new",
119     "noexcept",
120     "not",
121     "not_eq",
122     "nullptr",
123     "operator",
124     "or",
125     "or_eq",
126     "private",
127     "protected",
128     "public",
129     "register",
130     "reinterpret_cast",
131     "return",
132     "short",
133     "signed",
134     "sizeof",
135     "static",
136     "static_assert",
137     "static_cast",
138     "struct",
139     "switch",
140     "template",
141     "this",
142     "thread_local",
143     "throw",
144     "true",
145     "try",
146     "typedef",
147     "typeid",
148     "typename",
149     "union",
150     "unsigned",
151     "using",
152     "virtual",
153     "void",
154     "volatile",
155     "wchar_t",
156     "while",
157     "xor",
158     "xor_eq"};
159 
MakeKeywordsMap()160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161   auto* result = new std::unordered_set<std::string>();
162   for (const auto keyword : kKeywordList) {
163     result->emplace(keyword);
164   }
165   return result;
166 }
167 
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169 
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)171 char Base63Char(int value) {
172   GOOGLE_CHECK_GE(value, 0);
173   if (value < 26) return 'A' + value;
174   value -= 26;
175   if (value < 26) return 'a' + value;
176   value -= 26;
177   if (value < 10) return '0' + value;
178   GOOGLE_CHECK_EQ(value, 10);
179   return '_';
180 }
181 
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
Base63(I n,int k)185 std::string Base63(I n, int k) {
186   std::string res;
187   while (k-- > 0) {
188     res += Base63Char(static_cast<int>(n % 63));
189     n /= 63;
190   }
191   return res;
192 }
193 
IntTypeName(const Options & options,const std::string & type)194 std::string IntTypeName(const Options& options, const std::string& type) {
195   if (options.opensource_runtime) {
196     return "::PROTOBUF_NAMESPACE_ID::" + type;
197   } else {
198     return "::" + type;
199   }
200 }
201 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)202 void SetIntVar(const Options& options, const std::string& type,
203                std::map<std::string, std::string>* variables) {
204   (*variables)[type] = IntTypeName(options, type);
205 }
206 
HasInternalAccessors(const FieldOptions::CType ctype)207 bool HasInternalAccessors(const FieldOptions::CType ctype) {
208   return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
209 }
210 
211 }  // namespace
212 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)213 void SetCommonVars(const Options& options,
214                    std::map<std::string, std::string>* variables) {
215   (*variables)["proto_ns"] = ProtobufNamespace(options);
216 
217   // Warning: there is some clever naming/splitting here to avoid extract script
218   // rewrites.  The names of these variables must not be things that the extract
219   // script will rewrite.  That's why we use "CHK" (for example) instead of
220   // "GOOGLE_CHECK".
221   if (options.opensource_runtime) {
222     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
223     (*variables)["CHK"] = "GOOGLE_CHECK";
224     (*variables)["DCHK"] = "GOOGLE_DCHECK";
225   } else {
226     // These values are things the extract script would rewrite if we did not
227     // split them.  It might not strictly matter since we don't generate google3
228     // code in open-source.  But it's good to prevent surprising things from
229     // happening.
230     (*variables)["GOOGLE_PROTOBUF"] =
231         "GOOGLE3"
232         "_PROTOBUF";
233     (*variables)["CHK"] =
234         "CH"
235         "ECK";
236     (*variables)["DCHK"] =
237         "DCH"
238         "ECK";
239   }
240 
241   SetIntVar(options, "int8", variables);
242   SetIntVar(options, "uint8", variables);
243   SetIntVar(options, "uint32", variables);
244   SetIntVar(options, "uint64", variables);
245   SetIntVar(options, "int32", variables);
246   SetIntVar(options, "int64", variables);
247   (*variables)["string"] = "std::string";
248 }
249 
SetUnknkownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)250 void SetUnknkownFieldsVariable(const Descriptor* descriptor,
251                                const Options& options,
252                                std::map<std::string, std::string>* variables) {
253   std::string proto_ns = ProtobufNamespace(options);
254   std::string unknown_fields_type;
255   if (UseUnknownFieldSet(descriptor->file(), options)) {
256     unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
257     (*variables)["unknown_fields"] =
258         "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
259         unknown_fields_type + "::default_instance)";
260   } else {
261     unknown_fields_type =
262         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
263     (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
264                                      unknown_fields_type + ">(::" + proto_ns +
265                                      "::internal::GetEmptyString)";
266   }
267   (*variables)["unknown_fields_type"] = unknown_fields_type;
268   (*variables)["have_unknown_fields"] =
269       "_internal_metadata_.have_unknown_fields()";
270   (*variables)["mutable_unknown_fields"] =
271       "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
272       ">()";
273 }
274 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)275 std::string UnderscoresToCamelCase(const std::string& input,
276                                    bool cap_next_letter) {
277   std::string result;
278   // Note:  I distrust ctype.h due to locales.
279   for (int i = 0; i < input.size(); i++) {
280     if ('a' <= input[i] && input[i] <= 'z') {
281       if (cap_next_letter) {
282         result += input[i] + ('A' - 'a');
283       } else {
284         result += input[i];
285       }
286       cap_next_letter = false;
287     } else if ('A' <= input[i] && input[i] <= 'Z') {
288       // Capital letters are left as-is.
289       result += input[i];
290       cap_next_letter = false;
291     } else if ('0' <= input[i] && input[i] <= '9') {
292       result += input[i];
293       cap_next_letter = true;
294     } else {
295       cap_next_letter = true;
296     }
297   }
298   return result;
299 }
300 
301 const char kThickSeparator[] =
302     "// ===================================================================\n";
303 const char kThinSeparator[] =
304     "// -------------------------------------------------------------------\n";
305 
CanInitializeByZeroing(const FieldDescriptor * field)306 bool CanInitializeByZeroing(const FieldDescriptor* field) {
307   if (field->is_repeated() || field->is_extension()) return false;
308   switch (field->cpp_type()) {
309     case FieldDescriptor::CPPTYPE_ENUM:
310       return field->default_value_enum()->number() == 0;
311     case FieldDescriptor::CPPTYPE_INT32:
312       return field->default_value_int32() == 0;
313     case FieldDescriptor::CPPTYPE_INT64:
314       return field->default_value_int64() == 0;
315     case FieldDescriptor::CPPTYPE_UINT32:
316       return field->default_value_uint32() == 0;
317     case FieldDescriptor::CPPTYPE_UINT64:
318       return field->default_value_uint64() == 0;
319     case FieldDescriptor::CPPTYPE_FLOAT:
320       return field->default_value_float() == 0;
321     case FieldDescriptor::CPPTYPE_DOUBLE:
322       return field->default_value_double() == 0;
323     case FieldDescriptor::CPPTYPE_BOOL:
324       return field->default_value_bool() == false;
325     default:
326       return false;
327   }
328 }
329 
ClassName(const Descriptor * descriptor)330 std::string ClassName(const Descriptor* descriptor) {
331   const Descriptor* parent = descriptor->containing_type();
332   std::string res;
333   if (parent) res += ClassName(parent) + "_";
334   res += descriptor->name();
335   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
336   return ResolveKeyword(res);
337 }
338 
ClassName(const EnumDescriptor * enum_descriptor)339 std::string ClassName(const EnumDescriptor* enum_descriptor) {
340   if (enum_descriptor->containing_type() == nullptr) {
341     return ResolveKeyword(enum_descriptor->name());
342   } else {
343     return ClassName(enum_descriptor->containing_type()) + "_" +
344            enum_descriptor->name();
345   }
346 }
347 
QualifiedClassName(const Descriptor * d,const Options & options)348 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
349   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
350 }
351 
QualifiedClassName(const EnumDescriptor * d,const Options & options)352 std::string QualifiedClassName(const EnumDescriptor* d,
353                                const Options& options) {
354   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
355 }
356 
QualifiedClassName(const Descriptor * d)357 std::string QualifiedClassName(const Descriptor* d) {
358   return QualifiedClassName(d, Options());
359 }
360 
QualifiedClassName(const EnumDescriptor * d)361 std::string QualifiedClassName(const EnumDescriptor* d) {
362   return QualifiedClassName(d, Options());
363 }
364 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)365 std::string QualifiedExtensionName(const FieldDescriptor* d,
366                                    const Options& options) {
367   GOOGLE_DCHECK(d->is_extension());
368   return QualifiedFileLevelSymbol(d->file(), FieldName(d), options);
369 }
370 
QualifiedExtensionName(const FieldDescriptor * d)371 std::string QualifiedExtensionName(const FieldDescriptor* d) {
372   return QualifiedExtensionName(d, Options());
373 }
374 
Namespace(const std::string & package)375 std::string Namespace(const std::string& package) {
376   if (package.empty()) return "";
377   return "::" + DotsToColons(package);
378 }
379 
Namespace(const FileDescriptor * d,const Options & options)380 std::string Namespace(const FileDescriptor* d, const Options& options) {
381   std::string ret = Namespace(d->package());
382   if (IsWellKnownMessage(d) && options.opensource_runtime) {
383     // Written with string concatenation to prevent rewriting of
384     // ::google::protobuf.
385     ret = StringReplace(ret,
386                         "::google::"
387                         "protobuf",
388                         "PROTOBUF_NAMESPACE_ID", false);
389   }
390   return ret;
391 }
392 
Namespace(const Descriptor * d,const Options & options)393 std::string Namespace(const Descriptor* d, const Options& options) {
394   return Namespace(d->file(), options);
395 }
396 
Namespace(const FieldDescriptor * d,const Options & options)397 std::string Namespace(const FieldDescriptor* d, const Options& options) {
398   return Namespace(d->file(), options);
399 }
400 
Namespace(const EnumDescriptor * d,const Options & options)401 std::string Namespace(const EnumDescriptor* d, const Options& options) {
402   return Namespace(d->file(), options);
403 }
404 
DefaultInstanceType(const Descriptor * descriptor,const Options & options)405 std::string DefaultInstanceType(const Descriptor* descriptor,
406                                 const Options& options) {
407   return ClassName(descriptor) + "DefaultTypeInternal";
408 }
409 
DefaultInstanceName(const Descriptor * descriptor,const Options & options)410 std::string DefaultInstanceName(const Descriptor* descriptor,
411                                 const Options& options) {
412   return "_" + ClassName(descriptor, false) + "_default_instance_";
413 }
414 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)415 std::string DefaultInstancePtr(const Descriptor* descriptor,
416                                const Options& options) {
417   return DefaultInstanceName(descriptor, options) + "ptr_";
418 }
419 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)420 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421                                          const Options& options) {
422   return QualifiedFileLevelSymbol(
423       descriptor->file(), DefaultInstanceName(descriptor, options), options);
424 }
425 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)426 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427                                         const Options& options) {
428   return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429 }
430 
DescriptorTableName(const FileDescriptor * file,const Options & options)431 std::string DescriptorTableName(const FileDescriptor* file,
432                                 const Options& options) {
433   return UniqueName("descriptor_table", file, options);
434 }
435 
FileDllExport(const FileDescriptor * file,const Options & options)436 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438 }
439 
SuperClassName(const Descriptor * descriptor,const Options & options)440 std::string SuperClassName(const Descriptor* descriptor,
441                            const Options& options) {
442   return "::" + ProtobufNamespace(options) +
443          (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
444                                                             : "::MessageLite");
445 }
446 
ResolveKeyword(const std::string & name)447 std::string ResolveKeyword(const std::string& name) {
448   if (kKeywords.count(name) > 0) {
449     return name + "_";
450   }
451   return name;
452 }
453 
FieldName(const FieldDescriptor * field)454 std::string FieldName(const FieldDescriptor* field) {
455   std::string result = field->name();
456   LowerString(&result);
457   if (kKeywords.count(result) > 0) {
458     result.append("_");
459   }
460   return result;
461 }
462 
EnumValueName(const EnumValueDescriptor * enum_value)463 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
464   std::string result = enum_value->name();
465   if (kKeywords.count(result) > 0) {
466     result.append("_");
467   }
468   return result;
469 }
470 
EstimateAlignmentSize(const FieldDescriptor * field)471 int EstimateAlignmentSize(const FieldDescriptor* field) {
472   if (field == nullptr) return 0;
473   if (field->is_repeated()) return 8;
474   switch (field->cpp_type()) {
475     case FieldDescriptor::CPPTYPE_BOOL:
476       return 1;
477 
478     case FieldDescriptor::CPPTYPE_INT32:
479     case FieldDescriptor::CPPTYPE_UINT32:
480     case FieldDescriptor::CPPTYPE_ENUM:
481     case FieldDescriptor::CPPTYPE_FLOAT:
482       return 4;
483 
484     case FieldDescriptor::CPPTYPE_INT64:
485     case FieldDescriptor::CPPTYPE_UINT64:
486     case FieldDescriptor::CPPTYPE_DOUBLE:
487     case FieldDescriptor::CPPTYPE_STRING:
488     case FieldDescriptor::CPPTYPE_MESSAGE:
489       return 8;
490   }
491   GOOGLE_LOG(FATAL) << "Can't get here.";
492   return -1;  // Make compiler happy.
493 }
494 
FieldConstantName(const FieldDescriptor * field)495 std::string FieldConstantName(const FieldDescriptor* field) {
496   std::string field_name = UnderscoresToCamelCase(field->name(), true);
497   std::string result = "k" + field_name + "FieldNumber";
498 
499   if (!field->is_extension() &&
500       field->containing_type()->FindFieldByCamelcaseName(
501           field->camelcase_name()) != field) {
502     // This field's camelcase name is not unique.  As a hack, add the field
503     // number to the constant name.  This makes the constant rather useless,
504     // but what can we do?
505     result += "_" + StrCat(field->number());
506   }
507 
508   return result;
509 }
510 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)511 std::string FieldMessageTypeName(const FieldDescriptor* field,
512                                  const Options& options) {
513   // Note:  The Google-internal version of Protocol Buffers uses this function
514   //   as a hook point for hacks to support legacy code.
515   return QualifiedClassName(field->message_type(), options);
516 }
517 
StripProto(const std::string & filename)518 std::string StripProto(const std::string& filename) {
519   if (HasSuffixString(filename, ".protodevel")) {
520     return StripSuffixString(filename, ".protodevel");
521   } else {
522     return StripSuffixString(filename, ".proto");
523   }
524 }
525 
PrimitiveTypeName(FieldDescriptor::CppType type)526 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
527   switch (type) {
528     case FieldDescriptor::CPPTYPE_INT32:
529       return "::google::protobuf::int32";
530     case FieldDescriptor::CPPTYPE_INT64:
531       return "::google::protobuf::int64";
532     case FieldDescriptor::CPPTYPE_UINT32:
533       return "::google::protobuf::uint32";
534     case FieldDescriptor::CPPTYPE_UINT64:
535       return "::google::protobuf::uint64";
536     case FieldDescriptor::CPPTYPE_DOUBLE:
537       return "double";
538     case FieldDescriptor::CPPTYPE_FLOAT:
539       return "float";
540     case FieldDescriptor::CPPTYPE_BOOL:
541       return "bool";
542     case FieldDescriptor::CPPTYPE_ENUM:
543       return "int";
544     case FieldDescriptor::CPPTYPE_STRING:
545       return "std::string";
546     case FieldDescriptor::CPPTYPE_MESSAGE:
547       return nullptr;
548 
549       // No default because we want the compiler to complain if any new
550       // CppTypes are added.
551   }
552 
553   GOOGLE_LOG(FATAL) << "Can't get here.";
554   return nullptr;
555 }
556 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)557 std::string PrimitiveTypeName(const Options& options,
558                               FieldDescriptor::CppType type) {
559   switch (type) {
560     case FieldDescriptor::CPPTYPE_INT32:
561       return IntTypeName(options, "int32");
562     case FieldDescriptor::CPPTYPE_INT64:
563       return IntTypeName(options, "int64");
564     case FieldDescriptor::CPPTYPE_UINT32:
565       return IntTypeName(options, "uint32");
566     case FieldDescriptor::CPPTYPE_UINT64:
567       return IntTypeName(options, "uint64");
568     case FieldDescriptor::CPPTYPE_DOUBLE:
569       return "double";
570     case FieldDescriptor::CPPTYPE_FLOAT:
571       return "float";
572     case FieldDescriptor::CPPTYPE_BOOL:
573       return "bool";
574     case FieldDescriptor::CPPTYPE_ENUM:
575       return "int";
576     case FieldDescriptor::CPPTYPE_STRING:
577       return "std::string";
578     case FieldDescriptor::CPPTYPE_MESSAGE:
579       return "";
580 
581       // No default because we want the compiler to complain if any new
582       // CppTypes are added.
583   }
584 
585   GOOGLE_LOG(FATAL) << "Can't get here.";
586   return "";
587 }
588 
DeclaredTypeMethodName(FieldDescriptor::Type type)589 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
590   switch (type) {
591     case FieldDescriptor::TYPE_INT32:
592       return "Int32";
593     case FieldDescriptor::TYPE_INT64:
594       return "Int64";
595     case FieldDescriptor::TYPE_UINT32:
596       return "UInt32";
597     case FieldDescriptor::TYPE_UINT64:
598       return "UInt64";
599     case FieldDescriptor::TYPE_SINT32:
600       return "SInt32";
601     case FieldDescriptor::TYPE_SINT64:
602       return "SInt64";
603     case FieldDescriptor::TYPE_FIXED32:
604       return "Fixed32";
605     case FieldDescriptor::TYPE_FIXED64:
606       return "Fixed64";
607     case FieldDescriptor::TYPE_SFIXED32:
608       return "SFixed32";
609     case FieldDescriptor::TYPE_SFIXED64:
610       return "SFixed64";
611     case FieldDescriptor::TYPE_FLOAT:
612       return "Float";
613     case FieldDescriptor::TYPE_DOUBLE:
614       return "Double";
615 
616     case FieldDescriptor::TYPE_BOOL:
617       return "Bool";
618     case FieldDescriptor::TYPE_ENUM:
619       return "Enum";
620 
621     case FieldDescriptor::TYPE_STRING:
622       return "String";
623     case FieldDescriptor::TYPE_BYTES:
624       return "Bytes";
625     case FieldDescriptor::TYPE_GROUP:
626       return "Group";
627     case FieldDescriptor::TYPE_MESSAGE:
628       return "Message";
629 
630       // No default because we want the compiler to complain if any new
631       // types are added.
632   }
633   GOOGLE_LOG(FATAL) << "Can't get here.";
634   return "";
635 }
636 
Int32ToString(int number)637 std::string Int32ToString(int number) {
638   if (number == kint32min) {
639     // This needs to be special-cased, see explanation here:
640     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
641     return StrCat(number + 1, " - 1");
642   } else {
643     return StrCat(number);
644   }
645 }
646 
Int64ToString(const std::string & macro_prefix,int64 number)647 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
648   if (number == kint64min) {
649     // This needs to be special-cased, see explanation here:
650     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
651     return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
652   }
653   return StrCat(macro_prefix, "_LONGLONG(", number, ")");
654 }
655 
UInt64ToString(const std::string & macro_prefix,uint64 number)656 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
657   return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
658 }
659 
DefaultValue(const FieldDescriptor * field)660 std::string DefaultValue(const FieldDescriptor* field) {
661   switch (field->cpp_type()) {
662     case FieldDescriptor::CPPTYPE_INT64:
663       return Int64ToString("GG", field->default_value_int64());
664     case FieldDescriptor::CPPTYPE_UINT64:
665       return UInt64ToString("GG", field->default_value_uint64());
666     default:
667       return DefaultValue(Options(), field);
668   }
669 }
670 
DefaultValue(const Options & options,const FieldDescriptor * field)671 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
672   switch (field->cpp_type()) {
673     case FieldDescriptor::CPPTYPE_INT32:
674       return Int32ToString(field->default_value_int32());
675     case FieldDescriptor::CPPTYPE_UINT32:
676       return StrCat(field->default_value_uint32()) + "u";
677     case FieldDescriptor::CPPTYPE_INT64:
678       return Int64ToString("PROTOBUF", field->default_value_int64());
679     case FieldDescriptor::CPPTYPE_UINT64:
680       return UInt64ToString("PROTOBUF", field->default_value_uint64());
681     case FieldDescriptor::CPPTYPE_DOUBLE: {
682       double value = field->default_value_double();
683       if (value == std::numeric_limits<double>::infinity()) {
684         return "std::numeric_limits<double>::infinity()";
685       } else if (value == -std::numeric_limits<double>::infinity()) {
686         return "-std::numeric_limits<double>::infinity()";
687       } else if (value != value) {
688         return "std::numeric_limits<double>::quiet_NaN()";
689       } else {
690         return SimpleDtoa(value);
691       }
692     }
693     case FieldDescriptor::CPPTYPE_FLOAT: {
694       float value = field->default_value_float();
695       if (value == std::numeric_limits<float>::infinity()) {
696         return "std::numeric_limits<float>::infinity()";
697       } else if (value == -std::numeric_limits<float>::infinity()) {
698         return "-std::numeric_limits<float>::infinity()";
699       } else if (value != value) {
700         return "std::numeric_limits<float>::quiet_NaN()";
701       } else {
702         std::string float_value = SimpleFtoa(value);
703         // If floating point value contains a period (.) or an exponent
704         // (either E or e), then append suffix 'f' to make it a float
705         // literal.
706         if (float_value.find_first_of(".eE") != std::string::npos) {
707           float_value.push_back('f');
708         }
709         return float_value;
710       }
711     }
712     case FieldDescriptor::CPPTYPE_BOOL:
713       return field->default_value_bool() ? "true" : "false";
714     case FieldDescriptor::CPPTYPE_ENUM:
715       // Lazy:  Generate a static_cast because we don't have a helper function
716       //   that constructs the full name of an enum value.
717       return strings::Substitute(
718           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
719           Int32ToString(field->default_value_enum()->number()));
720     case FieldDescriptor::CPPTYPE_STRING:
721       return "\"" +
722              EscapeTrigraphs(CEscape(field->default_value_string())) +
723              "\"";
724     case FieldDescriptor::CPPTYPE_MESSAGE:
725       return "*" + FieldMessageTypeName(field, options) +
726              "::internal_default_instance()";
727   }
728   // Can't actually get here; make compiler happy.  (We could add a default
729   // case above but then we wouldn't get the nice compiler warning when a
730   // new type is added.)
731   GOOGLE_LOG(FATAL) << "Can't get here.";
732   return "";
733 }
734 
735 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)736 std::string FilenameIdentifier(const std::string& filename) {
737   std::string result;
738   for (int i = 0; i < filename.size(); i++) {
739     if (ascii_isalnum(filename[i])) {
740       result.push_back(filename[i]);
741     } else {
742       // Not alphanumeric.  To avoid any possibility of name conflicts we
743       // use the hex code for the character.
744       StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
745     }
746   }
747   return result;
748 }
749 
UniqueName(const std::string & name,const std::string & filename,const Options & options)750 std::string UniqueName(const std::string& name, const std::string& filename,
751                        const Options& options) {
752   return name + "_" + FilenameIdentifier(filename);
753 }
754 
755 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)756 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
757                                      const std::string& name,
758                                      const Options& options) {
759   if (file->package().empty()) {
760     return StrCat("::", name);
761   }
762   return StrCat(Namespace(file, options), "::", name);
763 }
764 
765 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)766 std::string EscapeTrigraphs(const std::string& to_escape) {
767   return StringReplace(to_escape, "?", "\\?", true);
768 }
769 
770 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)771 std::string SafeFunctionName(const Descriptor* descriptor,
772                              const FieldDescriptor* field,
773                              const std::string& prefix) {
774   // Do not use FieldName() since it will escape keywords.
775   std::string name = field->name();
776   LowerString(&name);
777   std::string function_name = prefix + name;
778   if (descriptor->FindFieldByName(function_name)) {
779     // Single underscore will also make it conflicting with the private data
780     // member. We use double underscore to escape function names.
781     function_name.append("__");
782   } else if (kKeywords.count(name) > 0) {
783     // If the field name is a keyword, we append the underscore back to keep it
784     // consistent with other function names.
785     function_name.append("_");
786   }
787   return function_name;
788 }
789 
HasLazyFields(const Descriptor * descriptor,const Options & options)790 static bool HasLazyFields(const Descriptor* descriptor,
791                           const Options& options) {
792   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
793     if (IsLazy(descriptor->field(field_idx), options)) {
794       return true;
795     }
796   }
797   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
798     if (IsLazy(descriptor->extension(idx), options)) {
799       return true;
800     }
801   }
802   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
803     if (HasLazyFields(descriptor->nested_type(idx), options)) {
804       return true;
805     }
806   }
807   return false;
808 }
809 
810 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options)811 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
812   for (int i = 0; i < file->message_type_count(); i++) {
813     const Descriptor* descriptor(file->message_type(i));
814     if (HasLazyFields(descriptor, options)) {
815       return true;
816     }
817   }
818   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
819     if (IsLazy(file->extension(field_idx), options)) {
820       return true;
821     }
822   }
823   return false;
824 }
825 
HasRepeatedFields(const Descriptor * descriptor)826 static bool HasRepeatedFields(const Descriptor* descriptor) {
827   for (int i = 0; i < descriptor->field_count(); ++i) {
828     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
829       return true;
830     }
831   }
832   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
833     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
834   }
835   return false;
836 }
837 
HasRepeatedFields(const FileDescriptor * file)838 bool HasRepeatedFields(const FileDescriptor* file) {
839   for (int i = 0; i < file->message_type_count(); ++i) {
840     if (HasRepeatedFields(file->message_type(i))) return true;
841   }
842   return false;
843 }
844 
IsStringPieceField(const FieldDescriptor * field,const Options & options)845 static bool IsStringPieceField(const FieldDescriptor* field,
846                                const Options& options) {
847   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
848          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
849 }
850 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)851 static bool HasStringPieceFields(const Descriptor* descriptor,
852                                  const Options& options) {
853   for (int i = 0; i < descriptor->field_count(); ++i) {
854     if (IsStringPieceField(descriptor->field(i), options)) return true;
855   }
856   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
857     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
858   }
859   return false;
860 }
861 
HasStringPieceFields(const FileDescriptor * file,const Options & options)862 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
863   for (int i = 0; i < file->message_type_count(); ++i) {
864     if (HasStringPieceFields(file->message_type(i), options)) return true;
865   }
866   return false;
867 }
868 
IsCordField(const FieldDescriptor * field,const Options & options)869 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
870   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
871          EffectiveStringCType(field, options) == FieldOptions::CORD;
872 }
873 
HasCordFields(const Descriptor * descriptor,const Options & options)874 static bool HasCordFields(const Descriptor* descriptor,
875                           const Options& options) {
876   for (int i = 0; i < descriptor->field_count(); ++i) {
877     if (IsCordField(descriptor->field(i), options)) return true;
878   }
879   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
880     if (HasCordFields(descriptor->nested_type(i), options)) return true;
881   }
882   return false;
883 }
884 
HasCordFields(const FileDescriptor * file,const Options & options)885 bool HasCordFields(const FileDescriptor* file, const Options& options) {
886   for (int i = 0; i < file->message_type_count(); ++i) {
887     if (HasCordFields(file->message_type(i), options)) return true;
888   }
889   return false;
890 }
891 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)892 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
893   if (descriptor->extension_range_count() > 0) return true;
894   if (descriptor->extension_count() > 0) return true;
895   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
896     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
897       return true;
898     }
899   }
900   return false;
901 }
902 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)903 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
904   if (file->extension_count() > 0) return true;
905   for (int i = 0; i < file->message_type_count(); ++i) {
906     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
907   }
908   return false;
909 }
910 
HasMapFields(const Descriptor * descriptor)911 static bool HasMapFields(const Descriptor* descriptor) {
912   for (int i = 0; i < descriptor->field_count(); ++i) {
913     if (descriptor->field(i)->is_map()) {
914       return true;
915     }
916   }
917   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
918     if (HasMapFields(descriptor->nested_type(i))) return true;
919   }
920   return false;
921 }
922 
HasMapFields(const FileDescriptor * file)923 bool HasMapFields(const FileDescriptor* file) {
924   for (int i = 0; i < file->message_type_count(); ++i) {
925     if (HasMapFields(file->message_type(i))) return true;
926   }
927   return false;
928 }
929 
HasEnumDefinitions(const Descriptor * message_type)930 static bool HasEnumDefinitions(const Descriptor* message_type) {
931   if (message_type->enum_type_count() > 0) return true;
932   for (int i = 0; i < message_type->nested_type_count(); ++i) {
933     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
934   }
935   return false;
936 }
937 
HasEnumDefinitions(const FileDescriptor * file)938 bool HasEnumDefinitions(const FileDescriptor* file) {
939   if (file->enum_type_count() > 0) return true;
940   for (int i = 0; i < file->message_type_count(); ++i) {
941     if (HasEnumDefinitions(file->message_type(i))) return true;
942   }
943   return false;
944 }
945 
IsStringOrMessage(const FieldDescriptor * field)946 bool IsStringOrMessage(const FieldDescriptor* field) {
947   switch (field->cpp_type()) {
948     case FieldDescriptor::CPPTYPE_INT32:
949     case FieldDescriptor::CPPTYPE_INT64:
950     case FieldDescriptor::CPPTYPE_UINT32:
951     case FieldDescriptor::CPPTYPE_UINT64:
952     case FieldDescriptor::CPPTYPE_DOUBLE:
953     case FieldDescriptor::CPPTYPE_FLOAT:
954     case FieldDescriptor::CPPTYPE_BOOL:
955     case FieldDescriptor::CPPTYPE_ENUM:
956       return false;
957     case FieldDescriptor::CPPTYPE_STRING:
958     case FieldDescriptor::CPPTYPE_MESSAGE:
959       return true;
960   }
961 
962   GOOGLE_LOG(FATAL) << "Can't get here.";
963   return false;
964 }
965 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)966 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
967                                          const Options& options) {
968   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
969   if (options.opensource_runtime) {
970     // Open-source protobuf release only supports STRING ctype.
971     return FieldOptions::STRING;
972   } else {
973     // Google-internal supports all ctypes.
974     return field->options().ctype();
975   }
976 }
977 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)978 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
979   return descriptor->name() == kAnyProtoFile;
980 }
981 
IsAnyMessage(const Descriptor * descriptor,const Options & options)982 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
983   return descriptor->name() == kAnyMessageName &&
984          IsAnyMessage(descriptor->file(), options);
985 }
986 
IsWellKnownMessage(const FileDescriptor * file)987 bool IsWellKnownMessage(const FileDescriptor* file) {
988   static const std::unordered_set<std::string> well_known_files{
989       "google/protobuf/any.proto",
990       "google/protobuf/api.proto",
991       "google/protobuf/compiler/plugin.proto",
992       "google/protobuf/descriptor.proto",
993       "google/protobuf/duration.proto",
994       "google/protobuf/empty.proto",
995       "google/protobuf/field_mask.proto",
996       "google/protobuf/source_context.proto",
997       "google/protobuf/struct.proto",
998       "google/protobuf/timestamp.proto",
999       "google/protobuf/type.proto",
1000       "google/protobuf/wrappers.proto",
1001   };
1002   return well_known_files.find(file->name()) != well_known_files.end();
1003 }
1004 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1005 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1006                              const Options& options) {
1007   return true;
1008 }
1009 
FileUtf8Verification(const FileDescriptor * file,const Options & options)1010 static bool FileUtf8Verification(const FileDescriptor* file,
1011                                  const Options& options) {
1012   return true;
1013 }
1014 
1015 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1016 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1017                                const Options& options) {
1018   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1019       FieldEnforceUtf8(field, options)) {
1020     return STRICT;
1021   } else if (GetOptimizeFor(field->file(), options) !=
1022                  FileOptions::LITE_RUNTIME &&
1023              FileUtf8Verification(field->file(), options)) {
1024     return VERIFY;
1025   } else {
1026     return NONE;
1027   }
1028 }
1029 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1030 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1031                                   const Options& options, bool for_parse,
1032                                   const char* parameters,
1033                                   const char* strict_function,
1034                                   const char* verify_function,
1035                                   const Formatter& format) {
1036   switch (GetUtf8CheckMode(field, options)) {
1037     case STRICT: {
1038       if (for_parse) {
1039         format("DO_(");
1040       }
1041       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1042       format.Indent();
1043       format(parameters);
1044       if (for_parse) {
1045         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1046       } else {
1047         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1048       }
1049       format("\"$1$\")", field->full_name());
1050       if (for_parse) {
1051         format(")");
1052       }
1053       format(";\n");
1054       format.Outdent();
1055       break;
1056     }
1057     case VERIFY: {
1058       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1059       format.Indent();
1060       format(parameters);
1061       if (for_parse) {
1062         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1063       } else {
1064         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1065       }
1066       format("\"$1$\");\n", field->full_name());
1067       format.Outdent();
1068       break;
1069     }
1070     case NONE:
1071       break;
1072   }
1073 }
1074 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1075 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1076                                     const Options& options, bool for_parse,
1077                                     const char* parameters,
1078                                     const Formatter& format) {
1079   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1080                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1081                         format);
1082 }
1083 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1084 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1085                                   const Options& options, bool for_parse,
1086                                   const char* parameters,
1087                                   const Formatter& format) {
1088   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1089                         "VerifyUTF8CordNamedField", format);
1090 }
1091 
1092 namespace {
1093 
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1094 void Flatten(const Descriptor* descriptor,
1095              std::vector<const Descriptor*>* flatten) {
1096   for (int i = 0; i < descriptor->nested_type_count(); i++)
1097     Flatten(descriptor->nested_type(i), flatten);
1098   flatten->push_back(descriptor);
1099 }
1100 
1101 }  // namespace
1102 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1103 void FlattenMessagesInFile(const FileDescriptor* file,
1104                            std::vector<const Descriptor*>* result) {
1105   for (int i = 0; i < file->message_type_count(); i++) {
1106     Flatten(file->message_type(i), result);
1107   }
1108 }
1109 
HasWeakFields(const Descriptor * descriptor,const Options & options)1110 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1111   for (int i = 0; i < descriptor->field_count(); i++) {
1112     if (IsWeak(descriptor->field(i), options)) return true;
1113   }
1114   return false;
1115 }
1116 
HasWeakFields(const FileDescriptor * file,const Options & options)1117 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1118   for (int i = 0; i < file->message_type_count(); ++i) {
1119     if (HasWeakFields(file->message_type(i), options)) return true;
1120   }
1121   return false;
1122 }
1123 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1124 bool UsingImplicitWeakFields(const FileDescriptor* file,
1125                              const Options& options) {
1126   return options.lite_implicit_weak_fields &&
1127          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1128 }
1129 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1130 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1131                          MessageSCCAnalyzer* scc_analyzer) {
1132   return UsingImplicitWeakFields(field->file(), options) &&
1133          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1134          !field->is_required() && !field->is_map() && !field->is_extension() &&
1135          !field->real_containing_oneof() &&
1136          !IsWellKnownMessage(field->message_type()->file()) &&
1137          field->message_type()->file()->name() !=
1138              "net/proto2/proto/descriptor.proto" &&
1139          // We do not support implicit weak fields between messages in the same
1140          // strongly-connected component.
1141          scc_analyzer->GetSCC(field->containing_type()) !=
1142              scc_analyzer->GetSCC(field->message_type());
1143 }
1144 
GetSCCAnalysis(const SCC * scc)1145 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1146   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1147   MessageAnalysis result{};
1148   for (int i = 0; i < scc->descriptors.size(); i++) {
1149     const Descriptor* descriptor = scc->descriptors[i];
1150     if (descriptor->extension_range_count() > 0) {
1151       result.contains_extension = true;
1152       // Extensions are found by looking up default_instance and extension
1153       // number in a map. So you'd maybe expect here
1154       // result.constructor_requires_initialization = true;
1155       // However the extension registration mechanism already makes sure
1156       // the default will be initialized.
1157     }
1158     for (int i = 0; i < descriptor->field_count(); i++) {
1159       const FieldDescriptor* field = descriptor->field(i);
1160       if (field->is_required()) {
1161         result.contains_required = true;
1162       }
1163       switch (field->type()) {
1164         case FieldDescriptor::TYPE_STRING:
1165         case FieldDescriptor::TYPE_BYTES: {
1166           result.constructor_requires_initialization = true;
1167           if (field->options().ctype() == FieldOptions::CORD) {
1168             result.contains_cord = true;
1169           }
1170           break;
1171         }
1172         case FieldDescriptor::TYPE_GROUP:
1173         case FieldDescriptor::TYPE_MESSAGE: {
1174           result.constructor_requires_initialization = true;
1175           const SCC* child = analyzer_.GetSCC(field->message_type());
1176           if (child != scc) {
1177             MessageAnalysis analysis = GetSCCAnalysis(child);
1178             result.contains_cord |= analysis.contains_cord;
1179             result.contains_extension |= analysis.contains_extension;
1180             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1181               result.contains_required |= analysis.contains_required;
1182             }
1183           } else {
1184             // This field points back into the same SCC hence the messages
1185             // in the SCC are recursive. Note if SCC contains more than two
1186             // nodes it has to be recursive, however this test also works for
1187             // a single node that is recursive.
1188             result.is_recursive = true;
1189           }
1190           break;
1191         }
1192         default:
1193           break;
1194       }
1195     }
1196   }
1197   // We deliberately only insert the result here. After we contracted the SCC
1198   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1199   // nodes visited as we can never return to them. By inserting them here
1200   // we will go in an infinite loop if the SCC is not correct.
1201   return analysis_cache_[scc] = result;
1202 }
1203 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1204 void ListAllFields(const Descriptor* d,
1205                    std::vector<const FieldDescriptor*>* fields) {
1206   // Collect sub messages
1207   for (int i = 0; i < d->nested_type_count(); i++) {
1208     ListAllFields(d->nested_type(i), fields);
1209   }
1210   // Collect message level extensions.
1211   for (int i = 0; i < d->extension_count(); i++) {
1212     fields->push_back(d->extension(i));
1213   }
1214   // Add types of fields necessary
1215   for (int i = 0; i < d->field_count(); i++) {
1216     fields->push_back(d->field(i));
1217   }
1218 }
1219 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1220 void ListAllFields(const FileDescriptor* d,
1221                    std::vector<const FieldDescriptor*>* fields) {
1222   // Collect file level message.
1223   for (int i = 0; i < d->message_type_count(); i++) {
1224     ListAllFields(d->message_type(i), fields);
1225   }
1226   // Collect message level extensions.
1227   for (int i = 0; i < d->extension_count(); i++) {
1228     fields->push_back(d->extension(i));
1229   }
1230 }
1231 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1232 void ListAllTypesForServices(const FileDescriptor* fd,
1233                              std::vector<const Descriptor*>* types) {
1234   for (int i = 0; i < fd->service_count(); i++) {
1235     const ServiceDescriptor* sd = fd->service(i);
1236     for (int j = 0; j < sd->method_count(); j++) {
1237       const MethodDescriptor* method = sd->method(j);
1238       types->push_back(method->input_type());
1239       types->push_back(method->output_type());
1240     }
1241   }
1242 }
1243 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1244 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1245                           std::string* bootstrap_basename) {
1246   if (options.opensource_runtime) {
1247     return false;
1248   }
1249 
1250   std::unordered_map<std::string, std::string> bootstrap_mapping{
1251       {"net/proto2/proto/descriptor",
1252        "net/proto2/internal/descriptor"},
1253       {"net/proto2/compiler/proto/plugin",
1254        "net/proto2/compiler/proto/plugin"},
1255       {"net/proto2/compiler/proto/profile",
1256        "net/proto2/compiler/proto/profile_bootstrap"},
1257   };
1258   auto iter = bootstrap_mapping.find(basename);
1259   if (iter == bootstrap_mapping.end()) {
1260     *bootstrap_basename = basename;
1261     return false;
1262   } else {
1263     *bootstrap_basename = iter->second;
1264     return true;
1265   }
1266 }
1267 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1268 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1269   std::string my_name = StripProto(file->name());
1270   return GetBootstrapBasename(options, my_name, &my_name);
1271 }
1272 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1273 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1274                     bool bootstrap_flag, std::string* basename) {
1275   std::string bootstrap_basename;
1276   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1277     return false;
1278   }
1279 
1280   if (bootstrap_flag) {
1281     // Adjust basename, but don't abort code generation.
1282     *basename = bootstrap_basename;
1283     return false;
1284   } else {
1285     std::string forward_to_basename = bootstrap_basename;
1286 
1287     // Generate forwarding headers and empty .pb.cc.
1288     {
1289       std::unique_ptr<io::ZeroCopyOutputStream> output(
1290           generator_context->Open(*basename + ".pb.h"));
1291       io::Printer printer(output.get(), '$', nullptr);
1292       printer.Print(
1293           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1294           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1295           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1296           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1297           "forward_to_basename", forward_to_basename, "filename_identifier",
1298           FilenameIdentifier(*basename));
1299 
1300       if (!options.opensource_runtime) {
1301         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1302         // protocoltype is SWIG'ed and we need to forward
1303         if (*basename == "net/proto/protocoltype") {
1304           printer.Print(
1305               "#ifdef SWIG\n"
1306               "%include \"$forward_to_basename$.pb.h\"\n"
1307               "#endif  // SWIG\n",
1308               "forward_to_basename", forward_to_basename);
1309         }
1310       }
1311     }
1312 
1313     {
1314       std::unique_ptr<io::ZeroCopyOutputStream> output(
1315           generator_context->Open(*basename + ".proto.h"));
1316       io::Printer printer(output.get(), '$', nullptr);
1317       printer.Print(
1318           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1319           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1320           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1321           "export\n"
1322           "#endif  // "
1323           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1324           "forward_to_basename", forward_to_basename, "filename_identifier",
1325           FilenameIdentifier(*basename));
1326     }
1327 
1328     {
1329       std::unique_ptr<io::ZeroCopyOutputStream> output(
1330           generator_context->Open(*basename + ".pb.cc"));
1331       io::Printer printer(output.get(), '$', nullptr);
1332       printer.Print("\n");
1333     }
1334 
1335     {
1336       std::unique_ptr<io::ZeroCopyOutputStream> output(
1337           generator_context->Open(*basename + ".pb.h.meta"));
1338     }
1339 
1340     {
1341       std::unique_ptr<io::ZeroCopyOutputStream> output(
1342           generator_context->Open(*basename + ".proto.h.meta"));
1343     }
1344 
1345     // Abort code generation.
1346     return true;
1347   }
1348 }
1349 
1350 class ParseLoopGenerator {
1351  public:
ParseLoopGenerator(int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1352   ParseLoopGenerator(int num_hasbits, const Options& options,
1353                      MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1354       : scc_analyzer_(scc_analyzer),
1355         options_(options),
1356         format_(printer),
1357         num_hasbits_(num_hasbits) {}
1358 
GenerateParserLoop(const Descriptor * descriptor)1359   void GenerateParserLoop(const Descriptor* descriptor) {
1360     format_.Set("classname", ClassName(descriptor));
1361     format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1362     format_.Set("pi_ns",
1363                 StrCat("::", ProtobufNamespace(options_), "::internal"));
1364     format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1365     std::map<std::string, std::string> vars;
1366     SetCommonVars(options_, &vars);
1367     SetUnknkownFieldsVariable(descriptor, options_, &vars);
1368     format_.AddMap(vars);
1369 
1370     std::vector<const FieldDescriptor*> ordered_fields;
1371     for (auto field : FieldRange(descriptor)) {
1372       if (!IsFieldStripped(field, options_)) {
1373         ordered_fields.push_back(field);
1374       }
1375     }
1376     std::sort(ordered_fields.begin(), ordered_fields.end(),
1377               [](const FieldDescriptor* a, const FieldDescriptor* b) {
1378                 return a->number() < b->number();
1379               });
1380 
1381     format_(
1382         "const char* $classname$::_InternalParse(const char* ptr, "
1383         "$pi_ns$::ParseContext* ctx) {\n"
1384         "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1385     format_.Indent();
1386     int hasbits_size = 0;
1387     if (num_hasbits_ > 0) {
1388       hasbits_size = (num_hasbits_ + 31) / 32;
1389     }
1390     // For now only optimize small hasbits.
1391     if (hasbits_size != 1) hasbits_size = 0;
1392     if (hasbits_size) {
1393       format_("_Internal::HasBits has_bits{};\n");
1394       format_.Set("has_bits", "has_bits");
1395     } else {
1396       format_.Set("has_bits", "_has_bits_");
1397     }
1398 
1399     GenerateParseLoop(descriptor, ordered_fields);
1400     format_.Outdent();
1401     format_("success:\n");
1402     if (hasbits_size) format_("  _has_bits_.Or(has_bits);\n");
1403 
1404     format_(
1405         "  return ptr;\n"
1406         "failure:\n"
1407         "  ptr = nullptr;\n"
1408         "  goto success;\n"
1409         "#undef CHK_\n"
1410         "}\n");
1411   }
1412 
1413  private:
1414   MessageSCCAnalyzer* scc_analyzer_;
1415   const Options& options_;
1416   Formatter format_;
1417   int num_hasbits_;
1418 
1419   using WireFormat = internal::WireFormat;
1420   using WireFormatLite = internal::WireFormatLite;
1421 
GenerateArenaString(const FieldDescriptor * field)1422   void GenerateArenaString(const FieldDescriptor* field) {
1423     if (HasHasbit(field)) {
1424       format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1425     }
1426     std::string default_string =
1427         field->default_value_string().empty()
1428             ? "::" + ProtobufNamespace(options_) +
1429                   "::internal::GetEmptyStringAlreadyInited()"
1430             : QualifiedClassName(field->containing_type(), options_) +
1431                   "::" + MakeDefaultName(field) + ".get()";
1432     format_(
1433         "if (arena != nullptr) {\n"
1434         "  ptr = ctx->ReadArenaString(ptr, &$1$_, arena);\n"
1435         "} else {\n"
1436         "  ptr = "
1437         "$pi_ns$::InlineGreedyStringParser($1$_.MutableNoArenaNoDefault(&$2$"
1438         "), ptr, ctx);"
1439         "\n}\n"
1440         "const std::string* str = &$1$_.Get(); (void)str;\n",
1441         FieldName(field), default_string);
1442   }
1443 
GenerateStrings(const FieldDescriptor * field,bool check_utf8)1444   void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1445     FieldOptions::CType ctype = FieldOptions::STRING;
1446     if (!options_.opensource_runtime) {
1447       // Open source doesn't support other ctypes;
1448       ctype = field->options().ctype();
1449     }
1450     if (!field->is_repeated() && !options_.opensource_runtime &&
1451         GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1452         // For now only use arena string for strings with empty defaults.
1453         field->default_value_string().empty() &&
1454         !field->real_containing_oneof() && ctype == FieldOptions::STRING) {
1455       GenerateArenaString(field);
1456     } else {
1457       std::string name;
1458       switch (ctype) {
1459         case FieldOptions::STRING:
1460           name = "GreedyStringParser";
1461           break;
1462         case FieldOptions::CORD:
1463           name = "CordParser";
1464           break;
1465         case FieldOptions::STRING_PIECE:
1466           name = "StringPieceParser";
1467           break;
1468       }
1469       format_(
1470           "auto str = $1$$2$_$3$();\n"
1471           "ptr = $pi_ns$::Inline$4$(str, ptr, ctx);\n",
1472           HasInternalAccessors(ctype) ? "_internal_" : "",
1473           field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1474           FieldName(field), name);
1475     }
1476     if (!check_utf8) return;  // return if this is a bytes field
1477     auto level = GetUtf8CheckMode(field, options_);
1478     switch (level) {
1479       case NONE:
1480         return;
1481       case VERIFY:
1482         format_("#ifndef NDEBUG\n");
1483         break;
1484       case STRICT:
1485         format_("CHK_(");
1486         break;
1487     }
1488     std::string field_name;
1489     field_name = "nullptr";
1490     if (HasDescriptorMethods(field->file(), options_)) {
1491       field_name = StrCat("\"", field->full_name(), "\"");
1492     }
1493     format_("$pi_ns$::VerifyUTF8(str, $1$)", field_name);
1494     switch (level) {
1495       case NONE:
1496         return;
1497       case VERIFY:
1498         format_(
1499             ";\n"
1500             "#endif  // !NDEBUG\n");
1501         break;
1502       case STRICT:
1503         format_(");\n");
1504         break;
1505     }
1506   }
1507 
GenerateLengthDelim(const FieldDescriptor * field)1508   void GenerateLengthDelim(const FieldDescriptor* field) {
1509     if (field->is_packable()) {
1510       std::string enum_validator;
1511       if (field->type() == FieldDescriptor::TYPE_ENUM &&
1512           !HasPreservingUnknownEnumSemantics(field)) {
1513         enum_validator =
1514             StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1515                          "_IsValid, &_internal_metadata_, ", field->number());
1516         format_(
1517             "ptr = "
1518             "$pi_ns$::Packed$1$Parser<$unknown_fields_type$>(_internal_mutable_"
1519             "$2$(), ptr, "
1520             "ctx$3$);\n",
1521             DeclaredTypeMethodName(field->type()), FieldName(field),
1522             enum_validator);
1523       } else {
1524         format_(
1525             "ptr = $pi_ns$::Packed$1$Parser(_internal_mutable_$2$(), ptr, "
1526             "ctx$3$);\n",
1527             DeclaredTypeMethodName(field->type()), FieldName(field),
1528             enum_validator);
1529       }
1530     } else {
1531       auto field_type = field->type();
1532       switch (field_type) {
1533         case FieldDescriptor::TYPE_STRING:
1534           GenerateStrings(field, true /* utf8 */);
1535           break;
1536         case FieldDescriptor::TYPE_BYTES:
1537           GenerateStrings(field, false /* utf8 */);
1538           break;
1539         case FieldDescriptor::TYPE_MESSAGE: {
1540           if (field->is_map()) {
1541             const FieldDescriptor* val =
1542                 field->message_type()->FindFieldByName("value");
1543             GOOGLE_CHECK(val);
1544             if (val->type() == FieldDescriptor::TYPE_ENUM &&
1545                 !HasPreservingUnknownEnumSemantics(field)) {
1546               format_(
1547                   "auto object = "
1548                   "::$proto_ns$::internal::InitEnumParseWrapper<$unknown_"
1549                   "fields_type$>("
1550                   "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1551                   "ptr = ctx->ParseMessage(&object, ptr);\n",
1552                   FieldName(field), QualifiedClassName(val->enum_type()),
1553                   field->number());
1554             } else {
1555               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1556                       FieldName(field));
1557             }
1558           } else if (IsLazy(field, options_)) {
1559             if (field->real_containing_oneof()) {
1560               format_(
1561                   "if (!_internal_has_$1$()) {\n"
1562                   "  clear_$2$();\n"
1563                   "  $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1564                   "      $pi_ns$::LazyField>(GetArena());\n"
1565                   "  set_has_$1$();\n"
1566                   "}\n"
1567                   "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1568                   FieldName(field), field->containing_oneof()->name());
1569             } else if (HasHasbit(field)) {
1570               format_(
1571                   "_Internal::set_has_$1$(&$has_bits$);\n"
1572                   "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1573                   FieldName(field));
1574             } else {
1575               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1576                       FieldName(field));
1577             }
1578           } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1579             if (!field->is_repeated()) {
1580               format_(
1581                   "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1582                   "ptr);\n",
1583                   FieldName(field));
1584             } else {
1585               format_(
1586                   "ptr = ctx->ParseMessage($1$_.AddWeak(reinterpret_cast<const "
1587                   "::$proto_ns$::MessageLite*>($2$::_$3$_default_instance_ptr_)"
1588                   "), ptr);\n",
1589                   FieldName(field), Namespace(field->message_type(), options_),
1590                   ClassName(field->message_type()));
1591             }
1592           } else if (IsWeak(field, options_)) {
1593             format_(
1594                 "{\n"
1595                 "  auto* default_ = &reinterpret_cast<const Message&>($1$);\n"
1596                 "  ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($2$,"
1597                 " default_), ptr);\n"
1598                 "}\n",
1599                 QualifiedDefaultInstanceName(field->message_type(), options_),
1600                 field->number());
1601           } else {
1602             format_("ptr = ctx->ParseMessage(_internal_$1$_$2$(), ptr);\n",
1603                     field->is_repeated() ? "add" : "mutable", FieldName(field));
1604           }
1605           break;
1606         }
1607         default:
1608           GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1609                      << " filed type is " << field->type();
1610       }
1611     }
1612   }
1613 
1614   // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
SmallVarintValue(uint32 x)1615   static uint32 SmallVarintValue(uint32 x) {
1616     GOOGLE_DCHECK(x < 128 * 128);
1617     if (x >= 128) x += (x & 0xFF80) + 128;
1618     return x;
1619   }
1620 
ShouldRepeat(const FieldDescriptor * descriptor,internal::WireFormatLite::WireType wiretype)1621   static bool ShouldRepeat(const FieldDescriptor* descriptor,
1622                            internal::WireFormatLite::WireType wiretype) {
1623     constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1624     return descriptor->number() < kMaxTwoByteFieldNumber &&
1625            descriptor->is_repeated() &&
1626            (!descriptor->is_packable() ||
1627             wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1628   }
1629 
GenerateFieldBody(internal::WireFormatLite::WireType wiretype,const FieldDescriptor * field)1630   void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1631                          const FieldDescriptor* field) {
1632     uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1633     switch (wiretype) {
1634       case WireFormatLite::WIRETYPE_VARINT: {
1635         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1636         std::string prefix = field->is_repeated() ? "add" : "set";
1637         if (field->type() == FieldDescriptor::TYPE_ENUM) {
1638           format_(
1639               "$uint64$ val = $pi_ns$::ReadVarint64(&ptr);\n"
1640               "CHK_(ptr);\n");
1641           if (!HasPreservingUnknownEnumSemantics(field)) {
1642             format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1643                     QualifiedClassName(field->enum_type(), options_));
1644             format_.Indent();
1645           }
1646           format_("_internal_$1$_$2$(static_cast<$3$>(val));\n", prefix,
1647                   FieldName(field),
1648                   QualifiedClassName(field->enum_type(), options_));
1649           if (!HasPreservingUnknownEnumSemantics(field)) {
1650             format_.Outdent();
1651             format_(
1652                 "} else {\n"
1653                 "  $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1654                 "}\n",
1655                 field->number());
1656           }
1657         } else {
1658           std::string size = (field->type() == FieldDescriptor::TYPE_SINT32 ||
1659                               field->type() == FieldDescriptor::TYPE_UINT32)
1660                                  ? "32"
1661                                  : "64";
1662           std::string zigzag;
1663           if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1664                field->type() == FieldDescriptor::TYPE_SINT64)) {
1665             zigzag = "ZigZag";
1666           }
1667           if (field->is_repeated() || field->real_containing_oneof()) {
1668             std::string prefix = field->is_repeated() ? "add" : "set";
1669             format_(
1670                 "_internal_$1$_$2$($pi_ns$::ReadVarint$3$$4$(&ptr));\n"
1671                 "CHK_(ptr);\n",
1672                 prefix, FieldName(field), zigzag, size);
1673           } else {
1674             if (HasHasbit(field)) {
1675               format_("_Internal::set_has_$1$(&$has_bits$);\n",
1676                       FieldName(field));
1677             }
1678             format_(
1679                 "$1$_ = $pi_ns$::ReadVarint$2$$3$(&ptr);\n"
1680                 "CHK_(ptr);\n",
1681                 FieldName(field), zigzag, size);
1682           }
1683         }
1684         break;
1685       }
1686       case WireFormatLite::WIRETYPE_FIXED32:
1687       case WireFormatLite::WIRETYPE_FIXED64: {
1688         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1689         if (field->is_repeated() || field->real_containing_oneof()) {
1690           std::string prefix = field->is_repeated() ? "add" : "set";
1691           format_(
1692               "_internal_$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1693               "ptr += sizeof($3$);\n",
1694               prefix, FieldName(field), type);
1695         } else {
1696           if (HasHasbit(field)) {
1697             format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1698           }
1699           format_(
1700               "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1701               "ptr += sizeof($2$);\n",
1702               FieldName(field), type);
1703         }
1704         break;
1705       }
1706       case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1707         GenerateLengthDelim(field);
1708         format_("CHK_(ptr);\n");
1709         break;
1710       }
1711       case WireFormatLite::WIRETYPE_START_GROUP: {
1712         format_(
1713             "ptr = ctx->ParseGroup(_internal_$1$_$2$(), ptr, $3$);\n"
1714             "CHK_(ptr);\n",
1715             field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1716         break;
1717       }
1718       case WireFormatLite::WIRETYPE_END_GROUP: {
1719         GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1720         break;
1721       }
1722     }  // switch (wire_type)
1723   }
1724 
1725   // Returns the tag for this field and in case of repeated packable fields,
1726   // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32 * fallback_tag_ptr)1727   static uint32 ExpectedTag(const FieldDescriptor* field,
1728                             uint32* fallback_tag_ptr) {
1729     uint32 expected_tag;
1730     if (field->is_packable()) {
1731       auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1732       expected_tag =
1733           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1734       GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1735       auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1736       uint32 fallback_tag =
1737           WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1738 
1739       if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1740       *fallback_tag_ptr = fallback_tag;
1741     } else {
1742       auto expected_wiretype = WireFormat::WireTypeForField(field);
1743       expected_tag =
1744           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1745     }
1746     return expected_tag;
1747   }
1748 
GenerateParseLoop(const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & ordered_fields)1749   void GenerateParseLoop(
1750       const Descriptor* descriptor,
1751       const std::vector<const FieldDescriptor*>& ordered_fields) {
1752     format_(
1753         "while (!ctx->Done(&ptr)) {\n"
1754         "  $uint32$ tag;\n"
1755         "  ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1756         "  CHK_(ptr);\n");
1757     if (!ordered_fields.empty()) format_("  switch (tag >> 3) {\n");
1758 
1759     format_.Indent();
1760     format_.Indent();
1761 
1762     for (const auto* field : ordered_fields) {
1763       PrintFieldComment(format_, field);
1764       format_("case $1$:\n", field->number());
1765       format_.Indent();
1766       uint32 fallback_tag = 0;
1767       uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1768       format_(
1769           "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1770           expected_tag & 0xFF);
1771       format_.Indent();
1772       auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1773       uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1774       int tag_size = io::CodedOutputStream::VarintSize32(tag);
1775       bool is_repeat = ShouldRepeat(field, wiretype);
1776       if (is_repeat) {
1777         format_(
1778             "ptr -= $1$;\n"
1779             "do {\n"
1780             "  ptr += $1$;\n",
1781             tag_size);
1782         format_.Indent();
1783       }
1784       GenerateFieldBody(wiretype, field);
1785       if (is_repeat) {
1786         format_.Outdent();
1787         format_(
1788             "  if (!ctx->DataAvailable(ptr)) break;\n"
1789             "} while ($pi_ns$::ExpectTag<$1$>(ptr));\n",
1790             tag);
1791       }
1792       format_.Outdent();
1793       if (fallback_tag) {
1794         format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1795                 fallback_tag & 0xFF);
1796         format_.Indent();
1797         GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1798         format_.Outdent();
1799       }
1800       format_.Outdent();
1801       format_(
1802           "  } else goto handle_unusual;\n"
1803           "  continue;\n");
1804     }  // for loop over ordered fields
1805 
1806     // Default case
1807     if (!ordered_fields.empty()) format_("default: {\n");
1808     if (!ordered_fields.empty()) format_("handle_unusual:\n");
1809     format_(
1810         "  if ((tag & 7) == 4 || tag == 0) {\n"
1811         "    ctx->SetLastTag(tag);\n"
1812         "    goto success;\n"
1813         "  }\n");
1814     if (IsMapEntryMessage(descriptor)) {
1815       format_("  continue;\n");
1816     } else {
1817       if (descriptor->extension_range_count() > 0) {
1818         format_("if (");
1819         for (int i = 0; i < descriptor->extension_range_count(); i++) {
1820           const Descriptor::ExtensionRange* range =
1821               descriptor->extension_range(i);
1822           if (i > 0) format_(" ||\n    ");
1823 
1824           uint32 start_tag = WireFormatLite::MakeTag(
1825               range->start, static_cast<WireFormatLite::WireType>(0));
1826           uint32 end_tag = WireFormatLite::MakeTag(
1827               range->end, static_cast<WireFormatLite::WireType>(0));
1828 
1829           if (range->end > FieldDescriptor::kMaxNumber) {
1830             format_("($1$u <= tag)", start_tag);
1831           } else {
1832             format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1833           }
1834         }
1835         format_(") {\n");
1836         format_(
1837             "  ptr = _extensions_.ParseField(tag, ptr,\n"
1838             "      internal_default_instance(), &_internal_metadata_, ctx);\n"
1839             "  CHK_(ptr != nullptr);\n"
1840             "  continue;\n"
1841             "}\n");
1842       }
1843       format_(
1844           "  ptr = UnknownFieldParse(tag,\n"
1845           "      _internal_metadata_.mutable_unknown_fields<$unknown_"
1846           "fields_type$>(),\n"
1847           "      ptr, ctx);\n"
1848           "  CHK_(ptr != nullptr);\n"
1849           "  continue;\n");
1850     }
1851     if (!ordered_fields.empty()) format_("}\n");  // default case
1852     format_.Outdent();
1853     format_.Outdent();
1854     if (!ordered_fields.empty()) format_("  }  // switch\n");
1855     format_("}  // while\n");
1856   }
1857 };
1858 
GenerateParserLoop(const Descriptor * descriptor,int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1859 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1860                         const Options& options,
1861                         MessageSCCAnalyzer* scc_analyzer,
1862                         io::Printer* printer) {
1863   ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1864   generator.GenerateParserLoop(descriptor);
1865 }
1866 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1867 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1868                                  const Options& options,
1869                                  bool* has_opt_codesize_extension) {
1870   std::vector<const FieldDescriptor*> fields;
1871   auto reflection = msg.GetReflection();
1872   reflection->ListFields(msg, &fields);
1873   for (auto field : fields) {
1874     const auto* field_msg = field->message_type();
1875     if (field_msg == nullptr) {
1876       // It so happens that enums Is_Valid are still generated so enums work.
1877       // Only messages have potential problems.
1878       continue;
1879     }
1880     // If this option has an extension set AND that extension is defined in the
1881     // same file we have bootstrap problem.
1882     if (field->is_extension()) {
1883       const auto* msg_extension_file = field->message_type()->file();
1884       if (msg_extension_file == file) return true;
1885       if (has_opt_codesize_extension &&
1886           GetOptimizeFor(msg_extension_file, options) ==
1887               FileOptions::CODE_SIZE) {
1888         *has_opt_codesize_extension = true;
1889       }
1890     }
1891     // Recurse in this field to see if there is a problem in there
1892     if (field->is_repeated()) {
1893       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1894         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1895                                  file, options, has_opt_codesize_extension)) {
1896           return true;
1897         }
1898       }
1899     } else {
1900       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1901                                options, has_opt_codesize_extension)) {
1902         return true;
1903       }
1904     }
1905   }
1906   return false;
1907 }
1908 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1909 static bool HasBootstrapProblem(const FileDescriptor* file,
1910                                 const Options& options,
1911                                 bool* has_opt_codesize_extension) {
1912   static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1913   auto it = cache.find(file);
1914   if (it != cache.end()) return it->second;
1915   // In order to build the data structures for the reflective parse, it needs
1916   // to parse the serialized descriptor describing all the messages defined in
1917   // this file. Obviously this presents a bootstrap problem for descriptor
1918   // messages.
1919   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1920       file->name() == "google/protobuf/descriptor.proto") {
1921     return true;
1922   }
1923   // Unfortunately we're not done yet. The descriptor option messages allow
1924   // for extensions. So we need to be able to parse these extensions in order
1925   // to parse the file descriptor for a file that has custom options. This is a
1926   // problem when these custom options extensions are defined in the same file.
1927   FileDescriptorProto linkedin_fd_proto;
1928   const DescriptorPool* pool = file->pool();
1929   const Descriptor* fd_proto_descriptor =
1930       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1931   // Not all pools have descriptor.proto in them. In these cases there for sure
1932   // are no custom options.
1933   if (fd_proto_descriptor == nullptr) return false;
1934 
1935   // It's easier to inspect file as a proto, because we can use reflection on
1936   // the proto to iterate over all content.
1937   file->CopyTo(&linkedin_fd_proto);
1938 
1939   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1940   // such it doesn't know the extensions that are potentially present in the
1941   // descriptor pool constructed from the protos that are being compiled. These
1942   // custom options are therefore in the unknown fields.
1943   // By building the corresponding FileDescriptorProto in the pool constructed
1944   // by the protos that are being compiled, ie. file's pool, the unknown fields
1945   // are converted to extensions.
1946   DynamicMessageFactory factory(pool);
1947   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1948   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1949 
1950   bool& res = cache[file];
1951   res = HasExtensionFromFile(*fd_proto, file, options,
1952                              has_opt_codesize_extension);
1953   delete fd_proto;
1954   return res;
1955 }
1956 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1957 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1958                                         const Options& options,
1959                                         bool* has_opt_codesize_extension) {
1960   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1961   switch (options.enforce_mode) {
1962     case EnforceOptimizeMode::kSpeed:
1963       return FileOptions::SPEED;
1964     case EnforceOptimizeMode::kLiteRuntime:
1965       return FileOptions::LITE_RUNTIME;
1966     case EnforceOptimizeMode::kCodeSize:
1967       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1968         return FileOptions::LITE_RUNTIME;
1969       }
1970       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1971         return FileOptions::SPEED;
1972       }
1973       return FileOptions::CODE_SIZE;
1974     case EnforceOptimizeMode::kNoEnforcement:
1975       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1976         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1977           GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1978                           "cannot honor that because it contains custom option "
1979                           "extensions defined in the same proto.";
1980           return FileOptions::SPEED;
1981         }
1982       }
1983       return file->options().optimize_for();
1984   }
1985 
1986   GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
1987   // The phony return below serves to silence a warning from GCC 8.
1988   return FileOptions::SPEED;
1989 }
1990 
1991 }  // namespace cpp
1992 }  // namespace compiler
1993 }  // namespace protobuf
1994 }  // namespace google
1995