• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36 
37 #include <functional>
38 #include <limits>
39 #include <map>
40 #include <queue>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/compiler/cpp/cpp_options.h>
47 #include <google/protobuf/descriptor.pb.h>
48 #include <google/protobuf/descriptor.h>
49 #include <google/protobuf/compiler/scc.h>
50 #include <google/protobuf/io/printer.h>
51 #include <google/protobuf/io/zero_copy_stream.h>
52 #include <google/protobuf/dynamic_message.h>
53 #include <google/protobuf/wire_format.h>
54 #include <google/protobuf/wire_format_lite.h>
55 #include <google/protobuf/stubs/strutil.h>
56 #include <google/protobuf/stubs/substitute.h>
57 #include <google/protobuf/stubs/hash.h>
58 
59 #include <google/protobuf/port_def.inc>
60 
61 namespace google {
62 namespace protobuf {
63 namespace compiler {
64 namespace cpp {
65 
66 namespace {
67 
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70 
DotsToColons(const std::string & name)71 std::string DotsToColons(const std::string& name) {
72   return StringReplace(name, ".", "::", true);
73 }
74 
75 static const char* const kKeywordList[] = {  //
76     "NULL",
77     "alignas",
78     "alignof",
79     "and",
80     "and_eq",
81     "asm",
82     "auto",
83     "bitand",
84     "bitor",
85     "bool",
86     "break",
87     "case",
88     "catch",
89     "char",
90     "class",
91     "compl",
92     "const",
93     "constexpr",
94     "const_cast",
95     "continue",
96     "decltype",
97     "default",
98     "delete",
99     "do",
100     "double",
101     "dynamic_cast",
102     "else",
103     "enum",
104     "explicit",
105     "export",
106     "extern",
107     "false",
108     "float",
109     "for",
110     "friend",
111     "goto",
112     "if",
113     "inline",
114     "int",
115     "long",
116     "mutable",
117     "namespace",
118     "new",
119     "noexcept",
120     "not",
121     "not_eq",
122     "nullptr",
123     "operator",
124     "or",
125     "or_eq",
126     "private",
127     "protected",
128     "public",
129     "register",
130     "reinterpret_cast",
131     "return",
132     "short",
133     "signed",
134     "sizeof",
135     "static",
136     "static_assert",
137     "static_cast",
138     "struct",
139     "switch",
140     "template",
141     "this",
142     "thread_local",
143     "throw",
144     "true",
145     "try",
146     "typedef",
147     "typeid",
148     "typename",
149     "union",
150     "unsigned",
151     "using",
152     "virtual",
153     "void",
154     "volatile",
155     "wchar_t",
156     "while",
157     "xor",
158     "xor_eq"};
159 
MakeKeywordsMap()160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161   auto* result = new std::unordered_set<std::string>();
162   for (const auto keyword : kKeywordList) {
163     result->emplace(keyword);
164   }
165   return result;
166 }
167 
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169 
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)171 char Base63Char(int value) {
172   GOOGLE_CHECK_GE(value, 0);
173   if (value < 26) return 'A' + value;
174   value -= 26;
175   if (value < 26) return 'a' + value;
176   value -= 26;
177   if (value < 10) return '0' + value;
178   GOOGLE_CHECK_EQ(value, 10);
179   return '_';
180 }
181 
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
Base63(I n,int k)185 std::string Base63(I n, int k) {
186   std::string res;
187   while (k-- > 0) {
188     res += Base63Char(static_cast<int>(n % 63));
189     n /= 63;
190   }
191   return res;
192 }
193 
IntTypeName(const Options & options,const std::string & type)194 std::string IntTypeName(const Options& options, const std::string& type) {
195   if (options.opensource_runtime) {
196     return "::PROTOBUF_NAMESPACE_ID::" + type;
197   } else {
198     return "::" + type;
199   }
200 }
201 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)202 void SetIntVar(const Options& options, const std::string& type,
203                std::map<std::string, std::string>* variables) {
204   (*variables)[type] = IntTypeName(options, type);
205 }
206 
HasInternalAccessors(const FieldOptions::CType ctype)207 bool HasInternalAccessors(const FieldOptions::CType ctype) {
208   return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
209 }
210 
211 }  // namespace
212 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)213 void SetCommonVars(const Options& options,
214                    std::map<std::string, std::string>* variables) {
215   (*variables)["proto_ns"] = ProtobufNamespace(options);
216 
217   // Warning: there is some clever naming/splitting here to avoid extract script
218   // rewrites.  The names of these variables must not be things that the extract
219   // script will rewrite.  That's why we use "CHK" (for example) instead of
220   // "GOOGLE_CHECK".
221   if (options.opensource_runtime) {
222     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
223     (*variables)["CHK"] = "GOOGLE_CHECK";
224     (*variables)["DCHK"] = "GOOGLE_DCHECK";
225   } else {
226     // These values are things the extract script would rewrite if we did not
227     // split them.  It might not strictly matter since we don't generate google3
228     // code in open-source.  But it's good to prevent surprising things from
229     // happening.
230     (*variables)["GOOGLE_PROTOBUF"] =
231         "GOOGLE3"
232         "_PROTOBUF";
233     (*variables)["CHK"] =
234         "CH"
235         "ECK";
236     (*variables)["DCHK"] =
237         "DCH"
238         "ECK";
239   }
240 
241   SetIntVar(options, "int8", variables);
242   SetIntVar(options, "uint8", variables);
243   SetIntVar(options, "uint32", variables);
244   SetIntVar(options, "uint64", variables);
245   SetIntVar(options, "int32", variables);
246   SetIntVar(options, "int64", variables);
247   (*variables)["string"] = "std::string";
248 }
249 
SetUnknkownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)250 void SetUnknkownFieldsVariable(const Descriptor* descriptor,
251                                const Options& options,
252                                std::map<std::string, std::string>* variables) {
253   std::string proto_ns = ProtobufNamespace(options);
254   std::string unknown_fields_type;
255   if (UseUnknownFieldSet(descriptor->file(), options)) {
256     unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
257     (*variables)["unknown_fields"] =
258         "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
259         unknown_fields_type + "::default_instance)";
260   } else {
261     unknown_fields_type =
262         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
263     (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
264                                      unknown_fields_type + ">(::" + proto_ns +
265                                      "::internal::GetEmptyString)";
266   }
267   (*variables)["unknown_fields_type"] = unknown_fields_type;
268   (*variables)["have_unknown_fields"] =
269       "_internal_metadata_.have_unknown_fields()";
270   (*variables)["mutable_unknown_fields"] =
271       "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
272       ">()";
273 }
274 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)275 std::string UnderscoresToCamelCase(const std::string& input,
276                                    bool cap_next_letter) {
277   std::string result;
278   // Note:  I distrust ctype.h due to locales.
279   for (int i = 0; i < input.size(); i++) {
280     if ('a' <= input[i] && input[i] <= 'z') {
281       if (cap_next_letter) {
282         result += input[i] + ('A' - 'a');
283       } else {
284         result += input[i];
285       }
286       cap_next_letter = false;
287     } else if ('A' <= input[i] && input[i] <= 'Z') {
288       // Capital letters are left as-is.
289       result += input[i];
290       cap_next_letter = false;
291     } else if ('0' <= input[i] && input[i] <= '9') {
292       result += input[i];
293       cap_next_letter = true;
294     } else {
295       cap_next_letter = true;
296     }
297   }
298   return result;
299 }
300 
301 const char kThickSeparator[] =
302     "// ===================================================================\n";
303 const char kThinSeparator[] =
304     "// -------------------------------------------------------------------\n";
305 
CanInitializeByZeroing(const FieldDescriptor * field)306 bool CanInitializeByZeroing(const FieldDescriptor* field) {
307   if (field->is_repeated() || field->is_extension()) return false;
308   switch (field->cpp_type()) {
309     case FieldDescriptor::CPPTYPE_ENUM:
310       return field->default_value_enum()->number() == 0;
311     case FieldDescriptor::CPPTYPE_INT32:
312       return field->default_value_int32() == 0;
313     case FieldDescriptor::CPPTYPE_INT64:
314       return field->default_value_int64() == 0;
315     case FieldDescriptor::CPPTYPE_UINT32:
316       return field->default_value_uint32() == 0;
317     case FieldDescriptor::CPPTYPE_UINT64:
318       return field->default_value_uint64() == 0;
319     case FieldDescriptor::CPPTYPE_FLOAT:
320       return field->default_value_float() == 0;
321     case FieldDescriptor::CPPTYPE_DOUBLE:
322       return field->default_value_double() == 0;
323     case FieldDescriptor::CPPTYPE_BOOL:
324       return field->default_value_bool() == false;
325     default:
326       return false;
327   }
328 }
329 
ClassName(const Descriptor * descriptor)330 std::string ClassName(const Descriptor* descriptor) {
331   const Descriptor* parent = descriptor->containing_type();
332   std::string res;
333   if (parent) res += ClassName(parent) + "_";
334   res += descriptor->name();
335   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
336   return ResolveKeyword(res);
337 }
338 
ClassName(const EnumDescriptor * enum_descriptor)339 std::string ClassName(const EnumDescriptor* enum_descriptor) {
340   if (enum_descriptor->containing_type() == nullptr) {
341     return ResolveKeyword(enum_descriptor->name());
342   } else {
343     return ClassName(enum_descriptor->containing_type()) + "_" +
344            enum_descriptor->name();
345   }
346 }
347 
QualifiedClassName(const Descriptor * d,const Options & options)348 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
349   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
350 }
351 
QualifiedClassName(const EnumDescriptor * d,const Options & options)352 std::string QualifiedClassName(const EnumDescriptor* d,
353                                const Options& options) {
354   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
355 }
356 
QualifiedClassName(const Descriptor * d)357 std::string QualifiedClassName(const Descriptor* d) {
358   return QualifiedClassName(d, Options());
359 }
360 
QualifiedClassName(const EnumDescriptor * d)361 std::string QualifiedClassName(const EnumDescriptor* d) {
362   return QualifiedClassName(d, Options());
363 }
364 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)365 std::string QualifiedExtensionName(const FieldDescriptor* d,
366                                    const Options& options) {
367   GOOGLE_DCHECK(d->is_extension());
368   return QualifiedFileLevelSymbol(d->file(), FieldName(d), options);
369 }
370 
QualifiedExtensionName(const FieldDescriptor * d)371 std::string QualifiedExtensionName(const FieldDescriptor* d) {
372   return QualifiedExtensionName(d, Options());
373 }
374 
Namespace(const std::string & package)375 std::string Namespace(const std::string& package) {
376   if (package.empty()) return "";
377   return "::" + DotsToColons(package);
378 }
379 
Namespace(const FileDescriptor * d,const Options & options)380 std::string Namespace(const FileDescriptor* d, const Options& options) {
381   std::string ret = Namespace(d->package());
382   if (IsWellKnownMessage(d) && options.opensource_runtime) {
383     // Written with string concatenation to prevent rewriting of
384     // ::google::protobuf.
385     ret = StringReplace(ret,
386                         "::google::"
387                         "protobuf",
388                         "PROTOBUF_NAMESPACE_ID", false);
389   }
390   return ret;
391 }
392 
Namespace(const Descriptor * d,const Options & options)393 std::string Namespace(const Descriptor* d, const Options& options) {
394   return Namespace(d->file(), options);
395 }
396 
Namespace(const FieldDescriptor * d,const Options & options)397 std::string Namespace(const FieldDescriptor* d, const Options& options) {
398   return Namespace(d->file(), options);
399 }
400 
Namespace(const EnumDescriptor * d,const Options & options)401 std::string Namespace(const EnumDescriptor* d, const Options& options) {
402   return Namespace(d->file(), options);
403 }
404 
DefaultInstanceType(const Descriptor * descriptor,const Options & options)405 std::string DefaultInstanceType(const Descriptor* descriptor,
406                                 const Options& options) {
407   return ClassName(descriptor) + "DefaultTypeInternal";
408 }
409 
DefaultInstanceName(const Descriptor * descriptor,const Options & options)410 std::string DefaultInstanceName(const Descriptor* descriptor,
411                                 const Options& options) {
412   return "_" + ClassName(descriptor, false) + "_default_instance_";
413 }
414 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)415 std::string DefaultInstancePtr(const Descriptor* descriptor,
416                                const Options& options) {
417   return DefaultInstanceName(descriptor, options) + "ptr_";
418 }
419 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)420 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421                                          const Options& options) {
422   return QualifiedFileLevelSymbol(
423       descriptor->file(), DefaultInstanceName(descriptor, options), options);
424 }
425 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)426 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427                                         const Options& options) {
428   return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429 }
430 
DescriptorTableName(const FileDescriptor * file,const Options & options)431 std::string DescriptorTableName(const FileDescriptor* file,
432                                 const Options& options) {
433   return UniqueName("descriptor_table", file, options);
434 }
435 
FileDllExport(const FileDescriptor * file,const Options & options)436 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438 }
439 
SuperClassName(const Descriptor * descriptor,const Options & options)440 std::string SuperClassName(const Descriptor* descriptor,
441                            const Options& options) {
442   return "::" + ProtobufNamespace(options) +
443          (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
444                                                             : "::MessageLite");
445 }
446 
ResolveKeyword(const std::string & name)447 std::string ResolveKeyword(const std::string& name) {
448   if (kKeywords.count(name) > 0) {
449     return name + "_";
450   }
451   return name;
452 }
453 
FieldName(const FieldDescriptor * field)454 std::string FieldName(const FieldDescriptor* field) {
455   std::string result = field->name();
456   LowerString(&result);
457   if (kKeywords.count(result) > 0) {
458     result.append("_");
459   }
460   return result;
461 }
462 
EnumValueName(const EnumValueDescriptor * enum_value)463 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
464   std::string result = enum_value->name();
465   if (kKeywords.count(result) > 0) {
466     result.append("_");
467   }
468   return result;
469 }
470 
EstimateAlignmentSize(const FieldDescriptor * field)471 int EstimateAlignmentSize(const FieldDescriptor* field) {
472   if (field == nullptr) return 0;
473   if (field->is_repeated()) return 8;
474   switch (field->cpp_type()) {
475     case FieldDescriptor::CPPTYPE_BOOL:
476       return 1;
477 
478     case FieldDescriptor::CPPTYPE_INT32:
479     case FieldDescriptor::CPPTYPE_UINT32:
480     case FieldDescriptor::CPPTYPE_ENUM:
481     case FieldDescriptor::CPPTYPE_FLOAT:
482       return 4;
483 
484     case FieldDescriptor::CPPTYPE_INT64:
485     case FieldDescriptor::CPPTYPE_UINT64:
486     case FieldDescriptor::CPPTYPE_DOUBLE:
487     case FieldDescriptor::CPPTYPE_STRING:
488     case FieldDescriptor::CPPTYPE_MESSAGE:
489       return 8;
490   }
491   GOOGLE_LOG(FATAL) << "Can't get here.";
492   return -1;  // Make compiler happy.
493 }
494 
FieldConstantName(const FieldDescriptor * field)495 std::string FieldConstantName(const FieldDescriptor* field) {
496   std::string field_name = UnderscoresToCamelCase(field->name(), true);
497   std::string result = "k" + field_name + "FieldNumber";
498 
499   if (!field->is_extension() &&
500       field->containing_type()->FindFieldByCamelcaseName(
501           field->camelcase_name()) != field) {
502     // This field's camelcase name is not unique.  As a hack, add the field
503     // number to the constant name.  This makes the constant rather useless,
504     // but what can we do?
505     result += "_" + StrCat(field->number());
506   }
507 
508   return result;
509 }
510 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)511 std::string FieldMessageTypeName(const FieldDescriptor* field,
512                                  const Options& options) {
513   // Note:  The Google-internal version of Protocol Buffers uses this function
514   //   as a hook point for hacks to support legacy code.
515   return QualifiedClassName(field->message_type(), options);
516 }
517 
StripProto(const std::string & filename)518 std::string StripProto(const std::string& filename) {
519   if (HasSuffixString(filename, ".protodevel")) {
520     return StripSuffixString(filename, ".protodevel");
521   } else {
522     return StripSuffixString(filename, ".proto");
523   }
524 }
525 
PrimitiveTypeName(FieldDescriptor::CppType type)526 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
527   switch (type) {
528     case FieldDescriptor::CPPTYPE_INT32:
529       return "::google::protobuf::int32";
530     case FieldDescriptor::CPPTYPE_INT64:
531       return "::google::protobuf::int64";
532     case FieldDescriptor::CPPTYPE_UINT32:
533       return "::google::protobuf::uint32";
534     case FieldDescriptor::CPPTYPE_UINT64:
535       return "::google::protobuf::uint64";
536     case FieldDescriptor::CPPTYPE_DOUBLE:
537       return "double";
538     case FieldDescriptor::CPPTYPE_FLOAT:
539       return "float";
540     case FieldDescriptor::CPPTYPE_BOOL:
541       return "bool";
542     case FieldDescriptor::CPPTYPE_ENUM:
543       return "int";
544     case FieldDescriptor::CPPTYPE_STRING:
545       return "std::string";
546     case FieldDescriptor::CPPTYPE_MESSAGE:
547       return nullptr;
548 
549       // No default because we want the compiler to complain if any new
550       // CppTypes are added.
551   }
552 
553   GOOGLE_LOG(FATAL) << "Can't get here.";
554   return nullptr;
555 }
556 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)557 std::string PrimitiveTypeName(const Options& options,
558                               FieldDescriptor::CppType type) {
559   switch (type) {
560     case FieldDescriptor::CPPTYPE_INT32:
561       return IntTypeName(options, "int32");
562     case FieldDescriptor::CPPTYPE_INT64:
563       return IntTypeName(options, "int64");
564     case FieldDescriptor::CPPTYPE_UINT32:
565       return IntTypeName(options, "uint32");
566     case FieldDescriptor::CPPTYPE_UINT64:
567       return IntTypeName(options, "uint64");
568     case FieldDescriptor::CPPTYPE_DOUBLE:
569       return "double";
570     case FieldDescriptor::CPPTYPE_FLOAT:
571       return "float";
572     case FieldDescriptor::CPPTYPE_BOOL:
573       return "bool";
574     case FieldDescriptor::CPPTYPE_ENUM:
575       return "int";
576     case FieldDescriptor::CPPTYPE_STRING:
577       return "std::string";
578     case FieldDescriptor::CPPTYPE_MESSAGE:
579       return "";
580 
581       // No default because we want the compiler to complain if any new
582       // CppTypes are added.
583   }
584 
585   GOOGLE_LOG(FATAL) << "Can't get here.";
586   return "";
587 }
588 
DeclaredTypeMethodName(FieldDescriptor::Type type)589 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
590   switch (type) {
591     case FieldDescriptor::TYPE_INT32:
592       return "Int32";
593     case FieldDescriptor::TYPE_INT64:
594       return "Int64";
595     case FieldDescriptor::TYPE_UINT32:
596       return "UInt32";
597     case FieldDescriptor::TYPE_UINT64:
598       return "UInt64";
599     case FieldDescriptor::TYPE_SINT32:
600       return "SInt32";
601     case FieldDescriptor::TYPE_SINT64:
602       return "SInt64";
603     case FieldDescriptor::TYPE_FIXED32:
604       return "Fixed32";
605     case FieldDescriptor::TYPE_FIXED64:
606       return "Fixed64";
607     case FieldDescriptor::TYPE_SFIXED32:
608       return "SFixed32";
609     case FieldDescriptor::TYPE_SFIXED64:
610       return "SFixed64";
611     case FieldDescriptor::TYPE_FLOAT:
612       return "Float";
613     case FieldDescriptor::TYPE_DOUBLE:
614       return "Double";
615 
616     case FieldDescriptor::TYPE_BOOL:
617       return "Bool";
618     case FieldDescriptor::TYPE_ENUM:
619       return "Enum";
620 
621     case FieldDescriptor::TYPE_STRING:
622       return "String";
623     case FieldDescriptor::TYPE_BYTES:
624       return "Bytes";
625     case FieldDescriptor::TYPE_GROUP:
626       return "Group";
627     case FieldDescriptor::TYPE_MESSAGE:
628       return "Message";
629 
630       // No default because we want the compiler to complain if any new
631       // types are added.
632   }
633   GOOGLE_LOG(FATAL) << "Can't get here.";
634   return "";
635 }
636 
Int32ToString(int number)637 std::string Int32ToString(int number) {
638   if (number == kint32min) {
639     // This needs to be special-cased, see explanation here:
640     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
641     return StrCat(number + 1, " - 1");
642   } else {
643     return StrCat(number);
644   }
645 }
646 
Int64ToString(const std::string & macro_prefix,int64 number)647 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
648   if (number == kint64min) {
649     // This needs to be special-cased, see explanation here:
650     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
651     return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
652   }
653   return StrCat(macro_prefix, "_LONGLONG(", number, ")");
654 }
655 
UInt64ToString(const std::string & macro_prefix,uint64 number)656 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
657   return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
658 }
659 
DefaultValue(const FieldDescriptor * field)660 std::string DefaultValue(const FieldDescriptor* field) {
661   switch (field->cpp_type()) {
662     case FieldDescriptor::CPPTYPE_INT64:
663       return Int64ToString("GG", field->default_value_int64());
664     case FieldDescriptor::CPPTYPE_UINT64:
665       return UInt64ToString("GG", field->default_value_uint64());
666     default:
667       return DefaultValue(Options(), field);
668   }
669 }
670 
DefaultValue(const Options & options,const FieldDescriptor * field)671 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
672   switch (field->cpp_type()) {
673     case FieldDescriptor::CPPTYPE_INT32:
674       return Int32ToString(field->default_value_int32());
675     case FieldDescriptor::CPPTYPE_UINT32:
676       return StrCat(field->default_value_uint32()) + "u";
677     case FieldDescriptor::CPPTYPE_INT64:
678       return Int64ToString("PROTOBUF", field->default_value_int64());
679     case FieldDescriptor::CPPTYPE_UINT64:
680       return UInt64ToString("PROTOBUF", field->default_value_uint64());
681     case FieldDescriptor::CPPTYPE_DOUBLE: {
682       double value = field->default_value_double();
683       if (value == std::numeric_limits<double>::infinity()) {
684         return "std::numeric_limits<double>::infinity()";
685       } else if (value == -std::numeric_limits<double>::infinity()) {
686         return "-std::numeric_limits<double>::infinity()";
687       } else if (value != value) {
688         return "std::numeric_limits<double>::quiet_NaN()";
689       } else {
690         return SimpleDtoa(value);
691       }
692     }
693     case FieldDescriptor::CPPTYPE_FLOAT: {
694       float value = field->default_value_float();
695       if (value == std::numeric_limits<float>::infinity()) {
696         return "std::numeric_limits<float>::infinity()";
697       } else if (value == -std::numeric_limits<float>::infinity()) {
698         return "-std::numeric_limits<float>::infinity()";
699       } else if (value != value) {
700         return "std::numeric_limits<float>::quiet_NaN()";
701       } else {
702         std::string float_value = SimpleFtoa(value);
703         // If floating point value contains a period (.) or an exponent
704         // (either E or e), then append suffix 'f' to make it a float
705         // literal.
706         if (float_value.find_first_of(".eE") != std::string::npos) {
707           float_value.push_back('f');
708         }
709         return float_value;
710       }
711     }
712     case FieldDescriptor::CPPTYPE_BOOL:
713       return field->default_value_bool() ? "true" : "false";
714     case FieldDescriptor::CPPTYPE_ENUM:
715       // Lazy:  Generate a static_cast because we don't have a helper function
716       //   that constructs the full name of an enum value.
717       return strings::Substitute(
718           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
719           Int32ToString(field->default_value_enum()->number()));
720     case FieldDescriptor::CPPTYPE_STRING:
721       return "\"" +
722              EscapeTrigraphs(CEscape(field->default_value_string())) +
723              "\"";
724     case FieldDescriptor::CPPTYPE_MESSAGE:
725       return "*" + FieldMessageTypeName(field, options) +
726              "::internal_default_instance()";
727   }
728   // Can't actually get here; make compiler happy.  (We could add a default
729   // case above but then we wouldn't get the nice compiler warning when a
730   // new type is added.)
731   GOOGLE_LOG(FATAL) << "Can't get here.";
732   return "";
733 }
734 
735 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)736 std::string FilenameIdentifier(const std::string& filename) {
737   std::string result;
738   for (int i = 0; i < filename.size(); i++) {
739     if (ascii_isalnum(filename[i])) {
740       result.push_back(filename[i]);
741     } else {
742       // Not alphanumeric.  To avoid any possibility of name conflicts we
743       // use the hex code for the character.
744       StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
745     }
746   }
747   return result;
748 }
749 
UniqueName(const std::string & name,const std::string & filename,const Options & options)750 std::string UniqueName(const std::string& name, const std::string& filename,
751                        const Options& options) {
752   return name + "_" + FilenameIdentifier(filename);
753 }
754 
755 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)756 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
757                                      const std::string& name,
758                                      const Options& options) {
759   if (file->package().empty()) {
760     return StrCat("::", name);
761   }
762   return StrCat(Namespace(file, options), "::", name);
763 }
764 
765 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)766 std::string EscapeTrigraphs(const std::string& to_escape) {
767   return StringReplace(to_escape, "?", "\\?", true);
768 }
769 
770 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)771 std::string SafeFunctionName(const Descriptor* descriptor,
772                              const FieldDescriptor* field,
773                              const std::string& prefix) {
774   // Do not use FieldName() since it will escape keywords.
775   std::string name = field->name();
776   LowerString(&name);
777   std::string function_name = prefix + name;
778   if (descriptor->FindFieldByName(function_name)) {
779     // Single underscore will also make it conflicting with the private data
780     // member. We use double underscore to escape function names.
781     function_name.append("__");
782   } else if (kKeywords.count(name) > 0) {
783     // If the field name is a keyword, we append the underscore back to keep it
784     // consistent with other function names.
785     function_name.append("_");
786   }
787   return function_name;
788 }
789 
IsStringInlined(const FieldDescriptor * descriptor,const Options & options)790 bool IsStringInlined(const FieldDescriptor* descriptor,
791                      const Options& options) {
792   if (options.opensource_runtime) return false;
793 
794   // TODO(ckennelly): Handle inlining for any.proto.
795   if (IsAnyMessage(descriptor->containing_type(), options)) return false;
796   if (descriptor->containing_type()->options().map_entry()) return false;
797 
798   // We rely on has bits to distinguish field presence for release_$name$.  When
799   // there is no hasbit, we cannot use the address of the string instance when
800   // the field has been inlined.
801   if (!HasHasbit(descriptor)) return false;
802 
803   if (options.access_info_map) {
804     if (descriptor->is_required()) return true;
805   }
806   return false;
807 }
808 
HasLazyFields(const Descriptor * descriptor,const Options & options)809 static bool HasLazyFields(const Descriptor* descriptor,
810                           const Options& options) {
811   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
812     if (IsLazy(descriptor->field(field_idx), options)) {
813       return true;
814     }
815   }
816   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
817     if (IsLazy(descriptor->extension(idx), options)) {
818       return true;
819     }
820   }
821   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
822     if (HasLazyFields(descriptor->nested_type(idx), options)) {
823       return true;
824     }
825   }
826   return false;
827 }
828 
829 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options)830 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
831   for (int i = 0; i < file->message_type_count(); i++) {
832     const Descriptor* descriptor(file->message_type(i));
833     if (HasLazyFields(descriptor, options)) {
834       return true;
835     }
836   }
837   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
838     if (IsLazy(file->extension(field_idx), options)) {
839       return true;
840     }
841   }
842   return false;
843 }
844 
HasRepeatedFields(const Descriptor * descriptor)845 static bool HasRepeatedFields(const Descriptor* descriptor) {
846   for (int i = 0; i < descriptor->field_count(); ++i) {
847     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
848       return true;
849     }
850   }
851   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
852     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
853   }
854   return false;
855 }
856 
HasRepeatedFields(const FileDescriptor * file)857 bool HasRepeatedFields(const FileDescriptor* file) {
858   for (int i = 0; i < file->message_type_count(); ++i) {
859     if (HasRepeatedFields(file->message_type(i))) return true;
860   }
861   return false;
862 }
863 
IsStringPieceField(const FieldDescriptor * field,const Options & options)864 static bool IsStringPieceField(const FieldDescriptor* field,
865                                const Options& options) {
866   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
867          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
868 }
869 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)870 static bool HasStringPieceFields(const Descriptor* descriptor,
871                                  const Options& options) {
872   for (int i = 0; i < descriptor->field_count(); ++i) {
873     if (IsStringPieceField(descriptor->field(i), options)) return true;
874   }
875   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
876     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
877   }
878   return false;
879 }
880 
HasStringPieceFields(const FileDescriptor * file,const Options & options)881 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
882   for (int i = 0; i < file->message_type_count(); ++i) {
883     if (HasStringPieceFields(file->message_type(i), options)) return true;
884   }
885   return false;
886 }
887 
IsCordField(const FieldDescriptor * field,const Options & options)888 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
889   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
890          EffectiveStringCType(field, options) == FieldOptions::CORD;
891 }
892 
HasCordFields(const Descriptor * descriptor,const Options & options)893 static bool HasCordFields(const Descriptor* descriptor,
894                           const Options& options) {
895   for (int i = 0; i < descriptor->field_count(); ++i) {
896     if (IsCordField(descriptor->field(i), options)) return true;
897   }
898   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
899     if (HasCordFields(descriptor->nested_type(i), options)) return true;
900   }
901   return false;
902 }
903 
HasCordFields(const FileDescriptor * file,const Options & options)904 bool HasCordFields(const FileDescriptor* file, const Options& options) {
905   for (int i = 0; i < file->message_type_count(); ++i) {
906     if (HasCordFields(file->message_type(i), options)) return true;
907   }
908   return false;
909 }
910 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)911 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
912   if (descriptor->extension_range_count() > 0) return true;
913   if (descriptor->extension_count() > 0) return true;
914   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
915     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
916       return true;
917     }
918   }
919   return false;
920 }
921 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)922 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
923   if (file->extension_count() > 0) return true;
924   for (int i = 0; i < file->message_type_count(); ++i) {
925     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
926   }
927   return false;
928 }
929 
HasMapFields(const Descriptor * descriptor)930 static bool HasMapFields(const Descriptor* descriptor) {
931   for (int i = 0; i < descriptor->field_count(); ++i) {
932     if (descriptor->field(i)->is_map()) {
933       return true;
934     }
935   }
936   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
937     if (HasMapFields(descriptor->nested_type(i))) return true;
938   }
939   return false;
940 }
941 
HasMapFields(const FileDescriptor * file)942 bool HasMapFields(const FileDescriptor* file) {
943   for (int i = 0; i < file->message_type_count(); ++i) {
944     if (HasMapFields(file->message_type(i))) return true;
945   }
946   return false;
947 }
948 
HasEnumDefinitions(const Descriptor * message_type)949 static bool HasEnumDefinitions(const Descriptor* message_type) {
950   if (message_type->enum_type_count() > 0) return true;
951   for (int i = 0; i < message_type->nested_type_count(); ++i) {
952     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
953   }
954   return false;
955 }
956 
HasEnumDefinitions(const FileDescriptor * file)957 bool HasEnumDefinitions(const FileDescriptor* file) {
958   if (file->enum_type_count() > 0) return true;
959   for (int i = 0; i < file->message_type_count(); ++i) {
960     if (HasEnumDefinitions(file->message_type(i))) return true;
961   }
962   return false;
963 }
964 
IsStringOrMessage(const FieldDescriptor * field)965 bool IsStringOrMessage(const FieldDescriptor* field) {
966   switch (field->cpp_type()) {
967     case FieldDescriptor::CPPTYPE_INT32:
968     case FieldDescriptor::CPPTYPE_INT64:
969     case FieldDescriptor::CPPTYPE_UINT32:
970     case FieldDescriptor::CPPTYPE_UINT64:
971     case FieldDescriptor::CPPTYPE_DOUBLE:
972     case FieldDescriptor::CPPTYPE_FLOAT:
973     case FieldDescriptor::CPPTYPE_BOOL:
974     case FieldDescriptor::CPPTYPE_ENUM:
975       return false;
976     case FieldDescriptor::CPPTYPE_STRING:
977     case FieldDescriptor::CPPTYPE_MESSAGE:
978       return true;
979   }
980 
981   GOOGLE_LOG(FATAL) << "Can't get here.";
982   return false;
983 }
984 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)985 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
986                                          const Options& options) {
987   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
988   if (options.opensource_runtime) {
989     // Open-source protobuf release only supports STRING ctype.
990     return FieldOptions::STRING;
991   } else {
992     // Google-internal supports all ctypes.
993     return field->options().ctype();
994   }
995 }
996 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)997 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
998   return descriptor->name() == kAnyProtoFile;
999 }
1000 
IsAnyMessage(const Descriptor * descriptor,const Options & options)1001 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
1002   return descriptor->name() == kAnyMessageName &&
1003          IsAnyMessage(descriptor->file(), options);
1004 }
1005 
IsWellKnownMessage(const FileDescriptor * file)1006 bool IsWellKnownMessage(const FileDescriptor* file) {
1007   static const std::unordered_set<std::string> well_known_files{
1008       "google/protobuf/any.proto",
1009       "google/protobuf/api.proto",
1010       "google/protobuf/compiler/plugin.proto",
1011       "google/protobuf/descriptor.proto",
1012       "google/protobuf/duration.proto",
1013       "google/protobuf/empty.proto",
1014       "google/protobuf/field_mask.proto",
1015       "google/protobuf/source_context.proto",
1016       "google/protobuf/struct.proto",
1017       "google/protobuf/timestamp.proto",
1018       "google/protobuf/type.proto",
1019       "google/protobuf/wrappers.proto",
1020   };
1021   return well_known_files.find(file->name()) != well_known_files.end();
1022 }
1023 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1024 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1025                              const Options& options) {
1026   return true;
1027 }
1028 
FileUtf8Verification(const FileDescriptor * file,const Options & options)1029 static bool FileUtf8Verification(const FileDescriptor* file,
1030                                  const Options& options) {
1031   return true;
1032 }
1033 
1034 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1035 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1036                                const Options& options) {
1037   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1038       FieldEnforceUtf8(field, options)) {
1039     return STRICT;
1040   } else if (GetOptimizeFor(field->file(), options) !=
1041                  FileOptions::LITE_RUNTIME &&
1042              FileUtf8Verification(field->file(), options)) {
1043     return VERIFY;
1044   } else {
1045     return NONE;
1046   }
1047 }
1048 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1049 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1050                                   const Options& options, bool for_parse,
1051                                   const char* parameters,
1052                                   const char* strict_function,
1053                                   const char* verify_function,
1054                                   const Formatter& format) {
1055   switch (GetUtf8CheckMode(field, options)) {
1056     case STRICT: {
1057       if (for_parse) {
1058         format("DO_(");
1059       }
1060       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1061       format.Indent();
1062       format(parameters);
1063       if (for_parse) {
1064         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1065       } else {
1066         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1067       }
1068       format("\"$1$\")", field->full_name());
1069       if (for_parse) {
1070         format(")");
1071       }
1072       format(";\n");
1073       format.Outdent();
1074       break;
1075     }
1076     case VERIFY: {
1077       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1078       format.Indent();
1079       format(parameters);
1080       if (for_parse) {
1081         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1082       } else {
1083         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1084       }
1085       format("\"$1$\");\n", field->full_name());
1086       format.Outdent();
1087       break;
1088     }
1089     case NONE:
1090       break;
1091   }
1092 }
1093 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1094 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1095                                     const Options& options, bool for_parse,
1096                                     const char* parameters,
1097                                     const Formatter& format) {
1098   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1099                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1100                         format);
1101 }
1102 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1103 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1104                                   const Options& options, bool for_parse,
1105                                   const char* parameters,
1106                                   const Formatter& format) {
1107   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1108                         "VerifyUTF8CordNamedField", format);
1109 }
1110 
1111 namespace {
1112 
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1113 void Flatten(const Descriptor* descriptor,
1114              std::vector<const Descriptor*>* flatten) {
1115   for (int i = 0; i < descriptor->nested_type_count(); i++)
1116     Flatten(descriptor->nested_type(i), flatten);
1117   flatten->push_back(descriptor);
1118 }
1119 
1120 }  // namespace
1121 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1122 void FlattenMessagesInFile(const FileDescriptor* file,
1123                            std::vector<const Descriptor*>* result) {
1124   for (int i = 0; i < file->message_type_count(); i++) {
1125     Flatten(file->message_type(i), result);
1126   }
1127 }
1128 
HasWeakFields(const Descriptor * descriptor,const Options & options)1129 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1130   for (int i = 0; i < descriptor->field_count(); i++) {
1131     if (IsWeak(descriptor->field(i), options)) return true;
1132   }
1133   return false;
1134 }
1135 
HasWeakFields(const FileDescriptor * file,const Options & options)1136 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1137   for (int i = 0; i < file->message_type_count(); ++i) {
1138     if (HasWeakFields(file->message_type(i), options)) return true;
1139   }
1140   return false;
1141 }
1142 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1143 bool UsingImplicitWeakFields(const FileDescriptor* file,
1144                              const Options& options) {
1145   return options.lite_implicit_weak_fields &&
1146          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1147 }
1148 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1149 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1150                          MessageSCCAnalyzer* scc_analyzer) {
1151   return UsingImplicitWeakFields(field->file(), options) &&
1152          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1153          !field->is_required() && !field->is_map() && !field->is_extension() &&
1154          !field->real_containing_oneof() &&
1155          !IsWellKnownMessage(field->message_type()->file()) &&
1156          field->message_type()->file()->name() !=
1157              "net/proto2/proto/descriptor.proto" &&
1158          // We do not support implicit weak fields between messages in the same
1159          // strongly-connected component.
1160          scc_analyzer->GetSCC(field->containing_type()) !=
1161              scc_analyzer->GetSCC(field->message_type());
1162 }
1163 
GetSCCAnalysis(const SCC * scc)1164 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1165   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1166   MessageAnalysis result{};
1167   for (int i = 0; i < scc->descriptors.size(); i++) {
1168     const Descriptor* descriptor = scc->descriptors[i];
1169     if (descriptor->extension_range_count() > 0) {
1170       result.contains_extension = true;
1171       // Extensions are found by looking up default_instance and extension
1172       // number in a map. So you'd maybe expect here
1173       // result.constructor_requires_initialization = true;
1174       // However the extension registration mechanism already makes sure
1175       // the default will be initialized.
1176     }
1177     for (int i = 0; i < descriptor->field_count(); i++) {
1178       const FieldDescriptor* field = descriptor->field(i);
1179       if (field->is_required()) {
1180         result.contains_required = true;
1181       }
1182       switch (field->type()) {
1183         case FieldDescriptor::TYPE_STRING:
1184         case FieldDescriptor::TYPE_BYTES: {
1185           result.constructor_requires_initialization = true;
1186           if (field->options().ctype() == FieldOptions::CORD) {
1187             result.contains_cord = true;
1188           }
1189           break;
1190         }
1191         case FieldDescriptor::TYPE_GROUP:
1192         case FieldDescriptor::TYPE_MESSAGE: {
1193           result.constructor_requires_initialization = true;
1194           const SCC* child = analyzer_.GetSCC(field->message_type());
1195           if (child != scc) {
1196             MessageAnalysis analysis = GetSCCAnalysis(child);
1197             result.contains_cord |= analysis.contains_cord;
1198             result.contains_extension |= analysis.contains_extension;
1199             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1200               result.contains_required |= analysis.contains_required;
1201             }
1202           } else {
1203             // This field points back into the same SCC hence the messages
1204             // in the SCC are recursive. Note if SCC contains more than two
1205             // nodes it has to be recursive, however this test also works for
1206             // a single node that is recursive.
1207             result.is_recursive = true;
1208           }
1209           break;
1210         }
1211         default:
1212           break;
1213       }
1214     }
1215   }
1216   // We deliberately only insert the result here. After we contracted the SCC
1217   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1218   // nodes visited as we can never return to them. By inserting them here
1219   // we will go in an infinite loop if the SCC is not correct.
1220   return analysis_cache_[scc] = result;
1221 }
1222 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1223 void ListAllFields(const Descriptor* d,
1224                    std::vector<const FieldDescriptor*>* fields) {
1225   // Collect sub messages
1226   for (int i = 0; i < d->nested_type_count(); i++) {
1227     ListAllFields(d->nested_type(i), fields);
1228   }
1229   // Collect message level extensions.
1230   for (int i = 0; i < d->extension_count(); i++) {
1231     fields->push_back(d->extension(i));
1232   }
1233   // Add types of fields necessary
1234   for (int i = 0; i < d->field_count(); i++) {
1235     fields->push_back(d->field(i));
1236   }
1237 }
1238 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1239 void ListAllFields(const FileDescriptor* d,
1240                    std::vector<const FieldDescriptor*>* fields) {
1241   // Collect file level message.
1242   for (int i = 0; i < d->message_type_count(); i++) {
1243     ListAllFields(d->message_type(i), fields);
1244   }
1245   // Collect message level extensions.
1246   for (int i = 0; i < d->extension_count(); i++) {
1247     fields->push_back(d->extension(i));
1248   }
1249 }
1250 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1251 void ListAllTypesForServices(const FileDescriptor* fd,
1252                              std::vector<const Descriptor*>* types) {
1253   for (int i = 0; i < fd->service_count(); i++) {
1254     const ServiceDescriptor* sd = fd->service(i);
1255     for (int j = 0; j < sd->method_count(); j++) {
1256       const MethodDescriptor* method = sd->method(j);
1257       types->push_back(method->input_type());
1258       types->push_back(method->output_type());
1259     }
1260   }
1261 }
1262 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1263 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1264                           std::string* bootstrap_basename) {
1265   if (options.opensource_runtime) {
1266     return false;
1267   }
1268 
1269   std::unordered_map<std::string, std::string> bootstrap_mapping{
1270       {"net/proto2/proto/descriptor",
1271        "net/proto2/internal/descriptor"},
1272       {"net/proto2/compiler/proto/plugin",
1273        "net/proto2/compiler/proto/plugin"},
1274       {"net/proto2/compiler/proto/profile",
1275        "net/proto2/compiler/proto/profile_bootstrap"},
1276   };
1277   auto iter = bootstrap_mapping.find(basename);
1278   if (iter == bootstrap_mapping.end()) {
1279     *bootstrap_basename = basename;
1280     return false;
1281   } else {
1282     *bootstrap_basename = iter->second;
1283     return true;
1284   }
1285 }
1286 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1287 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1288   std::string my_name = StripProto(file->name());
1289   return GetBootstrapBasename(options, my_name, &my_name);
1290 }
1291 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1292 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1293                     bool bootstrap_flag, std::string* basename) {
1294   std::string bootstrap_basename;
1295   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1296     return false;
1297   }
1298 
1299   if (bootstrap_flag) {
1300     // Adjust basename, but don't abort code generation.
1301     *basename = bootstrap_basename;
1302     return false;
1303   } else {
1304     std::string forward_to_basename = bootstrap_basename;
1305 
1306     // Generate forwarding headers and empty .pb.cc.
1307     {
1308       std::unique_ptr<io::ZeroCopyOutputStream> output(
1309           generator_context->Open(*basename + ".pb.h"));
1310       io::Printer printer(output.get(), '$', nullptr);
1311       printer.Print(
1312           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1313           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1314           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1315           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1316           "forward_to_basename", forward_to_basename, "filename_identifier",
1317           FilenameIdentifier(*basename));
1318 
1319       if (!options.opensource_runtime) {
1320         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1321         // protocoltype is SWIG'ed and we need to forward
1322         if (*basename == "net/proto/protocoltype") {
1323           printer.Print(
1324               "#ifdef SWIG\n"
1325               "%include \"$forward_to_basename$.pb.h\"\n"
1326               "#endif  // SWIG\n",
1327               "forward_to_basename", forward_to_basename);
1328         }
1329       }
1330     }
1331 
1332     {
1333       std::unique_ptr<io::ZeroCopyOutputStream> output(
1334           generator_context->Open(*basename + ".proto.h"));
1335       io::Printer printer(output.get(), '$', nullptr);
1336       printer.Print(
1337           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1338           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1339           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1340           "export\n"
1341           "#endif  // "
1342           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1343           "forward_to_basename", forward_to_basename, "filename_identifier",
1344           FilenameIdentifier(*basename));
1345     }
1346 
1347     {
1348       std::unique_ptr<io::ZeroCopyOutputStream> output(
1349           generator_context->Open(*basename + ".pb.cc"));
1350       io::Printer printer(output.get(), '$', nullptr);
1351       printer.Print("\n");
1352     }
1353 
1354     {
1355       std::unique_ptr<io::ZeroCopyOutputStream> output(
1356           generator_context->Open(*basename + ".pb.h.meta"));
1357     }
1358 
1359     {
1360       std::unique_ptr<io::ZeroCopyOutputStream> output(
1361           generator_context->Open(*basename + ".proto.h.meta"));
1362     }
1363 
1364     // Abort code generation.
1365     return true;
1366   }
1367 }
1368 
1369 class ParseLoopGenerator {
1370  public:
ParseLoopGenerator(int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1371   ParseLoopGenerator(int num_hasbits, const Options& options,
1372                      MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1373       : scc_analyzer_(scc_analyzer),
1374         options_(options),
1375         format_(printer),
1376         num_hasbits_(num_hasbits) {}
1377 
GenerateParserLoop(const Descriptor * descriptor)1378   void GenerateParserLoop(const Descriptor* descriptor) {
1379     format_.Set("classname", ClassName(descriptor));
1380     format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1381     format_.Set("pi_ns",
1382                 StrCat("::", ProtobufNamespace(options_), "::internal"));
1383     format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1384     std::map<std::string, std::string> vars;
1385     SetCommonVars(options_, &vars);
1386     SetUnknkownFieldsVariable(descriptor, options_, &vars);
1387     format_.AddMap(vars);
1388 
1389     std::vector<const FieldDescriptor*> ordered_fields;
1390     for (auto field : FieldRange(descriptor)) {
1391       if (IsFieldUsed(field, options_)) {
1392         ordered_fields.push_back(field);
1393       }
1394     }
1395     std::sort(ordered_fields.begin(), ordered_fields.end(),
1396               [](const FieldDescriptor* a, const FieldDescriptor* b) {
1397                 return a->number() < b->number();
1398               });
1399 
1400     format_(
1401         "const char* $classname$::_InternalParse(const char* ptr, "
1402         "$pi_ns$::ParseContext* ctx) {\n"
1403         "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1404     format_.Indent();
1405     int hasbits_size = 0;
1406     if (num_hasbits_ > 0) {
1407       hasbits_size = (num_hasbits_ + 31) / 32;
1408     }
1409     // For now only optimize small hasbits.
1410     if (hasbits_size != 1) hasbits_size = 0;
1411     if (hasbits_size) {
1412       format_("_Internal::HasBits has_bits{};\n");
1413       format_.Set("has_bits", "has_bits");
1414     } else {
1415       format_.Set("has_bits", "_has_bits_");
1416     }
1417 
1418     if (descriptor->file()->options().cc_enable_arenas()) {
1419       format_("$p_ns$::Arena* arena = GetArena(); (void)arena;\n");
1420     }
1421     GenerateParseLoop(descriptor, ordered_fields);
1422     format_.Outdent();
1423     format_("success:\n");
1424     if (hasbits_size) format_("  _has_bits_.Or(has_bits);\n");
1425 
1426     format_(
1427         "  return ptr;\n"
1428         "failure:\n"
1429         "  ptr = nullptr;\n"
1430         "  goto success;\n"
1431         "#undef CHK_\n"
1432         "}\n");
1433   }
1434 
1435  private:
1436   MessageSCCAnalyzer* scc_analyzer_;
1437   const Options& options_;
1438   Formatter format_;
1439   int num_hasbits_;
1440 
1441   using WireFormat = internal::WireFormat;
1442   using WireFormatLite = internal::WireFormatLite;
1443 
GenerateArenaString(const FieldDescriptor * field)1444   void GenerateArenaString(const FieldDescriptor* field) {
1445     if (HasHasbit(field)) {
1446       format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1447     }
1448     std::string default_string =
1449         field->default_value_string().empty()
1450             ? "::" + ProtobufNamespace(options_) +
1451                   "::internal::GetEmptyStringAlreadyInited()"
1452             : QualifiedClassName(field->containing_type(), options_) +
1453                   "::" + MakeDefaultName(field) + ".get()";
1454     format_(
1455         "if (arena != nullptr) {\n"
1456         "  ptr = ctx->ReadArenaString(ptr, &$1$_, arena);\n"
1457         "} else {\n"
1458         "  ptr = "
1459         "$pi_ns$::InlineGreedyStringParser($1$_.MutableNoArenaNoDefault(&$2$"
1460         "), ptr, ctx);"
1461         "\n}\n"
1462         "const std::string* str = &$1$_.Get(); (void)str;\n",
1463         FieldName(field), default_string);
1464   }
1465 
GenerateStrings(const FieldDescriptor * field,bool check_utf8)1466   void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1467     FieldOptions::CType ctype = FieldOptions::STRING;
1468     if (!options_.opensource_runtime) {
1469       // Open source doesn't support other ctypes;
1470       ctype = field->options().ctype();
1471     }
1472     if (field->file()->options().cc_enable_arenas() && !field->is_repeated() &&
1473         !options_.opensource_runtime &&
1474         GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1475         // For now only use arena string for strings with empty defaults.
1476         field->default_value_string().empty() &&
1477         !IsStringInlined(field, options_) && !field->real_containing_oneof() &&
1478         ctype == FieldOptions::STRING) {
1479       GenerateArenaString(field);
1480     } else {
1481       std::string name;
1482       switch (ctype) {
1483         case FieldOptions::STRING:
1484           name = "GreedyStringParser";
1485           break;
1486         case FieldOptions::CORD:
1487           name = "CordParser";
1488           break;
1489         case FieldOptions::STRING_PIECE:
1490           name = "StringPieceParser";
1491           break;
1492       }
1493       format_(
1494           "auto str = $1$$2$_$3$();\n"
1495           "ptr = $pi_ns$::Inline$4$(str, ptr, ctx);\n",
1496           HasInternalAccessors(ctype) ? "_internal_" : "",
1497           field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1498           FieldName(field), name);
1499     }
1500     if (!check_utf8) return;  // return if this is a bytes field
1501     auto level = GetUtf8CheckMode(field, options_);
1502     switch (level) {
1503       case NONE:
1504         return;
1505       case VERIFY:
1506         format_("#ifndef NDEBUG\n");
1507         break;
1508       case STRICT:
1509         format_("CHK_(");
1510         break;
1511     }
1512     std::string field_name;
1513     field_name = "nullptr";
1514     if (HasDescriptorMethods(field->file(), options_)) {
1515       field_name = StrCat("\"", field->full_name(), "\"");
1516     }
1517     format_("$pi_ns$::VerifyUTF8(str, $1$)", field_name);
1518     switch (level) {
1519       case NONE:
1520         return;
1521       case VERIFY:
1522         format_(
1523             ";\n"
1524             "#endif  // !NDEBUG\n");
1525         break;
1526       case STRICT:
1527         format_(");\n");
1528         break;
1529     }
1530   }
1531 
GenerateLengthDelim(const FieldDescriptor * field)1532   void GenerateLengthDelim(const FieldDescriptor* field) {
1533     if (field->is_packable()) {
1534       std::string enum_validator;
1535       if (field->type() == FieldDescriptor::TYPE_ENUM &&
1536           !HasPreservingUnknownEnumSemantics(field)) {
1537         enum_validator =
1538             StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1539                          "_IsValid, &_internal_metadata_, ", field->number());
1540         format_(
1541             "ptr = "
1542             "$pi_ns$::Packed$1$Parser<$unknown_fields_type$>(_internal_mutable_"
1543             "$2$(), ptr, "
1544             "ctx$3$);\n",
1545             DeclaredTypeMethodName(field->type()), FieldName(field),
1546             enum_validator);
1547       } else {
1548         format_(
1549             "ptr = $pi_ns$::Packed$1$Parser(_internal_mutable_$2$(), ptr, "
1550             "ctx$3$);\n",
1551             DeclaredTypeMethodName(field->type()), FieldName(field),
1552             enum_validator);
1553       }
1554     } else {
1555       auto field_type = field->type();
1556       switch (field_type) {
1557         case FieldDescriptor::TYPE_STRING:
1558           GenerateStrings(field, true /* utf8 */);
1559           break;
1560         case FieldDescriptor::TYPE_BYTES:
1561           GenerateStrings(field, false /* utf8 */);
1562           break;
1563         case FieldDescriptor::TYPE_MESSAGE: {
1564           if (field->is_map()) {
1565             const FieldDescriptor* val =
1566                 field->message_type()->FindFieldByName("value");
1567             GOOGLE_CHECK(val);
1568             if (val->type() == FieldDescriptor::TYPE_ENUM &&
1569                 !HasPreservingUnknownEnumSemantics(field)) {
1570               format_(
1571                   "auto object = "
1572                   "::$proto_ns$::internal::InitEnumParseWrapper<$unknown_"
1573                   "fields_type$>("
1574                   "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1575                   "ptr = ctx->ParseMessage(&object, ptr);\n",
1576                   FieldName(field), QualifiedClassName(val->enum_type()),
1577                   field->number());
1578             } else {
1579               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1580                       FieldName(field));
1581             }
1582           } else if (IsLazy(field, options_)) {
1583             if (field->real_containing_oneof()) {
1584               format_(
1585                   "if (!_internal_has_$1$()) {\n"
1586                   "  clear_$2$();\n"
1587                   "  $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1588                   "      $pi_ns$::LazyField>(GetArena());\n"
1589                   "  set_has_$1$();\n"
1590                   "}\n"
1591                   "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1592                   FieldName(field), field->containing_oneof()->name());
1593             } else if (HasHasbit(field)) {
1594               format_(
1595                   "_Internal::set_has_$1$(&$has_bits$);\n"
1596                   "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1597                   FieldName(field));
1598             } else {
1599               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1600                       FieldName(field));
1601             }
1602           } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1603             if (!field->is_repeated()) {
1604               format_(
1605                   "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1606                   "ptr);\n",
1607                   FieldName(field));
1608             } else {
1609               format_(
1610                   "ptr = ctx->ParseMessage($1$_.AddWeak(reinterpret_cast<const "
1611                   "::$proto_ns$::MessageLite*>($2$::_$3$_default_instance_ptr_)"
1612                   "), ptr);\n",
1613                   FieldName(field), Namespace(field->message_type(), options_),
1614                   ClassName(field->message_type()));
1615             }
1616           } else if (IsWeak(field, options_)) {
1617             format_(
1618                 "ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($1$,"
1619                 " _$classname$_default_instance_.$2$_), ptr);\n",
1620                 field->number(), FieldName(field));
1621           } else {
1622             format_("ptr = ctx->ParseMessage(_internal_$1$_$2$(), ptr);\n",
1623                     field->is_repeated() ? "add" : "mutable", FieldName(field));
1624           }
1625           break;
1626         }
1627         default:
1628           GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1629                      << " filed type is " << field->type();
1630       }
1631     }
1632   }
1633 
1634   // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
SmallVarintValue(uint32 x)1635   static uint32 SmallVarintValue(uint32 x) {
1636     GOOGLE_DCHECK(x < 128 * 128);
1637     if (x >= 128) x += (x & 0xFF80) + 128;
1638     return x;
1639   }
1640 
ShouldRepeat(const FieldDescriptor * descriptor,internal::WireFormatLite::WireType wiretype)1641   static bool ShouldRepeat(const FieldDescriptor* descriptor,
1642                            internal::WireFormatLite::WireType wiretype) {
1643     constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1644     return descriptor->number() < kMaxTwoByteFieldNumber &&
1645            descriptor->is_repeated() &&
1646            (!descriptor->is_packable() ||
1647             wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1648   }
1649 
GenerateFieldBody(internal::WireFormatLite::WireType wiretype,const FieldDescriptor * field)1650   void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1651                          const FieldDescriptor* field) {
1652     uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1653     switch (wiretype) {
1654       case WireFormatLite::WIRETYPE_VARINT: {
1655         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1656         std::string prefix = field->is_repeated() ? "add" : "set";
1657         if (field->type() == FieldDescriptor::TYPE_ENUM) {
1658           format_(
1659               "$uint64$ val = $pi_ns$::ReadVarint64(&ptr);\n"
1660               "CHK_(ptr);\n");
1661           if (!HasPreservingUnknownEnumSemantics(field)) {
1662             format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1663                     QualifiedClassName(field->enum_type(), options_));
1664             format_.Indent();
1665           }
1666           format_("_internal_$1$_$2$(static_cast<$3$>(val));\n", prefix,
1667                   FieldName(field),
1668                   QualifiedClassName(field->enum_type(), options_));
1669           if (!HasPreservingUnknownEnumSemantics(field)) {
1670             format_.Outdent();
1671             format_(
1672                 "} else {\n"
1673                 "  $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1674                 "}\n",
1675                 field->number());
1676           }
1677         } else {
1678           std::string size = (field->type() == FieldDescriptor::TYPE_SINT32 ||
1679                               field->type() == FieldDescriptor::TYPE_UINT32)
1680                                  ? "32"
1681                                  : "64";
1682           std::string zigzag;
1683           if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1684                field->type() == FieldDescriptor::TYPE_SINT64)) {
1685             zigzag = "ZigZag";
1686           }
1687           if (field->is_repeated() || field->real_containing_oneof()) {
1688             std::string prefix = field->is_repeated() ? "add" : "set";
1689             format_(
1690                 "_internal_$1$_$2$($pi_ns$::ReadVarint$3$$4$(&ptr));\n"
1691                 "CHK_(ptr);\n",
1692                 prefix, FieldName(field), zigzag, size);
1693           } else {
1694             if (HasHasbit(field)) {
1695               format_("_Internal::set_has_$1$(&$has_bits$);\n",
1696                       FieldName(field));
1697             }
1698             format_(
1699                 "$1$_ = $pi_ns$::ReadVarint$2$$3$(&ptr);\n"
1700                 "CHK_(ptr);\n",
1701                 FieldName(field), zigzag, size);
1702           }
1703         }
1704         break;
1705       }
1706       case WireFormatLite::WIRETYPE_FIXED32:
1707       case WireFormatLite::WIRETYPE_FIXED64: {
1708         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1709         if (field->is_repeated() || field->real_containing_oneof()) {
1710           std::string prefix = field->is_repeated() ? "add" : "set";
1711           format_(
1712               "_internal_$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1713               "ptr += sizeof($3$);\n",
1714               prefix, FieldName(field), type);
1715         } else {
1716           if (HasHasbit(field)) {
1717             format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1718           }
1719           format_(
1720               "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1721               "ptr += sizeof($2$);\n",
1722               FieldName(field), type);
1723         }
1724         break;
1725       }
1726       case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1727         GenerateLengthDelim(field);
1728         format_("CHK_(ptr);\n");
1729         break;
1730       }
1731       case WireFormatLite::WIRETYPE_START_GROUP: {
1732         format_(
1733             "ptr = ctx->ParseGroup(_internal_$1$_$2$(), ptr, $3$);\n"
1734             "CHK_(ptr);\n",
1735             field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1736         break;
1737       }
1738       case WireFormatLite::WIRETYPE_END_GROUP: {
1739         GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1740         break;
1741       }
1742     }  // switch (wire_type)
1743   }
1744 
1745   // Returns the tag for this field and in case of repeated packable fields,
1746   // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32 * fallback_tag_ptr)1747   static uint32 ExpectedTag(const FieldDescriptor* field,
1748                             uint32* fallback_tag_ptr) {
1749     uint32 expected_tag;
1750     if (field->is_packable()) {
1751       auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1752       expected_tag =
1753           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1754       GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1755       auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1756       uint32 fallback_tag =
1757           WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1758 
1759       if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1760       *fallback_tag_ptr = fallback_tag;
1761     } else {
1762       auto expected_wiretype = WireFormat::WireTypeForField(field);
1763       expected_tag =
1764           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1765     }
1766     return expected_tag;
1767   }
1768 
GenerateParseLoop(const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & ordered_fields)1769   void GenerateParseLoop(
1770       const Descriptor* descriptor,
1771       const std::vector<const FieldDescriptor*>& ordered_fields) {
1772     format_(
1773         "while (!ctx->Done(&ptr)) {\n"
1774         "  $uint32$ tag;\n"
1775         "  ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1776         "  CHK_(ptr);\n");
1777     if (!ordered_fields.empty()) format_("  switch (tag >> 3) {\n");
1778 
1779     format_.Indent();
1780     format_.Indent();
1781 
1782     for (const auto* field : ordered_fields) {
1783       PrintFieldComment(format_, field);
1784       format_("case $1$:\n", field->number());
1785       format_.Indent();
1786       uint32 fallback_tag = 0;
1787       uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1788       format_(
1789           "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1790           expected_tag & 0xFF);
1791       format_.Indent();
1792       auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1793       uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1794       int tag_size = io::CodedOutputStream::VarintSize32(tag);
1795       bool is_repeat = ShouldRepeat(field, wiretype);
1796       if (is_repeat) {
1797         format_(
1798             "ptr -= $1$;\n"
1799             "do {\n"
1800             "  ptr += $1$;\n",
1801             tag_size);
1802         format_.Indent();
1803       }
1804       GenerateFieldBody(wiretype, field);
1805       if (is_repeat) {
1806         format_.Outdent();
1807         format_(
1808             "  if (!ctx->DataAvailable(ptr)) break;\n"
1809             "} while ($pi_ns$::ExpectTag<$1$>(ptr));\n",
1810             tag);
1811       }
1812       format_.Outdent();
1813       if (fallback_tag) {
1814         format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1815                 fallback_tag & 0xFF);
1816         format_.Indent();
1817         GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1818         format_.Outdent();
1819       }
1820       format_.Outdent();
1821       format_(
1822           "  } else goto handle_unusual;\n"
1823           "  continue;\n");
1824     }  // for loop over ordered fields
1825 
1826     // Default case
1827     if (!ordered_fields.empty()) format_("default: {\n");
1828     if (!ordered_fields.empty()) format_("handle_unusual:\n");
1829     format_(
1830         "  if ((tag & 7) == 4 || tag == 0) {\n"
1831         "    ctx->SetLastTag(tag);\n"
1832         "    goto success;\n"
1833         "  }\n");
1834     if (IsMapEntryMessage(descriptor)) {
1835       format_("  continue;\n");
1836     } else {
1837       if (descriptor->extension_range_count() > 0) {
1838         format_("if (");
1839         for (int i = 0; i < descriptor->extension_range_count(); i++) {
1840           const Descriptor::ExtensionRange* range =
1841               descriptor->extension_range(i);
1842           if (i > 0) format_(" ||\n    ");
1843 
1844           uint32 start_tag = WireFormatLite::MakeTag(
1845               range->start, static_cast<WireFormatLite::WireType>(0));
1846           uint32 end_tag = WireFormatLite::MakeTag(
1847               range->end, static_cast<WireFormatLite::WireType>(0));
1848 
1849           if (range->end > FieldDescriptor::kMaxNumber) {
1850             format_("($1$u <= tag)", start_tag);
1851           } else {
1852             format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1853           }
1854         }
1855         format_(") {\n");
1856         format_(
1857             "  ptr = _extensions_.ParseField(tag, ptr,\n"
1858             "      internal_default_instance(), &_internal_metadata_, ctx);\n"
1859             "  CHK_(ptr != nullptr);\n"
1860             "  continue;\n"
1861             "}\n");
1862       }
1863       format_(
1864           "  ptr = UnknownFieldParse(tag,\n"
1865           "      _internal_metadata_.mutable_unknown_fields<$unknown_"
1866           "fields_type$>(),\n"
1867           "      ptr, ctx);\n"
1868           "  CHK_(ptr != nullptr);\n"
1869           "  continue;\n");
1870     }
1871     if (!ordered_fields.empty()) format_("}\n");  // default case
1872     format_.Outdent();
1873     format_.Outdent();
1874     if (!ordered_fields.empty()) format_("  }  // switch\n");
1875     format_("}  // while\n");
1876   }
1877 };
1878 
GenerateParserLoop(const Descriptor * descriptor,int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1879 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1880                         const Options& options,
1881                         MessageSCCAnalyzer* scc_analyzer,
1882                         io::Printer* printer) {
1883   ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1884   generator.GenerateParserLoop(descriptor);
1885 }
1886 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1887 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1888                                  const Options& options,
1889                                  bool* has_opt_codesize_extension) {
1890   std::vector<const FieldDescriptor*> fields;
1891   auto reflection = msg.GetReflection();
1892   reflection->ListFields(msg, &fields);
1893   for (auto field : fields) {
1894     const auto* field_msg = field->message_type();
1895     if (field_msg == nullptr) {
1896       // It so happens that enums Is_Valid are still generated so enums work.
1897       // Only messages have potential problems.
1898       continue;
1899     }
1900     // If this option has an extension set AND that extension is defined in the
1901     // same file we have bootstrap problem.
1902     if (field->is_extension()) {
1903       const auto* msg_extension_file = field->message_type()->file();
1904       if (msg_extension_file == file) return true;
1905       if (has_opt_codesize_extension &&
1906           GetOptimizeFor(msg_extension_file, options) ==
1907               FileOptions::CODE_SIZE) {
1908         *has_opt_codesize_extension = true;
1909       }
1910     }
1911     // Recurse in this field to see if there is a problem in there
1912     if (field->is_repeated()) {
1913       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1914         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1915                                  file, options, has_opt_codesize_extension)) {
1916           return true;
1917         }
1918       }
1919     } else {
1920       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1921                                options, has_opt_codesize_extension)) {
1922         return true;
1923       }
1924     }
1925   }
1926   return false;
1927 }
1928 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1929 static bool HasBootstrapProblem(const FileDescriptor* file,
1930                                 const Options& options,
1931                                 bool* has_opt_codesize_extension) {
1932   static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1933   auto it = cache.find(file);
1934   if (it != cache.end()) return it->second;
1935   // In order to build the data structures for the reflective parse, it needs
1936   // to parse the serialized descriptor describing all the messages defined in
1937   // this file. Obviously this presents a bootstrap problem for descriptor
1938   // messages.
1939   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1940       file->name() == "google/protobuf/descriptor.proto") {
1941     return true;
1942   }
1943   // Unfortunately we're not done yet. The descriptor option messages allow
1944   // for extensions. So we need to be able to parse these extensions in order
1945   // to parse the file descriptor for a file that has custom options. This is a
1946   // problem when these custom options extensions are defined in the same file.
1947   FileDescriptorProto linkedin_fd_proto;
1948   const DescriptorPool* pool = file->pool();
1949   const Descriptor* fd_proto_descriptor =
1950       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1951   // Not all pools have descriptor.proto in them. In these cases there for sure
1952   // are no custom options.
1953   if (fd_proto_descriptor == nullptr) return false;
1954 
1955   // It's easier to inspect file as a proto, because we can use reflection on
1956   // the proto to iterate over all content.
1957   file->CopyTo(&linkedin_fd_proto);
1958 
1959   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1960   // such it doesn't know the extensions that are potentially present in the
1961   // descriptor pool constructed from the protos that are being compiled. These
1962   // custom options are therefore in the unknown fields.
1963   // By building the corresponding FileDescriptorProto in the pool constructed
1964   // by the protos that are being compiled, ie. file's pool, the unknown fields
1965   // are converted to extensions.
1966   DynamicMessageFactory factory(pool);
1967   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1968   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1969 
1970   bool& res = cache[file];
1971   res = HasExtensionFromFile(*fd_proto, file, options,
1972                              has_opt_codesize_extension);
1973   delete fd_proto;
1974   return res;
1975 }
1976 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1977 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1978                                         const Options& options,
1979                                         bool* has_opt_codesize_extension) {
1980   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1981   switch (options.enforce_mode) {
1982     case EnforceOptimizeMode::kSpeed:
1983       return FileOptions::SPEED;
1984     case EnforceOptimizeMode::kLiteRuntime:
1985       return FileOptions::LITE_RUNTIME;
1986     case EnforceOptimizeMode::kCodeSize:
1987       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1988         return FileOptions::LITE_RUNTIME;
1989       }
1990       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1991         return FileOptions::SPEED;
1992       }
1993       return FileOptions::CODE_SIZE;
1994     case EnforceOptimizeMode::kNoEnforcement:
1995       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1996         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1997           GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1998                           "cannot honor that because it contains custom option "
1999                           "extensions defined in the same proto.";
2000           return FileOptions::SPEED;
2001         }
2002       }
2003       return file->options().optimize_for();
2004   }
2005 
2006   GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
2007   // The phony return below serves to silence a warning from GCC 8.
2008   return FileOptions::SPEED;
2009 }
2010 
2011 }  // namespace cpp
2012 }  // namespace compiler
2013 }  // namespace protobuf
2014 }  // namespace google
2015