• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36 
37 #include <functional>
38 #include <limits>
39 #include <map>
40 #include <queue>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/descriptor.h>
47 
48 #include <google/protobuf/compiler/scc.h>
49 #include <google/protobuf/io/printer.h>
50 #include <google/protobuf/io/zero_copy_stream.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/wire_format_lite.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/substitute.h>
55 
56 
57 #include <google/protobuf/stubs/hash.h>
58 
59 #include <google/protobuf/port_def.inc>
60 
61 namespace google {
62 namespace protobuf {
63 namespace compiler {
64 namespace cpp {
65 
66 namespace {
67 
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70 
DotsToColons(const std::string & name)71 std::string DotsToColons(const std::string& name) {
72   return StringReplace(name, ".", "::", true);
73 }
74 
75 static const char* const kKeywordList[] = {  //
76     "NULL",
77     "alignas",
78     "alignof",
79     "and",
80     "and_eq",
81     "asm",
82     "auto",
83     "bitand",
84     "bitor",
85     "bool",
86     "break",
87     "case",
88     "catch",
89     "char",
90     "class",
91     "compl",
92     "const",
93     "constexpr",
94     "const_cast",
95     "continue",
96     "decltype",
97     "default",
98     "delete",
99     "do",
100     "double",
101     "dynamic_cast",
102     "else",
103     "enum",
104     "explicit",
105     "export",
106     "extern",
107     "false",
108     "float",
109     "for",
110     "friend",
111     "goto",
112     "if",
113     "inline",
114     "int",
115     "long",
116     "mutable",
117     "namespace",
118     "new",
119     "noexcept",
120     "not",
121     "not_eq",
122     "nullptr",
123     "operator",
124     "or",
125     "or_eq",
126     "private",
127     "protected",
128     "public",
129     "register",
130     "reinterpret_cast",
131     "return",
132     "short",
133     "signed",
134     "sizeof",
135     "static",
136     "static_assert",
137     "static_cast",
138     "struct",
139     "switch",
140     "template",
141     "this",
142     "thread_local",
143     "throw",
144     "true",
145     "try",
146     "typedef",
147     "typeid",
148     "typename",
149     "union",
150     "unsigned",
151     "using",
152     "virtual",
153     "void",
154     "volatile",
155     "wchar_t",
156     "while",
157     "xor",
158     "xor_eq"};
159 
MakeKeywordsMap()160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161   auto* result = new std::unordered_set<std::string>();
162   for (const auto keyword : kKeywordList) {
163     result->emplace(keyword);
164   }
165   return result;
166 }
167 
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169 
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)171 char Base63Char(int value) {
172   GOOGLE_CHECK_GE(value, 0);
173   if (value < 26) return 'A' + value;
174   value -= 26;
175   if (value < 26) return 'a' + value;
176   value -= 26;
177   if (value < 10) return '0' + value;
178   GOOGLE_CHECK_EQ(value, 10);
179   return '_';
180 }
181 
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
Base63(I n,int k)185 std::string Base63(I n, int k) {
186   std::string res;
187   while (k-- > 0) {
188     res += Base63Char(static_cast<int>(n % 63));
189     n /= 63;
190   }
191   return res;
192 }
193 
IntTypeName(const Options & options,const std::string & type)194 std::string IntTypeName(const Options& options, const std::string& type) {
195   if (options.opensource_runtime) {
196     return "::PROTOBUF_NAMESPACE_ID::" + type;
197   } else {
198     return "::" + type;
199   }
200 }
201 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)202 void SetIntVar(const Options& options, const std::string& type,
203                std::map<std::string, std::string>* variables) {
204   (*variables)[type] = IntTypeName(options, type);
205 }
206 
207 }  // namespace
208 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)209 void SetCommonVars(const Options& options,
210                    std::map<std::string, std::string>* variables) {
211   (*variables)["proto_ns"] = ProtobufNamespace(options);
212 
213   // Warning: there is some clever naming/splitting here to avoid extract script
214   // rewrites.  The names of these variables must not be things that the extract
215   // script will rewrite.  That's why we use "CHK" (for example) instead of
216   // "GOOGLE_CHECK".
217   if (options.opensource_runtime) {
218     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
219     (*variables)["CHK"] = "GOOGLE_CHECK";
220     (*variables)["DCHK"] = "GOOGLE_DCHECK";
221   } else {
222     // These values are things the extract script would rewrite if we did not
223     // split them.  It might not strictly matter since we don't generate google3
224     // code in open-source.  But it's good to prevent surprising things from
225     // happening.
226     (*variables)["GOOGLE_PROTOBUF"] =
227         "GOOGLE3"
228         "_PROTOBUF";
229     (*variables)["CHK"] =
230         "CH"
231         "ECK";
232     (*variables)["DCHK"] =
233         "DCH"
234         "ECK";
235   }
236 
237   SetIntVar(options, "int8", variables);
238   SetIntVar(options, "uint8", variables);
239   SetIntVar(options, "uint32", variables);
240   SetIntVar(options, "uint64", variables);
241   SetIntVar(options, "int32", variables);
242   SetIntVar(options, "int64", variables);
243   (*variables)["string"] = "std::string";
244 }
245 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)246 std::string UnderscoresToCamelCase(const std::string& input,
247                                    bool cap_next_letter) {
248   std::string result;
249   // Note:  I distrust ctype.h due to locales.
250   for (int i = 0; i < input.size(); i++) {
251     if ('a' <= input[i] && input[i] <= 'z') {
252       if (cap_next_letter) {
253         result += input[i] + ('A' - 'a');
254       } else {
255         result += input[i];
256       }
257       cap_next_letter = false;
258     } else if ('A' <= input[i] && input[i] <= 'Z') {
259       // Capital letters are left as-is.
260       result += input[i];
261       cap_next_letter = false;
262     } else if ('0' <= input[i] && input[i] <= '9') {
263       result += input[i];
264       cap_next_letter = true;
265     } else {
266       cap_next_letter = true;
267     }
268   }
269   return result;
270 }
271 
272 const char kThickSeparator[] =
273     "// ===================================================================\n";
274 const char kThinSeparator[] =
275     "// -------------------------------------------------------------------\n";
276 
CanInitializeByZeroing(const FieldDescriptor * field)277 bool CanInitializeByZeroing(const FieldDescriptor* field) {
278   if (field->is_repeated() || field->is_extension()) return false;
279   switch (field->cpp_type()) {
280     case FieldDescriptor::CPPTYPE_ENUM:
281       return field->default_value_enum()->number() == 0;
282     case FieldDescriptor::CPPTYPE_INT32:
283       return field->default_value_int32() == 0;
284     case FieldDescriptor::CPPTYPE_INT64:
285       return field->default_value_int64() == 0;
286     case FieldDescriptor::CPPTYPE_UINT32:
287       return field->default_value_uint32() == 0;
288     case FieldDescriptor::CPPTYPE_UINT64:
289       return field->default_value_uint64() == 0;
290     case FieldDescriptor::CPPTYPE_FLOAT:
291       return field->default_value_float() == 0;
292     case FieldDescriptor::CPPTYPE_DOUBLE:
293       return field->default_value_double() == 0;
294     case FieldDescriptor::CPPTYPE_BOOL:
295       return field->default_value_bool() == false;
296     default:
297       return false;
298   }
299 }
300 
ClassName(const Descriptor * descriptor)301 std::string ClassName(const Descriptor* descriptor) {
302   const Descriptor* parent = descriptor->containing_type();
303   std::string res;
304   if (parent) res += ClassName(parent) + "_";
305   res += descriptor->name();
306   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
307   return ResolveKeyword(res);
308 }
309 
ClassName(const EnumDescriptor * enum_descriptor)310 std::string ClassName(const EnumDescriptor* enum_descriptor) {
311   if (enum_descriptor->containing_type() == nullptr) {
312     return ResolveKeyword(enum_descriptor->name());
313   } else {
314     return ClassName(enum_descriptor->containing_type()) + "_" +
315            enum_descriptor->name();
316   }
317 }
318 
QualifiedClassName(const Descriptor * d,const Options & options)319 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
320   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
321 }
322 
QualifiedClassName(const EnumDescriptor * d,const Options & options)323 std::string QualifiedClassName(const EnumDescriptor* d,
324                                const Options& options) {
325   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
326 }
327 
QualifiedClassName(const Descriptor * d)328 std::string QualifiedClassName(const Descriptor* d) {
329   return QualifiedClassName(d, Options());
330 }
331 
QualifiedClassName(const EnumDescriptor * d)332 std::string QualifiedClassName(const EnumDescriptor* d) {
333   return QualifiedClassName(d, Options());
334 }
335 
Namespace(const std::string & package)336 std::string Namespace(const std::string& package) {
337   if (package.empty()) return "";
338   return "::" + DotsToColons(package);
339 }
340 
Namespace(const FileDescriptor * d,const Options & options)341 std::string Namespace(const FileDescriptor* d, const Options& options) {
342   std::string ret = Namespace(d->package());
343   if (IsWellKnownMessage(d) && options.opensource_runtime) {
344     // Written with string concatenation to prevent rewriting of
345     // ::google::protobuf.
346     ret = StringReplace(ret,
347                         "::google::"
348                         "protobuf",
349                         "PROTOBUF_NAMESPACE_ID", false);
350   }
351   return ret;
352 }
353 
Namespace(const Descriptor * d,const Options & options)354 std::string Namespace(const Descriptor* d, const Options& options) {
355   return Namespace(d->file(), options);
356 }
357 
Namespace(const FieldDescriptor * d,const Options & options)358 std::string Namespace(const FieldDescriptor* d, const Options& options) {
359   return Namespace(d->file(), options);
360 }
361 
Namespace(const EnumDescriptor * d,const Options & options)362 std::string Namespace(const EnumDescriptor* d, const Options& options) {
363   return Namespace(d->file(), options);
364 }
365 
DefaultInstanceType(const Descriptor * descriptor,const Options & options)366 std::string DefaultInstanceType(const Descriptor* descriptor,
367                                 const Options& options) {
368   return ClassName(descriptor) + "DefaultTypeInternal";
369 }
370 
DefaultInstanceName(const Descriptor * descriptor,const Options & options)371 std::string DefaultInstanceName(const Descriptor* descriptor,
372                                 const Options& options) {
373   return "_" + ClassName(descriptor, false) + "_default_instance_";
374 }
375 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)376 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
377                                          const Options& options) {
378   return QualifiedFileLevelSymbol(
379       descriptor->file(), DefaultInstanceName(descriptor, options), options);
380 }
381 
DescriptorTableName(const FileDescriptor * file,const Options & options)382 std::string DescriptorTableName(const FileDescriptor* file,
383                                 const Options& options) {
384   return UniqueName("descriptor_table", file, options);
385 }
386 
FileDllExport(const FileDescriptor * file,const Options & options)387 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
388   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
389 }
390 
ReferenceFunctionName(const Descriptor * descriptor,const Options & options)391 std::string ReferenceFunctionName(const Descriptor* descriptor,
392                                   const Options& options) {
393   return QualifiedClassName(descriptor, options) + "_ReferenceStrong";
394 }
395 
SuperClassName(const Descriptor * descriptor,const Options & options)396 std::string SuperClassName(const Descriptor* descriptor,
397                            const Options& options) {
398   return "::" + ProtobufNamespace(options) +
399          (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
400                                                             : "::MessageLite");
401 }
402 
ResolveKeyword(const string & name)403 std::string ResolveKeyword(const string& name) {
404   if (kKeywords.count(name) > 0) {
405     return name + "_";
406   }
407   return name;
408 }
409 
FieldName(const FieldDescriptor * field)410 std::string FieldName(const FieldDescriptor* field) {
411   std::string result = field->name();
412   LowerString(&result);
413   if (kKeywords.count(result) > 0) {
414     result.append("_");
415   }
416   return result;
417 }
418 
EnumValueName(const EnumValueDescriptor * enum_value)419 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
420   std::string result = enum_value->name();
421   if (kKeywords.count(result) > 0) {
422     result.append("_");
423   }
424   return result;
425 }
426 
EstimateAlignmentSize(const FieldDescriptor * field)427 int EstimateAlignmentSize(const FieldDescriptor* field) {
428   if (field == nullptr) return 0;
429   if (field->is_repeated()) return 8;
430   switch (field->cpp_type()) {
431     case FieldDescriptor::CPPTYPE_BOOL:
432       return 1;
433 
434     case FieldDescriptor::CPPTYPE_INT32:
435     case FieldDescriptor::CPPTYPE_UINT32:
436     case FieldDescriptor::CPPTYPE_ENUM:
437     case FieldDescriptor::CPPTYPE_FLOAT:
438       return 4;
439 
440     case FieldDescriptor::CPPTYPE_INT64:
441     case FieldDescriptor::CPPTYPE_UINT64:
442     case FieldDescriptor::CPPTYPE_DOUBLE:
443     case FieldDescriptor::CPPTYPE_STRING:
444     case FieldDescriptor::CPPTYPE_MESSAGE:
445       return 8;
446   }
447   GOOGLE_LOG(FATAL) << "Can't get here.";
448   return -1;  // Make compiler happy.
449 }
450 
FieldConstantName(const FieldDescriptor * field)451 std::string FieldConstantName(const FieldDescriptor* field) {
452   std::string field_name = UnderscoresToCamelCase(field->name(), true);
453   std::string result = "k" + field_name + "FieldNumber";
454 
455   if (!field->is_extension() &&
456       field->containing_type()->FindFieldByCamelcaseName(
457           field->camelcase_name()) != field) {
458     // This field's camelcase name is not unique.  As a hack, add the field
459     // number to the constant name.  This makes the constant rather useless,
460     // but what can we do?
461     result += "_" + StrCat(field->number());
462   }
463 
464   return result;
465 }
466 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)467 std::string FieldMessageTypeName(const FieldDescriptor* field,
468                                  const Options& options) {
469   // Note:  The Google-internal version of Protocol Buffers uses this function
470   //   as a hook point for hacks to support legacy code.
471   return QualifiedClassName(field->message_type(), options);
472 }
473 
StripProto(const std::string & filename)474 std::string StripProto(const std::string& filename) {
475   if (HasSuffixString(filename, ".protodevel")) {
476     return StripSuffixString(filename, ".protodevel");
477   } else {
478     return StripSuffixString(filename, ".proto");
479   }
480 }
481 
PrimitiveTypeName(FieldDescriptor::CppType type)482 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
483   switch (type) {
484     case FieldDescriptor::CPPTYPE_INT32:
485       return "::google::protobuf::int32";
486     case FieldDescriptor::CPPTYPE_INT64:
487       return "::google::protobuf::int64";
488     case FieldDescriptor::CPPTYPE_UINT32:
489       return "::google::protobuf::uint32";
490     case FieldDescriptor::CPPTYPE_UINT64:
491       return "::google::protobuf::uint64";
492     case FieldDescriptor::CPPTYPE_DOUBLE:
493       return "double";
494     case FieldDescriptor::CPPTYPE_FLOAT:
495       return "float";
496     case FieldDescriptor::CPPTYPE_BOOL:
497       return "bool";
498     case FieldDescriptor::CPPTYPE_ENUM:
499       return "int";
500     case FieldDescriptor::CPPTYPE_STRING:
501       return "std::string";
502     case FieldDescriptor::CPPTYPE_MESSAGE:
503       return nullptr;
504 
505       // No default because we want the compiler to complain if any new
506       // CppTypes are added.
507   }
508 
509   GOOGLE_LOG(FATAL) << "Can't get here.";
510   return nullptr;
511 }
512 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)513 std::string PrimitiveTypeName(const Options& options,
514                               FieldDescriptor::CppType type) {
515   switch (type) {
516     case FieldDescriptor::CPPTYPE_INT32:
517       return IntTypeName(options, "int32");
518     case FieldDescriptor::CPPTYPE_INT64:
519       return IntTypeName(options, "int64");
520     case FieldDescriptor::CPPTYPE_UINT32:
521       return IntTypeName(options, "uint32");
522     case FieldDescriptor::CPPTYPE_UINT64:
523       return IntTypeName(options, "uint64");
524     case FieldDescriptor::CPPTYPE_DOUBLE:
525       return "double";
526     case FieldDescriptor::CPPTYPE_FLOAT:
527       return "float";
528     case FieldDescriptor::CPPTYPE_BOOL:
529       return "bool";
530     case FieldDescriptor::CPPTYPE_ENUM:
531       return "int";
532     case FieldDescriptor::CPPTYPE_STRING:
533       return "std::string";
534     case FieldDescriptor::CPPTYPE_MESSAGE:
535       return "";
536 
537       // No default because we want the compiler to complain if any new
538       // CppTypes are added.
539   }
540 
541   GOOGLE_LOG(FATAL) << "Can't get here.";
542   return "";
543 }
544 
DeclaredTypeMethodName(FieldDescriptor::Type type)545 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
546   switch (type) {
547     case FieldDescriptor::TYPE_INT32:
548       return "Int32";
549     case FieldDescriptor::TYPE_INT64:
550       return "Int64";
551     case FieldDescriptor::TYPE_UINT32:
552       return "UInt32";
553     case FieldDescriptor::TYPE_UINT64:
554       return "UInt64";
555     case FieldDescriptor::TYPE_SINT32:
556       return "SInt32";
557     case FieldDescriptor::TYPE_SINT64:
558       return "SInt64";
559     case FieldDescriptor::TYPE_FIXED32:
560       return "Fixed32";
561     case FieldDescriptor::TYPE_FIXED64:
562       return "Fixed64";
563     case FieldDescriptor::TYPE_SFIXED32:
564       return "SFixed32";
565     case FieldDescriptor::TYPE_SFIXED64:
566       return "SFixed64";
567     case FieldDescriptor::TYPE_FLOAT:
568       return "Float";
569     case FieldDescriptor::TYPE_DOUBLE:
570       return "Double";
571 
572     case FieldDescriptor::TYPE_BOOL:
573       return "Bool";
574     case FieldDescriptor::TYPE_ENUM:
575       return "Enum";
576 
577     case FieldDescriptor::TYPE_STRING:
578       return "String";
579     case FieldDescriptor::TYPE_BYTES:
580       return "Bytes";
581     case FieldDescriptor::TYPE_GROUP:
582       return "Group";
583     case FieldDescriptor::TYPE_MESSAGE:
584       return "Message";
585 
586       // No default because we want the compiler to complain if any new
587       // types are added.
588   }
589   GOOGLE_LOG(FATAL) << "Can't get here.";
590   return "";
591 }
592 
Int32ToString(int number)593 std::string Int32ToString(int number) {
594   if (number == kint32min) {
595     // This needs to be special-cased, see explanation here:
596     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
597     return StrCat(number + 1, " - 1");
598   } else {
599     return StrCat(number);
600   }
601 }
602 
Int64ToString(const std::string & macro_prefix,int64 number)603 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
604   if (number == kint64min) {
605     // This needs to be special-cased, see explanation here:
606     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
607     return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
608   }
609   return StrCat(macro_prefix, "_LONGLONG(", number, ")");
610 }
611 
UInt64ToString(const std::string & macro_prefix,uint64 number)612 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
613   return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
614 }
615 
DefaultValue(const FieldDescriptor * field)616 std::string DefaultValue(const FieldDescriptor* field) {
617   switch (field->cpp_type()) {
618     case FieldDescriptor::CPPTYPE_INT64:
619       return Int64ToString("GG", field->default_value_int64());
620     case FieldDescriptor::CPPTYPE_UINT64:
621       return UInt64ToString("GG", field->default_value_uint64());
622     default:
623       return DefaultValue(Options(), field);
624   }
625 }
626 
DefaultValue(const Options & options,const FieldDescriptor * field)627 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
628   switch (field->cpp_type()) {
629     case FieldDescriptor::CPPTYPE_INT32:
630       return Int32ToString(field->default_value_int32());
631     case FieldDescriptor::CPPTYPE_UINT32:
632       return StrCat(field->default_value_uint32()) + "u";
633     case FieldDescriptor::CPPTYPE_INT64:
634       return Int64ToString("PROTOBUF", field->default_value_int64());
635     case FieldDescriptor::CPPTYPE_UINT64:
636       return UInt64ToString("PROTOBUF", field->default_value_uint64());
637     case FieldDescriptor::CPPTYPE_DOUBLE: {
638       double value = field->default_value_double();
639       if (value == std::numeric_limits<double>::infinity()) {
640         return "std::numeric_limits<double>::infinity()";
641       } else if (value == -std::numeric_limits<double>::infinity()) {
642         return "-std::numeric_limits<double>::infinity()";
643       } else if (value != value) {
644         return "std::numeric_limits<double>::quiet_NaN()";
645       } else {
646         return SimpleDtoa(value);
647       }
648     }
649     case FieldDescriptor::CPPTYPE_FLOAT: {
650       float value = field->default_value_float();
651       if (value == std::numeric_limits<float>::infinity()) {
652         return "std::numeric_limits<float>::infinity()";
653       } else if (value == -std::numeric_limits<float>::infinity()) {
654         return "-std::numeric_limits<float>::infinity()";
655       } else if (value != value) {
656         return "std::numeric_limits<float>::quiet_NaN()";
657       } else {
658         std::string float_value = SimpleFtoa(value);
659         // If floating point value contains a period (.) or an exponent
660         // (either E or e), then append suffix 'f' to make it a float
661         // literal.
662         if (float_value.find_first_of(".eE") != string::npos) {
663           float_value.push_back('f');
664         }
665         return float_value;
666       }
667     }
668     case FieldDescriptor::CPPTYPE_BOOL:
669       return field->default_value_bool() ? "true" : "false";
670     case FieldDescriptor::CPPTYPE_ENUM:
671       // Lazy:  Generate a static_cast because we don't have a helper function
672       //   that constructs the full name of an enum value.
673       return strings::Substitute(
674           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
675           Int32ToString(field->default_value_enum()->number()));
676     case FieldDescriptor::CPPTYPE_STRING:
677       return "\"" +
678              EscapeTrigraphs(CEscape(field->default_value_string())) +
679              "\"";
680     case FieldDescriptor::CPPTYPE_MESSAGE:
681       return "*" + FieldMessageTypeName(field, options) +
682              "::internal_default_instance()";
683   }
684   // Can't actually get here; make compiler happy.  (We could add a default
685   // case above but then we wouldn't get the nice compiler warning when a
686   // new type is added.)
687   GOOGLE_LOG(FATAL) << "Can't get here.";
688   return "";
689 }
690 
691 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)692 std::string FilenameIdentifier(const std::string& filename) {
693   std::string result;
694   for (int i = 0; i < filename.size(); i++) {
695     if (ascii_isalnum(filename[i])) {
696       result.push_back(filename[i]);
697     } else {
698       // Not alphanumeric.  To avoid any possibility of name conflicts we
699       // use the hex code for the character.
700       StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
701     }
702   }
703   return result;
704 }
705 
UniqueName(const std::string & name,const std::string & filename,const Options & options)706 string UniqueName(const std::string& name, const std::string& filename,
707                   const Options& options) {
708   return name + "_" + FilenameIdentifier(filename);
709 }
710 
711 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)712 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
713                                      const std::string& name,
714                                      const Options& options) {
715   if (file->package().empty()) {
716     return StrCat("::", name);
717   }
718   return StrCat(Namespace(file, options), "::", name);
719 }
720 
721 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)722 std::string EscapeTrigraphs(const std::string& to_escape) {
723   return StringReplace(to_escape, "?", "\\?", true);
724 }
725 
726 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)727 std::string SafeFunctionName(const Descriptor* descriptor,
728                              const FieldDescriptor* field,
729                              const std::string& prefix) {
730   // Do not use FieldName() since it will escape keywords.
731   std::string name = field->name();
732   LowerString(&name);
733   std::string function_name = prefix + name;
734   if (descriptor->FindFieldByName(function_name)) {
735     // Single underscore will also make it conflicting with the private data
736     // member. We use double underscore to escape function names.
737     function_name.append("__");
738   } else if (kKeywords.count(name) > 0) {
739     // If the field name is a keyword, we append the underscore back to keep it
740     // consistent with other function names.
741     function_name.append("_");
742   }
743   return function_name;
744 }
745 
IsStringInlined(const FieldDescriptor * descriptor,const Options & options)746 bool IsStringInlined(const FieldDescriptor* descriptor,
747                      const Options& options) {
748   if (options.opensource_runtime) return false;
749 
750   // TODO(ckennelly): Handle inlining for any.proto.
751   if (IsAnyMessage(descriptor->containing_type(), options)) return false;
752   if (descriptor->containing_type()->options().map_entry()) return false;
753 
754   // Limit to proto2, as we rely on has bits to distinguish field presence for
755   // release_$name$.  On proto3, we cannot use the address of the string
756   // instance when the field has been inlined.
757   if (!HasFieldPresence(descriptor->file())) return false;
758 
759   if (options.access_info_map) {
760     if (descriptor->is_required()) return true;
761   }
762   return false;
763 }
764 
HasLazyFields(const Descriptor * descriptor,const Options & options)765 static bool HasLazyFields(const Descriptor* descriptor,
766                           const Options& options) {
767   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
768     if (IsLazy(descriptor->field(field_idx), options)) {
769       return true;
770     }
771   }
772   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
773     if (IsLazy(descriptor->extension(idx), options)) {
774       return true;
775     }
776   }
777   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
778     if (HasLazyFields(descriptor->nested_type(idx), options)) {
779       return true;
780     }
781   }
782   return false;
783 }
784 
785 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options)786 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
787   for (int i = 0; i < file->message_type_count(); i++) {
788     const Descriptor* descriptor(file->message_type(i));
789     if (HasLazyFields(descriptor, options)) {
790       return true;
791     }
792   }
793   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
794     if (IsLazy(file->extension(field_idx), options)) {
795       return true;
796     }
797   }
798   return false;
799 }
800 
HasRepeatedFields(const Descriptor * descriptor)801 static bool HasRepeatedFields(const Descriptor* descriptor) {
802   for (int i = 0; i < descriptor->field_count(); ++i) {
803     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
804       return true;
805     }
806   }
807   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
808     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
809   }
810   return false;
811 }
812 
HasRepeatedFields(const FileDescriptor * file)813 bool HasRepeatedFields(const FileDescriptor* file) {
814   for (int i = 0; i < file->message_type_count(); ++i) {
815     if (HasRepeatedFields(file->message_type(i))) return true;
816   }
817   return false;
818 }
819 
IsStringPieceField(const FieldDescriptor * field,const Options & options)820 static bool IsStringPieceField(const FieldDescriptor* field,
821                                const Options& options) {
822   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
823          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
824 }
825 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)826 static bool HasStringPieceFields(const Descriptor* descriptor,
827                                  const Options& options) {
828   for (int i = 0; i < descriptor->field_count(); ++i) {
829     if (IsStringPieceField(descriptor->field(i), options)) return true;
830   }
831   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
832     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
833   }
834   return false;
835 }
836 
HasStringPieceFields(const FileDescriptor * file,const Options & options)837 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
838   for (int i = 0; i < file->message_type_count(); ++i) {
839     if (HasStringPieceFields(file->message_type(i), options)) return true;
840   }
841   return false;
842 }
843 
IsCordField(const FieldDescriptor * field,const Options & options)844 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
845   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
846          EffectiveStringCType(field, options) == FieldOptions::CORD;
847 }
848 
HasCordFields(const Descriptor * descriptor,const Options & options)849 static bool HasCordFields(const Descriptor* descriptor,
850                           const Options& options) {
851   for (int i = 0; i < descriptor->field_count(); ++i) {
852     if (IsCordField(descriptor->field(i), options)) return true;
853   }
854   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
855     if (HasCordFields(descriptor->nested_type(i), options)) return true;
856   }
857   return false;
858 }
859 
HasCordFields(const FileDescriptor * file,const Options & options)860 bool HasCordFields(const FileDescriptor* file, const Options& options) {
861   for (int i = 0; i < file->message_type_count(); ++i) {
862     if (HasCordFields(file->message_type(i), options)) return true;
863   }
864   return false;
865 }
866 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)867 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
868   if (descriptor->extension_range_count() > 0) return true;
869   if (descriptor->extension_count() > 0) return true;
870   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
871     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
872       return true;
873     }
874   }
875   return false;
876 }
877 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)878 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
879   if (file->extension_count() > 0) return true;
880   for (int i = 0; i < file->message_type_count(); ++i) {
881     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
882   }
883   return false;
884 }
885 
HasMapFields(const Descriptor * descriptor)886 static bool HasMapFields(const Descriptor* descriptor) {
887   for (int i = 0; i < descriptor->field_count(); ++i) {
888     if (descriptor->field(i)->is_map()) {
889       return true;
890     }
891   }
892   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
893     if (HasMapFields(descriptor->nested_type(i))) return true;
894   }
895   return false;
896 }
897 
HasMapFields(const FileDescriptor * file)898 bool HasMapFields(const FileDescriptor* file) {
899   for (int i = 0; i < file->message_type_count(); ++i) {
900     if (HasMapFields(file->message_type(i))) return true;
901   }
902   return false;
903 }
904 
HasEnumDefinitions(const Descriptor * message_type)905 static bool HasEnumDefinitions(const Descriptor* message_type) {
906   if (message_type->enum_type_count() > 0) return true;
907   for (int i = 0; i < message_type->nested_type_count(); ++i) {
908     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
909   }
910   return false;
911 }
912 
HasEnumDefinitions(const FileDescriptor * file)913 bool HasEnumDefinitions(const FileDescriptor* file) {
914   if (file->enum_type_count() > 0) return true;
915   for (int i = 0; i < file->message_type_count(); ++i) {
916     if (HasEnumDefinitions(file->message_type(i))) return true;
917   }
918   return false;
919 }
920 
IsStringOrMessage(const FieldDescriptor * field)921 bool IsStringOrMessage(const FieldDescriptor* field) {
922   switch (field->cpp_type()) {
923     case FieldDescriptor::CPPTYPE_INT32:
924     case FieldDescriptor::CPPTYPE_INT64:
925     case FieldDescriptor::CPPTYPE_UINT32:
926     case FieldDescriptor::CPPTYPE_UINT64:
927     case FieldDescriptor::CPPTYPE_DOUBLE:
928     case FieldDescriptor::CPPTYPE_FLOAT:
929     case FieldDescriptor::CPPTYPE_BOOL:
930     case FieldDescriptor::CPPTYPE_ENUM:
931       return false;
932     case FieldDescriptor::CPPTYPE_STRING:
933     case FieldDescriptor::CPPTYPE_MESSAGE:
934       return true;
935   }
936 
937   GOOGLE_LOG(FATAL) << "Can't get here.";
938   return false;
939 }
940 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)941 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
942                                          const Options& options) {
943   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
944   if (options.opensource_runtime) {
945     // Open-source protobuf release only supports STRING ctype.
946     return FieldOptions::STRING;
947   } else {
948     // Google-internal supports all ctypes.
949     return field->options().ctype();
950   }
951 }
952 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)953 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
954   return descriptor->name() == kAnyProtoFile;
955 }
956 
IsAnyMessage(const Descriptor * descriptor,const Options & options)957 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
958   return descriptor->name() == kAnyMessageName &&
959          IsAnyMessage(descriptor->file(), options);
960 }
961 
IsWellKnownMessage(const FileDescriptor * file)962 bool IsWellKnownMessage(const FileDescriptor* file) {
963   static const std::unordered_set<std::string> well_known_files{
964       "google/protobuf/any.proto",
965       "google/protobuf/api.proto",
966       "google/protobuf/compiler/plugin.proto",
967       "google/protobuf/descriptor.proto",
968       "google/protobuf/duration.proto",
969       "google/protobuf/empty.proto",
970       "google/protobuf/field_mask.proto",
971       "google/protobuf/source_context.proto",
972       "google/protobuf/struct.proto",
973       "google/protobuf/timestamp.proto",
974       "google/protobuf/type.proto",
975       "google/protobuf/wrappers.proto",
976   };
977   return well_known_files.find(file->name()) != well_known_files.end();
978 }
979 
980 enum Utf8CheckMode {
981   STRICT = 0,  // Parsing will fail if non UTF-8 data is in string fields.
982   VERIFY = 1,  // Only log an error but parsing will succeed.
983   NONE = 2,    // No UTF-8 check.
984 };
985 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)986 static bool FieldEnforceUtf8(const FieldDescriptor* field,
987                              const Options& options) {
988   return true;
989 }
990 
FileUtf8Verification(const FileDescriptor * file,const Options & options)991 static bool FileUtf8Verification(const FileDescriptor* file,
992                                  const Options& options) {
993   return true;
994 }
995 
996 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)997 static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
998                                       const Options& options) {
999   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1000       FieldEnforceUtf8(field, options)) {
1001     return STRICT;
1002   } else if (GetOptimizeFor(field->file(), options) !=
1003                  FileOptions::LITE_RUNTIME &&
1004              FileUtf8Verification(field->file(), options)) {
1005     return VERIFY;
1006   } else {
1007     return NONE;
1008   }
1009 }
1010 
GetUtf8Suffix(const FieldDescriptor * field,const Options & options)1011 std::string GetUtf8Suffix(const FieldDescriptor* field,
1012                           const Options& options) {
1013   switch (GetUtf8CheckMode(field, options)) {
1014     case STRICT:
1015       return "UTF8";
1016     case VERIFY:
1017       return "UTF8Verify";
1018     case NONE:
1019     default:  // Some build configs warn on missing return without default.
1020       return "";
1021   }
1022 }
1023 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1024 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1025                                   const Options& options, bool for_parse,
1026                                   const char* parameters,
1027                                   const char* strict_function,
1028                                   const char* verify_function,
1029                                   const Formatter& format) {
1030   switch (GetUtf8CheckMode(field, options)) {
1031     case STRICT: {
1032       if (for_parse) {
1033         format("DO_(");
1034       }
1035       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1036       format.Indent();
1037       format(parameters);
1038       if (for_parse) {
1039         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1040       } else {
1041         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1042       }
1043       format("\"$1$\")", field->full_name());
1044       if (for_parse) {
1045         format(")");
1046       }
1047       format(";\n");
1048       format.Outdent();
1049       break;
1050     }
1051     case VERIFY: {
1052       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1053       format.Indent();
1054       format(parameters);
1055       if (for_parse) {
1056         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1057       } else {
1058         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1059       }
1060       format("\"$1$\");\n", field->full_name());
1061       format.Outdent();
1062       break;
1063     }
1064     case NONE:
1065       break;
1066   }
1067 }
1068 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1069 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1070                                     const Options& options, bool for_parse,
1071                                     const char* parameters,
1072                                     const Formatter& format) {
1073   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1074                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1075                         format);
1076 }
1077 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1078 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1079                                   const Options& options, bool for_parse,
1080                                   const char* parameters,
1081                                   const Formatter& format) {
1082   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1083                         "VerifyUTF8CordNamedField", format);
1084 }
1085 
1086 namespace {
1087 
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1088 void Flatten(const Descriptor* descriptor,
1089              std::vector<const Descriptor*>* flatten) {
1090   for (int i = 0; i < descriptor->nested_type_count(); i++)
1091     Flatten(descriptor->nested_type(i), flatten);
1092   flatten->push_back(descriptor);
1093 }
1094 
1095 }  // namespace
1096 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1097 void FlattenMessagesInFile(const FileDescriptor* file,
1098                            std::vector<const Descriptor*>* result) {
1099   for (int i = 0; i < file->message_type_count(); i++) {
1100     Flatten(file->message_type(i), result);
1101   }
1102 }
1103 
HasWeakFields(const Descriptor * descriptor,const Options & options)1104 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1105   for (int i = 0; i < descriptor->field_count(); i++) {
1106     if (IsWeak(descriptor->field(i), options)) return true;
1107   }
1108   return false;
1109 }
1110 
HasWeakFields(const FileDescriptor * file,const Options & options)1111 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1112   for (int i = 0; i < file->message_type_count(); ++i) {
1113     if (HasWeakFields(file->message_type(i), options)) return true;
1114   }
1115   return false;
1116 }
1117 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1118 bool UsingImplicitWeakFields(const FileDescriptor* file,
1119                              const Options& options) {
1120   return options.lite_implicit_weak_fields &&
1121          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1122 }
1123 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1124 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1125                          MessageSCCAnalyzer* scc_analyzer) {
1126   return UsingImplicitWeakFields(field->file(), options) &&
1127          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1128          !field->is_required() && !field->is_map() &&
1129          field->containing_oneof() == nullptr &&
1130          !IsWellKnownMessage(field->message_type()->file()) &&
1131          field->message_type()->file()->name() !=
1132              "net/proto2/proto/descriptor.proto" &&
1133          // We do not support implicit weak fields between messages in the same
1134          // strongly-connected component.
1135          scc_analyzer->GetSCC(field->containing_type()) !=
1136              scc_analyzer->GetSCC(field->message_type());
1137 }
1138 
GetSCCAnalysis(const SCC * scc)1139 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1140   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1141   MessageAnalysis result{};
1142   for (int i = 0; i < scc->descriptors.size(); i++) {
1143     const Descriptor* descriptor = scc->descriptors[i];
1144     if (descriptor->extension_range_count() > 0) {
1145       result.contains_extension = true;
1146       // Extensions are found by looking up default_instance and extension
1147       // number in a map. So you'd maybe expect here
1148       // result.constructor_requires_initialization = true;
1149       // However the extension registration mechanism already makes sure
1150       // the default will be initialized.
1151     }
1152     for (int i = 0; i < descriptor->field_count(); i++) {
1153       const FieldDescriptor* field = descriptor->field(i);
1154       if (field->is_required()) {
1155         result.contains_required = true;
1156       }
1157       switch (field->type()) {
1158         case FieldDescriptor::TYPE_STRING:
1159         case FieldDescriptor::TYPE_BYTES: {
1160           result.constructor_requires_initialization = true;
1161           if (field->options().ctype() == FieldOptions::CORD) {
1162             result.contains_cord = true;
1163           }
1164           break;
1165         }
1166         case FieldDescriptor::TYPE_GROUP:
1167         case FieldDescriptor::TYPE_MESSAGE: {
1168           result.constructor_requires_initialization = true;
1169           const SCC* child = analyzer_.GetSCC(field->message_type());
1170           if (child != scc) {
1171             MessageAnalysis analysis = GetSCCAnalysis(child);
1172             result.contains_cord |= analysis.contains_cord;
1173             result.contains_extension |= analysis.contains_extension;
1174             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1175               result.contains_required |= analysis.contains_required;
1176             }
1177           } else {
1178             // This field points back into the same SCC hence the messages
1179             // in the SCC are recursive. Note if SCC contains more than two
1180             // nodes it has to be recursive, however this test also works for
1181             // a single node that is recursive.
1182             result.is_recursive = true;
1183           }
1184           break;
1185         }
1186         default:
1187           break;
1188       }
1189     }
1190   }
1191   // We deliberately only insert the result here. After we contracted the SCC
1192   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1193   // nodes visited as we can never return to them. By inserting them here
1194   // we will go in an infinite loop if the SCC is not correct.
1195   return analysis_cache_[scc] = result;
1196 }
1197 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1198 void ListAllFields(const Descriptor* d,
1199                    std::vector<const FieldDescriptor*>* fields) {
1200   // Collect sub messages
1201   for (int i = 0; i < d->nested_type_count(); i++) {
1202     ListAllFields(d->nested_type(i), fields);
1203   }
1204   // Collect message level extensions.
1205   for (int i = 0; i < d->extension_count(); i++) {
1206     fields->push_back(d->extension(i));
1207   }
1208   // Add types of fields necessary
1209   for (int i = 0; i < d->field_count(); i++) {
1210     fields->push_back(d->field(i));
1211   }
1212 }
1213 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1214 void ListAllFields(const FileDescriptor* d,
1215                    std::vector<const FieldDescriptor*>* fields) {
1216   // Collect file level message.
1217   for (int i = 0; i < d->message_type_count(); i++) {
1218     ListAllFields(d->message_type(i), fields);
1219   }
1220   // Collect message level extensions.
1221   for (int i = 0; i < d->extension_count(); i++) {
1222     fields->push_back(d->extension(i));
1223   }
1224 }
1225 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1226 void ListAllTypesForServices(const FileDescriptor* fd,
1227                              std::vector<const Descriptor*>* types) {
1228   for (int i = 0; i < fd->service_count(); i++) {
1229     const ServiceDescriptor* sd = fd->service(i);
1230     for (int j = 0; j < sd->method_count(); j++) {
1231       const MethodDescriptor* method = sd->method(j);
1232       types->push_back(method->input_type());
1233       types->push_back(method->output_type());
1234     }
1235   }
1236 }
1237 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1238 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1239                           std::string* bootstrap_basename) {
1240   if (options.opensource_runtime) {
1241     return false;
1242   }
1243 
1244   std::unordered_map<std::string, std::string> bootstrap_mapping{
1245       {"net/proto2/proto/descriptor",
1246        "net/proto2/internal/descriptor"},
1247       {"net/proto2/compiler/proto/plugin",
1248        "net/proto2/compiler/proto/plugin"},
1249       {"net/proto2/compiler/proto/profile",
1250        "net/proto2/compiler/proto/profile_bootstrap"},
1251   };
1252   auto iter = bootstrap_mapping.find(basename);
1253   if (iter == bootstrap_mapping.end()) {
1254     *bootstrap_basename = basename;
1255     return false;
1256   } else {
1257     *bootstrap_basename = iter->second;
1258     return true;
1259   }
1260 }
1261 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1262 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1263   std::string my_name = StripProto(file->name());
1264   return GetBootstrapBasename(options, my_name, &my_name);
1265 }
1266 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1267 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1268                     bool bootstrap_flag, std::string* basename) {
1269   std::string bootstrap_basename;
1270   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1271     return false;
1272   }
1273 
1274   if (bootstrap_flag) {
1275     // Adjust basename, but don't abort code generation.
1276     *basename = bootstrap_basename;
1277     return false;
1278   } else {
1279     std::string forward_to_basename = bootstrap_basename;
1280 
1281     // Generate forwarding headers and empty .pb.cc.
1282     {
1283       std::unique_ptr<io::ZeroCopyOutputStream> output(
1284           generator_context->Open(*basename + ".pb.h"));
1285       io::Printer printer(output.get(), '$', nullptr);
1286       printer.Print(
1287           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1288           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1289           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1290           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1291           "forward_to_basename", forward_to_basename, "filename_identifier",
1292           FilenameIdentifier(*basename));
1293 
1294       if (!options.opensource_runtime) {
1295         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1296         // protocoltype is SWIG'ed and we need to forward
1297         if (*basename == "net/proto/protocoltype") {
1298           printer.Print(
1299               "#ifdef SWIG\n"
1300               "%include \"$forward_to_basename$.pb.h\"\n"
1301               "#endif  // SWIG\n",
1302               "forward_to_basename", forward_to_basename);
1303         }
1304       }
1305     }
1306 
1307     {
1308       std::unique_ptr<io::ZeroCopyOutputStream> output(
1309           generator_context->Open(*basename + ".proto.h"));
1310       io::Printer printer(output.get(), '$', nullptr);
1311       printer.Print(
1312           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1313           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1314           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1315           "export\n"
1316           "#endif  // "
1317           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1318           "forward_to_basename", forward_to_basename, "filename_identifier",
1319           FilenameIdentifier(*basename));
1320     }
1321 
1322     {
1323       std::unique_ptr<io::ZeroCopyOutputStream> output(
1324           generator_context->Open(*basename + ".pb.cc"));
1325       io::Printer printer(output.get(), '$', nullptr);
1326       printer.Print("\n");
1327     }
1328 
1329     {
1330       std::unique_ptr<io::ZeroCopyOutputStream> output(
1331           generator_context->Open(*basename + ".pb.h.meta"));
1332     }
1333 
1334     {
1335       std::unique_ptr<io::ZeroCopyOutputStream> output(
1336           generator_context->Open(*basename + ".proto.h.meta"));
1337     }
1338 
1339     // Abort code generation.
1340     return true;
1341   }
1342 }
1343 
1344 class ParseLoopGenerator {
1345  public:
ParseLoopGenerator(int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1346   ParseLoopGenerator(int num_hasbits, const Options& options,
1347                      MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1348       : scc_analyzer_(scc_analyzer),
1349         options_(options),
1350         format_(printer),
1351         num_hasbits_(num_hasbits) {}
1352 
GenerateParserLoop(const Descriptor * descriptor)1353   void GenerateParserLoop(const Descriptor* descriptor) {
1354     format_.Set("classname", ClassName(descriptor));
1355     format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1356     format_.Set("pi_ns",
1357                 StrCat("::", ProtobufNamespace(options_), "::internal"));
1358     format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1359     std::map<std::string, std::string> vars;
1360     SetCommonVars(options_, &vars);
1361     format_.AddMap(vars);
1362 
1363     std::vector<const FieldDescriptor*> ordered_fields;
1364     for (auto field : FieldRange(descriptor)) {
1365       ordered_fields.push_back(field);
1366     }
1367     std::sort(ordered_fields.begin(), ordered_fields.end(),
1368               [](const FieldDescriptor* a, const FieldDescriptor* b) {
1369                 return a->number() < b->number();
1370               });
1371 
1372     format_(
1373         "const char* $classname$::_InternalParse(const char* ptr, "
1374         "$pi_ns$::ParseContext* ctx) {\n"
1375         "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1376     format_.Indent();
1377     int hasbits_size = 0;
1378     if (HasFieldPresence(descriptor->file())) {
1379       hasbits_size = (num_hasbits_ + 31) / 32;
1380     }
1381     // For now only optimize small hasbits.
1382     if (hasbits_size != 1) hasbits_size = 0;
1383     if (hasbits_size) {
1384       format_("_Internal::HasBits has_bits{};\n");
1385       format_.Set("has_bits", "has_bits");
1386     } else {
1387       format_.Set("has_bits", "_has_bits_");
1388     }
1389 
1390     if (descriptor->file()->options().cc_enable_arenas()) {
1391       format_("$p_ns$::Arena* arena = GetArenaNoVirtual(); (void)arena;\n");
1392     }
1393     GenerateParseLoop(descriptor, ordered_fields);
1394     format_.Outdent();
1395     format_("success:\n");
1396     if (hasbits_size) format_("  _has_bits_.Or(has_bits);\n");
1397     format_(
1398         "  return ptr;\n"
1399         "failure:\n"
1400         "  ptr = nullptr;\n"
1401         "  goto success;\n"
1402         "#undef CHK_\n"
1403         "}\n");
1404   }
1405 
1406  private:
1407   MessageSCCAnalyzer* scc_analyzer_;
1408   const Options& options_;
1409   Formatter format_;
1410   int num_hasbits_;
1411 
1412   using WireFormat = internal::WireFormat;
1413   using WireFormatLite = internal::WireFormatLite;
1414 
GenerateArenaString(const FieldDescriptor * field,const std::string & utf8,std::string field_name)1415   void GenerateArenaString(const FieldDescriptor* field,
1416                            const std::string& utf8, std::string field_name) {
1417     if (!field_name.empty()) {
1418       format_("static const char kFieldName[] = $1$;\n",
1419               field_name.substr(2));  // remove ", "
1420       field_name = ", kFieldName";
1421     }
1422     if (HasFieldPresence(field->file())) {
1423       format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1424     }
1425     string default_string =
1426         field->default_value_string().empty()
1427             ? "::" + ProtobufNamespace(options_) +
1428                   "::internal::GetEmptyStringAlreadyInited()"
1429             : QualifiedClassName(field->containing_type(), options_) +
1430                   "::" + MakeDefaultName(field) + ".get()";
1431     format_(
1432         "if (arena != nullptr) {\n"
1433         "  ptr = $pi_ns$::InlineCopyIntoArenaString$1$(&$2$_, ptr, ctx, "
1434         "  arena$3$);\n"
1435         "} else {\n"
1436         "  ptr = "
1437         "$pi_ns$::InlineGreedyStringParser$1$($2$_.MutableNoArenaNoDefault(&$4$"
1438         "), ptr, ctx$3$);"
1439         "\n}\n",
1440         utf8, FieldName(field), field_name, default_string);
1441   }
1442 
GenerateStrings(const FieldDescriptor * field,bool check_utf8)1443   void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1444     std::string utf8;
1445     std::string field_name;
1446     if (check_utf8) {
1447       utf8 = GetUtf8Suffix(field, options_);
1448       if (!utf8.empty()) {
1449         field_name = ", nullptr";
1450         if (HasDescriptorMethods(field->file(), options_)) {
1451           field_name = StrCat(", \"", field->full_name(), "\"");
1452         }
1453       }
1454     }
1455     FieldOptions::CType ctype = FieldOptions::STRING;
1456     if (!options_.opensource_runtime) {
1457       // Open source doesn't support other ctypes;
1458       ctype = field->options().ctype();
1459     }
1460     if (field->file()->options().cc_enable_arenas() && !field->is_repeated() &&
1461         !options_.opensource_runtime &&
1462         GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1463         // For now only use arena string for strings with empty defaults.
1464         field->default_value_string().empty() &&
1465         !IsStringInlined(field, options_) &&
1466         field->containing_oneof() == nullptr && ctype == FieldOptions::STRING) {
1467       GenerateArenaString(field, utf8, field_name);
1468       return;
1469     }
1470     std::string name;
1471     switch (ctype) {
1472       case FieldOptions::STRING:
1473         name = "GreedyStringParser" + utf8;
1474         break;
1475       case FieldOptions::CORD:
1476         name = "CordParser" + utf8;
1477         break;
1478       case FieldOptions::STRING_PIECE:
1479         name = "StringPieceParser" + utf8;
1480         break;
1481     }
1482     format_("ptr = $pi_ns$::Inline$1$($2$_$3$(), ptr, ctx$4$);\n", name,
1483             field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1484             FieldName(field), field_name);
1485   }
1486 
GenerateLengthDelim(const FieldDescriptor * field)1487   void GenerateLengthDelim(const FieldDescriptor* field) {
1488     if (field->is_packable()) {
1489       std::string enum_validator;
1490       if (field->type() == FieldDescriptor::TYPE_ENUM &&
1491           !HasPreservingUnknownEnumSemantics(field)) {
1492         enum_validator =
1493             StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1494                          "_IsValid, &_internal_metadata_, ", field->number());
1495       }
1496       format_("ptr = $pi_ns$::Packed$1$Parser(mutable_$2$(), ptr, ctx$3$);\n",
1497               DeclaredTypeMethodName(field->type()), FieldName(field),
1498               enum_validator);
1499     } else {
1500       auto field_type = field->type();
1501       switch (field_type) {
1502         case FieldDescriptor::TYPE_STRING:
1503           GenerateStrings(field, true /* utf8 */);
1504           break;
1505         case FieldDescriptor::TYPE_BYTES:
1506           GenerateStrings(field, false /* utf8 */);
1507           break;
1508         case FieldDescriptor::TYPE_MESSAGE: {
1509           if (field->is_map()) {
1510             const FieldDescriptor* val =
1511                 field->message_type()->FindFieldByName("value");
1512             GOOGLE_CHECK(val);
1513             if (HasFieldPresence(field->file()) &&
1514                 val->type() == FieldDescriptor::TYPE_ENUM) {
1515               format_(
1516                   "auto object = ::$proto_ns$::internal::InitEnumParseWrapper("
1517                   "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1518                   "ptr = ctx->ParseMessage(&object, ptr);\n",
1519                   FieldName(field), QualifiedClassName(val->enum_type()),
1520                   field->number());
1521             } else {
1522               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1523                       FieldName(field));
1524             }
1525           } else if (IsLazy(field, options_)) {
1526             if (field->containing_oneof() != nullptr) {
1527               format_(
1528                   "if (!has_$1$()) {\n"
1529                   "  clear_$1$();\n"
1530                   "  $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1531                   "      $pi_ns$::LazyField>("
1532                   "GetArenaNoVirtual());\n"
1533                   "  set_has_$1$();\n"
1534                   "}\n"
1535                   "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1536                   FieldName(field), field->containing_oneof()->name());
1537             } else if (HasFieldPresence(field->file())) {
1538               format_(
1539                   "_Internal::set_has_$1$(&$has_bits$);\n"
1540                   "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1541                   FieldName(field));
1542             } else {
1543               format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1544                       FieldName(field));
1545             }
1546           } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1547             if (!field->is_repeated()) {
1548               format_(
1549                   "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1550                   "ptr);\n",
1551                   FieldName(field));
1552             } else {
1553               format_(
1554                   "ptr = ctx->ParseMessage("
1555                   "CastToBase(&$1$_)->AddWeak(reinterpret_cast<const "
1556                   "::$proto_ns$::MessageLite*>(&$2$::_$3$_default_instance_)), "
1557                   "ptr);\n",
1558                   FieldName(field), Namespace(field->message_type(), options_),
1559                   ClassName(field->message_type()));
1560             }
1561           } else if (IsWeak(field, options_)) {
1562             format_(
1563                 "ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($1$,"
1564                 " _$classname$_default_instance_.$2$_), ptr);\n",
1565                 field->number(), FieldName(field));
1566           } else {
1567             format_("ptr = ctx->ParseMessage($1$_$2$(), ptr);\n",
1568                     field->is_repeated() ? "add" : "mutable", FieldName(field));
1569           }
1570           break;
1571         }
1572         default:
1573           GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1574                      << " filed type is " << field->type();
1575       }
1576     }
1577   }
1578 
1579   // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
SmallVarintValue(uint32 x)1580   static uint32 SmallVarintValue(uint32 x) {
1581     GOOGLE_DCHECK(x < 128 * 128);
1582     if (x >= 128) x += (x & 0xFF80) + 128;
1583     return x;
1584   }
1585 
ShouldRepeat(const FieldDescriptor * descriptor,internal::WireFormatLite::WireType wiretype)1586   static bool ShouldRepeat(const FieldDescriptor* descriptor,
1587                            internal::WireFormatLite::WireType wiretype) {
1588     constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1589     return descriptor->number() < kMaxTwoByteFieldNumber &&
1590            descriptor->is_repeated() &&
1591            (!descriptor->is_packable() ||
1592             wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1593   }
1594 
GenerateFieldBody(internal::WireFormatLite::WireType wiretype,const FieldDescriptor * field)1595   void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1596                          const FieldDescriptor* field) {
1597     uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1598     switch (wiretype) {
1599       case WireFormatLite::WIRETYPE_VARINT: {
1600         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1601         std::string prefix = field->is_repeated() ? "add" : "set";
1602         if (field->type() == FieldDescriptor::TYPE_ENUM) {
1603           format_(
1604               "$uint64$ val = $pi_ns$::ReadVarint(&ptr);\n"
1605               "CHK_(ptr);\n");
1606           if (!HasPreservingUnknownEnumSemantics(field)) {
1607             format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1608                     QualifiedClassName(field->enum_type(), options_));
1609             format_.Indent();
1610           }
1611           format_("$1$_$2$(static_cast<$3$>(val));\n", prefix, FieldName(field),
1612                   QualifiedClassName(field->enum_type(), options_));
1613           if (!HasPreservingUnknownEnumSemantics(field)) {
1614             format_.Outdent();
1615             format_(
1616                 "} else {\n"
1617                 "  $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1618                 "}\n",
1619                 field->number());
1620           }
1621         } else {
1622           int size = field->type() == FieldDescriptor::TYPE_SINT32 ? 32 : 64;
1623           std::string zigzag;
1624           if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1625                field->type() == FieldDescriptor::TYPE_SINT64)) {
1626             zigzag = StrCat("ZigZag", size);
1627           }
1628           if (field->is_repeated() || field->containing_oneof()) {
1629             string prefix = field->is_repeated() ? "add" : "set";
1630             format_(
1631                 "$1$_$2$($pi_ns$::ReadVarint$3$(&ptr));\n"
1632                 "CHK_(ptr);\n",
1633                 prefix, FieldName(field), zigzag);
1634           } else {
1635             if (HasFieldPresence(field->file())) {
1636               format_("_Internal::set_has_$1$(&$has_bits$);\n",
1637                       FieldName(field));
1638             }
1639             format_(
1640                 "$1$_ = $pi_ns$::ReadVarint$2$(&ptr);\n"
1641                 "CHK_(ptr);\n",
1642                 FieldName(field), zigzag);
1643           }
1644         }
1645         break;
1646       }
1647       case WireFormatLite::WIRETYPE_FIXED32:
1648       case WireFormatLite::WIRETYPE_FIXED64: {
1649         std::string type = PrimitiveTypeName(options_, field->cpp_type());
1650         if (field->is_repeated() || field->containing_oneof()) {
1651           string prefix = field->is_repeated() ? "add" : "set";
1652           format_(
1653               "$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1654               "ptr += sizeof($3$);\n",
1655               prefix, FieldName(field), type);
1656         } else {
1657           if (HasFieldPresence(field->file())) {
1658             format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1659           }
1660           format_(
1661               "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1662               "ptr += sizeof($2$);\n",
1663               FieldName(field), type);
1664         }
1665         break;
1666       }
1667       case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1668         GenerateLengthDelim(field);
1669         format_("CHK_(ptr);\n");
1670         break;
1671       }
1672       case WireFormatLite::WIRETYPE_START_GROUP: {
1673         format_(
1674             "ptr = ctx->ParseGroup($1$_$2$(), ptr, $3$);\n"
1675             "CHK_(ptr);\n",
1676             field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1677         break;
1678       }
1679       case WireFormatLite::WIRETYPE_END_GROUP: {
1680         GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1681         break;
1682       }
1683     }  // switch (wire_type)
1684   }
1685 
1686   // Returns the tag for this field and in case of repeated packable fields,
1687   // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32 * fallback_tag_ptr)1688   static uint32 ExpectedTag(const FieldDescriptor* field,
1689                             uint32* fallback_tag_ptr) {
1690     uint32 expected_tag;
1691     if (field->is_packable()) {
1692       auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1693       expected_tag =
1694           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1695       GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1696       auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1697       uint32 fallback_tag =
1698           WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1699 
1700       if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1701       *fallback_tag_ptr = fallback_tag;
1702     } else {
1703       auto expected_wiretype = WireFormat::WireTypeForField(field);
1704       expected_tag =
1705           WireFormatLite::MakeTag(field->number(), expected_wiretype);
1706     }
1707     return expected_tag;
1708   }
1709 
GenerateParseLoop(const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & ordered_fields)1710   void GenerateParseLoop(
1711       const Descriptor* descriptor,
1712       const std::vector<const FieldDescriptor*>& ordered_fields) {
1713     format_(
1714         "while (!ctx->Done(&ptr)) {\n"
1715         "  $uint32$ tag;\n"
1716         "  ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1717         "  CHK_(ptr);\n"
1718         "  switch (tag >> 3) {\n");
1719 
1720     format_.Indent();
1721     format_.Indent();
1722 
1723     for (const auto* field : ordered_fields) {
1724       // Print the field's (or oneof's) proto-syntax definition as a comment.
1725       // We don't want to print group bodies so we cut off after the first
1726       // line.
1727       std::string def;
1728       {
1729         DebugStringOptions options;
1730         options.elide_group_body = true;
1731         options.elide_oneof_body = true;
1732         def = field->DebugStringWithOptions(options);
1733         def = def.substr(0, def.find_first_of('\n'));
1734       }
1735       format_(
1736           "// $1$\n"
1737           "case $2$:\n",
1738           def, field->number());
1739       format_.Indent();
1740       uint32 fallback_tag = 0;
1741       uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1742       format_(
1743           "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1744           expected_tag & 0xFF);
1745       format_.Indent();
1746       auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1747       uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1748       int tag_size = io::CodedOutputStream::VarintSize32(tag);
1749       bool is_repeat = ShouldRepeat(field, wiretype);
1750       if (is_repeat) {
1751         format_(
1752             "ptr -= $1$;\n"
1753             "do {\n"
1754             "  ptr += $1$;\n",
1755             tag_size);
1756         format_.Indent();
1757       }
1758       GenerateFieldBody(wiretype, field);
1759       if (is_repeat) {
1760         string type = tag_size == 2 ? "uint16" : "uint8";
1761         format_.Outdent();
1762         format_(
1763             "  if (!ctx->DataAvailable(ptr)) break;\n"
1764             "} while ($pi_ns$::UnalignedLoad<$1$>(ptr) == $2$);\n",
1765             IntTypeName(options_, type), SmallVarintValue(tag));
1766       }
1767       format_.Outdent();
1768       if (fallback_tag) {
1769         format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1770                 fallback_tag & 0xFF);
1771         format_.Indent();
1772         GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1773         format_.Outdent();
1774       }
1775       format_.Outdent();
1776       format_(
1777           "  } else goto handle_unusual;\n"
1778           "  continue;\n");
1779     }  // for loop over ordered fields
1780 
1781     // Default case
1782     format_("default: {\n");
1783     if (!ordered_fields.empty()) format_("handle_unusual:\n");
1784     format_(
1785         "  if ((tag & 7) == 4 || tag == 0) {\n"
1786         "    ctx->SetLastTag(tag);\n"
1787         "    goto success;\n"
1788         "  }\n");
1789     if (IsMapEntryMessage(descriptor)) {
1790       format_("  continue;\n");
1791     } else {
1792       if (descriptor->extension_range_count() > 0) {
1793         format_("if (");
1794         for (int i = 0; i < descriptor->extension_range_count(); i++) {
1795           const Descriptor::ExtensionRange* range =
1796               descriptor->extension_range(i);
1797           if (i > 0) format_(" ||\n    ");
1798 
1799           uint32 start_tag = WireFormatLite::MakeTag(
1800               range->start, static_cast<WireFormatLite::WireType>(0));
1801           uint32 end_tag = WireFormatLite::MakeTag(
1802               range->end, static_cast<WireFormatLite::WireType>(0));
1803 
1804           if (range->end > FieldDescriptor::kMaxNumber) {
1805             format_("($1$u <= tag)", start_tag);
1806           } else {
1807             format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1808           }
1809         }
1810         format_(") {\n");
1811         format_(
1812             "  ptr = _extensions_.ParseField(tag, ptr,\n"
1813             "      internal_default_instance(), &_internal_metadata_, ctx);\n"
1814             "  CHK_(ptr != nullptr);\n"
1815             "  continue;\n"
1816             "}\n");
1817       }
1818       format_(
1819           "  ptr = UnknownFieldParse(tag, &_internal_metadata_, ptr, ctx);\n"
1820           "  CHK_(ptr != nullptr);\n"
1821           "  continue;\n");
1822     }
1823     format_("}\n");  // default case
1824     format_.Outdent();
1825     format_.Outdent();
1826     format_(
1827         "  }  // switch\n"
1828         "}  // while\n");
1829   }
1830 };
1831 
GenerateParserLoop(const Descriptor * descriptor,int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1832 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1833                         const Options& options,
1834                         MessageSCCAnalyzer* scc_analyzer,
1835                         io::Printer* printer) {
1836   ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1837   generator.GenerateParserLoop(descriptor);
1838 }
1839 
1840 }  // namespace cpp
1841 }  // namespace compiler
1842 }  // namespace protobuf
1843 }  // namespace google
1844