1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36
37 #include <functional>
38 #include <limits>
39 #include <map>
40 #include <queue>
41 #include <unordered_set>
42 #include <vector>
43
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/compiler/cpp/cpp_options.h>
47 #include <google/protobuf/descriptor.pb.h>
48 #include <google/protobuf/descriptor.h>
49 #include <google/protobuf/compiler/scc.h>
50 #include <google/protobuf/io/printer.h>
51 #include <google/protobuf/io/zero_copy_stream.h>
52 #include <google/protobuf/dynamic_message.h>
53 #include <google/protobuf/wire_format.h>
54 #include <google/protobuf/wire_format_lite.h>
55 #include <google/protobuf/stubs/strutil.h>
56 #include <google/protobuf/stubs/substitute.h>
57 #include <google/protobuf/stubs/hash.h>
58
59 #include <google/protobuf/port_def.inc>
60
61 namespace google {
62 namespace protobuf {
63 namespace compiler {
64 namespace cpp {
65
66 namespace {
67
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70
DotsToColons(const std::string & name)71 std::string DotsToColons(const std::string& name) {
72 return StringReplace(name, ".", "::", true);
73 }
74
75 static const char* const kKeywordList[] = { //
76 "NULL",
77 "alignas",
78 "alignof",
79 "and",
80 "and_eq",
81 "asm",
82 "auto",
83 "bitand",
84 "bitor",
85 "bool",
86 "break",
87 "case",
88 "catch",
89 "char",
90 "class",
91 "compl",
92 "const",
93 "constexpr",
94 "const_cast",
95 "continue",
96 "decltype",
97 "default",
98 "delete",
99 "do",
100 "double",
101 "dynamic_cast",
102 "else",
103 "enum",
104 "explicit",
105 "export",
106 "extern",
107 "false",
108 "float",
109 "for",
110 "friend",
111 "goto",
112 "if",
113 "inline",
114 "int",
115 "long",
116 "mutable",
117 "namespace",
118 "new",
119 "noexcept",
120 "not",
121 "not_eq",
122 "nullptr",
123 "operator",
124 "or",
125 "or_eq",
126 "private",
127 "protected",
128 "public",
129 "register",
130 "reinterpret_cast",
131 "return",
132 "short",
133 "signed",
134 "sizeof",
135 "static",
136 "static_assert",
137 "static_cast",
138 "struct",
139 "switch",
140 "template",
141 "this",
142 "thread_local",
143 "throw",
144 "true",
145 "try",
146 "typedef",
147 "typeid",
148 "typename",
149 "union",
150 "unsigned",
151 "using",
152 "virtual",
153 "void",
154 "volatile",
155 "wchar_t",
156 "while",
157 "xor",
158 "xor_eq"};
159
MakeKeywordsMap()160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161 auto* result = new std::unordered_set<std::string>();
162 for (const auto keyword : kKeywordList) {
163 result->emplace(keyword);
164 }
165 return result;
166 }
167
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)171 char Base63Char(int value) {
172 GOOGLE_CHECK_GE(value, 0);
173 if (value < 26) return 'A' + value;
174 value -= 26;
175 if (value < 26) return 'a' + value;
176 value -= 26;
177 if (value < 10) return '0' + value;
178 GOOGLE_CHECK_EQ(value, 10);
179 return '_';
180 }
181
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
Base63(I n,int k)185 std::string Base63(I n, int k) {
186 std::string res;
187 while (k-- > 0) {
188 res += Base63Char(static_cast<int>(n % 63));
189 n /= 63;
190 }
191 return res;
192 }
193
IntTypeName(const Options & options,const std::string & type)194 std::string IntTypeName(const Options& options, const std::string& type) {
195 if (options.opensource_runtime) {
196 return "::PROTOBUF_NAMESPACE_ID::" + type;
197 } else {
198 return "::" + type;
199 }
200 }
201
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)202 void SetIntVar(const Options& options, const std::string& type,
203 std::map<std::string, std::string>* variables) {
204 (*variables)[type] = IntTypeName(options, type);
205 }
206
HasInternalAccessors(const FieldOptions::CType ctype)207 bool HasInternalAccessors(const FieldOptions::CType ctype) {
208 return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
209 }
210
211 } // namespace
212
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)213 void SetCommonVars(const Options& options,
214 std::map<std::string, std::string>* variables) {
215 (*variables)["proto_ns"] = ProtobufNamespace(options);
216
217 // Warning: there is some clever naming/splitting here to avoid extract script
218 // rewrites. The names of these variables must not be things that the extract
219 // script will rewrite. That's why we use "CHK" (for example) instead of
220 // "GOOGLE_CHECK".
221 if (options.opensource_runtime) {
222 (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
223 (*variables)["CHK"] = "GOOGLE_CHECK";
224 (*variables)["DCHK"] = "GOOGLE_DCHECK";
225 } else {
226 // These values are things the extract script would rewrite if we did not
227 // split them. It might not strictly matter since we don't generate google3
228 // code in open-source. But it's good to prevent surprising things from
229 // happening.
230 (*variables)["GOOGLE_PROTOBUF"] =
231 "GOOGLE3"
232 "_PROTOBUF";
233 (*variables)["CHK"] =
234 "CH"
235 "ECK";
236 (*variables)["DCHK"] =
237 "DCH"
238 "ECK";
239 }
240
241 SetIntVar(options, "int8", variables);
242 SetIntVar(options, "uint8", variables);
243 SetIntVar(options, "uint32", variables);
244 SetIntVar(options, "uint64", variables);
245 SetIntVar(options, "int32", variables);
246 SetIntVar(options, "int64", variables);
247 (*variables)["string"] = "std::string";
248 }
249
SetUnknkownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)250 void SetUnknkownFieldsVariable(const Descriptor* descriptor,
251 const Options& options,
252 std::map<std::string, std::string>* variables) {
253 std::string proto_ns = ProtobufNamespace(options);
254 std::string unknown_fields_type;
255 if (UseUnknownFieldSet(descriptor->file(), options)) {
256 unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
257 (*variables)["unknown_fields"] =
258 "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
259 unknown_fields_type + "::default_instance)";
260 } else {
261 unknown_fields_type =
262 PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
263 (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
264 unknown_fields_type + ">(::" + proto_ns +
265 "::internal::GetEmptyString)";
266 }
267 (*variables)["unknown_fields_type"] = unknown_fields_type;
268 (*variables)["have_unknown_fields"] =
269 "_internal_metadata_.have_unknown_fields()";
270 (*variables)["mutable_unknown_fields"] =
271 "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
272 ">()";
273 }
274
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)275 std::string UnderscoresToCamelCase(const std::string& input,
276 bool cap_next_letter) {
277 std::string result;
278 // Note: I distrust ctype.h due to locales.
279 for (int i = 0; i < input.size(); i++) {
280 if ('a' <= input[i] && input[i] <= 'z') {
281 if (cap_next_letter) {
282 result += input[i] + ('A' - 'a');
283 } else {
284 result += input[i];
285 }
286 cap_next_letter = false;
287 } else if ('A' <= input[i] && input[i] <= 'Z') {
288 // Capital letters are left as-is.
289 result += input[i];
290 cap_next_letter = false;
291 } else if ('0' <= input[i] && input[i] <= '9') {
292 result += input[i];
293 cap_next_letter = true;
294 } else {
295 cap_next_letter = true;
296 }
297 }
298 return result;
299 }
300
301 const char kThickSeparator[] =
302 "// ===================================================================\n";
303 const char kThinSeparator[] =
304 "// -------------------------------------------------------------------\n";
305
CanInitializeByZeroing(const FieldDescriptor * field)306 bool CanInitializeByZeroing(const FieldDescriptor* field) {
307 if (field->is_repeated() || field->is_extension()) return false;
308 switch (field->cpp_type()) {
309 case FieldDescriptor::CPPTYPE_ENUM:
310 return field->default_value_enum()->number() == 0;
311 case FieldDescriptor::CPPTYPE_INT32:
312 return field->default_value_int32() == 0;
313 case FieldDescriptor::CPPTYPE_INT64:
314 return field->default_value_int64() == 0;
315 case FieldDescriptor::CPPTYPE_UINT32:
316 return field->default_value_uint32() == 0;
317 case FieldDescriptor::CPPTYPE_UINT64:
318 return field->default_value_uint64() == 0;
319 case FieldDescriptor::CPPTYPE_FLOAT:
320 return field->default_value_float() == 0;
321 case FieldDescriptor::CPPTYPE_DOUBLE:
322 return field->default_value_double() == 0;
323 case FieldDescriptor::CPPTYPE_BOOL:
324 return field->default_value_bool() == false;
325 default:
326 return false;
327 }
328 }
329
ClassName(const Descriptor * descriptor)330 std::string ClassName(const Descriptor* descriptor) {
331 const Descriptor* parent = descriptor->containing_type();
332 std::string res;
333 if (parent) res += ClassName(parent) + "_";
334 res += descriptor->name();
335 if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
336 return ResolveKeyword(res);
337 }
338
ClassName(const EnumDescriptor * enum_descriptor)339 std::string ClassName(const EnumDescriptor* enum_descriptor) {
340 if (enum_descriptor->containing_type() == nullptr) {
341 return ResolveKeyword(enum_descriptor->name());
342 } else {
343 return ClassName(enum_descriptor->containing_type()) + "_" +
344 enum_descriptor->name();
345 }
346 }
347
QualifiedClassName(const Descriptor * d,const Options & options)348 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
349 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
350 }
351
QualifiedClassName(const EnumDescriptor * d,const Options & options)352 std::string QualifiedClassName(const EnumDescriptor* d,
353 const Options& options) {
354 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
355 }
356
QualifiedClassName(const Descriptor * d)357 std::string QualifiedClassName(const Descriptor* d) {
358 return QualifiedClassName(d, Options());
359 }
360
QualifiedClassName(const EnumDescriptor * d)361 std::string QualifiedClassName(const EnumDescriptor* d) {
362 return QualifiedClassName(d, Options());
363 }
364
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)365 std::string QualifiedExtensionName(const FieldDescriptor* d,
366 const Options& options) {
367 GOOGLE_DCHECK(d->is_extension());
368 return QualifiedFileLevelSymbol(d->file(), FieldName(d), options);
369 }
370
QualifiedExtensionName(const FieldDescriptor * d)371 std::string QualifiedExtensionName(const FieldDescriptor* d) {
372 return QualifiedExtensionName(d, Options());
373 }
374
Namespace(const std::string & package)375 std::string Namespace(const std::string& package) {
376 if (package.empty()) return "";
377 return "::" + DotsToColons(package);
378 }
379
Namespace(const FileDescriptor * d,const Options & options)380 std::string Namespace(const FileDescriptor* d, const Options& options) {
381 std::string ret = Namespace(d->package());
382 if (IsWellKnownMessage(d) && options.opensource_runtime) {
383 // Written with string concatenation to prevent rewriting of
384 // ::google::protobuf.
385 ret = StringReplace(ret,
386 "::google::"
387 "protobuf",
388 "PROTOBUF_NAMESPACE_ID", false);
389 }
390 return ret;
391 }
392
Namespace(const Descriptor * d,const Options & options)393 std::string Namespace(const Descriptor* d, const Options& options) {
394 return Namespace(d->file(), options);
395 }
396
Namespace(const FieldDescriptor * d,const Options & options)397 std::string Namespace(const FieldDescriptor* d, const Options& options) {
398 return Namespace(d->file(), options);
399 }
400
Namespace(const EnumDescriptor * d,const Options & options)401 std::string Namespace(const EnumDescriptor* d, const Options& options) {
402 return Namespace(d->file(), options);
403 }
404
DefaultInstanceType(const Descriptor * descriptor,const Options & options)405 std::string DefaultInstanceType(const Descriptor* descriptor,
406 const Options& options) {
407 return ClassName(descriptor) + "DefaultTypeInternal";
408 }
409
DefaultInstanceName(const Descriptor * descriptor,const Options & options)410 std::string DefaultInstanceName(const Descriptor* descriptor,
411 const Options& options) {
412 return "_" + ClassName(descriptor, false) + "_default_instance_";
413 }
414
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)415 std::string DefaultInstancePtr(const Descriptor* descriptor,
416 const Options& options) {
417 return DefaultInstanceName(descriptor, options) + "ptr_";
418 }
419
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)420 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421 const Options& options) {
422 return QualifiedFileLevelSymbol(
423 descriptor->file(), DefaultInstanceName(descriptor, options), options);
424 }
425
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)426 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427 const Options& options) {
428 return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429 }
430
DescriptorTableName(const FileDescriptor * file,const Options & options)431 std::string DescriptorTableName(const FileDescriptor* file,
432 const Options& options) {
433 return UniqueName("descriptor_table", file, options);
434 }
435
FileDllExport(const FileDescriptor * file,const Options & options)436 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437 return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438 }
439
SuperClassName(const Descriptor * descriptor,const Options & options)440 std::string SuperClassName(const Descriptor* descriptor,
441 const Options& options) {
442 return "::" + ProtobufNamespace(options) +
443 (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
444 : "::MessageLite");
445 }
446
ResolveKeyword(const std::string & name)447 std::string ResolveKeyword(const std::string& name) {
448 if (kKeywords.count(name) > 0) {
449 return name + "_";
450 }
451 return name;
452 }
453
FieldName(const FieldDescriptor * field)454 std::string FieldName(const FieldDescriptor* field) {
455 std::string result = field->name();
456 LowerString(&result);
457 if (kKeywords.count(result) > 0) {
458 result.append("_");
459 }
460 return result;
461 }
462
EnumValueName(const EnumValueDescriptor * enum_value)463 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
464 std::string result = enum_value->name();
465 if (kKeywords.count(result) > 0) {
466 result.append("_");
467 }
468 return result;
469 }
470
EstimateAlignmentSize(const FieldDescriptor * field)471 int EstimateAlignmentSize(const FieldDescriptor* field) {
472 if (field == nullptr) return 0;
473 if (field->is_repeated()) return 8;
474 switch (field->cpp_type()) {
475 case FieldDescriptor::CPPTYPE_BOOL:
476 return 1;
477
478 case FieldDescriptor::CPPTYPE_INT32:
479 case FieldDescriptor::CPPTYPE_UINT32:
480 case FieldDescriptor::CPPTYPE_ENUM:
481 case FieldDescriptor::CPPTYPE_FLOAT:
482 return 4;
483
484 case FieldDescriptor::CPPTYPE_INT64:
485 case FieldDescriptor::CPPTYPE_UINT64:
486 case FieldDescriptor::CPPTYPE_DOUBLE:
487 case FieldDescriptor::CPPTYPE_STRING:
488 case FieldDescriptor::CPPTYPE_MESSAGE:
489 return 8;
490 }
491 GOOGLE_LOG(FATAL) << "Can't get here.";
492 return -1; // Make compiler happy.
493 }
494
FieldConstantName(const FieldDescriptor * field)495 std::string FieldConstantName(const FieldDescriptor* field) {
496 std::string field_name = UnderscoresToCamelCase(field->name(), true);
497 std::string result = "k" + field_name + "FieldNumber";
498
499 if (!field->is_extension() &&
500 field->containing_type()->FindFieldByCamelcaseName(
501 field->camelcase_name()) != field) {
502 // This field's camelcase name is not unique. As a hack, add the field
503 // number to the constant name. This makes the constant rather useless,
504 // but what can we do?
505 result += "_" + StrCat(field->number());
506 }
507
508 return result;
509 }
510
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)511 std::string FieldMessageTypeName(const FieldDescriptor* field,
512 const Options& options) {
513 // Note: The Google-internal version of Protocol Buffers uses this function
514 // as a hook point for hacks to support legacy code.
515 return QualifiedClassName(field->message_type(), options);
516 }
517
StripProto(const std::string & filename)518 std::string StripProto(const std::string& filename) {
519 if (HasSuffixString(filename, ".protodevel")) {
520 return StripSuffixString(filename, ".protodevel");
521 } else {
522 return StripSuffixString(filename, ".proto");
523 }
524 }
525
PrimitiveTypeName(FieldDescriptor::CppType type)526 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
527 switch (type) {
528 case FieldDescriptor::CPPTYPE_INT32:
529 return "::google::protobuf::int32";
530 case FieldDescriptor::CPPTYPE_INT64:
531 return "::google::protobuf::int64";
532 case FieldDescriptor::CPPTYPE_UINT32:
533 return "::google::protobuf::uint32";
534 case FieldDescriptor::CPPTYPE_UINT64:
535 return "::google::protobuf::uint64";
536 case FieldDescriptor::CPPTYPE_DOUBLE:
537 return "double";
538 case FieldDescriptor::CPPTYPE_FLOAT:
539 return "float";
540 case FieldDescriptor::CPPTYPE_BOOL:
541 return "bool";
542 case FieldDescriptor::CPPTYPE_ENUM:
543 return "int";
544 case FieldDescriptor::CPPTYPE_STRING:
545 return "std::string";
546 case FieldDescriptor::CPPTYPE_MESSAGE:
547 return nullptr;
548
549 // No default because we want the compiler to complain if any new
550 // CppTypes are added.
551 }
552
553 GOOGLE_LOG(FATAL) << "Can't get here.";
554 return nullptr;
555 }
556
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)557 std::string PrimitiveTypeName(const Options& options,
558 FieldDescriptor::CppType type) {
559 switch (type) {
560 case FieldDescriptor::CPPTYPE_INT32:
561 return IntTypeName(options, "int32");
562 case FieldDescriptor::CPPTYPE_INT64:
563 return IntTypeName(options, "int64");
564 case FieldDescriptor::CPPTYPE_UINT32:
565 return IntTypeName(options, "uint32");
566 case FieldDescriptor::CPPTYPE_UINT64:
567 return IntTypeName(options, "uint64");
568 case FieldDescriptor::CPPTYPE_DOUBLE:
569 return "double";
570 case FieldDescriptor::CPPTYPE_FLOAT:
571 return "float";
572 case FieldDescriptor::CPPTYPE_BOOL:
573 return "bool";
574 case FieldDescriptor::CPPTYPE_ENUM:
575 return "int";
576 case FieldDescriptor::CPPTYPE_STRING:
577 return "std::string";
578 case FieldDescriptor::CPPTYPE_MESSAGE:
579 return "";
580
581 // No default because we want the compiler to complain if any new
582 // CppTypes are added.
583 }
584
585 GOOGLE_LOG(FATAL) << "Can't get here.";
586 return "";
587 }
588
DeclaredTypeMethodName(FieldDescriptor::Type type)589 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
590 switch (type) {
591 case FieldDescriptor::TYPE_INT32:
592 return "Int32";
593 case FieldDescriptor::TYPE_INT64:
594 return "Int64";
595 case FieldDescriptor::TYPE_UINT32:
596 return "UInt32";
597 case FieldDescriptor::TYPE_UINT64:
598 return "UInt64";
599 case FieldDescriptor::TYPE_SINT32:
600 return "SInt32";
601 case FieldDescriptor::TYPE_SINT64:
602 return "SInt64";
603 case FieldDescriptor::TYPE_FIXED32:
604 return "Fixed32";
605 case FieldDescriptor::TYPE_FIXED64:
606 return "Fixed64";
607 case FieldDescriptor::TYPE_SFIXED32:
608 return "SFixed32";
609 case FieldDescriptor::TYPE_SFIXED64:
610 return "SFixed64";
611 case FieldDescriptor::TYPE_FLOAT:
612 return "Float";
613 case FieldDescriptor::TYPE_DOUBLE:
614 return "Double";
615
616 case FieldDescriptor::TYPE_BOOL:
617 return "Bool";
618 case FieldDescriptor::TYPE_ENUM:
619 return "Enum";
620
621 case FieldDescriptor::TYPE_STRING:
622 return "String";
623 case FieldDescriptor::TYPE_BYTES:
624 return "Bytes";
625 case FieldDescriptor::TYPE_GROUP:
626 return "Group";
627 case FieldDescriptor::TYPE_MESSAGE:
628 return "Message";
629
630 // No default because we want the compiler to complain if any new
631 // types are added.
632 }
633 GOOGLE_LOG(FATAL) << "Can't get here.";
634 return "";
635 }
636
Int32ToString(int number)637 std::string Int32ToString(int number) {
638 if (number == kint32min) {
639 // This needs to be special-cased, see explanation here:
640 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
641 return StrCat(number + 1, " - 1");
642 } else {
643 return StrCat(number);
644 }
645 }
646
Int64ToString(const std::string & macro_prefix,int64 number)647 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
648 if (number == kint64min) {
649 // This needs to be special-cased, see explanation here:
650 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
651 return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
652 }
653 return StrCat(macro_prefix, "_LONGLONG(", number, ")");
654 }
655
UInt64ToString(const std::string & macro_prefix,uint64 number)656 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
657 return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
658 }
659
DefaultValue(const FieldDescriptor * field)660 std::string DefaultValue(const FieldDescriptor* field) {
661 switch (field->cpp_type()) {
662 case FieldDescriptor::CPPTYPE_INT64:
663 return Int64ToString("GG", field->default_value_int64());
664 case FieldDescriptor::CPPTYPE_UINT64:
665 return UInt64ToString("GG", field->default_value_uint64());
666 default:
667 return DefaultValue(Options(), field);
668 }
669 }
670
DefaultValue(const Options & options,const FieldDescriptor * field)671 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
672 switch (field->cpp_type()) {
673 case FieldDescriptor::CPPTYPE_INT32:
674 return Int32ToString(field->default_value_int32());
675 case FieldDescriptor::CPPTYPE_UINT32:
676 return StrCat(field->default_value_uint32()) + "u";
677 case FieldDescriptor::CPPTYPE_INT64:
678 return Int64ToString("PROTOBUF", field->default_value_int64());
679 case FieldDescriptor::CPPTYPE_UINT64:
680 return UInt64ToString("PROTOBUF", field->default_value_uint64());
681 case FieldDescriptor::CPPTYPE_DOUBLE: {
682 double value = field->default_value_double();
683 if (value == std::numeric_limits<double>::infinity()) {
684 return "std::numeric_limits<double>::infinity()";
685 } else if (value == -std::numeric_limits<double>::infinity()) {
686 return "-std::numeric_limits<double>::infinity()";
687 } else if (value != value) {
688 return "std::numeric_limits<double>::quiet_NaN()";
689 } else {
690 return SimpleDtoa(value);
691 }
692 }
693 case FieldDescriptor::CPPTYPE_FLOAT: {
694 float value = field->default_value_float();
695 if (value == std::numeric_limits<float>::infinity()) {
696 return "std::numeric_limits<float>::infinity()";
697 } else if (value == -std::numeric_limits<float>::infinity()) {
698 return "-std::numeric_limits<float>::infinity()";
699 } else if (value != value) {
700 return "std::numeric_limits<float>::quiet_NaN()";
701 } else {
702 std::string float_value = SimpleFtoa(value);
703 // If floating point value contains a period (.) or an exponent
704 // (either E or e), then append suffix 'f' to make it a float
705 // literal.
706 if (float_value.find_first_of(".eE") != std::string::npos) {
707 float_value.push_back('f');
708 }
709 return float_value;
710 }
711 }
712 case FieldDescriptor::CPPTYPE_BOOL:
713 return field->default_value_bool() ? "true" : "false";
714 case FieldDescriptor::CPPTYPE_ENUM:
715 // Lazy: Generate a static_cast because we don't have a helper function
716 // that constructs the full name of an enum value.
717 return strings::Substitute(
718 "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
719 Int32ToString(field->default_value_enum()->number()));
720 case FieldDescriptor::CPPTYPE_STRING:
721 return "\"" +
722 EscapeTrigraphs(CEscape(field->default_value_string())) +
723 "\"";
724 case FieldDescriptor::CPPTYPE_MESSAGE:
725 return "*" + FieldMessageTypeName(field, options) +
726 "::internal_default_instance()";
727 }
728 // Can't actually get here; make compiler happy. (We could add a default
729 // case above but then we wouldn't get the nice compiler warning when a
730 // new type is added.)
731 GOOGLE_LOG(FATAL) << "Can't get here.";
732 return "";
733 }
734
735 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)736 std::string FilenameIdentifier(const std::string& filename) {
737 std::string result;
738 for (int i = 0; i < filename.size(); i++) {
739 if (ascii_isalnum(filename[i])) {
740 result.push_back(filename[i]);
741 } else {
742 // Not alphanumeric. To avoid any possibility of name conflicts we
743 // use the hex code for the character.
744 StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
745 }
746 }
747 return result;
748 }
749
UniqueName(const std::string & name,const std::string & filename,const Options & options)750 std::string UniqueName(const std::string& name, const std::string& filename,
751 const Options& options) {
752 return name + "_" + FilenameIdentifier(filename);
753 }
754
755 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)756 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
757 const std::string& name,
758 const Options& options) {
759 if (file->package().empty()) {
760 return StrCat("::", name);
761 }
762 return StrCat(Namespace(file, options), "::", name);
763 }
764
765 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)766 std::string EscapeTrigraphs(const std::string& to_escape) {
767 return StringReplace(to_escape, "?", "\\?", true);
768 }
769
770 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)771 std::string SafeFunctionName(const Descriptor* descriptor,
772 const FieldDescriptor* field,
773 const std::string& prefix) {
774 // Do not use FieldName() since it will escape keywords.
775 std::string name = field->name();
776 LowerString(&name);
777 std::string function_name = prefix + name;
778 if (descriptor->FindFieldByName(function_name)) {
779 // Single underscore will also make it conflicting with the private data
780 // member. We use double underscore to escape function names.
781 function_name.append("__");
782 } else if (kKeywords.count(name) > 0) {
783 // If the field name is a keyword, we append the underscore back to keep it
784 // consistent with other function names.
785 function_name.append("_");
786 }
787 return function_name;
788 }
789
IsStringInlined(const FieldDescriptor * descriptor,const Options & options)790 bool IsStringInlined(const FieldDescriptor* descriptor,
791 const Options& options) {
792 if (options.opensource_runtime) return false;
793
794 // TODO(ckennelly): Handle inlining for any.proto.
795 if (IsAnyMessage(descriptor->containing_type(), options)) return false;
796 if (descriptor->containing_type()->options().map_entry()) return false;
797
798 // We rely on has bits to distinguish field presence for release_$name$. When
799 // there is no hasbit, we cannot use the address of the string instance when
800 // the field has been inlined.
801 if (!HasHasbit(descriptor)) return false;
802
803 if (options.access_info_map) {
804 if (descriptor->is_required()) return true;
805 }
806 return false;
807 }
808
HasLazyFields(const Descriptor * descriptor,const Options & options)809 static bool HasLazyFields(const Descriptor* descriptor,
810 const Options& options) {
811 for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
812 if (IsLazy(descriptor->field(field_idx), options)) {
813 return true;
814 }
815 }
816 for (int idx = 0; idx < descriptor->extension_count(); idx++) {
817 if (IsLazy(descriptor->extension(idx), options)) {
818 return true;
819 }
820 }
821 for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
822 if (HasLazyFields(descriptor->nested_type(idx), options)) {
823 return true;
824 }
825 }
826 return false;
827 }
828
829 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options)830 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
831 for (int i = 0; i < file->message_type_count(); i++) {
832 const Descriptor* descriptor(file->message_type(i));
833 if (HasLazyFields(descriptor, options)) {
834 return true;
835 }
836 }
837 for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
838 if (IsLazy(file->extension(field_idx), options)) {
839 return true;
840 }
841 }
842 return false;
843 }
844
HasRepeatedFields(const Descriptor * descriptor)845 static bool HasRepeatedFields(const Descriptor* descriptor) {
846 for (int i = 0; i < descriptor->field_count(); ++i) {
847 if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
848 return true;
849 }
850 }
851 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
852 if (HasRepeatedFields(descriptor->nested_type(i))) return true;
853 }
854 return false;
855 }
856
HasRepeatedFields(const FileDescriptor * file)857 bool HasRepeatedFields(const FileDescriptor* file) {
858 for (int i = 0; i < file->message_type_count(); ++i) {
859 if (HasRepeatedFields(file->message_type(i))) return true;
860 }
861 return false;
862 }
863
IsStringPieceField(const FieldDescriptor * field,const Options & options)864 static bool IsStringPieceField(const FieldDescriptor* field,
865 const Options& options) {
866 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
867 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
868 }
869
HasStringPieceFields(const Descriptor * descriptor,const Options & options)870 static bool HasStringPieceFields(const Descriptor* descriptor,
871 const Options& options) {
872 for (int i = 0; i < descriptor->field_count(); ++i) {
873 if (IsStringPieceField(descriptor->field(i), options)) return true;
874 }
875 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
876 if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
877 }
878 return false;
879 }
880
HasStringPieceFields(const FileDescriptor * file,const Options & options)881 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
882 for (int i = 0; i < file->message_type_count(); ++i) {
883 if (HasStringPieceFields(file->message_type(i), options)) return true;
884 }
885 return false;
886 }
887
IsCordField(const FieldDescriptor * field,const Options & options)888 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
889 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
890 EffectiveStringCType(field, options) == FieldOptions::CORD;
891 }
892
HasCordFields(const Descriptor * descriptor,const Options & options)893 static bool HasCordFields(const Descriptor* descriptor,
894 const Options& options) {
895 for (int i = 0; i < descriptor->field_count(); ++i) {
896 if (IsCordField(descriptor->field(i), options)) return true;
897 }
898 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
899 if (HasCordFields(descriptor->nested_type(i), options)) return true;
900 }
901 return false;
902 }
903
HasCordFields(const FileDescriptor * file,const Options & options)904 bool HasCordFields(const FileDescriptor* file, const Options& options) {
905 for (int i = 0; i < file->message_type_count(); ++i) {
906 if (HasCordFields(file->message_type(i), options)) return true;
907 }
908 return false;
909 }
910
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)911 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
912 if (descriptor->extension_range_count() > 0) return true;
913 if (descriptor->extension_count() > 0) return true;
914 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
915 if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
916 return true;
917 }
918 }
919 return false;
920 }
921
HasExtensionsOrExtendableMessage(const FileDescriptor * file)922 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
923 if (file->extension_count() > 0) return true;
924 for (int i = 0; i < file->message_type_count(); ++i) {
925 if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
926 }
927 return false;
928 }
929
HasMapFields(const Descriptor * descriptor)930 static bool HasMapFields(const Descriptor* descriptor) {
931 for (int i = 0; i < descriptor->field_count(); ++i) {
932 if (descriptor->field(i)->is_map()) {
933 return true;
934 }
935 }
936 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
937 if (HasMapFields(descriptor->nested_type(i))) return true;
938 }
939 return false;
940 }
941
HasMapFields(const FileDescriptor * file)942 bool HasMapFields(const FileDescriptor* file) {
943 for (int i = 0; i < file->message_type_count(); ++i) {
944 if (HasMapFields(file->message_type(i))) return true;
945 }
946 return false;
947 }
948
HasEnumDefinitions(const Descriptor * message_type)949 static bool HasEnumDefinitions(const Descriptor* message_type) {
950 if (message_type->enum_type_count() > 0) return true;
951 for (int i = 0; i < message_type->nested_type_count(); ++i) {
952 if (HasEnumDefinitions(message_type->nested_type(i))) return true;
953 }
954 return false;
955 }
956
HasEnumDefinitions(const FileDescriptor * file)957 bool HasEnumDefinitions(const FileDescriptor* file) {
958 if (file->enum_type_count() > 0) return true;
959 for (int i = 0; i < file->message_type_count(); ++i) {
960 if (HasEnumDefinitions(file->message_type(i))) return true;
961 }
962 return false;
963 }
964
IsStringOrMessage(const FieldDescriptor * field)965 bool IsStringOrMessage(const FieldDescriptor* field) {
966 switch (field->cpp_type()) {
967 case FieldDescriptor::CPPTYPE_INT32:
968 case FieldDescriptor::CPPTYPE_INT64:
969 case FieldDescriptor::CPPTYPE_UINT32:
970 case FieldDescriptor::CPPTYPE_UINT64:
971 case FieldDescriptor::CPPTYPE_DOUBLE:
972 case FieldDescriptor::CPPTYPE_FLOAT:
973 case FieldDescriptor::CPPTYPE_BOOL:
974 case FieldDescriptor::CPPTYPE_ENUM:
975 return false;
976 case FieldDescriptor::CPPTYPE_STRING:
977 case FieldDescriptor::CPPTYPE_MESSAGE:
978 return true;
979 }
980
981 GOOGLE_LOG(FATAL) << "Can't get here.";
982 return false;
983 }
984
EffectiveStringCType(const FieldDescriptor * field,const Options & options)985 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
986 const Options& options) {
987 GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
988 if (options.opensource_runtime) {
989 // Open-source protobuf release only supports STRING ctype.
990 return FieldOptions::STRING;
991 } else {
992 // Google-internal supports all ctypes.
993 return field->options().ctype();
994 }
995 }
996
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)997 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
998 return descriptor->name() == kAnyProtoFile;
999 }
1000
IsAnyMessage(const Descriptor * descriptor,const Options & options)1001 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
1002 return descriptor->name() == kAnyMessageName &&
1003 IsAnyMessage(descriptor->file(), options);
1004 }
1005
IsWellKnownMessage(const FileDescriptor * file)1006 bool IsWellKnownMessage(const FileDescriptor* file) {
1007 static const std::unordered_set<std::string> well_known_files{
1008 "google/protobuf/any.proto",
1009 "google/protobuf/api.proto",
1010 "google/protobuf/compiler/plugin.proto",
1011 "google/protobuf/descriptor.proto",
1012 "google/protobuf/duration.proto",
1013 "google/protobuf/empty.proto",
1014 "google/protobuf/field_mask.proto",
1015 "google/protobuf/source_context.proto",
1016 "google/protobuf/struct.proto",
1017 "google/protobuf/timestamp.proto",
1018 "google/protobuf/type.proto",
1019 "google/protobuf/wrappers.proto",
1020 };
1021 return well_known_files.find(file->name()) != well_known_files.end();
1022 }
1023
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1024 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1025 const Options& options) {
1026 return true;
1027 }
1028
FileUtf8Verification(const FileDescriptor * file,const Options & options)1029 static bool FileUtf8Verification(const FileDescriptor* file,
1030 const Options& options) {
1031 return true;
1032 }
1033
1034 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1035 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1036 const Options& options) {
1037 if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1038 FieldEnforceUtf8(field, options)) {
1039 return STRICT;
1040 } else if (GetOptimizeFor(field->file(), options) !=
1041 FileOptions::LITE_RUNTIME &&
1042 FileUtf8Verification(field->file(), options)) {
1043 return VERIFY;
1044 } else {
1045 return NONE;
1046 }
1047 }
1048
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1049 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1050 const Options& options, bool for_parse,
1051 const char* parameters,
1052 const char* strict_function,
1053 const char* verify_function,
1054 const Formatter& format) {
1055 switch (GetUtf8CheckMode(field, options)) {
1056 case STRICT: {
1057 if (for_parse) {
1058 format("DO_(");
1059 }
1060 format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1061 format.Indent();
1062 format(parameters);
1063 if (for_parse) {
1064 format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1065 } else {
1066 format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1067 }
1068 format("\"$1$\")", field->full_name());
1069 if (for_parse) {
1070 format(")");
1071 }
1072 format(";\n");
1073 format.Outdent();
1074 break;
1075 }
1076 case VERIFY: {
1077 format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1078 format.Indent();
1079 format(parameters);
1080 if (for_parse) {
1081 format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1082 } else {
1083 format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1084 }
1085 format("\"$1$\");\n", field->full_name());
1086 format.Outdent();
1087 break;
1088 }
1089 case NONE:
1090 break;
1091 }
1092 }
1093
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1094 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1095 const Options& options, bool for_parse,
1096 const char* parameters,
1097 const Formatter& format) {
1098 GenerateUtf8CheckCode(field, options, for_parse, parameters,
1099 "VerifyUtf8String", "VerifyUTF8StringNamedField",
1100 format);
1101 }
1102
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1103 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1104 const Options& options, bool for_parse,
1105 const char* parameters,
1106 const Formatter& format) {
1107 GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1108 "VerifyUTF8CordNamedField", format);
1109 }
1110
1111 namespace {
1112
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1113 void Flatten(const Descriptor* descriptor,
1114 std::vector<const Descriptor*>* flatten) {
1115 for (int i = 0; i < descriptor->nested_type_count(); i++)
1116 Flatten(descriptor->nested_type(i), flatten);
1117 flatten->push_back(descriptor);
1118 }
1119
1120 } // namespace
1121
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1122 void FlattenMessagesInFile(const FileDescriptor* file,
1123 std::vector<const Descriptor*>* result) {
1124 for (int i = 0; i < file->message_type_count(); i++) {
1125 Flatten(file->message_type(i), result);
1126 }
1127 }
1128
HasWeakFields(const Descriptor * descriptor,const Options & options)1129 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1130 for (int i = 0; i < descriptor->field_count(); i++) {
1131 if (IsWeak(descriptor->field(i), options)) return true;
1132 }
1133 return false;
1134 }
1135
HasWeakFields(const FileDescriptor * file,const Options & options)1136 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1137 for (int i = 0; i < file->message_type_count(); ++i) {
1138 if (HasWeakFields(file->message_type(i), options)) return true;
1139 }
1140 return false;
1141 }
1142
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1143 bool UsingImplicitWeakFields(const FileDescriptor* file,
1144 const Options& options) {
1145 return options.lite_implicit_weak_fields &&
1146 GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1147 }
1148
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1149 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1150 MessageSCCAnalyzer* scc_analyzer) {
1151 return UsingImplicitWeakFields(field->file(), options) &&
1152 field->type() == FieldDescriptor::TYPE_MESSAGE &&
1153 !field->is_required() && !field->is_map() && !field->is_extension() &&
1154 !field->real_containing_oneof() &&
1155 !IsWellKnownMessage(field->message_type()->file()) &&
1156 field->message_type()->file()->name() !=
1157 "net/proto2/proto/descriptor.proto" &&
1158 // We do not support implicit weak fields between messages in the same
1159 // strongly-connected component.
1160 scc_analyzer->GetSCC(field->containing_type()) !=
1161 scc_analyzer->GetSCC(field->message_type());
1162 }
1163
GetSCCAnalysis(const SCC * scc)1164 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1165 if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1166 MessageAnalysis result{};
1167 for (int i = 0; i < scc->descriptors.size(); i++) {
1168 const Descriptor* descriptor = scc->descriptors[i];
1169 if (descriptor->extension_range_count() > 0) {
1170 result.contains_extension = true;
1171 // Extensions are found by looking up default_instance and extension
1172 // number in a map. So you'd maybe expect here
1173 // result.constructor_requires_initialization = true;
1174 // However the extension registration mechanism already makes sure
1175 // the default will be initialized.
1176 }
1177 for (int i = 0; i < descriptor->field_count(); i++) {
1178 const FieldDescriptor* field = descriptor->field(i);
1179 if (field->is_required()) {
1180 result.contains_required = true;
1181 }
1182 switch (field->type()) {
1183 case FieldDescriptor::TYPE_STRING:
1184 case FieldDescriptor::TYPE_BYTES: {
1185 result.constructor_requires_initialization = true;
1186 if (field->options().ctype() == FieldOptions::CORD) {
1187 result.contains_cord = true;
1188 }
1189 break;
1190 }
1191 case FieldDescriptor::TYPE_GROUP:
1192 case FieldDescriptor::TYPE_MESSAGE: {
1193 result.constructor_requires_initialization = true;
1194 const SCC* child = analyzer_.GetSCC(field->message_type());
1195 if (child != scc) {
1196 MessageAnalysis analysis = GetSCCAnalysis(child);
1197 result.contains_cord |= analysis.contains_cord;
1198 result.contains_extension |= analysis.contains_extension;
1199 if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1200 result.contains_required |= analysis.contains_required;
1201 }
1202 } else {
1203 // This field points back into the same SCC hence the messages
1204 // in the SCC are recursive. Note if SCC contains more than two
1205 // nodes it has to be recursive, however this test also works for
1206 // a single node that is recursive.
1207 result.is_recursive = true;
1208 }
1209 break;
1210 }
1211 default:
1212 break;
1213 }
1214 }
1215 }
1216 // We deliberately only insert the result here. After we contracted the SCC
1217 // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1218 // nodes visited as we can never return to them. By inserting them here
1219 // we will go in an infinite loop if the SCC is not correct.
1220 return analysis_cache_[scc] = result;
1221 }
1222
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1223 void ListAllFields(const Descriptor* d,
1224 std::vector<const FieldDescriptor*>* fields) {
1225 // Collect sub messages
1226 for (int i = 0; i < d->nested_type_count(); i++) {
1227 ListAllFields(d->nested_type(i), fields);
1228 }
1229 // Collect message level extensions.
1230 for (int i = 0; i < d->extension_count(); i++) {
1231 fields->push_back(d->extension(i));
1232 }
1233 // Add types of fields necessary
1234 for (int i = 0; i < d->field_count(); i++) {
1235 fields->push_back(d->field(i));
1236 }
1237 }
1238
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1239 void ListAllFields(const FileDescriptor* d,
1240 std::vector<const FieldDescriptor*>* fields) {
1241 // Collect file level message.
1242 for (int i = 0; i < d->message_type_count(); i++) {
1243 ListAllFields(d->message_type(i), fields);
1244 }
1245 // Collect message level extensions.
1246 for (int i = 0; i < d->extension_count(); i++) {
1247 fields->push_back(d->extension(i));
1248 }
1249 }
1250
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1251 void ListAllTypesForServices(const FileDescriptor* fd,
1252 std::vector<const Descriptor*>* types) {
1253 for (int i = 0; i < fd->service_count(); i++) {
1254 const ServiceDescriptor* sd = fd->service(i);
1255 for (int j = 0; j < sd->method_count(); j++) {
1256 const MethodDescriptor* method = sd->method(j);
1257 types->push_back(method->input_type());
1258 types->push_back(method->output_type());
1259 }
1260 }
1261 }
1262
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1263 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1264 std::string* bootstrap_basename) {
1265 if (options.opensource_runtime) {
1266 return false;
1267 }
1268
1269 std::unordered_map<std::string, std::string> bootstrap_mapping{
1270 {"net/proto2/proto/descriptor",
1271 "net/proto2/internal/descriptor"},
1272 {"net/proto2/compiler/proto/plugin",
1273 "net/proto2/compiler/proto/plugin"},
1274 {"net/proto2/compiler/proto/profile",
1275 "net/proto2/compiler/proto/profile_bootstrap"},
1276 };
1277 auto iter = bootstrap_mapping.find(basename);
1278 if (iter == bootstrap_mapping.end()) {
1279 *bootstrap_basename = basename;
1280 return false;
1281 } else {
1282 *bootstrap_basename = iter->second;
1283 return true;
1284 }
1285 }
1286
IsBootstrapProto(const Options & options,const FileDescriptor * file)1287 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1288 std::string my_name = StripProto(file->name());
1289 return GetBootstrapBasename(options, my_name, &my_name);
1290 }
1291
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1292 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1293 bool bootstrap_flag, std::string* basename) {
1294 std::string bootstrap_basename;
1295 if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1296 return false;
1297 }
1298
1299 if (bootstrap_flag) {
1300 // Adjust basename, but don't abort code generation.
1301 *basename = bootstrap_basename;
1302 return false;
1303 } else {
1304 std::string forward_to_basename = bootstrap_basename;
1305
1306 // Generate forwarding headers and empty .pb.cc.
1307 {
1308 std::unique_ptr<io::ZeroCopyOutputStream> output(
1309 generator_context->Open(*basename + ".pb.h"));
1310 io::Printer printer(output.get(), '$', nullptr);
1311 printer.Print(
1312 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1313 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1314 "#include \"$forward_to_basename$.pb.h\" // IWYU pragma: export\n"
1315 "#endif // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1316 "forward_to_basename", forward_to_basename, "filename_identifier",
1317 FilenameIdentifier(*basename));
1318
1319 if (!options.opensource_runtime) {
1320 // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1321 // protocoltype is SWIG'ed and we need to forward
1322 if (*basename == "net/proto/protocoltype") {
1323 printer.Print(
1324 "#ifdef SWIG\n"
1325 "%include \"$forward_to_basename$.pb.h\"\n"
1326 "#endif // SWIG\n",
1327 "forward_to_basename", forward_to_basename);
1328 }
1329 }
1330 }
1331
1332 {
1333 std::unique_ptr<io::ZeroCopyOutputStream> output(
1334 generator_context->Open(*basename + ".proto.h"));
1335 io::Printer printer(output.get(), '$', nullptr);
1336 printer.Print(
1337 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1338 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1339 "#include \"$forward_to_basename$.proto.h\" // IWYU pragma: "
1340 "export\n"
1341 "#endif // "
1342 "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1343 "forward_to_basename", forward_to_basename, "filename_identifier",
1344 FilenameIdentifier(*basename));
1345 }
1346
1347 {
1348 std::unique_ptr<io::ZeroCopyOutputStream> output(
1349 generator_context->Open(*basename + ".pb.cc"));
1350 io::Printer printer(output.get(), '$', nullptr);
1351 printer.Print("\n");
1352 }
1353
1354 {
1355 std::unique_ptr<io::ZeroCopyOutputStream> output(
1356 generator_context->Open(*basename + ".pb.h.meta"));
1357 }
1358
1359 {
1360 std::unique_ptr<io::ZeroCopyOutputStream> output(
1361 generator_context->Open(*basename + ".proto.h.meta"));
1362 }
1363
1364 // Abort code generation.
1365 return true;
1366 }
1367 }
1368
1369 class ParseLoopGenerator {
1370 public:
ParseLoopGenerator(int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1371 ParseLoopGenerator(int num_hasbits, const Options& options,
1372 MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1373 : scc_analyzer_(scc_analyzer),
1374 options_(options),
1375 format_(printer),
1376 num_hasbits_(num_hasbits) {}
1377
GenerateParserLoop(const Descriptor * descriptor)1378 void GenerateParserLoop(const Descriptor* descriptor) {
1379 format_.Set("classname", ClassName(descriptor));
1380 format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1381 format_.Set("pi_ns",
1382 StrCat("::", ProtobufNamespace(options_), "::internal"));
1383 format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1384 std::map<std::string, std::string> vars;
1385 SetCommonVars(options_, &vars);
1386 SetUnknkownFieldsVariable(descriptor, options_, &vars);
1387 format_.AddMap(vars);
1388
1389 std::vector<const FieldDescriptor*> ordered_fields;
1390 for (auto field : FieldRange(descriptor)) {
1391 if (IsFieldUsed(field, options_)) {
1392 ordered_fields.push_back(field);
1393 }
1394 }
1395 std::sort(ordered_fields.begin(), ordered_fields.end(),
1396 [](const FieldDescriptor* a, const FieldDescriptor* b) {
1397 return a->number() < b->number();
1398 });
1399
1400 format_(
1401 "const char* $classname$::_InternalParse(const char* ptr, "
1402 "$pi_ns$::ParseContext* ctx) {\n"
1403 "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1404 format_.Indent();
1405 int hasbits_size = 0;
1406 if (num_hasbits_ > 0) {
1407 hasbits_size = (num_hasbits_ + 31) / 32;
1408 }
1409 // For now only optimize small hasbits.
1410 if (hasbits_size != 1) hasbits_size = 0;
1411 if (hasbits_size) {
1412 format_("_Internal::HasBits has_bits{};\n");
1413 format_.Set("has_bits", "has_bits");
1414 } else {
1415 format_.Set("has_bits", "_has_bits_");
1416 }
1417
1418 if (descriptor->file()->options().cc_enable_arenas()) {
1419 format_("$p_ns$::Arena* arena = GetArena(); (void)arena;\n");
1420 }
1421 GenerateParseLoop(descriptor, ordered_fields);
1422 format_.Outdent();
1423 format_("success:\n");
1424 if (hasbits_size) format_(" _has_bits_.Or(has_bits);\n");
1425
1426 format_(
1427 " return ptr;\n"
1428 "failure:\n"
1429 " ptr = nullptr;\n"
1430 " goto success;\n"
1431 "#undef CHK_\n"
1432 "}\n");
1433 }
1434
1435 private:
1436 MessageSCCAnalyzer* scc_analyzer_;
1437 const Options& options_;
1438 Formatter format_;
1439 int num_hasbits_;
1440
1441 using WireFormat = internal::WireFormat;
1442 using WireFormatLite = internal::WireFormatLite;
1443
GenerateArenaString(const FieldDescriptor * field)1444 void GenerateArenaString(const FieldDescriptor* field) {
1445 if (HasHasbit(field)) {
1446 format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1447 }
1448 std::string default_string =
1449 field->default_value_string().empty()
1450 ? "::" + ProtobufNamespace(options_) +
1451 "::internal::GetEmptyStringAlreadyInited()"
1452 : QualifiedClassName(field->containing_type(), options_) +
1453 "::" + MakeDefaultName(field) + ".get()";
1454 format_(
1455 "if (arena != nullptr) {\n"
1456 " ptr = ctx->ReadArenaString(ptr, &$1$_, arena);\n"
1457 "} else {\n"
1458 " ptr = "
1459 "$pi_ns$::InlineGreedyStringParser($1$_.MutableNoArenaNoDefault(&$2$"
1460 "), ptr, ctx);"
1461 "\n}\n"
1462 "const std::string* str = &$1$_.Get(); (void)str;\n",
1463 FieldName(field), default_string);
1464 }
1465
GenerateStrings(const FieldDescriptor * field,bool check_utf8)1466 void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1467 FieldOptions::CType ctype = FieldOptions::STRING;
1468 if (!options_.opensource_runtime) {
1469 // Open source doesn't support other ctypes;
1470 ctype = field->options().ctype();
1471 }
1472 if (field->file()->options().cc_enable_arenas() && !field->is_repeated() &&
1473 !options_.opensource_runtime &&
1474 GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1475 // For now only use arena string for strings with empty defaults.
1476 field->default_value_string().empty() &&
1477 !IsStringInlined(field, options_) && !field->real_containing_oneof() &&
1478 ctype == FieldOptions::STRING) {
1479 GenerateArenaString(field);
1480 } else {
1481 std::string name;
1482 switch (ctype) {
1483 case FieldOptions::STRING:
1484 name = "GreedyStringParser";
1485 break;
1486 case FieldOptions::CORD:
1487 name = "CordParser";
1488 break;
1489 case FieldOptions::STRING_PIECE:
1490 name = "StringPieceParser";
1491 break;
1492 }
1493 format_(
1494 "auto str = $1$$2$_$3$();\n"
1495 "ptr = $pi_ns$::Inline$4$(str, ptr, ctx);\n",
1496 HasInternalAccessors(ctype) ? "_internal_" : "",
1497 field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1498 FieldName(field), name);
1499 }
1500 if (!check_utf8) return; // return if this is a bytes field
1501 auto level = GetUtf8CheckMode(field, options_);
1502 switch (level) {
1503 case NONE:
1504 return;
1505 case VERIFY:
1506 format_("#ifndef NDEBUG\n");
1507 break;
1508 case STRICT:
1509 format_("CHK_(");
1510 break;
1511 }
1512 std::string field_name;
1513 field_name = "nullptr";
1514 if (HasDescriptorMethods(field->file(), options_)) {
1515 field_name = StrCat("\"", field->full_name(), "\"");
1516 }
1517 format_("$pi_ns$::VerifyUTF8(str, $1$)", field_name);
1518 switch (level) {
1519 case NONE:
1520 return;
1521 case VERIFY:
1522 format_(
1523 ";\n"
1524 "#endif // !NDEBUG\n");
1525 break;
1526 case STRICT:
1527 format_(");\n");
1528 break;
1529 }
1530 }
1531
GenerateLengthDelim(const FieldDescriptor * field)1532 void GenerateLengthDelim(const FieldDescriptor* field) {
1533 if (field->is_packable()) {
1534 std::string enum_validator;
1535 if (field->type() == FieldDescriptor::TYPE_ENUM &&
1536 !HasPreservingUnknownEnumSemantics(field)) {
1537 enum_validator =
1538 StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1539 "_IsValid, &_internal_metadata_, ", field->number());
1540 format_(
1541 "ptr = "
1542 "$pi_ns$::Packed$1$Parser<$unknown_fields_type$>(_internal_mutable_"
1543 "$2$(), ptr, "
1544 "ctx$3$);\n",
1545 DeclaredTypeMethodName(field->type()), FieldName(field),
1546 enum_validator);
1547 } else {
1548 format_(
1549 "ptr = $pi_ns$::Packed$1$Parser(_internal_mutable_$2$(), ptr, "
1550 "ctx$3$);\n",
1551 DeclaredTypeMethodName(field->type()), FieldName(field),
1552 enum_validator);
1553 }
1554 } else {
1555 auto field_type = field->type();
1556 switch (field_type) {
1557 case FieldDescriptor::TYPE_STRING:
1558 GenerateStrings(field, true /* utf8 */);
1559 break;
1560 case FieldDescriptor::TYPE_BYTES:
1561 GenerateStrings(field, false /* utf8 */);
1562 break;
1563 case FieldDescriptor::TYPE_MESSAGE: {
1564 if (field->is_map()) {
1565 const FieldDescriptor* val =
1566 field->message_type()->FindFieldByName("value");
1567 GOOGLE_CHECK(val);
1568 if (val->type() == FieldDescriptor::TYPE_ENUM &&
1569 !HasPreservingUnknownEnumSemantics(field)) {
1570 format_(
1571 "auto object = "
1572 "::$proto_ns$::internal::InitEnumParseWrapper<$unknown_"
1573 "fields_type$>("
1574 "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1575 "ptr = ctx->ParseMessage(&object, ptr);\n",
1576 FieldName(field), QualifiedClassName(val->enum_type()),
1577 field->number());
1578 } else {
1579 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1580 FieldName(field));
1581 }
1582 } else if (IsLazy(field, options_)) {
1583 if (field->real_containing_oneof()) {
1584 format_(
1585 "if (!_internal_has_$1$()) {\n"
1586 " clear_$2$();\n"
1587 " $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1588 " $pi_ns$::LazyField>(GetArena());\n"
1589 " set_has_$1$();\n"
1590 "}\n"
1591 "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1592 FieldName(field), field->containing_oneof()->name());
1593 } else if (HasHasbit(field)) {
1594 format_(
1595 "_Internal::set_has_$1$(&$has_bits$);\n"
1596 "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1597 FieldName(field));
1598 } else {
1599 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1600 FieldName(field));
1601 }
1602 } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1603 if (!field->is_repeated()) {
1604 format_(
1605 "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1606 "ptr);\n",
1607 FieldName(field));
1608 } else {
1609 format_(
1610 "ptr = ctx->ParseMessage($1$_.AddWeak(reinterpret_cast<const "
1611 "::$proto_ns$::MessageLite*>($2$::_$3$_default_instance_ptr_)"
1612 "), ptr);\n",
1613 FieldName(field), Namespace(field->message_type(), options_),
1614 ClassName(field->message_type()));
1615 }
1616 } else if (IsWeak(field, options_)) {
1617 format_(
1618 "ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($1$,"
1619 " _$classname$_default_instance_.$2$_), ptr);\n",
1620 field->number(), FieldName(field));
1621 } else {
1622 format_("ptr = ctx->ParseMessage(_internal_$1$_$2$(), ptr);\n",
1623 field->is_repeated() ? "add" : "mutable", FieldName(field));
1624 }
1625 break;
1626 }
1627 default:
1628 GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1629 << " filed type is " << field->type();
1630 }
1631 }
1632 }
1633
1634 // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
SmallVarintValue(uint32 x)1635 static uint32 SmallVarintValue(uint32 x) {
1636 GOOGLE_DCHECK(x < 128 * 128);
1637 if (x >= 128) x += (x & 0xFF80) + 128;
1638 return x;
1639 }
1640
ShouldRepeat(const FieldDescriptor * descriptor,internal::WireFormatLite::WireType wiretype)1641 static bool ShouldRepeat(const FieldDescriptor* descriptor,
1642 internal::WireFormatLite::WireType wiretype) {
1643 constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1644 return descriptor->number() < kMaxTwoByteFieldNumber &&
1645 descriptor->is_repeated() &&
1646 (!descriptor->is_packable() ||
1647 wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1648 }
1649
GenerateFieldBody(internal::WireFormatLite::WireType wiretype,const FieldDescriptor * field)1650 void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1651 const FieldDescriptor* field) {
1652 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1653 switch (wiretype) {
1654 case WireFormatLite::WIRETYPE_VARINT: {
1655 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1656 std::string prefix = field->is_repeated() ? "add" : "set";
1657 if (field->type() == FieldDescriptor::TYPE_ENUM) {
1658 format_(
1659 "$uint64$ val = $pi_ns$::ReadVarint64(&ptr);\n"
1660 "CHK_(ptr);\n");
1661 if (!HasPreservingUnknownEnumSemantics(field)) {
1662 format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1663 QualifiedClassName(field->enum_type(), options_));
1664 format_.Indent();
1665 }
1666 format_("_internal_$1$_$2$(static_cast<$3$>(val));\n", prefix,
1667 FieldName(field),
1668 QualifiedClassName(field->enum_type(), options_));
1669 if (!HasPreservingUnknownEnumSemantics(field)) {
1670 format_.Outdent();
1671 format_(
1672 "} else {\n"
1673 " $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1674 "}\n",
1675 field->number());
1676 }
1677 } else {
1678 std::string size = (field->type() == FieldDescriptor::TYPE_SINT32 ||
1679 field->type() == FieldDescriptor::TYPE_UINT32)
1680 ? "32"
1681 : "64";
1682 std::string zigzag;
1683 if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1684 field->type() == FieldDescriptor::TYPE_SINT64)) {
1685 zigzag = "ZigZag";
1686 }
1687 if (field->is_repeated() || field->real_containing_oneof()) {
1688 std::string prefix = field->is_repeated() ? "add" : "set";
1689 format_(
1690 "_internal_$1$_$2$($pi_ns$::ReadVarint$3$$4$(&ptr));\n"
1691 "CHK_(ptr);\n",
1692 prefix, FieldName(field), zigzag, size);
1693 } else {
1694 if (HasHasbit(field)) {
1695 format_("_Internal::set_has_$1$(&$has_bits$);\n",
1696 FieldName(field));
1697 }
1698 format_(
1699 "$1$_ = $pi_ns$::ReadVarint$2$$3$(&ptr);\n"
1700 "CHK_(ptr);\n",
1701 FieldName(field), zigzag, size);
1702 }
1703 }
1704 break;
1705 }
1706 case WireFormatLite::WIRETYPE_FIXED32:
1707 case WireFormatLite::WIRETYPE_FIXED64: {
1708 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1709 if (field->is_repeated() || field->real_containing_oneof()) {
1710 std::string prefix = field->is_repeated() ? "add" : "set";
1711 format_(
1712 "_internal_$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1713 "ptr += sizeof($3$);\n",
1714 prefix, FieldName(field), type);
1715 } else {
1716 if (HasHasbit(field)) {
1717 format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1718 }
1719 format_(
1720 "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1721 "ptr += sizeof($2$);\n",
1722 FieldName(field), type);
1723 }
1724 break;
1725 }
1726 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1727 GenerateLengthDelim(field);
1728 format_("CHK_(ptr);\n");
1729 break;
1730 }
1731 case WireFormatLite::WIRETYPE_START_GROUP: {
1732 format_(
1733 "ptr = ctx->ParseGroup(_internal_$1$_$2$(), ptr, $3$);\n"
1734 "CHK_(ptr);\n",
1735 field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1736 break;
1737 }
1738 case WireFormatLite::WIRETYPE_END_GROUP: {
1739 GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1740 break;
1741 }
1742 } // switch (wire_type)
1743 }
1744
1745 // Returns the tag for this field and in case of repeated packable fields,
1746 // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32 * fallback_tag_ptr)1747 static uint32 ExpectedTag(const FieldDescriptor* field,
1748 uint32* fallback_tag_ptr) {
1749 uint32 expected_tag;
1750 if (field->is_packable()) {
1751 auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1752 expected_tag =
1753 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1754 GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1755 auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1756 uint32 fallback_tag =
1757 WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1758
1759 if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1760 *fallback_tag_ptr = fallback_tag;
1761 } else {
1762 auto expected_wiretype = WireFormat::WireTypeForField(field);
1763 expected_tag =
1764 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1765 }
1766 return expected_tag;
1767 }
1768
GenerateParseLoop(const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & ordered_fields)1769 void GenerateParseLoop(
1770 const Descriptor* descriptor,
1771 const std::vector<const FieldDescriptor*>& ordered_fields) {
1772 format_(
1773 "while (!ctx->Done(&ptr)) {\n"
1774 " $uint32$ tag;\n"
1775 " ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1776 " CHK_(ptr);\n");
1777 if (!ordered_fields.empty()) format_(" switch (tag >> 3) {\n");
1778
1779 format_.Indent();
1780 format_.Indent();
1781
1782 for (const auto* field : ordered_fields) {
1783 PrintFieldComment(format_, field);
1784 format_("case $1$:\n", field->number());
1785 format_.Indent();
1786 uint32 fallback_tag = 0;
1787 uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1788 format_(
1789 "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1790 expected_tag & 0xFF);
1791 format_.Indent();
1792 auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1793 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1794 int tag_size = io::CodedOutputStream::VarintSize32(tag);
1795 bool is_repeat = ShouldRepeat(field, wiretype);
1796 if (is_repeat) {
1797 format_(
1798 "ptr -= $1$;\n"
1799 "do {\n"
1800 " ptr += $1$;\n",
1801 tag_size);
1802 format_.Indent();
1803 }
1804 GenerateFieldBody(wiretype, field);
1805 if (is_repeat) {
1806 format_.Outdent();
1807 format_(
1808 " if (!ctx->DataAvailable(ptr)) break;\n"
1809 "} while ($pi_ns$::ExpectTag<$1$>(ptr));\n",
1810 tag);
1811 }
1812 format_.Outdent();
1813 if (fallback_tag) {
1814 format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1815 fallback_tag & 0xFF);
1816 format_.Indent();
1817 GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1818 format_.Outdent();
1819 }
1820 format_.Outdent();
1821 format_(
1822 " } else goto handle_unusual;\n"
1823 " continue;\n");
1824 } // for loop over ordered fields
1825
1826 // Default case
1827 if (!ordered_fields.empty()) format_("default: {\n");
1828 if (!ordered_fields.empty()) format_("handle_unusual:\n");
1829 format_(
1830 " if ((tag & 7) == 4 || tag == 0) {\n"
1831 " ctx->SetLastTag(tag);\n"
1832 " goto success;\n"
1833 " }\n");
1834 if (IsMapEntryMessage(descriptor)) {
1835 format_(" continue;\n");
1836 } else {
1837 if (descriptor->extension_range_count() > 0) {
1838 format_("if (");
1839 for (int i = 0; i < descriptor->extension_range_count(); i++) {
1840 const Descriptor::ExtensionRange* range =
1841 descriptor->extension_range(i);
1842 if (i > 0) format_(" ||\n ");
1843
1844 uint32 start_tag = WireFormatLite::MakeTag(
1845 range->start, static_cast<WireFormatLite::WireType>(0));
1846 uint32 end_tag = WireFormatLite::MakeTag(
1847 range->end, static_cast<WireFormatLite::WireType>(0));
1848
1849 if (range->end > FieldDescriptor::kMaxNumber) {
1850 format_("($1$u <= tag)", start_tag);
1851 } else {
1852 format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1853 }
1854 }
1855 format_(") {\n");
1856 format_(
1857 " ptr = _extensions_.ParseField(tag, ptr,\n"
1858 " internal_default_instance(), &_internal_metadata_, ctx);\n"
1859 " CHK_(ptr != nullptr);\n"
1860 " continue;\n"
1861 "}\n");
1862 }
1863 format_(
1864 " ptr = UnknownFieldParse(tag,\n"
1865 " _internal_metadata_.mutable_unknown_fields<$unknown_"
1866 "fields_type$>(),\n"
1867 " ptr, ctx);\n"
1868 " CHK_(ptr != nullptr);\n"
1869 " continue;\n");
1870 }
1871 if (!ordered_fields.empty()) format_("}\n"); // default case
1872 format_.Outdent();
1873 format_.Outdent();
1874 if (!ordered_fields.empty()) format_(" } // switch\n");
1875 format_("} // while\n");
1876 }
1877 };
1878
GenerateParserLoop(const Descriptor * descriptor,int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1879 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1880 const Options& options,
1881 MessageSCCAnalyzer* scc_analyzer,
1882 io::Printer* printer) {
1883 ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1884 generator.GenerateParserLoop(descriptor);
1885 }
1886
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1887 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1888 const Options& options,
1889 bool* has_opt_codesize_extension) {
1890 std::vector<const FieldDescriptor*> fields;
1891 auto reflection = msg.GetReflection();
1892 reflection->ListFields(msg, &fields);
1893 for (auto field : fields) {
1894 const auto* field_msg = field->message_type();
1895 if (field_msg == nullptr) {
1896 // It so happens that enums Is_Valid are still generated so enums work.
1897 // Only messages have potential problems.
1898 continue;
1899 }
1900 // If this option has an extension set AND that extension is defined in the
1901 // same file we have bootstrap problem.
1902 if (field->is_extension()) {
1903 const auto* msg_extension_file = field->message_type()->file();
1904 if (msg_extension_file == file) return true;
1905 if (has_opt_codesize_extension &&
1906 GetOptimizeFor(msg_extension_file, options) ==
1907 FileOptions::CODE_SIZE) {
1908 *has_opt_codesize_extension = true;
1909 }
1910 }
1911 // Recurse in this field to see if there is a problem in there
1912 if (field->is_repeated()) {
1913 for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1914 if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1915 file, options, has_opt_codesize_extension)) {
1916 return true;
1917 }
1918 }
1919 } else {
1920 if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1921 options, has_opt_codesize_extension)) {
1922 return true;
1923 }
1924 }
1925 }
1926 return false;
1927 }
1928
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1929 static bool HasBootstrapProblem(const FileDescriptor* file,
1930 const Options& options,
1931 bool* has_opt_codesize_extension) {
1932 static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1933 auto it = cache.find(file);
1934 if (it != cache.end()) return it->second;
1935 // In order to build the data structures for the reflective parse, it needs
1936 // to parse the serialized descriptor describing all the messages defined in
1937 // this file. Obviously this presents a bootstrap problem for descriptor
1938 // messages.
1939 if (file->name() == "net/proto2/proto/descriptor.proto" ||
1940 file->name() == "google/protobuf/descriptor.proto") {
1941 return true;
1942 }
1943 // Unfortunately we're not done yet. The descriptor option messages allow
1944 // for extensions. So we need to be able to parse these extensions in order
1945 // to parse the file descriptor for a file that has custom options. This is a
1946 // problem when these custom options extensions are defined in the same file.
1947 FileDescriptorProto linkedin_fd_proto;
1948 const DescriptorPool* pool = file->pool();
1949 const Descriptor* fd_proto_descriptor =
1950 pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1951 // Not all pools have descriptor.proto in them. In these cases there for sure
1952 // are no custom options.
1953 if (fd_proto_descriptor == nullptr) return false;
1954
1955 // It's easier to inspect file as a proto, because we can use reflection on
1956 // the proto to iterate over all content.
1957 file->CopyTo(&linkedin_fd_proto);
1958
1959 // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1960 // such it doesn't know the extensions that are potentially present in the
1961 // descriptor pool constructed from the protos that are being compiled. These
1962 // custom options are therefore in the unknown fields.
1963 // By building the corresponding FileDescriptorProto in the pool constructed
1964 // by the protos that are being compiled, ie. file's pool, the unknown fields
1965 // are converted to extensions.
1966 DynamicMessageFactory factory(pool);
1967 Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1968 fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1969
1970 bool& res = cache[file];
1971 res = HasExtensionFromFile(*fd_proto, file, options,
1972 has_opt_codesize_extension);
1973 delete fd_proto;
1974 return res;
1975 }
1976
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1977 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1978 const Options& options,
1979 bool* has_opt_codesize_extension) {
1980 if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1981 switch (options.enforce_mode) {
1982 case EnforceOptimizeMode::kSpeed:
1983 return FileOptions::SPEED;
1984 case EnforceOptimizeMode::kLiteRuntime:
1985 return FileOptions::LITE_RUNTIME;
1986 case EnforceOptimizeMode::kCodeSize:
1987 if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1988 return FileOptions::LITE_RUNTIME;
1989 }
1990 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1991 return FileOptions::SPEED;
1992 }
1993 return FileOptions::CODE_SIZE;
1994 case EnforceOptimizeMode::kNoEnforcement:
1995 if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1996 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1997 GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1998 "cannot honor that because it contains custom option "
1999 "extensions defined in the same proto.";
2000 return FileOptions::SPEED;
2001 }
2002 }
2003 return file->options().optimize_for();
2004 }
2005
2006 GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
2007 // The phony return below serves to silence a warning from GCC 8.
2008 return FileOptions::SPEED;
2009 }
2010
2011 } // namespace cpp
2012 } // namespace compiler
2013 } // namespace protobuf
2014 } // namespace google
2015