1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36
37 #include <functional>
38 #include <limits>
39 #include <map>
40 #include <queue>
41 #include <unordered_set>
42 #include <vector>
43
44 #include <google/protobuf/stubs/common.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/compiler/cpp/cpp_options.h>
47 #include <google/protobuf/descriptor.pb.h>
48 #include <google/protobuf/descriptor.h>
49 #include <google/protobuf/compiler/scc.h>
50 #include <google/protobuf/io/printer.h>
51 #include <google/protobuf/io/zero_copy_stream.h>
52 #include <google/protobuf/dynamic_message.h>
53 #include <google/protobuf/wire_format.h>
54 #include <google/protobuf/wire_format_lite.h>
55 #include <google/protobuf/stubs/strutil.h>
56 #include <google/protobuf/stubs/substitute.h>
57 #include <google/protobuf/stubs/hash.h>
58
59 #include <google/protobuf/port_def.inc>
60
61 namespace google {
62 namespace protobuf {
63 namespace compiler {
64 namespace cpp {
65
66 namespace {
67
68 static const char kAnyMessageName[] = "Any";
69 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70
DotsToColons(const std::string & name)71 std::string DotsToColons(const std::string& name) {
72 return StringReplace(name, ".", "::", true);
73 }
74
75 static const char* const kKeywordList[] = { //
76 "NULL",
77 "alignas",
78 "alignof",
79 "and",
80 "and_eq",
81 "asm",
82 "auto",
83 "bitand",
84 "bitor",
85 "bool",
86 "break",
87 "case",
88 "catch",
89 "char",
90 "class",
91 "compl",
92 "const",
93 "constexpr",
94 "const_cast",
95 "continue",
96 "decltype",
97 "default",
98 "delete",
99 "do",
100 "double",
101 "dynamic_cast",
102 "else",
103 "enum",
104 "explicit",
105 "export",
106 "extern",
107 "false",
108 "float",
109 "for",
110 "friend",
111 "goto",
112 "if",
113 "inline",
114 "int",
115 "long",
116 "mutable",
117 "namespace",
118 "new",
119 "noexcept",
120 "not",
121 "not_eq",
122 "nullptr",
123 "operator",
124 "or",
125 "or_eq",
126 "private",
127 "protected",
128 "public",
129 "register",
130 "reinterpret_cast",
131 "return",
132 "short",
133 "signed",
134 "sizeof",
135 "static",
136 "static_assert",
137 "static_cast",
138 "struct",
139 "switch",
140 "template",
141 "this",
142 "thread_local",
143 "throw",
144 "true",
145 "try",
146 "typedef",
147 "typeid",
148 "typename",
149 "union",
150 "unsigned",
151 "using",
152 "virtual",
153 "void",
154 "volatile",
155 "wchar_t",
156 "while",
157 "xor",
158 "xor_eq"};
159
MakeKeywordsMap()160 static std::unordered_set<std::string>* MakeKeywordsMap() {
161 auto* result = new std::unordered_set<std::string>();
162 for (const auto keyword : kKeywordList) {
163 result->emplace(keyword);
164 }
165 return result;
166 }
167
168 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169
170 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)171 char Base63Char(int value) {
172 GOOGLE_CHECK_GE(value, 0);
173 if (value < 26) return 'A' + value;
174 value -= 26;
175 if (value < 26) return 'a' + value;
176 value -= 26;
177 if (value < 10) return '0' + value;
178 GOOGLE_CHECK_EQ(value, 10);
179 return '_';
180 }
181
182 // Given a c identifier has 63 legal characters we can't implement base64
183 // encoding. So we return the k least significant "digits" in base 63.
184 template <typename I>
Base63(I n,int k)185 std::string Base63(I n, int k) {
186 std::string res;
187 while (k-- > 0) {
188 res += Base63Char(static_cast<int>(n % 63));
189 n /= 63;
190 }
191 return res;
192 }
193
IntTypeName(const Options & options,const std::string & type)194 std::string IntTypeName(const Options& options, const std::string& type) {
195 if (options.opensource_runtime) {
196 return "::PROTOBUF_NAMESPACE_ID::" + type;
197 } else {
198 return "::" + type;
199 }
200 }
201
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)202 void SetIntVar(const Options& options, const std::string& type,
203 std::map<std::string, std::string>* variables) {
204 (*variables)[type] = IntTypeName(options, type);
205 }
206
HasInternalAccessors(const FieldOptions::CType ctype)207 bool HasInternalAccessors(const FieldOptions::CType ctype) {
208 return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
209 }
210
211 } // namespace
212
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)213 void SetCommonVars(const Options& options,
214 std::map<std::string, std::string>* variables) {
215 (*variables)["proto_ns"] = ProtobufNamespace(options);
216
217 // Warning: there is some clever naming/splitting here to avoid extract script
218 // rewrites. The names of these variables must not be things that the extract
219 // script will rewrite. That's why we use "CHK" (for example) instead of
220 // "GOOGLE_CHECK".
221 if (options.opensource_runtime) {
222 (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
223 (*variables)["CHK"] = "GOOGLE_CHECK";
224 (*variables)["DCHK"] = "GOOGLE_DCHECK";
225 } else {
226 // These values are things the extract script would rewrite if we did not
227 // split them. It might not strictly matter since we don't generate google3
228 // code in open-source. But it's good to prevent surprising things from
229 // happening.
230 (*variables)["GOOGLE_PROTOBUF"] =
231 "GOOGLE3"
232 "_PROTOBUF";
233 (*variables)["CHK"] =
234 "CH"
235 "ECK";
236 (*variables)["DCHK"] =
237 "DCH"
238 "ECK";
239 }
240
241 SetIntVar(options, "int8", variables);
242 SetIntVar(options, "uint8", variables);
243 SetIntVar(options, "uint32", variables);
244 SetIntVar(options, "uint64", variables);
245 SetIntVar(options, "int32", variables);
246 SetIntVar(options, "int64", variables);
247 (*variables)["string"] = "std::string";
248 }
249
SetUnknkownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)250 void SetUnknkownFieldsVariable(const Descriptor* descriptor,
251 const Options& options,
252 std::map<std::string, std::string>* variables) {
253 std::string proto_ns = ProtobufNamespace(options);
254 std::string unknown_fields_type;
255 if (UseUnknownFieldSet(descriptor->file(), options)) {
256 unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
257 (*variables)["unknown_fields"] =
258 "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
259 unknown_fields_type + "::default_instance)";
260 } else {
261 unknown_fields_type =
262 PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
263 (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
264 unknown_fields_type + ">(::" + proto_ns +
265 "::internal::GetEmptyString)";
266 }
267 (*variables)["unknown_fields_type"] = unknown_fields_type;
268 (*variables)["have_unknown_fields"] =
269 "_internal_metadata_.have_unknown_fields()";
270 (*variables)["mutable_unknown_fields"] =
271 "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
272 ">()";
273 }
274
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)275 std::string UnderscoresToCamelCase(const std::string& input,
276 bool cap_next_letter) {
277 std::string result;
278 // Note: I distrust ctype.h due to locales.
279 for (int i = 0; i < input.size(); i++) {
280 if ('a' <= input[i] && input[i] <= 'z') {
281 if (cap_next_letter) {
282 result += input[i] + ('A' - 'a');
283 } else {
284 result += input[i];
285 }
286 cap_next_letter = false;
287 } else if ('A' <= input[i] && input[i] <= 'Z') {
288 // Capital letters are left as-is.
289 result += input[i];
290 cap_next_letter = false;
291 } else if ('0' <= input[i] && input[i] <= '9') {
292 result += input[i];
293 cap_next_letter = true;
294 } else {
295 cap_next_letter = true;
296 }
297 }
298 return result;
299 }
300
301 const char kThickSeparator[] =
302 "// ===================================================================\n";
303 const char kThinSeparator[] =
304 "// -------------------------------------------------------------------\n";
305
CanInitializeByZeroing(const FieldDescriptor * field)306 bool CanInitializeByZeroing(const FieldDescriptor* field) {
307 if (field->is_repeated() || field->is_extension()) return false;
308 switch (field->cpp_type()) {
309 case FieldDescriptor::CPPTYPE_ENUM:
310 return field->default_value_enum()->number() == 0;
311 case FieldDescriptor::CPPTYPE_INT32:
312 return field->default_value_int32() == 0;
313 case FieldDescriptor::CPPTYPE_INT64:
314 return field->default_value_int64() == 0;
315 case FieldDescriptor::CPPTYPE_UINT32:
316 return field->default_value_uint32() == 0;
317 case FieldDescriptor::CPPTYPE_UINT64:
318 return field->default_value_uint64() == 0;
319 case FieldDescriptor::CPPTYPE_FLOAT:
320 return field->default_value_float() == 0;
321 case FieldDescriptor::CPPTYPE_DOUBLE:
322 return field->default_value_double() == 0;
323 case FieldDescriptor::CPPTYPE_BOOL:
324 return field->default_value_bool() == false;
325 default:
326 return false;
327 }
328 }
329
ClassName(const Descriptor * descriptor)330 std::string ClassName(const Descriptor* descriptor) {
331 const Descriptor* parent = descriptor->containing_type();
332 std::string res;
333 if (parent) res += ClassName(parent) + "_";
334 res += descriptor->name();
335 if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
336 return ResolveKeyword(res);
337 }
338
ClassName(const EnumDescriptor * enum_descriptor)339 std::string ClassName(const EnumDescriptor* enum_descriptor) {
340 if (enum_descriptor->containing_type() == nullptr) {
341 return ResolveKeyword(enum_descriptor->name());
342 } else {
343 return ClassName(enum_descriptor->containing_type()) + "_" +
344 enum_descriptor->name();
345 }
346 }
347
QualifiedClassName(const Descriptor * d,const Options & options)348 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
349 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
350 }
351
QualifiedClassName(const EnumDescriptor * d,const Options & options)352 std::string QualifiedClassName(const EnumDescriptor* d,
353 const Options& options) {
354 return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
355 }
356
QualifiedClassName(const Descriptor * d)357 std::string QualifiedClassName(const Descriptor* d) {
358 return QualifiedClassName(d, Options());
359 }
360
QualifiedClassName(const EnumDescriptor * d)361 std::string QualifiedClassName(const EnumDescriptor* d) {
362 return QualifiedClassName(d, Options());
363 }
364
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)365 std::string QualifiedExtensionName(const FieldDescriptor* d,
366 const Options& options) {
367 GOOGLE_DCHECK(d->is_extension());
368 return QualifiedFileLevelSymbol(d->file(), FieldName(d), options);
369 }
370
QualifiedExtensionName(const FieldDescriptor * d)371 std::string QualifiedExtensionName(const FieldDescriptor* d) {
372 return QualifiedExtensionName(d, Options());
373 }
374
Namespace(const std::string & package)375 std::string Namespace(const std::string& package) {
376 if (package.empty()) return "";
377 return "::" + DotsToColons(package);
378 }
379
Namespace(const FileDescriptor * d,const Options & options)380 std::string Namespace(const FileDescriptor* d, const Options& options) {
381 std::string ret = Namespace(d->package());
382 if (IsWellKnownMessage(d) && options.opensource_runtime) {
383 // Written with string concatenation to prevent rewriting of
384 // ::google::protobuf.
385 ret = StringReplace(ret,
386 "::google::"
387 "protobuf",
388 "PROTOBUF_NAMESPACE_ID", false);
389 }
390 return ret;
391 }
392
Namespace(const Descriptor * d,const Options & options)393 std::string Namespace(const Descriptor* d, const Options& options) {
394 return Namespace(d->file(), options);
395 }
396
Namespace(const FieldDescriptor * d,const Options & options)397 std::string Namespace(const FieldDescriptor* d, const Options& options) {
398 return Namespace(d->file(), options);
399 }
400
Namespace(const EnumDescriptor * d,const Options & options)401 std::string Namespace(const EnumDescriptor* d, const Options& options) {
402 return Namespace(d->file(), options);
403 }
404
DefaultInstanceType(const Descriptor * descriptor,const Options & options)405 std::string DefaultInstanceType(const Descriptor* descriptor,
406 const Options& options) {
407 return ClassName(descriptor) + "DefaultTypeInternal";
408 }
409
DefaultInstanceName(const Descriptor * descriptor,const Options & options)410 std::string DefaultInstanceName(const Descriptor* descriptor,
411 const Options& options) {
412 return "_" + ClassName(descriptor, false) + "_default_instance_";
413 }
414
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)415 std::string DefaultInstancePtr(const Descriptor* descriptor,
416 const Options& options) {
417 return DefaultInstanceName(descriptor, options) + "ptr_";
418 }
419
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)420 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421 const Options& options) {
422 return QualifiedFileLevelSymbol(
423 descriptor->file(), DefaultInstanceName(descriptor, options), options);
424 }
425
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)426 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427 const Options& options) {
428 return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429 }
430
DescriptorTableName(const FileDescriptor * file,const Options & options)431 std::string DescriptorTableName(const FileDescriptor* file,
432 const Options& options) {
433 return UniqueName("descriptor_table", file, options);
434 }
435
FileDllExport(const FileDescriptor * file,const Options & options)436 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437 return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438 }
439
SuperClassName(const Descriptor * descriptor,const Options & options)440 std::string SuperClassName(const Descriptor* descriptor,
441 const Options& options) {
442 return "::" + ProtobufNamespace(options) +
443 (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
444 : "::MessageLite");
445 }
446
ResolveKeyword(const std::string & name)447 std::string ResolveKeyword(const std::string& name) {
448 if (kKeywords.count(name) > 0) {
449 return name + "_";
450 }
451 return name;
452 }
453
FieldName(const FieldDescriptor * field)454 std::string FieldName(const FieldDescriptor* field) {
455 std::string result = field->name();
456 LowerString(&result);
457 if (kKeywords.count(result) > 0) {
458 result.append("_");
459 }
460 return result;
461 }
462
EnumValueName(const EnumValueDescriptor * enum_value)463 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
464 std::string result = enum_value->name();
465 if (kKeywords.count(result) > 0) {
466 result.append("_");
467 }
468 return result;
469 }
470
EstimateAlignmentSize(const FieldDescriptor * field)471 int EstimateAlignmentSize(const FieldDescriptor* field) {
472 if (field == nullptr) return 0;
473 if (field->is_repeated()) return 8;
474 switch (field->cpp_type()) {
475 case FieldDescriptor::CPPTYPE_BOOL:
476 return 1;
477
478 case FieldDescriptor::CPPTYPE_INT32:
479 case FieldDescriptor::CPPTYPE_UINT32:
480 case FieldDescriptor::CPPTYPE_ENUM:
481 case FieldDescriptor::CPPTYPE_FLOAT:
482 return 4;
483
484 case FieldDescriptor::CPPTYPE_INT64:
485 case FieldDescriptor::CPPTYPE_UINT64:
486 case FieldDescriptor::CPPTYPE_DOUBLE:
487 case FieldDescriptor::CPPTYPE_STRING:
488 case FieldDescriptor::CPPTYPE_MESSAGE:
489 return 8;
490 }
491 GOOGLE_LOG(FATAL) << "Can't get here.";
492 return -1; // Make compiler happy.
493 }
494
FieldConstantName(const FieldDescriptor * field)495 std::string FieldConstantName(const FieldDescriptor* field) {
496 std::string field_name = UnderscoresToCamelCase(field->name(), true);
497 std::string result = "k" + field_name + "FieldNumber";
498
499 if (!field->is_extension() &&
500 field->containing_type()->FindFieldByCamelcaseName(
501 field->camelcase_name()) != field) {
502 // This field's camelcase name is not unique. As a hack, add the field
503 // number to the constant name. This makes the constant rather useless,
504 // but what can we do?
505 result += "_" + StrCat(field->number());
506 }
507
508 return result;
509 }
510
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)511 std::string FieldMessageTypeName(const FieldDescriptor* field,
512 const Options& options) {
513 // Note: The Google-internal version of Protocol Buffers uses this function
514 // as a hook point for hacks to support legacy code.
515 return QualifiedClassName(field->message_type(), options);
516 }
517
StripProto(const std::string & filename)518 std::string StripProto(const std::string& filename) {
519 if (HasSuffixString(filename, ".protodevel")) {
520 return StripSuffixString(filename, ".protodevel");
521 } else {
522 return StripSuffixString(filename, ".proto");
523 }
524 }
525
PrimitiveTypeName(FieldDescriptor::CppType type)526 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
527 switch (type) {
528 case FieldDescriptor::CPPTYPE_INT32:
529 return "::google::protobuf::int32";
530 case FieldDescriptor::CPPTYPE_INT64:
531 return "::google::protobuf::int64";
532 case FieldDescriptor::CPPTYPE_UINT32:
533 return "::google::protobuf::uint32";
534 case FieldDescriptor::CPPTYPE_UINT64:
535 return "::google::protobuf::uint64";
536 case FieldDescriptor::CPPTYPE_DOUBLE:
537 return "double";
538 case FieldDescriptor::CPPTYPE_FLOAT:
539 return "float";
540 case FieldDescriptor::CPPTYPE_BOOL:
541 return "bool";
542 case FieldDescriptor::CPPTYPE_ENUM:
543 return "int";
544 case FieldDescriptor::CPPTYPE_STRING:
545 return "std::string";
546 case FieldDescriptor::CPPTYPE_MESSAGE:
547 return nullptr;
548
549 // No default because we want the compiler to complain if any new
550 // CppTypes are added.
551 }
552
553 GOOGLE_LOG(FATAL) << "Can't get here.";
554 return nullptr;
555 }
556
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)557 std::string PrimitiveTypeName(const Options& options,
558 FieldDescriptor::CppType type) {
559 switch (type) {
560 case FieldDescriptor::CPPTYPE_INT32:
561 return IntTypeName(options, "int32");
562 case FieldDescriptor::CPPTYPE_INT64:
563 return IntTypeName(options, "int64");
564 case FieldDescriptor::CPPTYPE_UINT32:
565 return IntTypeName(options, "uint32");
566 case FieldDescriptor::CPPTYPE_UINT64:
567 return IntTypeName(options, "uint64");
568 case FieldDescriptor::CPPTYPE_DOUBLE:
569 return "double";
570 case FieldDescriptor::CPPTYPE_FLOAT:
571 return "float";
572 case FieldDescriptor::CPPTYPE_BOOL:
573 return "bool";
574 case FieldDescriptor::CPPTYPE_ENUM:
575 return "int";
576 case FieldDescriptor::CPPTYPE_STRING:
577 return "std::string";
578 case FieldDescriptor::CPPTYPE_MESSAGE:
579 return "";
580
581 // No default because we want the compiler to complain if any new
582 // CppTypes are added.
583 }
584
585 GOOGLE_LOG(FATAL) << "Can't get here.";
586 return "";
587 }
588
DeclaredTypeMethodName(FieldDescriptor::Type type)589 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
590 switch (type) {
591 case FieldDescriptor::TYPE_INT32:
592 return "Int32";
593 case FieldDescriptor::TYPE_INT64:
594 return "Int64";
595 case FieldDescriptor::TYPE_UINT32:
596 return "UInt32";
597 case FieldDescriptor::TYPE_UINT64:
598 return "UInt64";
599 case FieldDescriptor::TYPE_SINT32:
600 return "SInt32";
601 case FieldDescriptor::TYPE_SINT64:
602 return "SInt64";
603 case FieldDescriptor::TYPE_FIXED32:
604 return "Fixed32";
605 case FieldDescriptor::TYPE_FIXED64:
606 return "Fixed64";
607 case FieldDescriptor::TYPE_SFIXED32:
608 return "SFixed32";
609 case FieldDescriptor::TYPE_SFIXED64:
610 return "SFixed64";
611 case FieldDescriptor::TYPE_FLOAT:
612 return "Float";
613 case FieldDescriptor::TYPE_DOUBLE:
614 return "Double";
615
616 case FieldDescriptor::TYPE_BOOL:
617 return "Bool";
618 case FieldDescriptor::TYPE_ENUM:
619 return "Enum";
620
621 case FieldDescriptor::TYPE_STRING:
622 return "String";
623 case FieldDescriptor::TYPE_BYTES:
624 return "Bytes";
625 case FieldDescriptor::TYPE_GROUP:
626 return "Group";
627 case FieldDescriptor::TYPE_MESSAGE:
628 return "Message";
629
630 // No default because we want the compiler to complain if any new
631 // types are added.
632 }
633 GOOGLE_LOG(FATAL) << "Can't get here.";
634 return "";
635 }
636
Int32ToString(int number)637 std::string Int32ToString(int number) {
638 if (number == kint32min) {
639 // This needs to be special-cased, see explanation here:
640 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
641 return StrCat(number + 1, " - 1");
642 } else {
643 return StrCat(number);
644 }
645 }
646
Int64ToString(const std::string & macro_prefix,int64 number)647 std::string Int64ToString(const std::string& macro_prefix, int64 number) {
648 if (number == kint64min) {
649 // This needs to be special-cased, see explanation here:
650 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
651 return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
652 }
653 return StrCat(macro_prefix, "_LONGLONG(", number, ")");
654 }
655
UInt64ToString(const std::string & macro_prefix,uint64 number)656 std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
657 return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
658 }
659
DefaultValue(const FieldDescriptor * field)660 std::string DefaultValue(const FieldDescriptor* field) {
661 switch (field->cpp_type()) {
662 case FieldDescriptor::CPPTYPE_INT64:
663 return Int64ToString("GG", field->default_value_int64());
664 case FieldDescriptor::CPPTYPE_UINT64:
665 return UInt64ToString("GG", field->default_value_uint64());
666 default:
667 return DefaultValue(Options(), field);
668 }
669 }
670
DefaultValue(const Options & options,const FieldDescriptor * field)671 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
672 switch (field->cpp_type()) {
673 case FieldDescriptor::CPPTYPE_INT32:
674 return Int32ToString(field->default_value_int32());
675 case FieldDescriptor::CPPTYPE_UINT32:
676 return StrCat(field->default_value_uint32()) + "u";
677 case FieldDescriptor::CPPTYPE_INT64:
678 return Int64ToString("PROTOBUF", field->default_value_int64());
679 case FieldDescriptor::CPPTYPE_UINT64:
680 return UInt64ToString("PROTOBUF", field->default_value_uint64());
681 case FieldDescriptor::CPPTYPE_DOUBLE: {
682 double value = field->default_value_double();
683 if (value == std::numeric_limits<double>::infinity()) {
684 return "std::numeric_limits<double>::infinity()";
685 } else if (value == -std::numeric_limits<double>::infinity()) {
686 return "-std::numeric_limits<double>::infinity()";
687 } else if (value != value) {
688 return "std::numeric_limits<double>::quiet_NaN()";
689 } else {
690 return SimpleDtoa(value);
691 }
692 }
693 case FieldDescriptor::CPPTYPE_FLOAT: {
694 float value = field->default_value_float();
695 if (value == std::numeric_limits<float>::infinity()) {
696 return "std::numeric_limits<float>::infinity()";
697 } else if (value == -std::numeric_limits<float>::infinity()) {
698 return "-std::numeric_limits<float>::infinity()";
699 } else if (value != value) {
700 return "std::numeric_limits<float>::quiet_NaN()";
701 } else {
702 std::string float_value = SimpleFtoa(value);
703 // If floating point value contains a period (.) or an exponent
704 // (either E or e), then append suffix 'f' to make it a float
705 // literal.
706 if (float_value.find_first_of(".eE") != std::string::npos) {
707 float_value.push_back('f');
708 }
709 return float_value;
710 }
711 }
712 case FieldDescriptor::CPPTYPE_BOOL:
713 return field->default_value_bool() ? "true" : "false";
714 case FieldDescriptor::CPPTYPE_ENUM:
715 // Lazy: Generate a static_cast because we don't have a helper function
716 // that constructs the full name of an enum value.
717 return strings::Substitute(
718 "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
719 Int32ToString(field->default_value_enum()->number()));
720 case FieldDescriptor::CPPTYPE_STRING:
721 return "\"" +
722 EscapeTrigraphs(CEscape(field->default_value_string())) +
723 "\"";
724 case FieldDescriptor::CPPTYPE_MESSAGE:
725 return "*" + FieldMessageTypeName(field, options) +
726 "::internal_default_instance()";
727 }
728 // Can't actually get here; make compiler happy. (We could add a default
729 // case above but then we wouldn't get the nice compiler warning when a
730 // new type is added.)
731 GOOGLE_LOG(FATAL) << "Can't get here.";
732 return "";
733 }
734
735 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)736 std::string FilenameIdentifier(const std::string& filename) {
737 std::string result;
738 for (int i = 0; i < filename.size(); i++) {
739 if (ascii_isalnum(filename[i])) {
740 result.push_back(filename[i]);
741 } else {
742 // Not alphanumeric. To avoid any possibility of name conflicts we
743 // use the hex code for the character.
744 StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
745 }
746 }
747 return result;
748 }
749
UniqueName(const std::string & name,const std::string & filename,const Options & options)750 std::string UniqueName(const std::string& name, const std::string& filename,
751 const Options& options) {
752 return name + "_" + FilenameIdentifier(filename);
753 }
754
755 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)756 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
757 const std::string& name,
758 const Options& options) {
759 if (file->package().empty()) {
760 return StrCat("::", name);
761 }
762 return StrCat(Namespace(file, options), "::", name);
763 }
764
765 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)766 std::string EscapeTrigraphs(const std::string& to_escape) {
767 return StringReplace(to_escape, "?", "\\?", true);
768 }
769
770 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)771 std::string SafeFunctionName(const Descriptor* descriptor,
772 const FieldDescriptor* field,
773 const std::string& prefix) {
774 // Do not use FieldName() since it will escape keywords.
775 std::string name = field->name();
776 LowerString(&name);
777 std::string function_name = prefix + name;
778 if (descriptor->FindFieldByName(function_name)) {
779 // Single underscore will also make it conflicting with the private data
780 // member. We use double underscore to escape function names.
781 function_name.append("__");
782 } else if (kKeywords.count(name) > 0) {
783 // If the field name is a keyword, we append the underscore back to keep it
784 // consistent with other function names.
785 function_name.append("_");
786 }
787 return function_name;
788 }
789
HasLazyFields(const Descriptor * descriptor,const Options & options)790 static bool HasLazyFields(const Descriptor* descriptor,
791 const Options& options) {
792 for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
793 if (IsLazy(descriptor->field(field_idx), options)) {
794 return true;
795 }
796 }
797 for (int idx = 0; idx < descriptor->extension_count(); idx++) {
798 if (IsLazy(descriptor->extension(idx), options)) {
799 return true;
800 }
801 }
802 for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
803 if (HasLazyFields(descriptor->nested_type(idx), options)) {
804 return true;
805 }
806 }
807 return false;
808 }
809
810 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options)811 bool HasLazyFields(const FileDescriptor* file, const Options& options) {
812 for (int i = 0; i < file->message_type_count(); i++) {
813 const Descriptor* descriptor(file->message_type(i));
814 if (HasLazyFields(descriptor, options)) {
815 return true;
816 }
817 }
818 for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
819 if (IsLazy(file->extension(field_idx), options)) {
820 return true;
821 }
822 }
823 return false;
824 }
825
HasRepeatedFields(const Descriptor * descriptor)826 static bool HasRepeatedFields(const Descriptor* descriptor) {
827 for (int i = 0; i < descriptor->field_count(); ++i) {
828 if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
829 return true;
830 }
831 }
832 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
833 if (HasRepeatedFields(descriptor->nested_type(i))) return true;
834 }
835 return false;
836 }
837
HasRepeatedFields(const FileDescriptor * file)838 bool HasRepeatedFields(const FileDescriptor* file) {
839 for (int i = 0; i < file->message_type_count(); ++i) {
840 if (HasRepeatedFields(file->message_type(i))) return true;
841 }
842 return false;
843 }
844
IsStringPieceField(const FieldDescriptor * field,const Options & options)845 static bool IsStringPieceField(const FieldDescriptor* field,
846 const Options& options) {
847 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
848 EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
849 }
850
HasStringPieceFields(const Descriptor * descriptor,const Options & options)851 static bool HasStringPieceFields(const Descriptor* descriptor,
852 const Options& options) {
853 for (int i = 0; i < descriptor->field_count(); ++i) {
854 if (IsStringPieceField(descriptor->field(i), options)) return true;
855 }
856 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
857 if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
858 }
859 return false;
860 }
861
HasStringPieceFields(const FileDescriptor * file,const Options & options)862 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
863 for (int i = 0; i < file->message_type_count(); ++i) {
864 if (HasStringPieceFields(file->message_type(i), options)) return true;
865 }
866 return false;
867 }
868
IsCordField(const FieldDescriptor * field,const Options & options)869 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
870 return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
871 EffectiveStringCType(field, options) == FieldOptions::CORD;
872 }
873
HasCordFields(const Descriptor * descriptor,const Options & options)874 static bool HasCordFields(const Descriptor* descriptor,
875 const Options& options) {
876 for (int i = 0; i < descriptor->field_count(); ++i) {
877 if (IsCordField(descriptor->field(i), options)) return true;
878 }
879 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
880 if (HasCordFields(descriptor->nested_type(i), options)) return true;
881 }
882 return false;
883 }
884
HasCordFields(const FileDescriptor * file,const Options & options)885 bool HasCordFields(const FileDescriptor* file, const Options& options) {
886 for (int i = 0; i < file->message_type_count(); ++i) {
887 if (HasCordFields(file->message_type(i), options)) return true;
888 }
889 return false;
890 }
891
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)892 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
893 if (descriptor->extension_range_count() > 0) return true;
894 if (descriptor->extension_count() > 0) return true;
895 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
896 if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
897 return true;
898 }
899 }
900 return false;
901 }
902
HasExtensionsOrExtendableMessage(const FileDescriptor * file)903 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
904 if (file->extension_count() > 0) return true;
905 for (int i = 0; i < file->message_type_count(); ++i) {
906 if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
907 }
908 return false;
909 }
910
HasMapFields(const Descriptor * descriptor)911 static bool HasMapFields(const Descriptor* descriptor) {
912 for (int i = 0; i < descriptor->field_count(); ++i) {
913 if (descriptor->field(i)->is_map()) {
914 return true;
915 }
916 }
917 for (int i = 0; i < descriptor->nested_type_count(); ++i) {
918 if (HasMapFields(descriptor->nested_type(i))) return true;
919 }
920 return false;
921 }
922
HasMapFields(const FileDescriptor * file)923 bool HasMapFields(const FileDescriptor* file) {
924 for (int i = 0; i < file->message_type_count(); ++i) {
925 if (HasMapFields(file->message_type(i))) return true;
926 }
927 return false;
928 }
929
HasEnumDefinitions(const Descriptor * message_type)930 static bool HasEnumDefinitions(const Descriptor* message_type) {
931 if (message_type->enum_type_count() > 0) return true;
932 for (int i = 0; i < message_type->nested_type_count(); ++i) {
933 if (HasEnumDefinitions(message_type->nested_type(i))) return true;
934 }
935 return false;
936 }
937
HasEnumDefinitions(const FileDescriptor * file)938 bool HasEnumDefinitions(const FileDescriptor* file) {
939 if (file->enum_type_count() > 0) return true;
940 for (int i = 0; i < file->message_type_count(); ++i) {
941 if (HasEnumDefinitions(file->message_type(i))) return true;
942 }
943 return false;
944 }
945
IsStringOrMessage(const FieldDescriptor * field)946 bool IsStringOrMessage(const FieldDescriptor* field) {
947 switch (field->cpp_type()) {
948 case FieldDescriptor::CPPTYPE_INT32:
949 case FieldDescriptor::CPPTYPE_INT64:
950 case FieldDescriptor::CPPTYPE_UINT32:
951 case FieldDescriptor::CPPTYPE_UINT64:
952 case FieldDescriptor::CPPTYPE_DOUBLE:
953 case FieldDescriptor::CPPTYPE_FLOAT:
954 case FieldDescriptor::CPPTYPE_BOOL:
955 case FieldDescriptor::CPPTYPE_ENUM:
956 return false;
957 case FieldDescriptor::CPPTYPE_STRING:
958 case FieldDescriptor::CPPTYPE_MESSAGE:
959 return true;
960 }
961
962 GOOGLE_LOG(FATAL) << "Can't get here.";
963 return false;
964 }
965
EffectiveStringCType(const FieldDescriptor * field,const Options & options)966 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
967 const Options& options) {
968 GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
969 if (options.opensource_runtime) {
970 // Open-source protobuf release only supports STRING ctype.
971 return FieldOptions::STRING;
972 } else {
973 // Google-internal supports all ctypes.
974 return field->options().ctype();
975 }
976 }
977
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)978 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
979 return descriptor->name() == kAnyProtoFile;
980 }
981
IsAnyMessage(const Descriptor * descriptor,const Options & options)982 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
983 return descriptor->name() == kAnyMessageName &&
984 IsAnyMessage(descriptor->file(), options);
985 }
986
IsWellKnownMessage(const FileDescriptor * file)987 bool IsWellKnownMessage(const FileDescriptor* file) {
988 static const std::unordered_set<std::string> well_known_files{
989 "google/protobuf/any.proto",
990 "google/protobuf/api.proto",
991 "google/protobuf/compiler/plugin.proto",
992 "google/protobuf/descriptor.proto",
993 "google/protobuf/duration.proto",
994 "google/protobuf/empty.proto",
995 "google/protobuf/field_mask.proto",
996 "google/protobuf/source_context.proto",
997 "google/protobuf/struct.proto",
998 "google/protobuf/timestamp.proto",
999 "google/protobuf/type.proto",
1000 "google/protobuf/wrappers.proto",
1001 };
1002 return well_known_files.find(file->name()) != well_known_files.end();
1003 }
1004
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1005 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1006 const Options& options) {
1007 return true;
1008 }
1009
FileUtf8Verification(const FileDescriptor * file,const Options & options)1010 static bool FileUtf8Verification(const FileDescriptor* file,
1011 const Options& options) {
1012 return true;
1013 }
1014
1015 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1016 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1017 const Options& options) {
1018 if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1019 FieldEnforceUtf8(field, options)) {
1020 return STRICT;
1021 } else if (GetOptimizeFor(field->file(), options) !=
1022 FileOptions::LITE_RUNTIME &&
1023 FileUtf8Verification(field->file(), options)) {
1024 return VERIFY;
1025 } else {
1026 return NONE;
1027 }
1028 }
1029
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1030 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1031 const Options& options, bool for_parse,
1032 const char* parameters,
1033 const char* strict_function,
1034 const char* verify_function,
1035 const Formatter& format) {
1036 switch (GetUtf8CheckMode(field, options)) {
1037 case STRICT: {
1038 if (for_parse) {
1039 format("DO_(");
1040 }
1041 format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1042 format.Indent();
1043 format(parameters);
1044 if (for_parse) {
1045 format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1046 } else {
1047 format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1048 }
1049 format("\"$1$\")", field->full_name());
1050 if (for_parse) {
1051 format(")");
1052 }
1053 format(";\n");
1054 format.Outdent();
1055 break;
1056 }
1057 case VERIFY: {
1058 format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1059 format.Indent();
1060 format(parameters);
1061 if (for_parse) {
1062 format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1063 } else {
1064 format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1065 }
1066 format("\"$1$\");\n", field->full_name());
1067 format.Outdent();
1068 break;
1069 }
1070 case NONE:
1071 break;
1072 }
1073 }
1074
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1075 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1076 const Options& options, bool for_parse,
1077 const char* parameters,
1078 const Formatter& format) {
1079 GenerateUtf8CheckCode(field, options, for_parse, parameters,
1080 "VerifyUtf8String", "VerifyUTF8StringNamedField",
1081 format);
1082 }
1083
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1084 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1085 const Options& options, bool for_parse,
1086 const char* parameters,
1087 const Formatter& format) {
1088 GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1089 "VerifyUTF8CordNamedField", format);
1090 }
1091
1092 namespace {
1093
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1094 void Flatten(const Descriptor* descriptor,
1095 std::vector<const Descriptor*>* flatten) {
1096 for (int i = 0; i < descriptor->nested_type_count(); i++)
1097 Flatten(descriptor->nested_type(i), flatten);
1098 flatten->push_back(descriptor);
1099 }
1100
1101 } // namespace
1102
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1103 void FlattenMessagesInFile(const FileDescriptor* file,
1104 std::vector<const Descriptor*>* result) {
1105 for (int i = 0; i < file->message_type_count(); i++) {
1106 Flatten(file->message_type(i), result);
1107 }
1108 }
1109
HasWeakFields(const Descriptor * descriptor,const Options & options)1110 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1111 for (int i = 0; i < descriptor->field_count(); i++) {
1112 if (IsWeak(descriptor->field(i), options)) return true;
1113 }
1114 return false;
1115 }
1116
HasWeakFields(const FileDescriptor * file,const Options & options)1117 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1118 for (int i = 0; i < file->message_type_count(); ++i) {
1119 if (HasWeakFields(file->message_type(i), options)) return true;
1120 }
1121 return false;
1122 }
1123
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1124 bool UsingImplicitWeakFields(const FileDescriptor* file,
1125 const Options& options) {
1126 return options.lite_implicit_weak_fields &&
1127 GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1128 }
1129
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1130 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1131 MessageSCCAnalyzer* scc_analyzer) {
1132 return UsingImplicitWeakFields(field->file(), options) &&
1133 field->type() == FieldDescriptor::TYPE_MESSAGE &&
1134 !field->is_required() && !field->is_map() && !field->is_extension() &&
1135 !field->real_containing_oneof() &&
1136 !IsWellKnownMessage(field->message_type()->file()) &&
1137 field->message_type()->file()->name() !=
1138 "net/proto2/proto/descriptor.proto" &&
1139 // We do not support implicit weak fields between messages in the same
1140 // strongly-connected component.
1141 scc_analyzer->GetSCC(field->containing_type()) !=
1142 scc_analyzer->GetSCC(field->message_type());
1143 }
1144
GetSCCAnalysis(const SCC * scc)1145 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1146 if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1147 MessageAnalysis result{};
1148 for (int i = 0; i < scc->descriptors.size(); i++) {
1149 const Descriptor* descriptor = scc->descriptors[i];
1150 if (descriptor->extension_range_count() > 0) {
1151 result.contains_extension = true;
1152 // Extensions are found by looking up default_instance and extension
1153 // number in a map. So you'd maybe expect here
1154 // result.constructor_requires_initialization = true;
1155 // However the extension registration mechanism already makes sure
1156 // the default will be initialized.
1157 }
1158 for (int i = 0; i < descriptor->field_count(); i++) {
1159 const FieldDescriptor* field = descriptor->field(i);
1160 if (field->is_required()) {
1161 result.contains_required = true;
1162 }
1163 switch (field->type()) {
1164 case FieldDescriptor::TYPE_STRING:
1165 case FieldDescriptor::TYPE_BYTES: {
1166 result.constructor_requires_initialization = true;
1167 if (field->options().ctype() == FieldOptions::CORD) {
1168 result.contains_cord = true;
1169 }
1170 break;
1171 }
1172 case FieldDescriptor::TYPE_GROUP:
1173 case FieldDescriptor::TYPE_MESSAGE: {
1174 result.constructor_requires_initialization = true;
1175 const SCC* child = analyzer_.GetSCC(field->message_type());
1176 if (child != scc) {
1177 MessageAnalysis analysis = GetSCCAnalysis(child);
1178 result.contains_cord |= analysis.contains_cord;
1179 result.contains_extension |= analysis.contains_extension;
1180 if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1181 result.contains_required |= analysis.contains_required;
1182 }
1183 } else {
1184 // This field points back into the same SCC hence the messages
1185 // in the SCC are recursive. Note if SCC contains more than two
1186 // nodes it has to be recursive, however this test also works for
1187 // a single node that is recursive.
1188 result.is_recursive = true;
1189 }
1190 break;
1191 }
1192 default:
1193 break;
1194 }
1195 }
1196 }
1197 // We deliberately only insert the result here. After we contracted the SCC
1198 // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1199 // nodes visited as we can never return to them. By inserting them here
1200 // we will go in an infinite loop if the SCC is not correct.
1201 return analysis_cache_[scc] = result;
1202 }
1203
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1204 void ListAllFields(const Descriptor* d,
1205 std::vector<const FieldDescriptor*>* fields) {
1206 // Collect sub messages
1207 for (int i = 0; i < d->nested_type_count(); i++) {
1208 ListAllFields(d->nested_type(i), fields);
1209 }
1210 // Collect message level extensions.
1211 for (int i = 0; i < d->extension_count(); i++) {
1212 fields->push_back(d->extension(i));
1213 }
1214 // Add types of fields necessary
1215 for (int i = 0; i < d->field_count(); i++) {
1216 fields->push_back(d->field(i));
1217 }
1218 }
1219
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1220 void ListAllFields(const FileDescriptor* d,
1221 std::vector<const FieldDescriptor*>* fields) {
1222 // Collect file level message.
1223 for (int i = 0; i < d->message_type_count(); i++) {
1224 ListAllFields(d->message_type(i), fields);
1225 }
1226 // Collect message level extensions.
1227 for (int i = 0; i < d->extension_count(); i++) {
1228 fields->push_back(d->extension(i));
1229 }
1230 }
1231
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1232 void ListAllTypesForServices(const FileDescriptor* fd,
1233 std::vector<const Descriptor*>* types) {
1234 for (int i = 0; i < fd->service_count(); i++) {
1235 const ServiceDescriptor* sd = fd->service(i);
1236 for (int j = 0; j < sd->method_count(); j++) {
1237 const MethodDescriptor* method = sd->method(j);
1238 types->push_back(method->input_type());
1239 types->push_back(method->output_type());
1240 }
1241 }
1242 }
1243
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1244 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1245 std::string* bootstrap_basename) {
1246 if (options.opensource_runtime) {
1247 return false;
1248 }
1249
1250 std::unordered_map<std::string, std::string> bootstrap_mapping{
1251 {"net/proto2/proto/descriptor",
1252 "net/proto2/internal/descriptor"},
1253 {"net/proto2/compiler/proto/plugin",
1254 "net/proto2/compiler/proto/plugin"},
1255 {"net/proto2/compiler/proto/profile",
1256 "net/proto2/compiler/proto/profile_bootstrap"},
1257 };
1258 auto iter = bootstrap_mapping.find(basename);
1259 if (iter == bootstrap_mapping.end()) {
1260 *bootstrap_basename = basename;
1261 return false;
1262 } else {
1263 *bootstrap_basename = iter->second;
1264 return true;
1265 }
1266 }
1267
IsBootstrapProto(const Options & options,const FileDescriptor * file)1268 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1269 std::string my_name = StripProto(file->name());
1270 return GetBootstrapBasename(options, my_name, &my_name);
1271 }
1272
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1273 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1274 bool bootstrap_flag, std::string* basename) {
1275 std::string bootstrap_basename;
1276 if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1277 return false;
1278 }
1279
1280 if (bootstrap_flag) {
1281 // Adjust basename, but don't abort code generation.
1282 *basename = bootstrap_basename;
1283 return false;
1284 } else {
1285 std::string forward_to_basename = bootstrap_basename;
1286
1287 // Generate forwarding headers and empty .pb.cc.
1288 {
1289 std::unique_ptr<io::ZeroCopyOutputStream> output(
1290 generator_context->Open(*basename + ".pb.h"));
1291 io::Printer printer(output.get(), '$', nullptr);
1292 printer.Print(
1293 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1294 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1295 "#include \"$forward_to_basename$.pb.h\" // IWYU pragma: export\n"
1296 "#endif // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1297 "forward_to_basename", forward_to_basename, "filename_identifier",
1298 FilenameIdentifier(*basename));
1299
1300 if (!options.opensource_runtime) {
1301 // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1302 // protocoltype is SWIG'ed and we need to forward
1303 if (*basename == "net/proto/protocoltype") {
1304 printer.Print(
1305 "#ifdef SWIG\n"
1306 "%include \"$forward_to_basename$.pb.h\"\n"
1307 "#endif // SWIG\n",
1308 "forward_to_basename", forward_to_basename);
1309 }
1310 }
1311 }
1312
1313 {
1314 std::unique_ptr<io::ZeroCopyOutputStream> output(
1315 generator_context->Open(*basename + ".proto.h"));
1316 io::Printer printer(output.get(), '$', nullptr);
1317 printer.Print(
1318 "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1319 "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1320 "#include \"$forward_to_basename$.proto.h\" // IWYU pragma: "
1321 "export\n"
1322 "#endif // "
1323 "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1324 "forward_to_basename", forward_to_basename, "filename_identifier",
1325 FilenameIdentifier(*basename));
1326 }
1327
1328 {
1329 std::unique_ptr<io::ZeroCopyOutputStream> output(
1330 generator_context->Open(*basename + ".pb.cc"));
1331 io::Printer printer(output.get(), '$', nullptr);
1332 printer.Print("\n");
1333 }
1334
1335 {
1336 std::unique_ptr<io::ZeroCopyOutputStream> output(
1337 generator_context->Open(*basename + ".pb.h.meta"));
1338 }
1339
1340 {
1341 std::unique_ptr<io::ZeroCopyOutputStream> output(
1342 generator_context->Open(*basename + ".proto.h.meta"));
1343 }
1344
1345 // Abort code generation.
1346 return true;
1347 }
1348 }
1349
1350 class ParseLoopGenerator {
1351 public:
ParseLoopGenerator(int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1352 ParseLoopGenerator(int num_hasbits, const Options& options,
1353 MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1354 : scc_analyzer_(scc_analyzer),
1355 options_(options),
1356 format_(printer),
1357 num_hasbits_(num_hasbits) {}
1358
GenerateParserLoop(const Descriptor * descriptor)1359 void GenerateParserLoop(const Descriptor* descriptor) {
1360 format_.Set("classname", ClassName(descriptor));
1361 format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1362 format_.Set("pi_ns",
1363 StrCat("::", ProtobufNamespace(options_), "::internal"));
1364 format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1365 std::map<std::string, std::string> vars;
1366 SetCommonVars(options_, &vars);
1367 SetUnknkownFieldsVariable(descriptor, options_, &vars);
1368 format_.AddMap(vars);
1369
1370 std::vector<const FieldDescriptor*> ordered_fields;
1371 for (auto field : FieldRange(descriptor)) {
1372 if (!IsFieldStripped(field, options_)) {
1373 ordered_fields.push_back(field);
1374 }
1375 }
1376 std::sort(ordered_fields.begin(), ordered_fields.end(),
1377 [](const FieldDescriptor* a, const FieldDescriptor* b) {
1378 return a->number() < b->number();
1379 });
1380
1381 format_(
1382 "const char* $classname$::_InternalParse(const char* ptr, "
1383 "$pi_ns$::ParseContext* ctx) {\n"
1384 "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1385 format_.Indent();
1386 int hasbits_size = 0;
1387 if (num_hasbits_ > 0) {
1388 hasbits_size = (num_hasbits_ + 31) / 32;
1389 }
1390 // For now only optimize small hasbits.
1391 if (hasbits_size != 1) hasbits_size = 0;
1392 if (hasbits_size) {
1393 format_("_Internal::HasBits has_bits{};\n");
1394 format_.Set("has_bits", "has_bits");
1395 } else {
1396 format_.Set("has_bits", "_has_bits_");
1397 }
1398
1399 GenerateParseLoop(descriptor, ordered_fields);
1400 format_.Outdent();
1401 format_("success:\n");
1402 if (hasbits_size) format_(" _has_bits_.Or(has_bits);\n");
1403
1404 format_(
1405 " return ptr;\n"
1406 "failure:\n"
1407 " ptr = nullptr;\n"
1408 " goto success;\n"
1409 "#undef CHK_\n"
1410 "}\n");
1411 }
1412
1413 private:
1414 MessageSCCAnalyzer* scc_analyzer_;
1415 const Options& options_;
1416 Formatter format_;
1417 int num_hasbits_;
1418
1419 using WireFormat = internal::WireFormat;
1420 using WireFormatLite = internal::WireFormatLite;
1421
GenerateArenaString(const FieldDescriptor * field)1422 void GenerateArenaString(const FieldDescriptor* field) {
1423 if (HasHasbit(field)) {
1424 format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1425 }
1426 std::string default_string =
1427 field->default_value_string().empty()
1428 ? "::" + ProtobufNamespace(options_) +
1429 "::internal::GetEmptyStringAlreadyInited()"
1430 : QualifiedClassName(field->containing_type(), options_) +
1431 "::" + MakeDefaultName(field) + ".get()";
1432 format_(
1433 "if (arena != nullptr) {\n"
1434 " ptr = ctx->ReadArenaString(ptr, &$1$_, arena);\n"
1435 "} else {\n"
1436 " ptr = "
1437 "$pi_ns$::InlineGreedyStringParser($1$_.MutableNoArenaNoDefault(&$2$"
1438 "), ptr, ctx);"
1439 "\n}\n"
1440 "const std::string* str = &$1$_.Get(); (void)str;\n",
1441 FieldName(field), default_string);
1442 }
1443
GenerateStrings(const FieldDescriptor * field,bool check_utf8)1444 void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1445 FieldOptions::CType ctype = FieldOptions::STRING;
1446 if (!options_.opensource_runtime) {
1447 // Open source doesn't support other ctypes;
1448 ctype = field->options().ctype();
1449 }
1450 if (!field->is_repeated() && !options_.opensource_runtime &&
1451 GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1452 // For now only use arena string for strings with empty defaults.
1453 field->default_value_string().empty() &&
1454 !field->real_containing_oneof() && ctype == FieldOptions::STRING) {
1455 GenerateArenaString(field);
1456 } else {
1457 std::string name;
1458 switch (ctype) {
1459 case FieldOptions::STRING:
1460 name = "GreedyStringParser";
1461 break;
1462 case FieldOptions::CORD:
1463 name = "CordParser";
1464 break;
1465 case FieldOptions::STRING_PIECE:
1466 name = "StringPieceParser";
1467 break;
1468 }
1469 format_(
1470 "auto str = $1$$2$_$3$();\n"
1471 "ptr = $pi_ns$::Inline$4$(str, ptr, ctx);\n",
1472 HasInternalAccessors(ctype) ? "_internal_" : "",
1473 field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1474 FieldName(field), name);
1475 }
1476 if (!check_utf8) return; // return if this is a bytes field
1477 auto level = GetUtf8CheckMode(field, options_);
1478 switch (level) {
1479 case NONE:
1480 return;
1481 case VERIFY:
1482 format_("#ifndef NDEBUG\n");
1483 break;
1484 case STRICT:
1485 format_("CHK_(");
1486 break;
1487 }
1488 std::string field_name;
1489 field_name = "nullptr";
1490 if (HasDescriptorMethods(field->file(), options_)) {
1491 field_name = StrCat("\"", field->full_name(), "\"");
1492 }
1493 format_("$pi_ns$::VerifyUTF8(str, $1$)", field_name);
1494 switch (level) {
1495 case NONE:
1496 return;
1497 case VERIFY:
1498 format_(
1499 ";\n"
1500 "#endif // !NDEBUG\n");
1501 break;
1502 case STRICT:
1503 format_(");\n");
1504 break;
1505 }
1506 }
1507
GenerateLengthDelim(const FieldDescriptor * field)1508 void GenerateLengthDelim(const FieldDescriptor* field) {
1509 if (field->is_packable()) {
1510 std::string enum_validator;
1511 if (field->type() == FieldDescriptor::TYPE_ENUM &&
1512 !HasPreservingUnknownEnumSemantics(field)) {
1513 enum_validator =
1514 StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1515 "_IsValid, &_internal_metadata_, ", field->number());
1516 format_(
1517 "ptr = "
1518 "$pi_ns$::Packed$1$Parser<$unknown_fields_type$>(_internal_mutable_"
1519 "$2$(), ptr, "
1520 "ctx$3$);\n",
1521 DeclaredTypeMethodName(field->type()), FieldName(field),
1522 enum_validator);
1523 } else {
1524 format_(
1525 "ptr = $pi_ns$::Packed$1$Parser(_internal_mutable_$2$(), ptr, "
1526 "ctx$3$);\n",
1527 DeclaredTypeMethodName(field->type()), FieldName(field),
1528 enum_validator);
1529 }
1530 } else {
1531 auto field_type = field->type();
1532 switch (field_type) {
1533 case FieldDescriptor::TYPE_STRING:
1534 GenerateStrings(field, true /* utf8 */);
1535 break;
1536 case FieldDescriptor::TYPE_BYTES:
1537 GenerateStrings(field, false /* utf8 */);
1538 break;
1539 case FieldDescriptor::TYPE_MESSAGE: {
1540 if (field->is_map()) {
1541 const FieldDescriptor* val =
1542 field->message_type()->FindFieldByName("value");
1543 GOOGLE_CHECK(val);
1544 if (val->type() == FieldDescriptor::TYPE_ENUM &&
1545 !HasPreservingUnknownEnumSemantics(field)) {
1546 format_(
1547 "auto object = "
1548 "::$proto_ns$::internal::InitEnumParseWrapper<$unknown_"
1549 "fields_type$>("
1550 "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1551 "ptr = ctx->ParseMessage(&object, ptr);\n",
1552 FieldName(field), QualifiedClassName(val->enum_type()),
1553 field->number());
1554 } else {
1555 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1556 FieldName(field));
1557 }
1558 } else if (IsLazy(field, options_)) {
1559 if (field->real_containing_oneof()) {
1560 format_(
1561 "if (!_internal_has_$1$()) {\n"
1562 " clear_$2$();\n"
1563 " $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1564 " $pi_ns$::LazyField>(GetArena());\n"
1565 " set_has_$1$();\n"
1566 "}\n"
1567 "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1568 FieldName(field), field->containing_oneof()->name());
1569 } else if (HasHasbit(field)) {
1570 format_(
1571 "_Internal::set_has_$1$(&$has_bits$);\n"
1572 "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1573 FieldName(field));
1574 } else {
1575 format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1576 FieldName(field));
1577 }
1578 } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1579 if (!field->is_repeated()) {
1580 format_(
1581 "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1582 "ptr);\n",
1583 FieldName(field));
1584 } else {
1585 format_(
1586 "ptr = ctx->ParseMessage($1$_.AddWeak(reinterpret_cast<const "
1587 "::$proto_ns$::MessageLite*>($2$::_$3$_default_instance_ptr_)"
1588 "), ptr);\n",
1589 FieldName(field), Namespace(field->message_type(), options_),
1590 ClassName(field->message_type()));
1591 }
1592 } else if (IsWeak(field, options_)) {
1593 format_(
1594 "{\n"
1595 " auto* default_ = &reinterpret_cast<const Message&>($1$);\n"
1596 " ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($2$,"
1597 " default_), ptr);\n"
1598 "}\n",
1599 QualifiedDefaultInstanceName(field->message_type(), options_),
1600 field->number());
1601 } else {
1602 format_("ptr = ctx->ParseMessage(_internal_$1$_$2$(), ptr);\n",
1603 field->is_repeated() ? "add" : "mutable", FieldName(field));
1604 }
1605 break;
1606 }
1607 default:
1608 GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1609 << " filed type is " << field->type();
1610 }
1611 }
1612 }
1613
1614 // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
SmallVarintValue(uint32 x)1615 static uint32 SmallVarintValue(uint32 x) {
1616 GOOGLE_DCHECK(x < 128 * 128);
1617 if (x >= 128) x += (x & 0xFF80) + 128;
1618 return x;
1619 }
1620
ShouldRepeat(const FieldDescriptor * descriptor,internal::WireFormatLite::WireType wiretype)1621 static bool ShouldRepeat(const FieldDescriptor* descriptor,
1622 internal::WireFormatLite::WireType wiretype) {
1623 constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1624 return descriptor->number() < kMaxTwoByteFieldNumber &&
1625 descriptor->is_repeated() &&
1626 (!descriptor->is_packable() ||
1627 wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1628 }
1629
GenerateFieldBody(internal::WireFormatLite::WireType wiretype,const FieldDescriptor * field)1630 void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1631 const FieldDescriptor* field) {
1632 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1633 switch (wiretype) {
1634 case WireFormatLite::WIRETYPE_VARINT: {
1635 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1636 std::string prefix = field->is_repeated() ? "add" : "set";
1637 if (field->type() == FieldDescriptor::TYPE_ENUM) {
1638 format_(
1639 "$uint64$ val = $pi_ns$::ReadVarint64(&ptr);\n"
1640 "CHK_(ptr);\n");
1641 if (!HasPreservingUnknownEnumSemantics(field)) {
1642 format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1643 QualifiedClassName(field->enum_type(), options_));
1644 format_.Indent();
1645 }
1646 format_("_internal_$1$_$2$(static_cast<$3$>(val));\n", prefix,
1647 FieldName(field),
1648 QualifiedClassName(field->enum_type(), options_));
1649 if (!HasPreservingUnknownEnumSemantics(field)) {
1650 format_.Outdent();
1651 format_(
1652 "} else {\n"
1653 " $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1654 "}\n",
1655 field->number());
1656 }
1657 } else {
1658 std::string size = (field->type() == FieldDescriptor::TYPE_SINT32 ||
1659 field->type() == FieldDescriptor::TYPE_UINT32)
1660 ? "32"
1661 : "64";
1662 std::string zigzag;
1663 if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1664 field->type() == FieldDescriptor::TYPE_SINT64)) {
1665 zigzag = "ZigZag";
1666 }
1667 if (field->is_repeated() || field->real_containing_oneof()) {
1668 std::string prefix = field->is_repeated() ? "add" : "set";
1669 format_(
1670 "_internal_$1$_$2$($pi_ns$::ReadVarint$3$$4$(&ptr));\n"
1671 "CHK_(ptr);\n",
1672 prefix, FieldName(field), zigzag, size);
1673 } else {
1674 if (HasHasbit(field)) {
1675 format_("_Internal::set_has_$1$(&$has_bits$);\n",
1676 FieldName(field));
1677 }
1678 format_(
1679 "$1$_ = $pi_ns$::ReadVarint$2$$3$(&ptr);\n"
1680 "CHK_(ptr);\n",
1681 FieldName(field), zigzag, size);
1682 }
1683 }
1684 break;
1685 }
1686 case WireFormatLite::WIRETYPE_FIXED32:
1687 case WireFormatLite::WIRETYPE_FIXED64: {
1688 std::string type = PrimitiveTypeName(options_, field->cpp_type());
1689 if (field->is_repeated() || field->real_containing_oneof()) {
1690 std::string prefix = field->is_repeated() ? "add" : "set";
1691 format_(
1692 "_internal_$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1693 "ptr += sizeof($3$);\n",
1694 prefix, FieldName(field), type);
1695 } else {
1696 if (HasHasbit(field)) {
1697 format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1698 }
1699 format_(
1700 "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1701 "ptr += sizeof($2$);\n",
1702 FieldName(field), type);
1703 }
1704 break;
1705 }
1706 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1707 GenerateLengthDelim(field);
1708 format_("CHK_(ptr);\n");
1709 break;
1710 }
1711 case WireFormatLite::WIRETYPE_START_GROUP: {
1712 format_(
1713 "ptr = ctx->ParseGroup(_internal_$1$_$2$(), ptr, $3$);\n"
1714 "CHK_(ptr);\n",
1715 field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1716 break;
1717 }
1718 case WireFormatLite::WIRETYPE_END_GROUP: {
1719 GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1720 break;
1721 }
1722 } // switch (wire_type)
1723 }
1724
1725 // Returns the tag for this field and in case of repeated packable fields,
1726 // sets a fallback tag in fallback_tag_ptr.
ExpectedTag(const FieldDescriptor * field,uint32 * fallback_tag_ptr)1727 static uint32 ExpectedTag(const FieldDescriptor* field,
1728 uint32* fallback_tag_ptr) {
1729 uint32 expected_tag;
1730 if (field->is_packable()) {
1731 auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1732 expected_tag =
1733 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1734 GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1735 auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1736 uint32 fallback_tag =
1737 WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1738
1739 if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1740 *fallback_tag_ptr = fallback_tag;
1741 } else {
1742 auto expected_wiretype = WireFormat::WireTypeForField(field);
1743 expected_tag =
1744 WireFormatLite::MakeTag(field->number(), expected_wiretype);
1745 }
1746 return expected_tag;
1747 }
1748
GenerateParseLoop(const Descriptor * descriptor,const std::vector<const FieldDescriptor * > & ordered_fields)1749 void GenerateParseLoop(
1750 const Descriptor* descriptor,
1751 const std::vector<const FieldDescriptor*>& ordered_fields) {
1752 format_(
1753 "while (!ctx->Done(&ptr)) {\n"
1754 " $uint32$ tag;\n"
1755 " ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1756 " CHK_(ptr);\n");
1757 if (!ordered_fields.empty()) format_(" switch (tag >> 3) {\n");
1758
1759 format_.Indent();
1760 format_.Indent();
1761
1762 for (const auto* field : ordered_fields) {
1763 PrintFieldComment(format_, field);
1764 format_("case $1$:\n", field->number());
1765 format_.Indent();
1766 uint32 fallback_tag = 0;
1767 uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1768 format_(
1769 "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1770 expected_tag & 0xFF);
1771 format_.Indent();
1772 auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1773 uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1774 int tag_size = io::CodedOutputStream::VarintSize32(tag);
1775 bool is_repeat = ShouldRepeat(field, wiretype);
1776 if (is_repeat) {
1777 format_(
1778 "ptr -= $1$;\n"
1779 "do {\n"
1780 " ptr += $1$;\n",
1781 tag_size);
1782 format_.Indent();
1783 }
1784 GenerateFieldBody(wiretype, field);
1785 if (is_repeat) {
1786 format_.Outdent();
1787 format_(
1788 " if (!ctx->DataAvailable(ptr)) break;\n"
1789 "} while ($pi_ns$::ExpectTag<$1$>(ptr));\n",
1790 tag);
1791 }
1792 format_.Outdent();
1793 if (fallback_tag) {
1794 format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1795 fallback_tag & 0xFF);
1796 format_.Indent();
1797 GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1798 format_.Outdent();
1799 }
1800 format_.Outdent();
1801 format_(
1802 " } else goto handle_unusual;\n"
1803 " continue;\n");
1804 } // for loop over ordered fields
1805
1806 // Default case
1807 if (!ordered_fields.empty()) format_("default: {\n");
1808 if (!ordered_fields.empty()) format_("handle_unusual:\n");
1809 format_(
1810 " if ((tag & 7) == 4 || tag == 0) {\n"
1811 " ctx->SetLastTag(tag);\n"
1812 " goto success;\n"
1813 " }\n");
1814 if (IsMapEntryMessage(descriptor)) {
1815 format_(" continue;\n");
1816 } else {
1817 if (descriptor->extension_range_count() > 0) {
1818 format_("if (");
1819 for (int i = 0; i < descriptor->extension_range_count(); i++) {
1820 const Descriptor::ExtensionRange* range =
1821 descriptor->extension_range(i);
1822 if (i > 0) format_(" ||\n ");
1823
1824 uint32 start_tag = WireFormatLite::MakeTag(
1825 range->start, static_cast<WireFormatLite::WireType>(0));
1826 uint32 end_tag = WireFormatLite::MakeTag(
1827 range->end, static_cast<WireFormatLite::WireType>(0));
1828
1829 if (range->end > FieldDescriptor::kMaxNumber) {
1830 format_("($1$u <= tag)", start_tag);
1831 } else {
1832 format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1833 }
1834 }
1835 format_(") {\n");
1836 format_(
1837 " ptr = _extensions_.ParseField(tag, ptr,\n"
1838 " internal_default_instance(), &_internal_metadata_, ctx);\n"
1839 " CHK_(ptr != nullptr);\n"
1840 " continue;\n"
1841 "}\n");
1842 }
1843 format_(
1844 " ptr = UnknownFieldParse(tag,\n"
1845 " _internal_metadata_.mutable_unknown_fields<$unknown_"
1846 "fields_type$>(),\n"
1847 " ptr, ctx);\n"
1848 " CHK_(ptr != nullptr);\n"
1849 " continue;\n");
1850 }
1851 if (!ordered_fields.empty()) format_("}\n"); // default case
1852 format_.Outdent();
1853 format_.Outdent();
1854 if (!ordered_fields.empty()) format_(" } // switch\n");
1855 format_("} // while\n");
1856 }
1857 };
1858
GenerateParserLoop(const Descriptor * descriptor,int num_hasbits,const Options & options,MessageSCCAnalyzer * scc_analyzer,io::Printer * printer)1859 void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1860 const Options& options,
1861 MessageSCCAnalyzer* scc_analyzer,
1862 io::Printer* printer) {
1863 ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1864 generator.GenerateParserLoop(descriptor);
1865 }
1866
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1867 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1868 const Options& options,
1869 bool* has_opt_codesize_extension) {
1870 std::vector<const FieldDescriptor*> fields;
1871 auto reflection = msg.GetReflection();
1872 reflection->ListFields(msg, &fields);
1873 for (auto field : fields) {
1874 const auto* field_msg = field->message_type();
1875 if (field_msg == nullptr) {
1876 // It so happens that enums Is_Valid are still generated so enums work.
1877 // Only messages have potential problems.
1878 continue;
1879 }
1880 // If this option has an extension set AND that extension is defined in the
1881 // same file we have bootstrap problem.
1882 if (field->is_extension()) {
1883 const auto* msg_extension_file = field->message_type()->file();
1884 if (msg_extension_file == file) return true;
1885 if (has_opt_codesize_extension &&
1886 GetOptimizeFor(msg_extension_file, options) ==
1887 FileOptions::CODE_SIZE) {
1888 *has_opt_codesize_extension = true;
1889 }
1890 }
1891 // Recurse in this field to see if there is a problem in there
1892 if (field->is_repeated()) {
1893 for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1894 if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1895 file, options, has_opt_codesize_extension)) {
1896 return true;
1897 }
1898 }
1899 } else {
1900 if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1901 options, has_opt_codesize_extension)) {
1902 return true;
1903 }
1904 }
1905 }
1906 return false;
1907 }
1908
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1909 static bool HasBootstrapProblem(const FileDescriptor* file,
1910 const Options& options,
1911 bool* has_opt_codesize_extension) {
1912 static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1913 auto it = cache.find(file);
1914 if (it != cache.end()) return it->second;
1915 // In order to build the data structures for the reflective parse, it needs
1916 // to parse the serialized descriptor describing all the messages defined in
1917 // this file. Obviously this presents a bootstrap problem for descriptor
1918 // messages.
1919 if (file->name() == "net/proto2/proto/descriptor.proto" ||
1920 file->name() == "google/protobuf/descriptor.proto") {
1921 return true;
1922 }
1923 // Unfortunately we're not done yet. The descriptor option messages allow
1924 // for extensions. So we need to be able to parse these extensions in order
1925 // to parse the file descriptor for a file that has custom options. This is a
1926 // problem when these custom options extensions are defined in the same file.
1927 FileDescriptorProto linkedin_fd_proto;
1928 const DescriptorPool* pool = file->pool();
1929 const Descriptor* fd_proto_descriptor =
1930 pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1931 // Not all pools have descriptor.proto in them. In these cases there for sure
1932 // are no custom options.
1933 if (fd_proto_descriptor == nullptr) return false;
1934
1935 // It's easier to inspect file as a proto, because we can use reflection on
1936 // the proto to iterate over all content.
1937 file->CopyTo(&linkedin_fd_proto);
1938
1939 // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1940 // such it doesn't know the extensions that are potentially present in the
1941 // descriptor pool constructed from the protos that are being compiled. These
1942 // custom options are therefore in the unknown fields.
1943 // By building the corresponding FileDescriptorProto in the pool constructed
1944 // by the protos that are being compiled, ie. file's pool, the unknown fields
1945 // are converted to extensions.
1946 DynamicMessageFactory factory(pool);
1947 Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1948 fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1949
1950 bool& res = cache[file];
1951 res = HasExtensionFromFile(*fd_proto, file, options,
1952 has_opt_codesize_extension);
1953 delete fd_proto;
1954 return res;
1955 }
1956
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1957 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1958 const Options& options,
1959 bool* has_opt_codesize_extension) {
1960 if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1961 switch (options.enforce_mode) {
1962 case EnforceOptimizeMode::kSpeed:
1963 return FileOptions::SPEED;
1964 case EnforceOptimizeMode::kLiteRuntime:
1965 return FileOptions::LITE_RUNTIME;
1966 case EnforceOptimizeMode::kCodeSize:
1967 if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1968 return FileOptions::LITE_RUNTIME;
1969 }
1970 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1971 return FileOptions::SPEED;
1972 }
1973 return FileOptions::CODE_SIZE;
1974 case EnforceOptimizeMode::kNoEnforcement:
1975 if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1976 if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1977 GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1978 "cannot honor that because it contains custom option "
1979 "extensions defined in the same proto.";
1980 return FileOptions::SPEED;
1981 }
1982 }
1983 return file->options().optimize_for();
1984 }
1985
1986 GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
1987 // The phony return below serves to silence a warning from GCC 8.
1988 return FileOptions::SPEED;
1989 }
1990
1991 } // namespace cpp
1992 } // namespace compiler
1993 } // namespace protobuf
1994 } // namespace google
1995