1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: robinson@google.com (Will Robinson)
9 //
10 // This module outputs pure-Python protocol message classes that will
11 // largely be constructed at runtime via the metaclass in reflection.py.
12 // In other words, our job is basically to output a Python equivalent
13 // of the C++ *Descriptor objects, and fix up all circular references
14 // within these objects.
15 //
16 // Note that the runtime performance of protocol message classes created in
17 // this way is expected to be lousy. The plan is to create an alternate
18 // generator that outputs a Python/C extension module that lets
19 // performance-minded Python code leverage the fast C++ implementation
20 // directly.
21
22 #include "google/protobuf/compiler/python/generator.h"
23
24 #include <cstddef>
25 #include <limits>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30
31 #include "absl/container/flat_hash_map.h"
32 #include "absl/container/flat_hash_set.h"
33 #include "absl/log/absl_check.h"
34 #include "absl/log/absl_log.h"
35 #include "absl/memory/memory.h"
36 #include "absl/strings/ascii.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/str_cat.h"
39 #include "absl/strings/str_format.h"
40 #include "absl/strings/str_join.h"
41 #include "absl/strings/str_replace.h"
42 #include "absl/strings/string_view.h"
43 #include "absl/strings/strip.h"
44 #include "absl/strings/substitute.h"
45 #include "google/protobuf/compiler/code_generator.h"
46 #include "google/protobuf/compiler/python/helpers.h"
47 #include "google/protobuf/compiler/python/pyi_generator.h"
48 #include "google/protobuf/compiler/retention.h"
49 #include "google/protobuf/compiler/versions.h"
50 #include "google/protobuf/descriptor.h"
51 #include "google/protobuf/descriptor.pb.h"
52 #include "google/protobuf/descriptor_visitor.h"
53 #include "google/protobuf/dynamic_message.h"
54 #include "google/protobuf/io/printer.h"
55 #include "google/protobuf/io/strtod.h"
56 #include "google/protobuf/io/zero_copy_stream.h"
57 #include "google/protobuf/message.h"
58
59 namespace google {
60 namespace protobuf {
61 namespace compiler {
62 namespace python {
63
64 namespace {
65 // Returns the alias we assign to the module of the given .proto filename
66 // when importing. See testPackageInitializationImport in
67 // third_party/py/google/protobuf/internal/reflection_test.py
68 // to see why we need the alias.
ModuleAlias(absl::string_view filename)69 std::string ModuleAlias(absl::string_view filename) {
70 std::string module_name = ModuleName(filename);
71 // We can't have dots in the module name, so we replace each with _dot_.
72 // But that could lead to a collision between a.b and a_dot_b, so we also
73 // duplicate each underscore.
74 absl::StrReplaceAll({{"_", "__"}}, &module_name);
75 absl::StrReplaceAll({{".", "_dot_"}}, &module_name);
76 return module_name;
77 }
78
79 // Name of the class attribute where we store the Python
80 // descriptor.Descriptor instance for the generated class.
81 // Must stay consistent with the _DESCRIPTOR_KEY constant
82 // in proto2/public/reflection.py.
83 const char kDescriptorKey[] = "DESCRIPTOR";
84
85 const char kThirdPartyPrefix[] = "google3.third_party.py.";
86
87 // Returns a Python literal giving the default value for a field.
88 // If the field specifies no explicit default value, we'll return
89 // the default default value for the field type (zero for numbers,
90 // empty string for strings, empty list for repeated fields, and
91 // None for non-repeated, composite fields).
92 //
93 // TODO: Unify with code from
94 // //compiler/cpp/internal/primitive_field.cc
95 // //compiler/cpp/internal/enum_field.cc
96 // //compiler/cpp/internal/string_field.cc
StringifyDefaultValue(const FieldDescriptor & field)97 std::string StringifyDefaultValue(const FieldDescriptor& field) {
98 if (field.is_repeated()) {
99 return "[]";
100 }
101
102 switch (field.cpp_type()) {
103 case FieldDescriptor::CPPTYPE_INT32:
104 return absl::StrCat(field.default_value_int32());
105 case FieldDescriptor::CPPTYPE_UINT32:
106 return absl::StrCat(field.default_value_uint32());
107 case FieldDescriptor::CPPTYPE_INT64:
108 return absl::StrCat(field.default_value_int64());
109 case FieldDescriptor::CPPTYPE_UINT64:
110 return absl::StrCat(field.default_value_uint64());
111 case FieldDescriptor::CPPTYPE_DOUBLE: {
112 double value = field.default_value_double();
113 if (value == std::numeric_limits<double>::infinity()) {
114 // Python pre-2.6 on Windows does not parse "inf" correctly. However,
115 // a numeric literal that is too big for a double will become infinity.
116 return "1e10000";
117 } else if (value == -std::numeric_limits<double>::infinity()) {
118 // See above.
119 return "-1e10000";
120 } else if (value != value) {
121 // infinity * 0 = nan
122 return "(1e10000 * 0)";
123 } else {
124 return absl::StrCat("float(", io::SimpleDtoa(value), ")");
125 }
126 }
127 case FieldDescriptor::CPPTYPE_FLOAT: {
128 float value = field.default_value_float();
129 if (value == std::numeric_limits<float>::infinity()) {
130 // Python pre-2.6 on Windows does not parse "inf" correctly. However,
131 // a numeric literal that is too big for a double will become infinity.
132 return "1e10000";
133 } else if (value == -std::numeric_limits<float>::infinity()) {
134 // See above.
135 return "-1e10000";
136 } else if (value != value) {
137 // infinity - infinity = nan
138 return "(1e10000 * 0)";
139 } else {
140 return absl::StrCat("float(", io::SimpleFtoa(value), ")");
141 }
142 }
143 case FieldDescriptor::CPPTYPE_BOOL:
144 return field.default_value_bool() ? "True" : "False";
145 case FieldDescriptor::CPPTYPE_ENUM:
146 return absl::StrCat(field.default_value_enum()->number());
147 case FieldDescriptor::CPPTYPE_STRING:
148 return absl::StrCat("b\"", absl::CEscape(field.default_value_string()),
149 (field.type() != FieldDescriptor::TYPE_STRING
150 ? "\""
151 : "\".decode('utf-8')"));
152 case FieldDescriptor::CPPTYPE_MESSAGE:
153 return "None";
154 }
155 // (We could add a default case above but then we wouldn't get the nice
156 // compiler warning when a new type is added.)
157 ABSL_LOG(FATAL) << "Not reached.";
158 return "";
159 }
160
161 // Returns a CEscaped string of serialized_options.
OptionsValue(absl::string_view serialized_options)162 std::string OptionsValue(absl::string_view serialized_options) {
163 if (serialized_options.empty()) {
164 return "None";
165 } else {
166 return absl::StrCat("b'", absl::CEscape(serialized_options), "'");
167 }
168 }
169
GetLegacySyntaxName(Edition edition)170 std::string GetLegacySyntaxName(Edition edition) {
171 switch (edition) {
172 case Edition::EDITION_PROTO2:
173 return "proto2";
174 case Edition::EDITION_PROTO3:
175 return "proto3";
176 default:
177 return "editions";
178 }
179 }
180
181 } // namespace
182
Generator()183 Generator::Generator() : file_(nullptr) {}
184
~Generator()185 Generator::~Generator() {}
186
ParseParameter(absl::string_view parameter,std::string * error) const187 GeneratorOptions Generator::ParseParameter(absl::string_view parameter,
188 std::string* error) const {
189 GeneratorOptions options;
190
191 std::vector<std::pair<std::string, std::string> > option_pairs;
192 ParseGeneratorParameter(parameter, &option_pairs);
193
194 for (const std::pair<std::string, std::string>& option : option_pairs) {
195 if (!opensource_runtime_ && option.first == "bootstrap") {
196 options.bootstrap = true;
197 } else if (option.first == "pyi_out") {
198 options.generate_pyi = true;
199 } else if (option.first == "annotate_code") {
200 options.annotate_pyi = true;
201 } else if (option.first == "experimental_strip_nonfunctional_codegen") {
202 options.strip_nonfunctional_codegen = true;
203 } else {
204 *error = absl::StrCat("Unknown generator option: ", option.first);
205 }
206 }
207 return options;
208 }
209
Generate(const FileDescriptor * file,const std::string & parameter,GeneratorContext * context,std::string * error) const210 bool Generator::Generate(const FileDescriptor* file,
211 const std::string& parameter,
212 GeneratorContext* context, std::string* error) const {
213 // -----------------------------------------------------------------
214 GeneratorOptions options = ParseParameter(parameter, error);
215 if (!error->empty()) return false;
216
217 // Generate pyi typing information
218 if (options.generate_pyi) {
219 python::PyiGenerator pyi_generator;
220 std::vector<std::string> pyi_options;
221 if (options.annotate_pyi) {
222 pyi_options.push_back("annotate_code");
223 }
224 if (options.strip_nonfunctional_codegen) {
225 pyi_options.push_back("experimental_strip_nonfunctional_codegen");
226 }
227 if (!pyi_generator.Generate(file, absl::StrJoin(pyi_options, ","), context,
228 error)) {
229 return false;
230 }
231 }
232
233 // Completely serialize all Generate() calls on this instance. The
234 // thread-safety constraints of the CodeGenerator interface aren't clear so
235 // just be as conservative as possible. It's easier to relax this later if
236 // we need to, but I doubt it will be an issue.
237 // TODO: The proper thing to do would be to allocate any state on
238 // the stack and use that, so that the Generator class itself does not need
239 // to have any mutable members. Then it is implicitly thread-safe.
240 absl::MutexLock lock(&mutex_);
241 file_ = file;
242
243 std::string filename = GetFileName(file, ".py");
244
245 proto_ = StripSourceRetentionOptions(*file_);
246 proto_.SerializeToString(&file_descriptor_serialized_);
247
248 if (!opensource_runtime_ && GeneratingDescriptorProto()) {
249 std::string bootstrap_filename =
250 "net/proto2/python/internal/descriptor_pb2.py";
251 if (options.bootstrap) {
252 filename = bootstrap_filename;
253 } else {
254 std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
255 io::Printer printer(output.get(), '$');
256 printer.Print(
257 "from google3.net.google.protobuf.python.internal import "
258 "descriptor_pb2\n"
259 "\n");
260
261 // For static checkers, we need to explicitly assign to the symbols we
262 // publicly export.
263 for (int i = 0; i < file_->message_type_count(); i++) {
264 const Descriptor* message = file_->message_type(i);
265 printer.Print("$name$ = descriptor_pb2.$name$\n", "name",
266 message->name());
267 }
268
269 // Sadly some clients access our internal variables (starting with "_").
270 // To support them, we iterate over *all* symbols to expose even the
271 // private ones. Statically type-checked code should (especially) never
272 // use these, so we don't worry about making them available to pytype
273 // checks.
274 printer.Print(
275 "\n"
276 "globals().update(descriptor_pb2.__dict__)\n"
277 "\n");
278
279 printer.Print(
280 "# @@protoc_insertion_point(module_scope)\n"
281 "\n");
282 return true;
283 }
284 }
285
286 std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
287 ABSL_CHECK(output.get());
288 io::Printer printer(output.get(), '$');
289 printer_ = &printer;
290
291 PrintTopBoilerplate();
292 PrintImports();
293 PrintFileDescriptor();
294 printer_->Print("_globals = globals()\n");
295 if (GeneratingDescriptorProto()) {
296 printer_->Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
297 printer_->Indent();
298 // Create enums before message descriptors
299 PrintAllEnumsInFile();
300 PrintMessageDescriptors();
301 FixForeignFieldsInDescriptors();
302 PrintResolvedFeatures();
303 printer_->Outdent();
304 printer_->Print("else:\n");
305 printer_->Indent();
306 }
307 // Find the message descriptors first and then use the message
308 // descriptor to find enums.
309 printer_->Print(
310 "_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)\n");
311 if (GeneratingDescriptorProto()) {
312 printer_->Outdent();
313 }
314 std::string module_name = ModuleName(file->name());
315 if (!opensource_runtime_) {
316 module_name =
317 std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
318 }
319 printer_->Print(
320 "_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, '$module_name$', "
321 "_globals)\n",
322 "module_name", module_name);
323 printer.Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
324 printer_->Indent();
325
326 // Descriptor options may have custom extensions. These custom options
327 // can only be successfully parsed after we register corresponding
328 // extensions. Therefore we parse all options again here to recognize
329 // custom options that may be unknown when we define the descriptors.
330 // This does not apply to services because they are not used by extensions.
331 FixAllDescriptorOptions();
332
333 // Set serialized_start and serialized_end.
334 SetSerializedPbInterval(proto_);
335
336 printer_->Outdent();
337 if (HasGenericServices(file)) {
338 printer_->Print(
339 "_builder.BuildServices(DESCRIPTOR, '$module_name$', _globals)\n",
340 "module_name", module_name);
341 }
342
343 printer.Print("# @@protoc_insertion_point(module_scope)\n");
344
345 return !printer.failed();
346 }
347
348 // file output by this generator.
PrintTopBoilerplate() const349 void Generator::PrintTopBoilerplate() const {
350 // TODO: Allow parameterization of Python version?
351 printer_->Print(
352 "# -*- coding: utf-8 -*-\n"
353 "# Generated by the protocol buffer compiler. DO NOT EDIT!\n"
354 "# NO CHECKED-IN PROTOBUF "
355 // Intentional line breaker
356 "GENCODE\n"
357 "# source: $filename$\n",
358 "filename", file_->name());
359 if (opensource_runtime_) {
360 printer_->Print("# Protobuf Python Version: $protobuf_python_version$\n",
361 "protobuf_python_version", PROTOBUF_PYTHON_VERSION_STRING);
362 }
363 printer_->Print("\"\"\"Generated protocol buffer code.\"\"\"\n");
364 if (!opensource_runtime_) {
365 // This import is needed so that compatibility proto1 compiler output
366 // inserted at protoc_insertion_point can refer to other protos like
367 // google3.a.b.c. Code generated by proto2 compiler doesn't do it, and
368 // instead uses aliases assigned when importing modules.
369 printer_->Print("import google3\n");
370 }
371 bool runtime_version_disabled = false;
372 printer_->Print(
373 "from google.protobuf import descriptor as _descriptor\n"
374 "from google.protobuf import descriptor_pool as _descriptor_pool\n"
375 "$runtime_version_import$"
376 "from google.protobuf import symbol_database as _symbol_database\n"
377 "from google.protobuf.internal import builder as _builder\n",
378 "runtime_version_import",
379 runtime_version_disabled ? ""
380 : "from google.protobuf import runtime_version "
381 "as _runtime_version\n");
382 if (!runtime_version_disabled) {
383 const auto& version = GetProtobufPythonVersion(opensource_runtime_);
384 printer_->Print(
385 "_runtime_version.ValidateProtobufRuntimeVersion(\n"
386 " $domain$,\n"
387 " $major$,\n"
388 " $minor$,\n"
389 " $patch$,\n"
390 " '$suffix$',\n"
391 " '$location$'\n"
392 ")\n",
393 "domain",
394 opensource_runtime_ ? "_runtime_version.Domain.PUBLIC"
395 : "_runtime_version.Domain.GOOGLE_INTERNAL",
396 "major", absl::StrCat(version.major()), "minor",
397 absl::StrCat(version.minor()), "patch", absl::StrCat(version.patch()),
398 "suffix", version.suffix(), "location", file_->name());
399 }
400 printer_->Print("# @@protoc_insertion_point(imports)\n\n");
401 printer_->Print("_sym_db = _symbol_database.Default()\n");
402 printer_->Print("\n\n");
403 }
404
405 // Prints Python imports for all modules imported by |file|.
PrintImports() const406 void Generator::PrintImports() const {
407 bool has_importlib = false;
408 for (int i = 0; i < file_->dependency_count(); ++i) {
409 absl::string_view filename = file_->dependency(i)->name();
410
411 std::string module_name = ModuleName(filename);
412 std::string module_alias = ModuleAlias(filename);
413 if (!opensource_runtime_) {
414 module_name =
415 std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
416 }
417 if (ContainsPythonKeyword(module_name)) {
418 // If the module path contains a Python keyword, we have to quote the
419 // module name and import it using importlib. Otherwise the usual kind of
420 // import statement would result in a syntax error from the presence of
421 // the keyword.
422 if (has_importlib == false) {
423 printer_->Print("import importlib\n");
424 has_importlib = true;
425 }
426 printer_->Print("$alias$ = importlib.import_module('$name$')\n", "alias",
427 module_alias, "name", module_name);
428 } else {
429 size_t last_dot_pos = module_name.rfind('.');
430 std::string import_statement;
431 if (last_dot_pos == std::string::npos) {
432 // NOTE: this is not tested as it would require a protocol buffer
433 // outside of any package, and I don't think that is easily achievable.
434 import_statement = absl::StrCat("import ", module_name);
435 } else {
436 import_statement =
437 absl::StrCat("from ", module_name.substr(0, last_dot_pos),
438 " import ", module_name.substr(last_dot_pos + 1));
439 }
440 printer_->Print("$statement$ as $alias$\n", "statement", import_statement,
441 "alias", module_alias);
442 }
443
444 CopyPublicDependenciesAliases(module_alias, file_->dependency(i));
445 }
446 printer_->Print("\n");
447
448 // Print public imports.
449 for (int i = 0; i < file_->public_dependency_count(); ++i) {
450 std::string module_name = ModuleName(file_->public_dependency(i)->name());
451 if (!opensource_runtime_) {
452 module_name =
453 std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
454 }
455 printer_->Print("from $module$ import *\n", "module", module_name);
456 }
457 printer_->Print("\n");
458 }
459
460 template <typename DescriptorT>
GetResolvedFeatures(const DescriptorT & descriptor) const461 std::string Generator::GetResolvedFeatures(
462 const DescriptorT& descriptor) const {
463 if (!GeneratingDescriptorProto()) {
464 // Everything but descriptor.proto can handle proper feature resolution.
465 return "None";
466 }
467
468 // Load the resolved features from our pool.
469 const Descriptor* feature_set =
470 file_->FindMessageTypeByName(FeatureSet::GetDescriptor()->name());
471 ABSL_CHECK(feature_set != nullptr)
472 << "Malformed descriptor.proto doesn't contain "
473 << FeatureSet::GetDescriptor()->full_name();
474 auto message_factory = absl::make_unique<DynamicMessageFactory>();
475 auto features =
476 absl::WrapUnique(message_factory->GetPrototype(feature_set)->New());
477 features->ParseFromString(
478 GetResolvedSourceFeatures(descriptor).SerializeAsString());
479
480 // Collect all of the resolved features.
481 std::vector<std::string> feature_args;
482 const Reflection* reflection = features->GetReflection();
483 std::vector<const FieldDescriptor*> fields;
484 reflection->ListFields(*features, &fields);
485 for (const auto* field : fields) {
486 // Assume these are all enums. If we add non-enum global features or any
487 // python-specific features, we will need to come back and improve this
488 // logic.
489 ABSL_CHECK(field->enum_type() != nullptr)
490 << "Unexpected non-enum field found!";
491 if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
492 // Skip any source-retention features.
493 continue;
494 }
495 const EnumDescriptor* enm = field->enum_type();
496 const EnumValueDescriptor* value =
497 enm->FindValueByNumber(reflection->GetEnumValue(*features, field));
498
499 feature_args.emplace_back(absl::StrCat(
500 field->name(), "=",
501 absl::StrFormat("%s.values_by_name[\"%s\"].number",
502 ModuleLevelDescriptorName(*enm), value->name())));
503 }
504 return absl::StrCat("_ResolvedFeatures(", absl::StrJoin(feature_args, ","),
505 ")");
506 }
507
PrintResolvedFeatures() const508 void Generator::PrintResolvedFeatures() const {
509 // Since features are used during the descriptor build, it's impossible to do
510 // feature resolution at the normal point for descriptor.proto. Instead, we do
511 // feature resolution here in the generator, and embed a custom object on all
512 // of the generated descriptors. This object should act like any other
513 // FeatureSet message on normal descriptors, but will never have to be
514 // resolved by the python runtime.
515 ABSL_CHECK(GeneratingDescriptorProto());
516 printer_->Emit({{"resolved_features", GetResolvedFeatures(*file_)},
517 {"descriptor_name", kDescriptorKey}},
518 R"py(
519 class _ResolvedFeatures:
520 def __init__(self, features = None, **kwargs):
521 if features:
522 for k, v in features.FIELDS.items():
523 setattr(self, k, getattr(features, k))
524 else:
525 for k, v in kwargs.items():
526 setattr(self, k, v)
527 $descriptor_name$._features = $resolved_features$
528 )py");
529
530 #define MAKE_NESTED(desc, CPP_FIELD, PY_FIELD) \
531 [&] { \
532 for (int i = 0; i < desc.CPP_FIELD##_count(); ++i) { \
533 printer_->Emit( \
534 {{"resolved_subfeatures", GetResolvedFeatures(*desc.CPP_FIELD(i))}, \
535 {"index", absl::StrCat(i)}, \
536 {"field", PY_FIELD}}, \
537 "$descriptor_name$.$field$[$index$]._features = " \
538 "$resolved_subfeatures$\n"); \
539 } \
540 }
541
542 google::protobuf::internal::VisitDescriptors(*file_, [&](const Descriptor& msg) {
543 printer_->Emit(
544 {{"resolved_features", GetResolvedFeatures(msg)},
545 {"descriptor_name", ModuleLevelDescriptorName(msg)},
546 {"field_features", MAKE_NESTED(msg, field, "fields")},
547 {"oneof_features", MAKE_NESTED(msg, oneof_decl, "oneofs")},
548 {"ext_features", MAKE_NESTED(msg, extension, "extensions")}},
549 R"py(
550 $descriptor_name$._features = $resolved_features$
551 $field_features$
552 $oneof_features$
553 $ext_features$
554 )py");
555 });
556 google::protobuf::internal::VisitDescriptors(*file_, [&](const EnumDescriptor& enm) {
557 printer_->Emit({{"resolved_features", GetResolvedFeatures(enm)},
558 {"descriptor_name", ModuleLevelDescriptorName(enm)},
559 {"value_features", MAKE_NESTED(enm, value, "values")}},
560 R"py(
561 $descriptor_name$._features = $resolved_features$
562 $value_features$
563 )py");
564 });
565 #undef MAKE_NESTED
566 }
567
568 // Prints the single file descriptor for this file.
PrintFileDescriptor() const569 void Generator::PrintFileDescriptor() const {
570 absl::flat_hash_map<absl::string_view, std::string> m;
571 m["descriptor_name"] = kDescriptorKey;
572 m["name"] = file_->name();
573 m["package"] = file_->package();
574 m["syntax"] = GetLegacySyntaxName(GetEdition(*file_));
575 m["edition"] = Edition_Name(GetEdition(*file_));
576 m["options"] = OptionsValue(proto_.options().SerializeAsString());
577 m["serialized_descriptor"] = absl::CHexEscape(file_descriptor_serialized_);
578 if (GeneratingDescriptorProto()) {
579 printer_->Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
580 printer_->Indent();
581 // Pure python's AddSerializedFile() depend on the generated
582 // descriptor_pb2.py thus we can not use AddSerializedFile() when
583 // generated descriptor.proto for pure python.
584 const char file_descriptor_template[] =
585 "$descriptor_name$ = _descriptor.FileDescriptor(\n"
586 " name='$name$',\n"
587 " package='$package$',\n"
588 " syntax='$syntax$',\n"
589 " edition='$edition$',\n"
590 " serialized_options=$options$,\n"
591 " create_key=_descriptor._internal_create_key,\n";
592 printer_->Print(m, file_descriptor_template);
593 printer_->Indent();
594 printer_->Print("serialized_pb=b'$value$'\n", "value",
595 absl::CHexEscape(file_descriptor_serialized_));
596 if (file_->dependency_count() != 0) {
597 printer_->Print(",\ndependencies=[");
598 for (int i = 0; i < file_->dependency_count(); ++i) {
599 std::string module_alias = ModuleAlias(file_->dependency(i)->name());
600 printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
601 module_alias);
602 }
603 printer_->Print("]");
604 }
605 if (file_->public_dependency_count() > 0) {
606 printer_->Print(",\npublic_dependencies=[");
607 for (int i = 0; i < file_->public_dependency_count(); ++i) {
608 std::string module_alias =
609 ModuleAlias(file_->public_dependency(i)->name());
610 printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
611 module_alias);
612 }
613 printer_->Print("]");
614 }
615
616 // TODO: Also print options and fix the message_type, enum_type,
617 // service and extension later in the generation.
618
619 printer_->Outdent();
620 printer_->Print(")\n");
621
622 printer_->Outdent();
623 printer_->Print("else:\n");
624 printer_->Indent();
625 }
626 printer_->Print(m,
627 "$descriptor_name$ = "
628 "_descriptor_pool.Default().AddSerializedFile(b'$serialized_"
629 "descriptor$')\n");
630 if (GeneratingDescriptorProto()) {
631 printer_->Outdent();
632 }
633 printer_->Print("\n");
634 }
635
636 // Prints all enums contained in all message types in |file|.
PrintAllEnumsInFile() const637 void Generator::PrintAllEnumsInFile() const {
638 for (int i = 0; i < file_->enum_type_count(); ++i) {
639 PrintEnum(*file_->enum_type(i), proto_.enum_type(i));
640 }
641 for (int i = 0; i < file_->message_type_count(); ++i) {
642 PrintNestedEnums(*file_->message_type(i), proto_.message_type(i));
643 }
644 }
645
646 // Prints a Python statement assigning the appropriate module-level
647 // enum name to a Python EnumDescriptor object equivalent to
648 // enum_descriptor.
PrintEnum(const EnumDescriptor & enum_descriptor,const EnumDescriptorProto & proto) const649 void Generator::PrintEnum(const EnumDescriptor& enum_descriptor,
650 const EnumDescriptorProto& proto) const {
651 absl::flat_hash_map<absl::string_view, std::string> m;
652 std::string module_level_descriptor_name =
653 ModuleLevelDescriptorName(enum_descriptor);
654 m["descriptor_name"] = module_level_descriptor_name;
655 m["name"] = enum_descriptor.name();
656 m["full_name"] = enum_descriptor.full_name();
657 m["file"] = kDescriptorKey;
658 const char enum_descriptor_template[] =
659 "$descriptor_name$ = _descriptor.EnumDescriptor(\n"
660 " name='$name$',\n"
661 " full_name='$full_name$',\n"
662 " filename=None,\n"
663 " file=$file$,\n"
664 " create_key=_descriptor._internal_create_key,\n"
665 " values=[\n";
666 std::string options_string;
667 proto.options().SerializeToString(&options_string);
668 printer_->Print(m, enum_descriptor_template);
669 printer_->Indent();
670 printer_->Indent();
671
672 for (int i = 0; i < enum_descriptor.value_count(); ++i) {
673 PrintEnumValueDescriptor(*enum_descriptor.value(i), proto.value(i));
674 printer_->Print(",\n");
675 }
676
677 printer_->Outdent();
678 printer_->Print("],\n");
679 printer_->Print("containing_type=None,\n");
680 printer_->Print("serialized_options=$options_value$,\n", "options_value",
681 OptionsValue(options_string));
682 EnumDescriptorProto edp;
683 printer_->Outdent();
684 printer_->Print(")\n");
685 printer_->Print("_sym_db.RegisterEnumDescriptor($name$)\n", "name",
686 module_level_descriptor_name);
687 printer_->Print("\n");
688 }
689
690 // Recursively prints enums in nested types within descriptor, then
691 // prints enums contained at the top level in descriptor.
PrintNestedEnums(const Descriptor & descriptor,const DescriptorProto & proto) const692 void Generator::PrintNestedEnums(const Descriptor& descriptor,
693 const DescriptorProto& proto) const {
694 for (int i = 0; i < descriptor.nested_type_count(); ++i) {
695 PrintNestedEnums(*descriptor.nested_type(i), proto.nested_type(i));
696 }
697
698 for (int i = 0; i < descriptor.enum_type_count(); ++i) {
699 PrintEnum(*descriptor.enum_type(i), proto.enum_type(i));
700 }
701 }
702
703 // Prints Python equivalents of all Descriptors in |file|.
PrintMessageDescriptors() const704 void Generator::PrintMessageDescriptors() const {
705 for (int i = 0; i < file_->message_type_count(); ++i) {
706 PrintDescriptor(*file_->message_type(i), proto_.message_type(i));
707 printer_->Print("\n");
708 }
709 }
710
PrintServiceDescriptors() const711 void Generator::PrintServiceDescriptors() const {
712 for (int i = 0; i < file_->service_count(); ++i) {
713 PrintServiceDescriptor(*file_->service(i));
714 }
715 }
716
PrintServices() const717 void Generator::PrintServices() const {
718 for (int i = 0; i < file_->service_count(); ++i) {
719 PrintServiceClass(*file_->service(i));
720 PrintServiceStub(*file_->service(i));
721 printer_->Print("\n");
722 }
723 }
724
PrintServiceDescriptor(const ServiceDescriptor & descriptor) const725 void Generator::PrintServiceDescriptor(
726 const ServiceDescriptor& descriptor) const {
727 absl::flat_hash_map<absl::string_view, std::string> m;
728 m["service_name"] = ModuleLevelServiceDescriptorName(descriptor);
729 m["name"] = descriptor.name();
730 m["file"] = kDescriptorKey;
731 printer_->Print(m, "$service_name$ = $file$.services_by_name['$name$']\n");
732 }
733
PrintDescriptorKeyAndModuleName(const ServiceDescriptor & descriptor) const734 void Generator::PrintDescriptorKeyAndModuleName(
735 const ServiceDescriptor& descriptor) const {
736 std::string name = ModuleLevelServiceDescriptorName(descriptor);
737 printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key",
738 kDescriptorKey, "descriptor_name", name);
739 std::string module_name = ModuleName(file_->name());
740 if (!opensource_runtime_) {
741 module_name =
742 std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
743 }
744 printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name);
745 }
746
PrintServiceClass(const ServiceDescriptor & descriptor) const747 void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
748 // Print the service.
749 printer_->Print(
750 "$class_name$ = service_reflection.GeneratedServiceType("
751 "'$class_name$', (_service.Service,), dict(\n",
752 "class_name", descriptor.name());
753 printer_->Indent();
754 Generator::PrintDescriptorKeyAndModuleName(descriptor);
755 printer_->Print("))\n\n");
756 printer_->Outdent();
757 }
758
PrintServiceStub(const ServiceDescriptor & descriptor) const759 void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
760 // Print the service stub.
761 printer_->Print(
762 "$class_name$_Stub = "
763 "service_reflection.GeneratedServiceStubType("
764 "'$class_name$_Stub', ($class_name$,), dict(\n",
765 "class_name", descriptor.name());
766 printer_->Indent();
767 Generator::PrintDescriptorKeyAndModuleName(descriptor);
768 printer_->Print("))\n\n");
769 printer_->Outdent();
770 }
771
772 // Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
773 // to a Python Descriptor object for message_descriptor.
774 //
775 // Mutually recursive with PrintNestedDescriptors().
PrintDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const776 void Generator::PrintDescriptor(const Descriptor& message_descriptor,
777 const DescriptorProto& proto) const {
778 absl::flat_hash_map<absl::string_view, std::string> m;
779 m["name"] = message_descriptor.name();
780 m["full_name"] = message_descriptor.full_name();
781 m["file"] = kDescriptorKey;
782
783 PrintNestedDescriptors(message_descriptor, proto);
784
785 printer_->Print("\n");
786 printer_->Print("$descriptor_name$ = _descriptor.Descriptor(\n",
787 "descriptor_name",
788 ModuleLevelDescriptorName(message_descriptor));
789 printer_->Indent();
790 const char required_function_arguments[] =
791 "name='$name$',\n"
792 "full_name='$full_name$',\n"
793 "filename=None,\n"
794 "file=$file$,\n"
795 "containing_type=None,\n"
796 "create_key=_descriptor._internal_create_key,\n";
797 printer_->Print(m, required_function_arguments);
798 PrintFieldsInDescriptor(message_descriptor, proto);
799 PrintExtensionsInDescriptor(message_descriptor, proto);
800
801 // Nested types
802 printer_->Print("nested_types=[");
803 for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
804 const std::string nested_name =
805 ModuleLevelDescriptorName(*message_descriptor.nested_type(i));
806 printer_->Print("$name$, ", "name", nested_name);
807 }
808 printer_->Print("],\n");
809
810 // Enum types
811 printer_->Print("enum_types=[\n");
812 printer_->Indent();
813 for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
814 const std::string descriptor_name =
815 ModuleLevelDescriptorName(*message_descriptor.enum_type(i));
816 printer_->Print(descriptor_name.c_str());
817 printer_->Print(",\n");
818 }
819 printer_->Outdent();
820 printer_->Print("],\n");
821 std::string options_string;
822 proto.options().SerializeToString(&options_string);
823 printer_->Print(
824 "serialized_options=$options_value$,\n"
825 "is_extendable=$extendable$",
826 "options_value", OptionsValue(options_string), "extendable",
827 message_descriptor.extension_range_count() > 0 ? "True" : "False");
828 printer_->Print(",\n");
829
830 // Extension ranges
831 printer_->Print("extension_ranges=[");
832 for (int i = 0; i < message_descriptor.extension_range_count(); ++i) {
833 const Descriptor::ExtensionRange* range =
834 message_descriptor.extension_range(i);
835 printer_->Print("($start$, $end$), ", "start",
836 absl::StrCat(range->start_number()), "end",
837 absl::StrCat(range->end_number()));
838 }
839 printer_->Print("],\n");
840 printer_->Print("oneofs=[\n");
841 printer_->Indent();
842 for (int i = 0; i < message_descriptor.oneof_decl_count(); ++i) {
843 const OneofDescriptor* desc = message_descriptor.oneof_decl(i);
844 m.clear();
845 m["name"] = desc->name();
846 m["full_name"] = desc->full_name();
847 m["index"] = absl::StrCat(desc->index());
848 options_string =
849 OptionsValue(proto.oneof_decl(i).options().SerializeAsString());
850 if (options_string == "None") {
851 m["serialized_options"] = "";
852 } else {
853 m["serialized_options"] =
854 absl::StrCat(", serialized_options=", options_string);
855 }
856 printer_->Print(m,
857 "_descriptor.OneofDescriptor(\n"
858 " name='$name$', full_name='$full_name$',\n"
859 " index=$index$, containing_type=None,\n"
860 " create_key=_descriptor._internal_create_key,\n"
861 "fields=[]$serialized_options$),\n");
862 }
863 printer_->Outdent();
864 printer_->Print("],\n");
865
866 printer_->Outdent();
867 printer_->Print(")\n");
868 }
869
870 // Prints Python Descriptor objects for all nested types contained in
871 // message_descriptor.
872 //
873 // Mutually recursive with PrintDescriptor().
PrintNestedDescriptors(const Descriptor & containing_descriptor,const DescriptorProto & proto) const874 void Generator::PrintNestedDescriptors(const Descriptor& containing_descriptor,
875 const DescriptorProto& proto) const {
876 for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
877 PrintDescriptor(*containing_descriptor.nested_type(i),
878 proto.nested_type(i));
879 }
880 }
881
882 // Prints all messages in |file|.
PrintMessages() const883 void Generator::PrintMessages() const {
884 for (int i = 0; i < file_->message_type_count(); ++i) {
885 std::vector<std::string> to_register;
886 PrintMessage(*file_->message_type(i), "", &to_register, false);
887 for (int j = 0; j < to_register.size(); ++j) {
888 printer_->Print("_sym_db.RegisterMessage($name$)\n", "name",
889 ResolveKeyword(to_register[j]));
890 }
891 printer_->Print("\n");
892 }
893 }
894
895 // Prints a Python class for the given message descriptor. We defer to the
896 // metaclass to do almost all of the work of actually creating a useful class.
897 // The purpose of this function and its many helper functions above is merely
898 // to output a Python version of the descriptors, which the metaclass in
899 // reflection.py will use to construct the meat of the class itself.
900 //
901 // Mutually recursive with PrintNestedMessages().
902 // Collect nested message names to_register for the symbol_database.
PrintMessage(const Descriptor & message_descriptor,absl::string_view prefix,std::vector<std::string> * to_register,bool is_nested) const903 void Generator::PrintMessage(const Descriptor& message_descriptor,
904 absl::string_view prefix,
905 std::vector<std::string>* to_register,
906 bool is_nested) const {
907 std::string qualified_name;
908 if (is_nested) {
909 if (IsPythonKeyword(message_descriptor.name())) {
910 qualified_name = absl::StrCat("getattr(", prefix, ", '",
911 message_descriptor.name(), "')");
912 } else {
913 qualified_name = absl::StrCat(prefix, ".", message_descriptor.name());
914 }
915 printer_->Print(
916 "'$name$' : _reflection.GeneratedProtocolMessageType('$name$', "
917 "(_message.Message,), {\n",
918 "name", message_descriptor.name());
919 } else {
920 qualified_name = ResolveKeyword(message_descriptor.name());
921 printer_->Print(
922 "$qualified_name$ = _reflection.GeneratedProtocolMessageType('$name$', "
923 "(_message.Message,), {\n",
924 "qualified_name", qualified_name, "name", message_descriptor.name());
925 }
926 printer_->Indent();
927
928 to_register->push_back(qualified_name);
929
930 PrintNestedMessages(message_descriptor, qualified_name, to_register);
931 absl::flat_hash_map<absl::string_view, std::string> m;
932 m["descriptor_key"] = kDescriptorKey;
933 m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
934 printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n");
935 std::string module_name = ModuleName(file_->name());
936 if (!opensource_runtime_) {
937 module_name =
938 std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
939 }
940 printer_->Print("'__module__' : '$module_name$'\n", "module_name",
941 module_name);
942 printer_->Print("# @@protoc_insertion_point(class_scope:$full_name$)\n",
943 "full_name", message_descriptor.full_name());
944 printer_->Print("})\n");
945 printer_->Outdent();
946 }
947
948 // Prints all nested messages within |containing_descriptor|.
949 // Mutually recursive with PrintMessage().
PrintNestedMessages(const Descriptor & containing_descriptor,absl::string_view prefix,std::vector<std::string> * to_register) const950 void Generator::PrintNestedMessages(
951 const Descriptor& containing_descriptor, absl::string_view prefix,
952 std::vector<std::string>* to_register) const {
953 for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
954 printer_->Print("\n");
955 PrintMessage(*containing_descriptor.nested_type(i), prefix, to_register,
956 true);
957 printer_->Print(",\n");
958 }
959 }
960
961 // Recursively fixes foreign fields in all nested types in |descriptor|, then
962 // sets the message_type and enum_type of all message and enum fields to point
963 // to their respective descriptors.
964 // Args:
965 // descriptor: descriptor to print fields for.
966 // containing_descriptor: if descriptor is a nested type, this is its
967 // containing type, or NULL if this is a root/top-level type.
FixForeignFieldsInDescriptor(const Descriptor & descriptor,const Descriptor * containing_descriptor) const968 void Generator::FixForeignFieldsInDescriptor(
969 const Descriptor& descriptor,
970 const Descriptor* containing_descriptor) const {
971 for (int i = 0; i < descriptor.nested_type_count(); ++i) {
972 FixForeignFieldsInDescriptor(*descriptor.nested_type(i), &descriptor);
973 }
974
975 for (int i = 0; i < descriptor.field_count(); ++i) {
976 const FieldDescriptor& field_descriptor = *descriptor.field(i);
977 FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
978 }
979
980 FixContainingTypeInDescriptor(descriptor, containing_descriptor);
981 for (int i = 0; i < descriptor.enum_type_count(); ++i) {
982 const EnumDescriptor& enum_descriptor = *descriptor.enum_type(i);
983 FixContainingTypeInDescriptor(enum_descriptor, &descriptor);
984 }
985 for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
986 absl::flat_hash_map<absl::string_view, std::string> m;
987 const OneofDescriptor* oneof = descriptor.oneof_decl(i);
988 m["descriptor_name"] = ModuleLevelDescriptorName(descriptor);
989 m["oneof_name"] = oneof->name();
990 for (int j = 0; j < oneof->field_count(); ++j) {
991 m["field_name"] = oneof->field(j)->name();
992 printer_->Print(
993 m,
994 "$descriptor_name$.oneofs_by_name['$oneof_name$'].fields.append(\n"
995 " $descriptor_name$.fields_by_name['$field_name$'])\n");
996 printer_->Print(
997 m,
998 "$descriptor_name$.fields_by_name['$field_name$'].containing_oneof = "
999 "$descriptor_name$.oneofs_by_name['$oneof_name$']\n");
1000 }
1001 }
1002 }
1003
AddMessageToFileDescriptor(const Descriptor & descriptor) const1004 void Generator::AddMessageToFileDescriptor(const Descriptor& descriptor) const {
1005 absl::flat_hash_map<absl::string_view, std::string> m;
1006 m["descriptor_name"] = kDescriptorKey;
1007 m["message_name"] = descriptor.name();
1008 m["message_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
1009 const char file_descriptor_template[] =
1010 "$descriptor_name$.message_types_by_name['$message_name$'] = "
1011 "$message_descriptor_name$\n";
1012 printer_->Print(m, file_descriptor_template);
1013 }
1014
AddServiceToFileDescriptor(const ServiceDescriptor & descriptor) const1015 void Generator::AddServiceToFileDescriptor(
1016 const ServiceDescriptor& descriptor) const {
1017 absl::flat_hash_map<absl::string_view, std::string> m;
1018 m["descriptor_name"] = kDescriptorKey;
1019 m["service_name"] = descriptor.name();
1020 m["service_descriptor_name"] = ModuleLevelServiceDescriptorName(descriptor);
1021 const char file_descriptor_template[] =
1022 "$descriptor_name$.services_by_name['$service_name$'] = "
1023 "$service_descriptor_name$\n";
1024 printer_->Print(m, file_descriptor_template);
1025 }
1026
AddEnumToFileDescriptor(const EnumDescriptor & descriptor) const1027 void Generator::AddEnumToFileDescriptor(
1028 const EnumDescriptor& descriptor) const {
1029 absl::flat_hash_map<absl::string_view, std::string> m;
1030 m["descriptor_name"] = kDescriptorKey;
1031 m["enum_name"] = descriptor.name();
1032 m["enum_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
1033 const char file_descriptor_template[] =
1034 "$descriptor_name$.enum_types_by_name['$enum_name$'] = "
1035 "$enum_descriptor_name$\n";
1036 printer_->Print(m, file_descriptor_template);
1037 }
1038
AddExtensionToFileDescriptor(const FieldDescriptor & descriptor) const1039 void Generator::AddExtensionToFileDescriptor(
1040 const FieldDescriptor& descriptor) const {
1041 absl::flat_hash_map<absl::string_view, std::string> m;
1042 m["descriptor_name"] = kDescriptorKey;
1043 m["field_name"] = descriptor.name();
1044 m["resolved_name"] = ResolveKeyword(descriptor.name());
1045 const char file_descriptor_template[] =
1046 "$descriptor_name$.extensions_by_name['$field_name$'] = "
1047 "$resolved_name$\n";
1048 printer_->Print(m, file_descriptor_template);
1049 }
1050
1051 // Sets any necessary message_type and enum_type attributes
1052 // for the Python version of |field|.
1053 //
1054 // containing_type may be NULL, in which case this is a module-level field.
1055 //
1056 // python_dict_name is the name of the Python dict where we should
1057 // look the field up in the containing type. (e.g., fields_by_name
1058 // or extensions_by_name). We ignore python_dict_name if containing_type
1059 // is NULL.
FixForeignFieldsInField(const Descriptor * containing_type,const FieldDescriptor & field,absl::string_view python_dict_name) const1060 void Generator::FixForeignFieldsInField(
1061 const Descriptor* containing_type, const FieldDescriptor& field,
1062 absl::string_view python_dict_name) const {
1063 const std::string field_referencing_expression =
1064 FieldReferencingExpression(containing_type, field, python_dict_name);
1065 absl::flat_hash_map<absl::string_view, std::string> m;
1066 m["field_ref"] = field_referencing_expression;
1067 const Descriptor* foreign_message_type = field.message_type();
1068 if (foreign_message_type) {
1069 m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
1070 printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
1071 }
1072 const EnumDescriptor* enum_type = field.enum_type();
1073 if (enum_type) {
1074 m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
1075 printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
1076 }
1077 }
1078
1079 // Returns the module-level expression for the given FieldDescriptor.
1080 // Only works for fields in the .proto file this Generator is generating for.
1081 //
1082 // containing_type may be NULL, in which case this is a module-level field.
1083 //
1084 // python_dict_name is the name of the Python dict where we should
1085 // look the field up in the containing type. (e.g., fields_by_name
1086 // or extensions_by_name). We ignore python_dict_name if containing_type
1087 // is NULL.
FieldReferencingExpression(const Descriptor * containing_type,const FieldDescriptor & field,absl::string_view python_dict_name) const1088 std::string Generator::FieldReferencingExpression(
1089 const Descriptor* containing_type, const FieldDescriptor& field,
1090 absl::string_view python_dict_name) const {
1091 // We should only ever be looking up fields in the current file.
1092 // The only things we refer to from other files are message descriptors.
1093 ABSL_CHECK_EQ(field.file(), file_)
1094 << field.file()->name() << " vs. " << file_->name();
1095 if (!containing_type) {
1096 return ResolveKeyword(field.name());
1097 }
1098 return absl::Substitute("$0.$1['$2']",
1099 ModuleLevelDescriptorName(*containing_type),
1100 python_dict_name, field.name());
1101 }
1102
1103 // Prints containing_type for nested descriptors or enum descriptors.
1104 template <typename DescriptorT>
FixContainingTypeInDescriptor(const DescriptorT & descriptor,const Descriptor * containing_descriptor) const1105 void Generator::FixContainingTypeInDescriptor(
1106 const DescriptorT& descriptor,
1107 const Descriptor* containing_descriptor) const {
1108 if (containing_descriptor != nullptr) {
1109 const std::string nested_name = ModuleLevelDescriptorName(descriptor);
1110 const std::string parent_name =
1111 ModuleLevelDescriptorName(*containing_descriptor);
1112 printer_->Print("$nested_name$.containing_type = $parent_name$\n",
1113 "nested_name", nested_name, "parent_name", parent_name);
1114 }
1115 }
1116
1117 // Prints statements setting the message_type and enum_type fields in the
1118 // Python descriptor objects we've already output in the file. We must
1119 // do this in a separate step due to circular references (otherwise, we'd
1120 // just set everything in the initial assignment statements).
FixForeignFieldsInDescriptors() const1121 void Generator::FixForeignFieldsInDescriptors() const {
1122 for (int i = 0; i < file_->message_type_count(); ++i) {
1123 FixForeignFieldsInDescriptor(*file_->message_type(i), nullptr);
1124 }
1125 for (int i = 0; i < file_->message_type_count(); ++i) {
1126 AddMessageToFileDescriptor(*file_->message_type(i));
1127 }
1128 for (int i = 0; i < file_->enum_type_count(); ++i) {
1129 AddEnumToFileDescriptor(*file_->enum_type(i));
1130 }
1131 for (int i = 0; i < file_->extension_count(); ++i) {
1132 AddExtensionToFileDescriptor(*file_->extension(i));
1133 }
1134
1135 // TODO: Move this register to PrintFileDescriptor() when
1136 // FieldDescriptor.file is added in generated file.
1137 printer_->Print("_sym_db.RegisterFileDescriptor($name$)\n", "name",
1138 kDescriptorKey);
1139 printer_->Print("\n");
1140 }
1141
1142 // Returns a Python expression that instantiates a Python EnumValueDescriptor
1143 // object for the given C++ descriptor.
PrintEnumValueDescriptor(const EnumValueDescriptor & descriptor,const EnumValueDescriptorProto & proto) const1144 void Generator::PrintEnumValueDescriptor(
1145 const EnumValueDescriptor& descriptor,
1146 const EnumValueDescriptorProto& proto) const {
1147 // TODO: Fix up EnumValueDescriptor "type" fields.
1148 // More circular references. ::sigh::
1149 std::string options_string;
1150 proto.options().SerializeToString(&options_string);
1151 absl::flat_hash_map<absl::string_view, std::string> m;
1152 m["name"] = descriptor.name();
1153 m["index"] = absl::StrCat(descriptor.index());
1154 m["number"] = absl::StrCat(descriptor.number());
1155 m["options"] = OptionsValue(options_string);
1156 printer_->Print(m,
1157 "_descriptor.EnumValueDescriptor(\n"
1158 " name='$name$', index=$index$, number=$number$,\n"
1159 " serialized_options=$options$,\n"
1160 " type=None,\n"
1161 " create_key=_descriptor._internal_create_key)");
1162 }
1163
1164 // Prints an expression for a Python FieldDescriptor for |field|.
PrintFieldDescriptor(const FieldDescriptor & field,const FieldDescriptorProto & proto) const1165 void Generator::PrintFieldDescriptor(const FieldDescriptor& field,
1166 const FieldDescriptorProto& proto) const {
1167 std::string options_string;
1168 proto.options().SerializeToString(&options_string);
1169 absl::flat_hash_map<absl::string_view, std::string> m;
1170 m["name"] = field.name();
1171 m["full_name"] = field.full_name();
1172 m["index"] = absl::StrCat(field.index());
1173 m["number"] = absl::StrCat(field.number());
1174 m["type"] = absl::StrCat(field.type());
1175 m["cpp_type"] = absl::StrCat(field.cpp_type());
1176 m["label"] = absl::StrCat(field.label());
1177 m["has_default_value"] = field.has_default_value() ? "True" : "False";
1178 m["default_value"] = StringifyDefaultValue(field);
1179 m["is_extension"] = field.is_extension() ? "True" : "False";
1180 m["serialized_options"] = OptionsValue(options_string);
1181 m["json_name"] = field.has_json_name()
1182 ? absl::StrCat(", json_name='", field.json_name(), "'")
1183 : "";
1184 // We always set message_type and enum_type to None at this point, and then
1185 // these fields in correctly after all referenced descriptors have been
1186 // defined and/or imported (see FixForeignFieldsInDescriptors()).
1187 const char field_descriptor_decl[] =
1188 "_descriptor.FieldDescriptor(\n"
1189 " name='$name$', full_name='$full_name$', index=$index$,\n"
1190 " number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
1191 " has_default_value=$has_default_value$, "
1192 "default_value=$default_value$,\n"
1193 " message_type=None, enum_type=None, containing_type=None,\n"
1194 " is_extension=$is_extension$, extension_scope=None,\n"
1195 " serialized_options=$serialized_options$$json_name$, file=DESCRIPTOR,"
1196 " create_key=_descriptor._internal_create_key)";
1197 printer_->Print(m, field_descriptor_decl);
1198 }
1199
1200 // Helper for Print{Fields,Extensions}InDescriptor().
PrintFieldDescriptorsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto,bool is_extension,absl::string_view list_variable_name) const1201 void Generator::PrintFieldDescriptorsInDescriptor(
1202 const Descriptor& message_descriptor, const DescriptorProto& proto,
1203 bool is_extension, absl::string_view list_variable_name) const {
1204 printer_->Print("$list$=[\n", "list", list_variable_name);
1205 printer_->Indent();
1206 int count = is_extension ? message_descriptor.extension_count()
1207 : message_descriptor.field_count();
1208 for (int i = 0; i < count; ++i) {
1209 PrintFieldDescriptor(is_extension ? *message_descriptor.extension(i)
1210 : *message_descriptor.field(i),
1211 is_extension ? proto.extension(i) : proto.field(i));
1212 printer_->Print(",\n");
1213 }
1214 printer_->Outdent();
1215 printer_->Print("],\n");
1216 }
1217
1218 // Prints a statement assigning "fields" to a list of Python FieldDescriptors,
1219 // one for each field present in message_descriptor.
PrintFieldsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const1220 void Generator::PrintFieldsInDescriptor(const Descriptor& message_descriptor,
1221 const DescriptorProto& proto) const {
1222 const bool is_extension = false;
1223 PrintFieldDescriptorsInDescriptor(message_descriptor, proto, is_extension,
1224 "fields");
1225 }
1226
1227 // Prints a statement assigning "extensions" to a list of Python
1228 // FieldDescriptors, one for each extension present in message_descriptor.
PrintExtensionsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const1229 void Generator::PrintExtensionsInDescriptor(
1230 const Descriptor& message_descriptor, const DescriptorProto& proto) const {
1231 const bool is_extension = true;
1232 PrintFieldDescriptorsInDescriptor(message_descriptor, proto, is_extension,
1233 "extensions");
1234 }
1235
GeneratingDescriptorProto() const1236 bool Generator::GeneratingDescriptorProto() const {
1237 return file_->name() == "net/proto2/proto/descriptor.proto" ||
1238 file_->name() == "google/protobuf/descriptor.proto";
1239 }
1240
1241 // Returns the unique Python module-level identifier given to a descriptor.
1242 // This name is module-qualified iff the given descriptor describes an
1243 // entity that doesn't come from the current file.
1244 template <typename DescriptorT>
ModuleLevelDescriptorName(const DescriptorT & descriptor) const1245 std::string Generator::ModuleLevelDescriptorName(
1246 const DescriptorT& descriptor) const {
1247 // FIXME(robinson):
1248 // We currently don't worry about collisions with underscores in the type
1249 // names, so these would collide in nasty ways if found in the same file:
1250 // OuterProto.ProtoA.ProtoB
1251 // OuterProto_ProtoA.ProtoB # Underscore instead of period.
1252 // As would these:
1253 // OuterProto.ProtoA_.ProtoB
1254 // OuterProto.ProtoA._ProtoB # Leading vs. trailing underscore.
1255 // (Contrived, but certainly possible).
1256 //
1257 // The C++ implementation doesn't guard against this either. Leaving
1258 // it for now...
1259 std::string name = NamePrefixedWithNestedTypes(descriptor, "_");
1260 absl::AsciiStrToUpper(&name);
1261 // Module-private for now. Easy to make public later; almost impossible
1262 // to make private later.
1263 name = absl::StrCat("_", name);
1264 // We now have the name relative to its own module. Also qualify with
1265 // the module name iff this descriptor is from a different .proto file.
1266 if (descriptor.file() != file_) {
1267 name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1268 }
1269 return name;
1270 }
1271
1272 // Returns the name of the message class itself, not the descriptor.
1273 // Like ModuleLevelDescriptorName(), module-qualifies the name iff
1274 // the given descriptor describes an entity that doesn't come from
1275 // the current file.
ModuleLevelMessageName(const Descriptor & descriptor) const1276 std::string Generator::ModuleLevelMessageName(
1277 const Descriptor& descriptor) const {
1278 std::string name = NamePrefixedWithNestedTypes(descriptor, ".");
1279 if (descriptor.file() != file_) {
1280 name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1281 }
1282 return name;
1283 }
1284
1285 // Returns the unique Python module-level identifier given to a service
1286 // descriptor.
ModuleLevelServiceDescriptorName(const ServiceDescriptor & descriptor) const1287 std::string Generator::ModuleLevelServiceDescriptorName(
1288 const ServiceDescriptor& descriptor) const {
1289 std::string name = absl::StrCat("_", descriptor.name());
1290 absl::AsciiStrToUpper(&name);
1291 if (descriptor.file() != file_) {
1292 name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1293 }
1294 return name;
1295 }
1296
1297 // Prints descriptor offsets _serialized_start and _serialized_end.
1298 // Args:
1299 // descriptor_proto: The descriptor proto to have a serialized reference.
1300 // Example printer output:
1301 // _globals['_MYMESSAGE']._serialized_start=47
1302 // _globals['_MYMESSAGE']._serialized_end=76
1303 template <typename DescriptorProtoT>
PrintSerializedPbInterval(const DescriptorProtoT & descriptor_proto,absl::string_view name) const1304 void Generator::PrintSerializedPbInterval(
1305 const DescriptorProtoT& descriptor_proto, absl::string_view name) const {
1306 std::string sp;
1307 descriptor_proto.SerializeToString(&sp);
1308 size_t offset = file_descriptor_serialized_.find(sp);
1309 ABSL_CHECK_GE(offset, 0);
1310
1311 printer_->Print(
1312 "_globals['$name$']._serialized_start=$serialized_start$\n"
1313 "_globals['$name$']._serialized_end=$serialized_end$\n",
1314 "name", name, "serialized_start", absl::StrCat(offset), "serialized_end",
1315 absl::StrCat(offset + sp.size()));
1316 }
1317
1318 template <typename DescriptorT>
PrintDescriptorOptionsFixingCode(const DescriptorT & descriptor,const typename DescriptorT::Proto & proto,absl::string_view descriptor_str) const1319 bool Generator::PrintDescriptorOptionsFixingCode(
1320 const DescriptorT& descriptor, const typename DescriptorT::Proto& proto,
1321 absl::string_view descriptor_str) const {
1322 std::string options = OptionsValue(proto.options().SerializeAsString());
1323
1324 // Reset the _options to None thus DescriptorBase.GetOptions() can
1325 // parse _options again after extensions are registered.
1326 size_t dot_pos = descriptor_str.find('.');
1327 std::string descriptor_name;
1328 if (dot_pos == std::string::npos) {
1329 descriptor_name = absl::StrCat("_globals['", descriptor_str, "']");
1330 } else {
1331 descriptor_name =
1332 absl::StrCat("_globals['", descriptor_str.substr(0, dot_pos), "']",
1333 descriptor_str.substr(dot_pos));
1334 }
1335
1336 if (options == "None") {
1337 return false;
1338 }
1339
1340 printer_->Print(
1341 "$descriptor_name$._loaded_options = None\n"
1342 "$descriptor_name$._serialized_options = $serialized_value$\n",
1343 "descriptor_name", descriptor_name, "serialized_value", options);
1344 return true;
1345 }
1346
1347 // Generates the start and end offsets for each entity in the serialized file
1348 // descriptor. The file argument must exactly match what was serialized into
1349 // file_descriptor_serialized_, and should already have had any
1350 // source-retention options stripped out. This is important because we need an
1351 // exact byte-for-byte match so that we can successfully find the correct
1352 // offsets in the serialized descriptors.
SetSerializedPbInterval(const FileDescriptorProto & file) const1353 void Generator::SetSerializedPbInterval(const FileDescriptorProto& file) const {
1354 // Top level enums.
1355 for (int i = 0; i < file_->enum_type_count(); ++i) {
1356 const EnumDescriptor& descriptor = *file_->enum_type(i);
1357 PrintSerializedPbInterval(file.enum_type(i),
1358 ModuleLevelDescriptorName(descriptor));
1359 }
1360
1361 // Messages.
1362 for (int i = 0; i < file_->message_type_count(); ++i) {
1363 SetMessagePbInterval(file.message_type(i), *file_->message_type(i));
1364 }
1365
1366 // Services.
1367 for (int i = 0; i < file_->service_count(); ++i) {
1368 const ServiceDescriptor& service = *file_->service(i);
1369 PrintSerializedPbInterval(file.service(i),
1370 ModuleLevelServiceDescriptorName(service));
1371 }
1372 }
1373
SetMessagePbInterval(const DescriptorProto & message_proto,const Descriptor & descriptor) const1374 void Generator::SetMessagePbInterval(const DescriptorProto& message_proto,
1375 const Descriptor& descriptor) const {
1376 PrintSerializedPbInterval(message_proto,
1377 ModuleLevelDescriptorName(descriptor));
1378
1379 // Nested messages.
1380 for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1381 SetMessagePbInterval(message_proto.nested_type(i),
1382 *descriptor.nested_type(i));
1383 }
1384
1385 for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1386 const EnumDescriptor& enum_des = *descriptor.enum_type(i);
1387 PrintSerializedPbInterval(message_proto.enum_type(i),
1388 ModuleLevelDescriptorName(enum_des));
1389 }
1390 }
1391
1392 // Prints expressions that set the options field of all descriptors.
FixAllDescriptorOptions() const1393 void Generator::FixAllDescriptorOptions() const {
1394 // Prints an expression that sets the file descriptor's options.
1395 if (!PrintDescriptorOptionsFixingCode(*file_, proto_, kDescriptorKey)) {
1396 printer_->Print("DESCRIPTOR._loaded_options = None\n");
1397 }
1398 // Prints expressions that set the options for all top level enums.
1399 for (int i = 0; i < file_->enum_type_count(); ++i) {
1400 FixOptionsForEnum(*file_->enum_type(i), proto_.enum_type(i));
1401 }
1402 // Prints expressions that set the options for all top level extensions.
1403 for (int i = 0; i < file_->extension_count(); ++i) {
1404 FixOptionsForField(*file_->extension(i), proto_.extension(i));
1405 }
1406 // Prints expressions that set the options for all messages, nested enums,
1407 // nested extensions and message fields.
1408 for (int i = 0; i < file_->message_type_count(); ++i) {
1409 FixOptionsForMessage(*file_->message_type(i), proto_.message_type(i));
1410 }
1411
1412 for (int i = 0; i < file_->service_count(); ++i) {
1413 FixOptionsForService(*file_->service(i), proto_.service(i));
1414 }
1415 }
1416
FixOptionsForOneof(const OneofDescriptor & oneof,const OneofDescriptorProto & proto) const1417 void Generator::FixOptionsForOneof(const OneofDescriptor& oneof,
1418 const OneofDescriptorProto& proto) const {
1419 std::string oneof_name = absl::Substitute(
1420 "$0.$1['$2']", ModuleLevelDescriptorName(*oneof.containing_type()),
1421 "oneofs_by_name", oneof.name());
1422 PrintDescriptorOptionsFixingCode(oneof, proto, oneof_name);
1423 }
1424
1425 // Prints expressions that set the options for an enum descriptor and its
1426 // value descriptors.
FixOptionsForEnum(const EnumDescriptor & enum_descriptor,const EnumDescriptorProto & proto) const1427 void Generator::FixOptionsForEnum(const EnumDescriptor& enum_descriptor,
1428 const EnumDescriptorProto& proto) const {
1429 std::string descriptor_name = ModuleLevelDescriptorName(enum_descriptor);
1430 PrintDescriptorOptionsFixingCode(enum_descriptor, proto, descriptor_name);
1431 for (int i = 0; i < enum_descriptor.value_count(); ++i) {
1432 const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(i);
1433 PrintDescriptorOptionsFixingCode(
1434 value_descriptor, proto.value(i),
1435 absl::StrFormat("%s.values_by_name[\"%s\"]", descriptor_name.c_str(),
1436 value_descriptor.name()));
1437 }
1438 }
1439
1440 // Prints expressions that set the options for an service descriptor and its
1441 // value descriptors.
FixOptionsForService(const ServiceDescriptor & service_descriptor,const ServiceDescriptorProto & proto) const1442 void Generator::FixOptionsForService(
1443 const ServiceDescriptor& service_descriptor,
1444 const ServiceDescriptorProto& proto) const {
1445 std::string descriptor_name =
1446 ModuleLevelServiceDescriptorName(service_descriptor);
1447 PrintDescriptorOptionsFixingCode(service_descriptor, proto, descriptor_name);
1448
1449 for (int i = 0; i < service_descriptor.method_count(); ++i) {
1450 const MethodDescriptor* method = service_descriptor.method(i);
1451 std::string method_name = absl::StrCat(
1452 descriptor_name, ".methods_by_name['", method->name(), "']");
1453 PrintDescriptorOptionsFixingCode(*method, proto.method(i), method_name);
1454 }
1455 }
1456
1457 // Prints expressions that set the options for field descriptors (including
1458 // extensions).
FixOptionsForField(const FieldDescriptor & field,const FieldDescriptorProto & proto) const1459 void Generator::FixOptionsForField(const FieldDescriptor& field,
1460 const FieldDescriptorProto& proto) const {
1461 std::string field_name;
1462 if (field.is_extension()) {
1463 if (field.extension_scope() == nullptr) {
1464 // Top level extensions.
1465 field_name = field.name();
1466 } else {
1467 field_name = FieldReferencingExpression(field.extension_scope(), field,
1468 "extensions_by_name");
1469 }
1470 } else {
1471 field_name = FieldReferencingExpression(field.containing_type(), field,
1472 "fields_by_name");
1473 }
1474 PrintDescriptorOptionsFixingCode(field, proto, field_name);
1475 }
1476
1477 // Prints expressions that set the options for a message and all its inner
1478 // types (nested messages, nested enums, extensions, fields).
FixOptionsForMessage(const Descriptor & descriptor,const DescriptorProto & proto) const1479 void Generator::FixOptionsForMessage(const Descriptor& descriptor,
1480 const DescriptorProto& proto) const {
1481 // Nested messages.
1482 for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1483 FixOptionsForMessage(*descriptor.nested_type(i), proto.nested_type(i));
1484 }
1485 // Oneofs.
1486 for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
1487 FixOptionsForOneof(*descriptor.oneof_decl(i), proto.oneof_decl(i));
1488 }
1489 // Enums.
1490 for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1491 FixOptionsForEnum(*descriptor.enum_type(i), proto.enum_type(i));
1492 }
1493 // Fields.
1494 for (int i = 0; i < descriptor.field_count(); ++i) {
1495 const FieldDescriptor& field = *descriptor.field(i);
1496 FixOptionsForField(field, proto.field(i));
1497 }
1498 // Extensions.
1499 for (int i = 0; i < descriptor.extension_count(); ++i) {
1500 const FieldDescriptor& field = *descriptor.extension(i);
1501 FixOptionsForField(field, proto.extension(i));
1502 }
1503 // Message option for this message.
1504 PrintDescriptorOptionsFixingCode(descriptor, proto,
1505 ModuleLevelDescriptorName(descriptor));
1506 }
1507
1508 // If a dependency forwards other files through public dependencies, let's
1509 // copy over the corresponding module aliases.
CopyPublicDependenciesAliases(absl::string_view copy_from,const FileDescriptor * file) const1510 void Generator::CopyPublicDependenciesAliases(
1511 absl::string_view copy_from, const FileDescriptor* file) const {
1512 for (int i = 0; i < file->public_dependency_count(); ++i) {
1513 std::string module_name = ModuleName(file->public_dependency(i)->name());
1514 std::string module_alias = ModuleAlias(file->public_dependency(i)->name());
1515 // There's no module alias in the dependent file if it was generated by
1516 // an old protoc (less than 3.0.0-alpha-1). Use module name in this
1517 // situation.
1518 printer_->Print(
1519 "try:\n"
1520 " $alias$ = $copy_from$.$alias$\n"
1521 "except AttributeError:\n"
1522 " $alias$ = $copy_from$.$module$\n",
1523 "alias", module_alias, "module", module_name, "copy_from", copy_from);
1524 CopyPublicDependenciesAliases(copy_from, file->public_dependency(i));
1525 }
1526 }
1527
1528 } // namespace python
1529 } // namespace compiler
1530 } // namespace protobuf
1531 } // namespace google
1532