• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: robinson@google.com (Will Robinson)
9 //
10 // This module outputs pure-Python protocol message classes that will
11 // largely be constructed at runtime via the metaclass in reflection.py.
12 // In other words, our job is basically to output a Python equivalent
13 // of the C++ *Descriptor objects, and fix up all circular references
14 // within these objects.
15 //
16 // Note that the runtime performance of protocol message classes created in
17 // this way is expected to be lousy.  The plan is to create an alternate
18 // generator that outputs a Python/C extension module that lets
19 // performance-minded Python code leverage the fast C++ implementation
20 // directly.
21 
22 #include "google/protobuf/compiler/python/generator.h"
23 
24 #include <cstddef>
25 #include <limits>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include "absl/container/flat_hash_map.h"
32 #include "absl/container/flat_hash_set.h"
33 #include "absl/log/absl_check.h"
34 #include "absl/log/absl_log.h"
35 #include "absl/memory/memory.h"
36 #include "absl/strings/ascii.h"
37 #include "absl/strings/escaping.h"
38 #include "absl/strings/str_cat.h"
39 #include "absl/strings/str_format.h"
40 #include "absl/strings/str_join.h"
41 #include "absl/strings/str_replace.h"
42 #include "absl/strings/string_view.h"
43 #include "absl/strings/strip.h"
44 #include "absl/strings/substitute.h"
45 #include "google/protobuf/compiler/code_generator.h"
46 #include "google/protobuf/compiler/python/helpers.h"
47 #include "google/protobuf/compiler/python/pyi_generator.h"
48 #include "google/protobuf/compiler/retention.h"
49 #include "google/protobuf/compiler/versions.h"
50 #include "google/protobuf/descriptor.h"
51 #include "google/protobuf/descriptor.pb.h"
52 #include "google/protobuf/descriptor_visitor.h"
53 #include "google/protobuf/dynamic_message.h"
54 #include "google/protobuf/io/printer.h"
55 #include "google/protobuf/io/strtod.h"
56 #include "google/protobuf/io/zero_copy_stream.h"
57 #include "google/protobuf/message.h"
58 
59 namespace google {
60 namespace protobuf {
61 namespace compiler {
62 namespace python {
63 
64 namespace {
65 // Returns the alias we assign to the module of the given .proto filename
66 // when importing. See testPackageInitializationImport in
67 // third_party/py/google/protobuf/internal/reflection_test.py
68 // to see why we need the alias.
ModuleAlias(absl::string_view filename)69 std::string ModuleAlias(absl::string_view filename) {
70   std::string module_name = ModuleName(filename);
71   // We can't have dots in the module name, so we replace each with _dot_.
72   // But that could lead to a collision between a.b and a_dot_b, so we also
73   // duplicate each underscore.
74   absl::StrReplaceAll({{"_", "__"}}, &module_name);
75   absl::StrReplaceAll({{".", "_dot_"}}, &module_name);
76   return module_name;
77 }
78 
79 // Name of the class attribute where we store the Python
80 // descriptor.Descriptor instance for the generated class.
81 // Must stay consistent with the _DESCRIPTOR_KEY constant
82 // in proto2/public/reflection.py.
83 const char kDescriptorKey[] = "DESCRIPTOR";
84 
85 const char kThirdPartyPrefix[] = "google3.third_party.py.";
86 
87 // Returns a Python literal giving the default value for a field.
88 // If the field specifies no explicit default value, we'll return
89 // the default default value for the field type (zero for numbers,
90 // empty string for strings, empty list for repeated fields, and
91 // None for non-repeated, composite fields).
92 //
93 // TODO: Unify with code from
94 // //compiler/cpp/internal/primitive_field.cc
95 // //compiler/cpp/internal/enum_field.cc
96 // //compiler/cpp/internal/string_field.cc
StringifyDefaultValue(const FieldDescriptor & field)97 std::string StringifyDefaultValue(const FieldDescriptor& field) {
98   if (field.is_repeated()) {
99     return "[]";
100   }
101 
102   switch (field.cpp_type()) {
103     case FieldDescriptor::CPPTYPE_INT32:
104       return absl::StrCat(field.default_value_int32());
105     case FieldDescriptor::CPPTYPE_UINT32:
106       return absl::StrCat(field.default_value_uint32());
107     case FieldDescriptor::CPPTYPE_INT64:
108       return absl::StrCat(field.default_value_int64());
109     case FieldDescriptor::CPPTYPE_UINT64:
110       return absl::StrCat(field.default_value_uint64());
111     case FieldDescriptor::CPPTYPE_DOUBLE: {
112       double value = field.default_value_double();
113       if (value == std::numeric_limits<double>::infinity()) {
114         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
115         // a numeric literal that is too big for a double will become infinity.
116         return "1e10000";
117       } else if (value == -std::numeric_limits<double>::infinity()) {
118         // See above.
119         return "-1e10000";
120       } else if (value != value) {
121         // infinity * 0 = nan
122         return "(1e10000 * 0)";
123       } else {
124         return absl::StrCat("float(", io::SimpleDtoa(value), ")");
125       }
126     }
127     case FieldDescriptor::CPPTYPE_FLOAT: {
128       float value = field.default_value_float();
129       if (value == std::numeric_limits<float>::infinity()) {
130         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
131         // a numeric literal that is too big for a double will become infinity.
132         return "1e10000";
133       } else if (value == -std::numeric_limits<float>::infinity()) {
134         // See above.
135         return "-1e10000";
136       } else if (value != value) {
137         // infinity - infinity = nan
138         return "(1e10000 * 0)";
139       } else {
140         return absl::StrCat("float(", io::SimpleFtoa(value), ")");
141       }
142     }
143     case FieldDescriptor::CPPTYPE_BOOL:
144       return field.default_value_bool() ? "True" : "False";
145     case FieldDescriptor::CPPTYPE_ENUM:
146       return absl::StrCat(field.default_value_enum()->number());
147     case FieldDescriptor::CPPTYPE_STRING:
148       return absl::StrCat("b\"", absl::CEscape(field.default_value_string()),
149                           (field.type() != FieldDescriptor::TYPE_STRING
150                                ? "\""
151                                : "\".decode('utf-8')"));
152     case FieldDescriptor::CPPTYPE_MESSAGE:
153       return "None";
154   }
155   // (We could add a default case above but then we wouldn't get the nice
156   // compiler warning when a new type is added.)
157   ABSL_LOG(FATAL) << "Not reached.";
158   return "";
159 }
160 
161 // Returns a CEscaped string of serialized_options.
OptionsValue(absl::string_view serialized_options)162 std::string OptionsValue(absl::string_view serialized_options) {
163   if (serialized_options.empty()) {
164     return "None";
165   } else {
166     return absl::StrCat("b'", absl::CEscape(serialized_options), "'");
167   }
168 }
169 
GetLegacySyntaxName(Edition edition)170 std::string GetLegacySyntaxName(Edition edition) {
171   switch (edition) {
172     case Edition::EDITION_PROTO2:
173       return "proto2";
174     case Edition::EDITION_PROTO3:
175       return "proto3";
176     default:
177       return "editions";
178   }
179 }
180 
181 }  // namespace
182 
Generator()183 Generator::Generator() : file_(nullptr) {}
184 
~Generator()185 Generator::~Generator() {}
186 
ParseParameter(absl::string_view parameter,std::string * error) const187 GeneratorOptions Generator::ParseParameter(absl::string_view parameter,
188                                            std::string* error) const {
189   GeneratorOptions options;
190 
191   std::vector<std::pair<std::string, std::string> > option_pairs;
192   ParseGeneratorParameter(parameter, &option_pairs);
193 
194   for (const std::pair<std::string, std::string>& option : option_pairs) {
195     if (!opensource_runtime_ && option.first == "bootstrap") {
196       options.bootstrap = true;
197     } else if (option.first == "pyi_out") {
198       options.generate_pyi = true;
199     } else if (option.first == "annotate_code") {
200       options.annotate_pyi = true;
201     } else if (option.first == "experimental_strip_nonfunctional_codegen") {
202       options.strip_nonfunctional_codegen = true;
203     } else {
204       *error = absl::StrCat("Unknown generator option: ", option.first);
205     }
206   }
207   return options;
208 }
209 
Generate(const FileDescriptor * file,const std::string & parameter,GeneratorContext * context,std::string * error) const210 bool Generator::Generate(const FileDescriptor* file,
211                          const std::string& parameter,
212                          GeneratorContext* context, std::string* error) const {
213   // -----------------------------------------------------------------
214   GeneratorOptions options = ParseParameter(parameter, error);
215   if (!error->empty()) return false;
216 
217   // Generate pyi typing information
218   if (options.generate_pyi) {
219     python::PyiGenerator pyi_generator;
220     std::vector<std::string> pyi_options;
221     if (options.annotate_pyi) {
222       pyi_options.push_back("annotate_code");
223     }
224     if (options.strip_nonfunctional_codegen) {
225       pyi_options.push_back("experimental_strip_nonfunctional_codegen");
226     }
227     if (!pyi_generator.Generate(file, absl::StrJoin(pyi_options, ","), context,
228                                 error)) {
229       return false;
230     }
231   }
232 
233   // Completely serialize all Generate() calls on this instance.  The
234   // thread-safety constraints of the CodeGenerator interface aren't clear so
235   // just be as conservative as possible.  It's easier to relax this later if
236   // we need to, but I doubt it will be an issue.
237   // TODO:  The proper thing to do would be to allocate any state on
238   //   the stack and use that, so that the Generator class itself does not need
239   //   to have any mutable members.  Then it is implicitly thread-safe.
240   absl::MutexLock lock(&mutex_);
241   file_ = file;
242 
243   std::string filename = GetFileName(file, ".py");
244 
245   proto_ = StripSourceRetentionOptions(*file_);
246   proto_.SerializeToString(&file_descriptor_serialized_);
247 
248   if (!opensource_runtime_ && GeneratingDescriptorProto()) {
249     std::string bootstrap_filename =
250         "net/proto2/python/internal/descriptor_pb2.py";
251     if (options.bootstrap) {
252       filename = bootstrap_filename;
253     } else {
254       std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
255       io::Printer printer(output.get(), '$');
256       printer.Print(
257           "from google3.net.google.protobuf.python.internal import "
258           "descriptor_pb2\n"
259           "\n");
260 
261       // For static checkers, we need to explicitly assign to the symbols we
262       // publicly export.
263       for (int i = 0; i < file_->message_type_count(); i++) {
264         const Descriptor* message = file_->message_type(i);
265         printer.Print("$name$ = descriptor_pb2.$name$\n", "name",
266                       message->name());
267       }
268 
269       // Sadly some clients access our internal variables (starting with "_").
270       // To support them, we iterate over *all* symbols to expose even the
271       // private ones.  Statically type-checked code should (especially) never
272       // use these, so we don't worry about making them available to pytype
273       // checks.
274       printer.Print(
275           "\n"
276           "globals().update(descriptor_pb2.__dict__)\n"
277           "\n");
278 
279       printer.Print(
280           "# @@protoc_insertion_point(module_scope)\n"
281           "\n");
282       return true;
283     }
284   }
285 
286   std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
287   ABSL_CHECK(output.get());
288   io::Printer printer(output.get(), '$');
289   printer_ = &printer;
290 
291   PrintTopBoilerplate();
292   PrintImports();
293   PrintFileDescriptor();
294   printer_->Print("_globals = globals()\n");
295   if (GeneratingDescriptorProto()) {
296     printer_->Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
297     printer_->Indent();
298     // Create enums before message descriptors
299     PrintAllEnumsInFile();
300     PrintMessageDescriptors();
301     FixForeignFieldsInDescriptors();
302     PrintResolvedFeatures();
303     printer_->Outdent();
304     printer_->Print("else:\n");
305     printer_->Indent();
306   }
307   // Find the message descriptors first and then use the message
308   // descriptor to find enums.
309   printer_->Print(
310       "_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)\n");
311   if (GeneratingDescriptorProto()) {
312     printer_->Outdent();
313   }
314   std::string module_name = ModuleName(file->name());
315   if (!opensource_runtime_) {
316     module_name =
317         std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
318   }
319   printer_->Print(
320       "_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, '$module_name$', "
321       "_globals)\n",
322       "module_name", module_name);
323   printer.Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
324   printer_->Indent();
325 
326   // Descriptor options may have custom extensions. These custom options
327   // can only be successfully parsed after we register corresponding
328   // extensions. Therefore we parse all options again here to recognize
329   // custom options that may be unknown when we define the descriptors.
330   // This does not apply to services because they are not used by extensions.
331   FixAllDescriptorOptions();
332 
333   // Set serialized_start and serialized_end.
334   SetSerializedPbInterval(proto_);
335 
336   printer_->Outdent();
337   if (HasGenericServices(file)) {
338     printer_->Print(
339         "_builder.BuildServices(DESCRIPTOR, '$module_name$', _globals)\n",
340         "module_name", module_name);
341   }
342 
343   printer.Print("# @@protoc_insertion_point(module_scope)\n");
344 
345   return !printer.failed();
346 }
347 
348 // file output by this generator.
PrintTopBoilerplate() const349 void Generator::PrintTopBoilerplate() const {
350   // TODO: Allow parameterization of Python version?
351   printer_->Print(
352       "# -*- coding: utf-8 -*-\n"
353       "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
354       "# NO CHECKED-IN PROTOBUF "
355       // Intentional line breaker
356       "GENCODE\n"
357       "# source: $filename$\n",
358       "filename", file_->name());
359   if (opensource_runtime_) {
360     printer_->Print("# Protobuf Python Version: $protobuf_python_version$\n",
361                     "protobuf_python_version", PROTOBUF_PYTHON_VERSION_STRING);
362   }
363   printer_->Print("\"\"\"Generated protocol buffer code.\"\"\"\n");
364   if (!opensource_runtime_) {
365     // This import is needed so that compatibility proto1 compiler output
366     // inserted at protoc_insertion_point can refer to other protos like
367     // google3.a.b.c. Code generated by proto2 compiler doesn't do it, and
368     // instead uses aliases assigned when importing modules.
369     printer_->Print("import google3\n");
370   }
371   bool runtime_version_disabled = false;
372   printer_->Print(
373       "from google.protobuf import descriptor as _descriptor\n"
374       "from google.protobuf import descriptor_pool as _descriptor_pool\n"
375       "$runtime_version_import$"
376       "from google.protobuf import symbol_database as _symbol_database\n"
377       "from google.protobuf.internal import builder as _builder\n",
378       "runtime_version_import",
379       runtime_version_disabled ? ""
380                                : "from google.protobuf import runtime_version "
381                                  "as _runtime_version\n");
382   if (!runtime_version_disabled) {
383     const auto& version = GetProtobufPythonVersion(opensource_runtime_);
384     printer_->Print(
385         "_runtime_version.ValidateProtobufRuntimeVersion(\n"
386         "    $domain$,\n"
387         "    $major$,\n"
388         "    $minor$,\n"
389         "    $patch$,\n"
390         "    '$suffix$',\n"
391         "    '$location$'\n"
392         ")\n",
393         "domain",
394         opensource_runtime_ ? "_runtime_version.Domain.PUBLIC"
395                             : "_runtime_version.Domain.GOOGLE_INTERNAL",
396         "major", absl::StrCat(version.major()), "minor",
397         absl::StrCat(version.minor()), "patch", absl::StrCat(version.patch()),
398         "suffix", version.suffix(), "location", file_->name());
399   }
400   printer_->Print("# @@protoc_insertion_point(imports)\n\n");
401   printer_->Print("_sym_db = _symbol_database.Default()\n");
402   printer_->Print("\n\n");
403 }
404 
405 // Prints Python imports for all modules imported by |file|.
PrintImports() const406 void Generator::PrintImports() const {
407   bool has_importlib = false;
408   for (int i = 0; i < file_->dependency_count(); ++i) {
409     absl::string_view filename = file_->dependency(i)->name();
410 
411     std::string module_name = ModuleName(filename);
412     std::string module_alias = ModuleAlias(filename);
413     if (!opensource_runtime_) {
414       module_name =
415           std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
416     }
417     if (ContainsPythonKeyword(module_name)) {
418       // If the module path contains a Python keyword, we have to quote the
419       // module name and import it using importlib. Otherwise the usual kind of
420       // import statement would result in a syntax error from the presence of
421       // the keyword.
422       if (has_importlib == false) {
423         printer_->Print("import importlib\n");
424         has_importlib = true;
425       }
426       printer_->Print("$alias$ = importlib.import_module('$name$')\n", "alias",
427                       module_alias, "name", module_name);
428     } else {
429       size_t last_dot_pos = module_name.rfind('.');
430       std::string import_statement;
431       if (last_dot_pos == std::string::npos) {
432         // NOTE: this is not tested as it would require a protocol buffer
433         // outside of any package, and I don't think that is easily achievable.
434         import_statement = absl::StrCat("import ", module_name);
435       } else {
436         import_statement =
437             absl::StrCat("from ", module_name.substr(0, last_dot_pos),
438                          " import ", module_name.substr(last_dot_pos + 1));
439       }
440       printer_->Print("$statement$ as $alias$\n", "statement", import_statement,
441                       "alias", module_alias);
442     }
443 
444     CopyPublicDependenciesAliases(module_alias, file_->dependency(i));
445   }
446   printer_->Print("\n");
447 
448   // Print public imports.
449   for (int i = 0; i < file_->public_dependency_count(); ++i) {
450     std::string module_name = ModuleName(file_->public_dependency(i)->name());
451     if (!opensource_runtime_) {
452       module_name =
453           std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
454     }
455     printer_->Print("from $module$ import *\n", "module", module_name);
456   }
457   printer_->Print("\n");
458 }
459 
460 template <typename DescriptorT>
GetResolvedFeatures(const DescriptorT & descriptor) const461 std::string Generator::GetResolvedFeatures(
462     const DescriptorT& descriptor) const {
463   if (!GeneratingDescriptorProto()) {
464     // Everything but descriptor.proto can handle proper feature resolution.
465     return "None";
466   }
467 
468   // Load the resolved features from our pool.
469   const Descriptor* feature_set =
470       file_->FindMessageTypeByName(FeatureSet::GetDescriptor()->name());
471   ABSL_CHECK(feature_set != nullptr)
472       << "Malformed descriptor.proto doesn't contain "
473       << FeatureSet::GetDescriptor()->full_name();
474   auto message_factory = absl::make_unique<DynamicMessageFactory>();
475   auto features =
476       absl::WrapUnique(message_factory->GetPrototype(feature_set)->New());
477   features->ParseFromString(
478       GetResolvedSourceFeatures(descriptor).SerializeAsString());
479 
480   // Collect all of the resolved features.
481   std::vector<std::string> feature_args;
482   const Reflection* reflection = features->GetReflection();
483   std::vector<const FieldDescriptor*> fields;
484   reflection->ListFields(*features, &fields);
485   for (const auto* field : fields) {
486     // Assume these are all enums.  If we add non-enum global features or any
487     // python-specific features, we will need to come back and improve this
488     // logic.
489     ABSL_CHECK(field->enum_type() != nullptr)
490         << "Unexpected non-enum field found!";
491     if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
492       // Skip any source-retention features.
493       continue;
494     }
495     const EnumDescriptor* enm = field->enum_type();
496     const EnumValueDescriptor* value =
497         enm->FindValueByNumber(reflection->GetEnumValue(*features, field));
498 
499     feature_args.emplace_back(absl::StrCat(
500         field->name(), "=",
501         absl::StrFormat("%s.values_by_name[\"%s\"].number",
502                         ModuleLevelDescriptorName(*enm), value->name())));
503   }
504   return absl::StrCat("_ResolvedFeatures(", absl::StrJoin(feature_args, ","),
505                       ")");
506 }
507 
PrintResolvedFeatures() const508 void Generator::PrintResolvedFeatures() const {
509   // Since features are used during the descriptor build, it's impossible to do
510   // feature resolution at the normal point for descriptor.proto. Instead, we do
511   // feature resolution here in the generator, and embed a custom object on all
512   // of the generated descriptors.  This object should act like any other
513   // FeatureSet message on normal descriptors, but will never have to be
514   // resolved by the python runtime.
515   ABSL_CHECK(GeneratingDescriptorProto());
516   printer_->Emit({{"resolved_features", GetResolvedFeatures(*file_)},
517                   {"descriptor_name", kDescriptorKey}},
518                  R"py(
519                   class _ResolvedFeatures:
520                     def __init__(self, features = None, **kwargs):
521                       if features:
522                         for k, v in features.FIELDS.items():
523                           setattr(self, k, getattr(features, k))
524                       else:
525                         for k, v in kwargs.items():
526                           setattr(self, k, v)
527                   $descriptor_name$._features = $resolved_features$
528                 )py");
529 
530 #define MAKE_NESTED(desc, CPP_FIELD, PY_FIELD)                                \
531   [&] {                                                                       \
532     for (int i = 0; i < desc.CPP_FIELD##_count(); ++i) {                      \
533       printer_->Emit(                                                         \
534           {{"resolved_subfeatures", GetResolvedFeatures(*desc.CPP_FIELD(i))}, \
535            {"index", absl::StrCat(i)},                                        \
536            {"field", PY_FIELD}},                                              \
537           "$descriptor_name$.$field$[$index$]._features = "                   \
538           "$resolved_subfeatures$\n");                                        \
539     }                                                                         \
540   }
541 
542   google::protobuf::internal::VisitDescriptors(*file_, [&](const Descriptor& msg) {
543     printer_->Emit(
544         {{"resolved_features", GetResolvedFeatures(msg)},
545          {"descriptor_name", ModuleLevelDescriptorName(msg)},
546          {"field_features", MAKE_NESTED(msg, field, "fields")},
547          {"oneof_features", MAKE_NESTED(msg, oneof_decl, "oneofs")},
548          {"ext_features", MAKE_NESTED(msg, extension, "extensions")}},
549         R"py(
550           $descriptor_name$._features = $resolved_features$
551           $field_features$
552           $oneof_features$
553           $ext_features$
554         )py");
555   });
556   google::protobuf::internal::VisitDescriptors(*file_, [&](const EnumDescriptor& enm) {
557     printer_->Emit({{"resolved_features", GetResolvedFeatures(enm)},
558                     {"descriptor_name", ModuleLevelDescriptorName(enm)},
559                     {"value_features", MAKE_NESTED(enm, value, "values")}},
560                    R"py(
561                     $descriptor_name$._features = $resolved_features$
562                     $value_features$
563                   )py");
564   });
565 #undef MAKE_NESTED
566 }
567 
568 // Prints the single file descriptor for this file.
PrintFileDescriptor() const569 void Generator::PrintFileDescriptor() const {
570   absl::flat_hash_map<absl::string_view, std::string> m;
571   m["descriptor_name"] = kDescriptorKey;
572   m["name"] = file_->name();
573   m["package"] = file_->package();
574   m["syntax"] = GetLegacySyntaxName(GetEdition(*file_));
575   m["edition"] = Edition_Name(GetEdition(*file_));
576   m["options"] = OptionsValue(proto_.options().SerializeAsString());
577   m["serialized_descriptor"] = absl::CHexEscape(file_descriptor_serialized_);
578   if (GeneratingDescriptorProto()) {
579     printer_->Print("if not _descriptor._USE_C_DESCRIPTORS:\n");
580     printer_->Indent();
581     // Pure python's AddSerializedFile() depend on the generated
582     // descriptor_pb2.py thus we can not use AddSerializedFile() when
583     // generated descriptor.proto for pure python.
584     const char file_descriptor_template[] =
585         "$descriptor_name$ = _descriptor.FileDescriptor(\n"
586         "  name='$name$',\n"
587         "  package='$package$',\n"
588         "  syntax='$syntax$',\n"
589         "  edition='$edition$',\n"
590         "  serialized_options=$options$,\n"
591         "  create_key=_descriptor._internal_create_key,\n";
592     printer_->Print(m, file_descriptor_template);
593     printer_->Indent();
594     printer_->Print("serialized_pb=b'$value$'\n", "value",
595                     absl::CHexEscape(file_descriptor_serialized_));
596     if (file_->dependency_count() != 0) {
597       printer_->Print(",\ndependencies=[");
598       for (int i = 0; i < file_->dependency_count(); ++i) {
599         std::string module_alias = ModuleAlias(file_->dependency(i)->name());
600         printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
601                         module_alias);
602       }
603       printer_->Print("]");
604     }
605     if (file_->public_dependency_count() > 0) {
606       printer_->Print(",\npublic_dependencies=[");
607       for (int i = 0; i < file_->public_dependency_count(); ++i) {
608         std::string module_alias =
609             ModuleAlias(file_->public_dependency(i)->name());
610         printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
611                         module_alias);
612       }
613       printer_->Print("]");
614     }
615 
616     // TODO: Also print options and fix the message_type, enum_type,
617     //             service and extension later in the generation.
618 
619     printer_->Outdent();
620     printer_->Print(")\n");
621 
622     printer_->Outdent();
623     printer_->Print("else:\n");
624     printer_->Indent();
625   }
626   printer_->Print(m,
627                   "$descriptor_name$ = "
628                   "_descriptor_pool.Default().AddSerializedFile(b'$serialized_"
629                   "descriptor$')\n");
630   if (GeneratingDescriptorProto()) {
631     printer_->Outdent();
632   }
633   printer_->Print("\n");
634 }
635 
636 // Prints all enums contained in all message types in |file|.
PrintAllEnumsInFile() const637 void Generator::PrintAllEnumsInFile() const {
638   for (int i = 0; i < file_->enum_type_count(); ++i) {
639     PrintEnum(*file_->enum_type(i), proto_.enum_type(i));
640   }
641   for (int i = 0; i < file_->message_type_count(); ++i) {
642     PrintNestedEnums(*file_->message_type(i), proto_.message_type(i));
643   }
644 }
645 
646 // Prints a Python statement assigning the appropriate module-level
647 // enum name to a Python EnumDescriptor object equivalent to
648 // enum_descriptor.
PrintEnum(const EnumDescriptor & enum_descriptor,const EnumDescriptorProto & proto) const649 void Generator::PrintEnum(const EnumDescriptor& enum_descriptor,
650                           const EnumDescriptorProto& proto) const {
651   absl::flat_hash_map<absl::string_view, std::string> m;
652   std::string module_level_descriptor_name =
653       ModuleLevelDescriptorName(enum_descriptor);
654   m["descriptor_name"] = module_level_descriptor_name;
655   m["name"] = enum_descriptor.name();
656   m["full_name"] = enum_descriptor.full_name();
657   m["file"] = kDescriptorKey;
658   const char enum_descriptor_template[] =
659       "$descriptor_name$ = _descriptor.EnumDescriptor(\n"
660       "  name='$name$',\n"
661       "  full_name='$full_name$',\n"
662       "  filename=None,\n"
663       "  file=$file$,\n"
664       "  create_key=_descriptor._internal_create_key,\n"
665       "  values=[\n";
666   std::string options_string;
667   proto.options().SerializeToString(&options_string);
668   printer_->Print(m, enum_descriptor_template);
669   printer_->Indent();
670   printer_->Indent();
671 
672   for (int i = 0; i < enum_descriptor.value_count(); ++i) {
673     PrintEnumValueDescriptor(*enum_descriptor.value(i), proto.value(i));
674     printer_->Print(",\n");
675   }
676 
677   printer_->Outdent();
678   printer_->Print("],\n");
679   printer_->Print("containing_type=None,\n");
680   printer_->Print("serialized_options=$options_value$,\n", "options_value",
681                   OptionsValue(options_string));
682   EnumDescriptorProto edp;
683   printer_->Outdent();
684   printer_->Print(")\n");
685   printer_->Print("_sym_db.RegisterEnumDescriptor($name$)\n", "name",
686                   module_level_descriptor_name);
687   printer_->Print("\n");
688 }
689 
690 // Recursively prints enums in nested types within descriptor, then
691 // prints enums contained at the top level in descriptor.
PrintNestedEnums(const Descriptor & descriptor,const DescriptorProto & proto) const692 void Generator::PrintNestedEnums(const Descriptor& descriptor,
693                                  const DescriptorProto& proto) const {
694   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
695     PrintNestedEnums(*descriptor.nested_type(i), proto.nested_type(i));
696   }
697 
698   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
699     PrintEnum(*descriptor.enum_type(i), proto.enum_type(i));
700   }
701 }
702 
703 // Prints Python equivalents of all Descriptors in |file|.
PrintMessageDescriptors() const704 void Generator::PrintMessageDescriptors() const {
705   for (int i = 0; i < file_->message_type_count(); ++i) {
706     PrintDescriptor(*file_->message_type(i), proto_.message_type(i));
707     printer_->Print("\n");
708   }
709 }
710 
PrintServiceDescriptors() const711 void Generator::PrintServiceDescriptors() const {
712   for (int i = 0; i < file_->service_count(); ++i) {
713     PrintServiceDescriptor(*file_->service(i));
714   }
715 }
716 
PrintServices() const717 void Generator::PrintServices() const {
718   for (int i = 0; i < file_->service_count(); ++i) {
719     PrintServiceClass(*file_->service(i));
720     PrintServiceStub(*file_->service(i));
721     printer_->Print("\n");
722   }
723 }
724 
PrintServiceDescriptor(const ServiceDescriptor & descriptor) const725 void Generator::PrintServiceDescriptor(
726     const ServiceDescriptor& descriptor) const {
727   absl::flat_hash_map<absl::string_view, std::string> m;
728   m["service_name"] = ModuleLevelServiceDescriptorName(descriptor);
729   m["name"] = descriptor.name();
730   m["file"] = kDescriptorKey;
731   printer_->Print(m, "$service_name$ = $file$.services_by_name['$name$']\n");
732 }
733 
PrintDescriptorKeyAndModuleName(const ServiceDescriptor & descriptor) const734 void Generator::PrintDescriptorKeyAndModuleName(
735     const ServiceDescriptor& descriptor) const {
736   std::string name = ModuleLevelServiceDescriptorName(descriptor);
737   printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key",
738                   kDescriptorKey, "descriptor_name", name);
739   std::string module_name = ModuleName(file_->name());
740   if (!opensource_runtime_) {
741     module_name =
742         std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
743   }
744   printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name);
745 }
746 
PrintServiceClass(const ServiceDescriptor & descriptor) const747 void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
748   // Print the service.
749   printer_->Print(
750       "$class_name$ = service_reflection.GeneratedServiceType("
751       "'$class_name$', (_service.Service,), dict(\n",
752       "class_name", descriptor.name());
753   printer_->Indent();
754   Generator::PrintDescriptorKeyAndModuleName(descriptor);
755   printer_->Print("))\n\n");
756   printer_->Outdent();
757 }
758 
PrintServiceStub(const ServiceDescriptor & descriptor) const759 void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
760   // Print the service stub.
761   printer_->Print(
762       "$class_name$_Stub = "
763       "service_reflection.GeneratedServiceStubType("
764       "'$class_name$_Stub', ($class_name$,), dict(\n",
765       "class_name", descriptor.name());
766   printer_->Indent();
767   Generator::PrintDescriptorKeyAndModuleName(descriptor);
768   printer_->Print("))\n\n");
769   printer_->Outdent();
770 }
771 
772 // Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
773 // to a Python Descriptor object for message_descriptor.
774 //
775 // Mutually recursive with PrintNestedDescriptors().
PrintDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const776 void Generator::PrintDescriptor(const Descriptor& message_descriptor,
777                                 const DescriptorProto& proto) const {
778   absl::flat_hash_map<absl::string_view, std::string> m;
779   m["name"] = message_descriptor.name();
780   m["full_name"] = message_descriptor.full_name();
781   m["file"] = kDescriptorKey;
782 
783   PrintNestedDescriptors(message_descriptor, proto);
784 
785   printer_->Print("\n");
786   printer_->Print("$descriptor_name$ = _descriptor.Descriptor(\n",
787                   "descriptor_name",
788                   ModuleLevelDescriptorName(message_descriptor));
789   printer_->Indent();
790   const char required_function_arguments[] =
791       "name='$name$',\n"
792       "full_name='$full_name$',\n"
793       "filename=None,\n"
794       "file=$file$,\n"
795       "containing_type=None,\n"
796       "create_key=_descriptor._internal_create_key,\n";
797   printer_->Print(m, required_function_arguments);
798   PrintFieldsInDescriptor(message_descriptor, proto);
799   PrintExtensionsInDescriptor(message_descriptor, proto);
800 
801   // Nested types
802   printer_->Print("nested_types=[");
803   for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
804     const std::string nested_name =
805         ModuleLevelDescriptorName(*message_descriptor.nested_type(i));
806     printer_->Print("$name$, ", "name", nested_name);
807   }
808   printer_->Print("],\n");
809 
810   // Enum types
811   printer_->Print("enum_types=[\n");
812   printer_->Indent();
813   for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
814     const std::string descriptor_name =
815         ModuleLevelDescriptorName(*message_descriptor.enum_type(i));
816     printer_->Print(descriptor_name.c_str());
817     printer_->Print(",\n");
818   }
819   printer_->Outdent();
820   printer_->Print("],\n");
821   std::string options_string;
822   proto.options().SerializeToString(&options_string);
823   printer_->Print(
824       "serialized_options=$options_value$,\n"
825       "is_extendable=$extendable$",
826       "options_value", OptionsValue(options_string), "extendable",
827       message_descriptor.extension_range_count() > 0 ? "True" : "False");
828   printer_->Print(",\n");
829 
830   // Extension ranges
831   printer_->Print("extension_ranges=[");
832   for (int i = 0; i < message_descriptor.extension_range_count(); ++i) {
833     const Descriptor::ExtensionRange* range =
834         message_descriptor.extension_range(i);
835     printer_->Print("($start$, $end$), ", "start",
836                     absl::StrCat(range->start_number()), "end",
837                     absl::StrCat(range->end_number()));
838   }
839   printer_->Print("],\n");
840   printer_->Print("oneofs=[\n");
841   printer_->Indent();
842   for (int i = 0; i < message_descriptor.oneof_decl_count(); ++i) {
843     const OneofDescriptor* desc = message_descriptor.oneof_decl(i);
844     m.clear();
845     m["name"] = desc->name();
846     m["full_name"] = desc->full_name();
847     m["index"] = absl::StrCat(desc->index());
848     options_string =
849         OptionsValue(proto.oneof_decl(i).options().SerializeAsString());
850     if (options_string == "None") {
851       m["serialized_options"] = "";
852     } else {
853       m["serialized_options"] =
854           absl::StrCat(", serialized_options=", options_string);
855     }
856     printer_->Print(m,
857                     "_descriptor.OneofDescriptor(\n"
858                     "  name='$name$', full_name='$full_name$',\n"
859                     "  index=$index$, containing_type=None,\n"
860                     "  create_key=_descriptor._internal_create_key,\n"
861                     "fields=[]$serialized_options$),\n");
862   }
863   printer_->Outdent();
864   printer_->Print("],\n");
865 
866   printer_->Outdent();
867   printer_->Print(")\n");
868 }
869 
870 // Prints Python Descriptor objects for all nested types contained in
871 // message_descriptor.
872 //
873 // Mutually recursive with PrintDescriptor().
PrintNestedDescriptors(const Descriptor & containing_descriptor,const DescriptorProto & proto) const874 void Generator::PrintNestedDescriptors(const Descriptor& containing_descriptor,
875                                        const DescriptorProto& proto) const {
876   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
877     PrintDescriptor(*containing_descriptor.nested_type(i),
878                     proto.nested_type(i));
879   }
880 }
881 
882 // Prints all messages in |file|.
PrintMessages() const883 void Generator::PrintMessages() const {
884   for (int i = 0; i < file_->message_type_count(); ++i) {
885     std::vector<std::string> to_register;
886     PrintMessage(*file_->message_type(i), "", &to_register, false);
887     for (int j = 0; j < to_register.size(); ++j) {
888       printer_->Print("_sym_db.RegisterMessage($name$)\n", "name",
889                       ResolveKeyword(to_register[j]));
890     }
891     printer_->Print("\n");
892   }
893 }
894 
895 // Prints a Python class for the given message descriptor.  We defer to the
896 // metaclass to do almost all of the work of actually creating a useful class.
897 // The purpose of this function and its many helper functions above is merely
898 // to output a Python version of the descriptors, which the metaclass in
899 // reflection.py will use to construct the meat of the class itself.
900 //
901 // Mutually recursive with PrintNestedMessages().
902 // Collect nested message names to_register for the symbol_database.
PrintMessage(const Descriptor & message_descriptor,absl::string_view prefix,std::vector<std::string> * to_register,bool is_nested) const903 void Generator::PrintMessage(const Descriptor& message_descriptor,
904                              absl::string_view prefix,
905                              std::vector<std::string>* to_register,
906                              bool is_nested) const {
907   std::string qualified_name;
908   if (is_nested) {
909     if (IsPythonKeyword(message_descriptor.name())) {
910       qualified_name = absl::StrCat("getattr(", prefix, ", '",
911                                     message_descriptor.name(), "')");
912     } else {
913       qualified_name = absl::StrCat(prefix, ".", message_descriptor.name());
914     }
915     printer_->Print(
916         "'$name$' : _reflection.GeneratedProtocolMessageType('$name$', "
917         "(_message.Message,), {\n",
918         "name", message_descriptor.name());
919   } else {
920     qualified_name = ResolveKeyword(message_descriptor.name());
921     printer_->Print(
922         "$qualified_name$ = _reflection.GeneratedProtocolMessageType('$name$', "
923         "(_message.Message,), {\n",
924         "qualified_name", qualified_name, "name", message_descriptor.name());
925   }
926   printer_->Indent();
927 
928   to_register->push_back(qualified_name);
929 
930   PrintNestedMessages(message_descriptor, qualified_name, to_register);
931   absl::flat_hash_map<absl::string_view, std::string> m;
932   m["descriptor_key"] = kDescriptorKey;
933   m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
934   printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n");
935   std::string module_name = ModuleName(file_->name());
936   if (!opensource_runtime_) {
937     module_name =
938         std::string(absl::StripPrefix(module_name, kThirdPartyPrefix));
939   }
940   printer_->Print("'__module__' : '$module_name$'\n", "module_name",
941                   module_name);
942   printer_->Print("# @@protoc_insertion_point(class_scope:$full_name$)\n",
943                   "full_name", message_descriptor.full_name());
944   printer_->Print("})\n");
945   printer_->Outdent();
946 }
947 
948 // Prints all nested messages within |containing_descriptor|.
949 // Mutually recursive with PrintMessage().
PrintNestedMessages(const Descriptor & containing_descriptor,absl::string_view prefix,std::vector<std::string> * to_register) const950 void Generator::PrintNestedMessages(
951     const Descriptor& containing_descriptor, absl::string_view prefix,
952     std::vector<std::string>* to_register) const {
953   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
954     printer_->Print("\n");
955     PrintMessage(*containing_descriptor.nested_type(i), prefix, to_register,
956                  true);
957     printer_->Print(",\n");
958   }
959 }
960 
961 // Recursively fixes foreign fields in all nested types in |descriptor|, then
962 // sets the message_type and enum_type of all message and enum fields to point
963 // to their respective descriptors.
964 // Args:
965 //   descriptor: descriptor to print fields for.
966 //   containing_descriptor: if descriptor is a nested type, this is its
967 //       containing type, or NULL if this is a root/top-level type.
FixForeignFieldsInDescriptor(const Descriptor & descriptor,const Descriptor * containing_descriptor) const968 void Generator::FixForeignFieldsInDescriptor(
969     const Descriptor& descriptor,
970     const Descriptor* containing_descriptor) const {
971   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
972     FixForeignFieldsInDescriptor(*descriptor.nested_type(i), &descriptor);
973   }
974 
975   for (int i = 0; i < descriptor.field_count(); ++i) {
976     const FieldDescriptor& field_descriptor = *descriptor.field(i);
977     FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
978   }
979 
980   FixContainingTypeInDescriptor(descriptor, containing_descriptor);
981   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
982     const EnumDescriptor& enum_descriptor = *descriptor.enum_type(i);
983     FixContainingTypeInDescriptor(enum_descriptor, &descriptor);
984   }
985   for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
986     absl::flat_hash_map<absl::string_view, std::string> m;
987     const OneofDescriptor* oneof = descriptor.oneof_decl(i);
988     m["descriptor_name"] = ModuleLevelDescriptorName(descriptor);
989     m["oneof_name"] = oneof->name();
990     for (int j = 0; j < oneof->field_count(); ++j) {
991       m["field_name"] = oneof->field(j)->name();
992       printer_->Print(
993           m,
994           "$descriptor_name$.oneofs_by_name['$oneof_name$'].fields.append(\n"
995           "  $descriptor_name$.fields_by_name['$field_name$'])\n");
996       printer_->Print(
997           m,
998           "$descriptor_name$.fields_by_name['$field_name$'].containing_oneof = "
999           "$descriptor_name$.oneofs_by_name['$oneof_name$']\n");
1000     }
1001   }
1002 }
1003 
AddMessageToFileDescriptor(const Descriptor & descriptor) const1004 void Generator::AddMessageToFileDescriptor(const Descriptor& descriptor) const {
1005   absl::flat_hash_map<absl::string_view, std::string> m;
1006   m["descriptor_name"] = kDescriptorKey;
1007   m["message_name"] = descriptor.name();
1008   m["message_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
1009   const char file_descriptor_template[] =
1010       "$descriptor_name$.message_types_by_name['$message_name$'] = "
1011       "$message_descriptor_name$\n";
1012   printer_->Print(m, file_descriptor_template);
1013 }
1014 
AddServiceToFileDescriptor(const ServiceDescriptor & descriptor) const1015 void Generator::AddServiceToFileDescriptor(
1016     const ServiceDescriptor& descriptor) const {
1017   absl::flat_hash_map<absl::string_view, std::string> m;
1018   m["descriptor_name"] = kDescriptorKey;
1019   m["service_name"] = descriptor.name();
1020   m["service_descriptor_name"] = ModuleLevelServiceDescriptorName(descriptor);
1021   const char file_descriptor_template[] =
1022       "$descriptor_name$.services_by_name['$service_name$'] = "
1023       "$service_descriptor_name$\n";
1024   printer_->Print(m, file_descriptor_template);
1025 }
1026 
AddEnumToFileDescriptor(const EnumDescriptor & descriptor) const1027 void Generator::AddEnumToFileDescriptor(
1028     const EnumDescriptor& descriptor) const {
1029   absl::flat_hash_map<absl::string_view, std::string> m;
1030   m["descriptor_name"] = kDescriptorKey;
1031   m["enum_name"] = descriptor.name();
1032   m["enum_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
1033   const char file_descriptor_template[] =
1034       "$descriptor_name$.enum_types_by_name['$enum_name$'] = "
1035       "$enum_descriptor_name$\n";
1036   printer_->Print(m, file_descriptor_template);
1037 }
1038 
AddExtensionToFileDescriptor(const FieldDescriptor & descriptor) const1039 void Generator::AddExtensionToFileDescriptor(
1040     const FieldDescriptor& descriptor) const {
1041   absl::flat_hash_map<absl::string_view, std::string> m;
1042   m["descriptor_name"] = kDescriptorKey;
1043   m["field_name"] = descriptor.name();
1044   m["resolved_name"] = ResolveKeyword(descriptor.name());
1045   const char file_descriptor_template[] =
1046       "$descriptor_name$.extensions_by_name['$field_name$'] = "
1047       "$resolved_name$\n";
1048   printer_->Print(m, file_descriptor_template);
1049 }
1050 
1051 // Sets any necessary message_type and enum_type attributes
1052 // for the Python version of |field|.
1053 //
1054 // containing_type may be NULL, in which case this is a module-level field.
1055 //
1056 // python_dict_name is the name of the Python dict where we should
1057 // look the field up in the containing type.  (e.g., fields_by_name
1058 // or extensions_by_name).  We ignore python_dict_name if containing_type
1059 // is NULL.
FixForeignFieldsInField(const Descriptor * containing_type,const FieldDescriptor & field,absl::string_view python_dict_name) const1060 void Generator::FixForeignFieldsInField(
1061     const Descriptor* containing_type, const FieldDescriptor& field,
1062     absl::string_view python_dict_name) const {
1063   const std::string field_referencing_expression =
1064       FieldReferencingExpression(containing_type, field, python_dict_name);
1065   absl::flat_hash_map<absl::string_view, std::string> m;
1066   m["field_ref"] = field_referencing_expression;
1067   const Descriptor* foreign_message_type = field.message_type();
1068   if (foreign_message_type) {
1069     m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
1070     printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
1071   }
1072   const EnumDescriptor* enum_type = field.enum_type();
1073   if (enum_type) {
1074     m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
1075     printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
1076   }
1077 }
1078 
1079 // Returns the module-level expression for the given FieldDescriptor.
1080 // Only works for fields in the .proto file this Generator is generating for.
1081 //
1082 // containing_type may be NULL, in which case this is a module-level field.
1083 //
1084 // python_dict_name is the name of the Python dict where we should
1085 // look the field up in the containing type.  (e.g., fields_by_name
1086 // or extensions_by_name).  We ignore python_dict_name if containing_type
1087 // is NULL.
FieldReferencingExpression(const Descriptor * containing_type,const FieldDescriptor & field,absl::string_view python_dict_name) const1088 std::string Generator::FieldReferencingExpression(
1089     const Descriptor* containing_type, const FieldDescriptor& field,
1090     absl::string_view python_dict_name) const {
1091   // We should only ever be looking up fields in the current file.
1092   // The only things we refer to from other files are message descriptors.
1093   ABSL_CHECK_EQ(field.file(), file_)
1094       << field.file()->name() << " vs. " << file_->name();
1095   if (!containing_type) {
1096     return ResolveKeyword(field.name());
1097   }
1098   return absl::Substitute("$0.$1['$2']",
1099                           ModuleLevelDescriptorName(*containing_type),
1100                           python_dict_name, field.name());
1101 }
1102 
1103 // Prints containing_type for nested descriptors or enum descriptors.
1104 template <typename DescriptorT>
FixContainingTypeInDescriptor(const DescriptorT & descriptor,const Descriptor * containing_descriptor) const1105 void Generator::FixContainingTypeInDescriptor(
1106     const DescriptorT& descriptor,
1107     const Descriptor* containing_descriptor) const {
1108   if (containing_descriptor != nullptr) {
1109     const std::string nested_name = ModuleLevelDescriptorName(descriptor);
1110     const std::string parent_name =
1111         ModuleLevelDescriptorName(*containing_descriptor);
1112     printer_->Print("$nested_name$.containing_type = $parent_name$\n",
1113                     "nested_name", nested_name, "parent_name", parent_name);
1114   }
1115 }
1116 
1117 // Prints statements setting the message_type and enum_type fields in the
1118 // Python descriptor objects we've already output in the file.  We must
1119 // do this in a separate step due to circular references (otherwise, we'd
1120 // just set everything in the initial assignment statements).
FixForeignFieldsInDescriptors() const1121 void Generator::FixForeignFieldsInDescriptors() const {
1122   for (int i = 0; i < file_->message_type_count(); ++i) {
1123     FixForeignFieldsInDescriptor(*file_->message_type(i), nullptr);
1124   }
1125   for (int i = 0; i < file_->message_type_count(); ++i) {
1126     AddMessageToFileDescriptor(*file_->message_type(i));
1127   }
1128   for (int i = 0; i < file_->enum_type_count(); ++i) {
1129     AddEnumToFileDescriptor(*file_->enum_type(i));
1130   }
1131   for (int i = 0; i < file_->extension_count(); ++i) {
1132     AddExtensionToFileDescriptor(*file_->extension(i));
1133   }
1134 
1135   // TODO: Move this register to PrintFileDescriptor() when
1136   // FieldDescriptor.file is added in generated file.
1137   printer_->Print("_sym_db.RegisterFileDescriptor($name$)\n", "name",
1138                   kDescriptorKey);
1139   printer_->Print("\n");
1140 }
1141 
1142 // Returns a Python expression that instantiates a Python EnumValueDescriptor
1143 // object for the given C++ descriptor.
PrintEnumValueDescriptor(const EnumValueDescriptor & descriptor,const EnumValueDescriptorProto & proto) const1144 void Generator::PrintEnumValueDescriptor(
1145     const EnumValueDescriptor& descriptor,
1146     const EnumValueDescriptorProto& proto) const {
1147   // TODO: Fix up EnumValueDescriptor "type" fields.
1148   // More circular references.  ::sigh::
1149   std::string options_string;
1150   proto.options().SerializeToString(&options_string);
1151   absl::flat_hash_map<absl::string_view, std::string> m;
1152   m["name"] = descriptor.name();
1153   m["index"] = absl::StrCat(descriptor.index());
1154   m["number"] = absl::StrCat(descriptor.number());
1155   m["options"] = OptionsValue(options_string);
1156   printer_->Print(m,
1157                   "_descriptor.EnumValueDescriptor(\n"
1158                   "  name='$name$', index=$index$, number=$number$,\n"
1159                   "  serialized_options=$options$,\n"
1160                   "  type=None,\n"
1161                   "  create_key=_descriptor._internal_create_key)");
1162 }
1163 
1164 // Prints an expression for a Python FieldDescriptor for |field|.
PrintFieldDescriptor(const FieldDescriptor & field,const FieldDescriptorProto & proto) const1165 void Generator::PrintFieldDescriptor(const FieldDescriptor& field,
1166                                      const FieldDescriptorProto& proto) const {
1167   std::string options_string;
1168   proto.options().SerializeToString(&options_string);
1169   absl::flat_hash_map<absl::string_view, std::string> m;
1170   m["name"] = field.name();
1171   m["full_name"] = field.full_name();
1172   m["index"] = absl::StrCat(field.index());
1173   m["number"] = absl::StrCat(field.number());
1174   m["type"] = absl::StrCat(field.type());
1175   m["cpp_type"] = absl::StrCat(field.cpp_type());
1176   m["label"] = absl::StrCat(field.label());
1177   m["has_default_value"] = field.has_default_value() ? "True" : "False";
1178   m["default_value"] = StringifyDefaultValue(field);
1179   m["is_extension"] = field.is_extension() ? "True" : "False";
1180   m["serialized_options"] = OptionsValue(options_string);
1181   m["json_name"] = field.has_json_name()
1182                        ? absl::StrCat(", json_name='", field.json_name(), "'")
1183                        : "";
1184   // We always set message_type and enum_type to None at this point, and then
1185   // these fields in correctly after all referenced descriptors have been
1186   // defined and/or imported (see FixForeignFieldsInDescriptors()).
1187   const char field_descriptor_decl[] =
1188       "_descriptor.FieldDescriptor(\n"
1189       "  name='$name$', full_name='$full_name$', index=$index$,\n"
1190       "  number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
1191       "  has_default_value=$has_default_value$, "
1192       "default_value=$default_value$,\n"
1193       "  message_type=None, enum_type=None, containing_type=None,\n"
1194       "  is_extension=$is_extension$, extension_scope=None,\n"
1195       "  serialized_options=$serialized_options$$json_name$, file=DESCRIPTOR,"
1196       "  create_key=_descriptor._internal_create_key)";
1197   printer_->Print(m, field_descriptor_decl);
1198 }
1199 
1200 // Helper for Print{Fields,Extensions}InDescriptor().
PrintFieldDescriptorsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto,bool is_extension,absl::string_view list_variable_name) const1201 void Generator::PrintFieldDescriptorsInDescriptor(
1202     const Descriptor& message_descriptor, const DescriptorProto& proto,
1203     bool is_extension, absl::string_view list_variable_name) const {
1204   printer_->Print("$list$=[\n", "list", list_variable_name);
1205   printer_->Indent();
1206   int count = is_extension ? message_descriptor.extension_count()
1207                            : message_descriptor.field_count();
1208   for (int i = 0; i < count; ++i) {
1209     PrintFieldDescriptor(is_extension ? *message_descriptor.extension(i)
1210                                       : *message_descriptor.field(i),
1211                          is_extension ? proto.extension(i) : proto.field(i));
1212     printer_->Print(",\n");
1213   }
1214   printer_->Outdent();
1215   printer_->Print("],\n");
1216 }
1217 
1218 // Prints a statement assigning "fields" to a list of Python FieldDescriptors,
1219 // one for each field present in message_descriptor.
PrintFieldsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const1220 void Generator::PrintFieldsInDescriptor(const Descriptor& message_descriptor,
1221                                         const DescriptorProto& proto) const {
1222   const bool is_extension = false;
1223   PrintFieldDescriptorsInDescriptor(message_descriptor, proto, is_extension,
1224                                     "fields");
1225 }
1226 
1227 // Prints a statement assigning "extensions" to a list of Python
1228 // FieldDescriptors, one for each extension present in message_descriptor.
PrintExtensionsInDescriptor(const Descriptor & message_descriptor,const DescriptorProto & proto) const1229 void Generator::PrintExtensionsInDescriptor(
1230     const Descriptor& message_descriptor, const DescriptorProto& proto) const {
1231   const bool is_extension = true;
1232   PrintFieldDescriptorsInDescriptor(message_descriptor, proto, is_extension,
1233                                     "extensions");
1234 }
1235 
GeneratingDescriptorProto() const1236 bool Generator::GeneratingDescriptorProto() const {
1237   return file_->name() == "net/proto2/proto/descriptor.proto" ||
1238          file_->name() == "google/protobuf/descriptor.proto";
1239 }
1240 
1241 // Returns the unique Python module-level identifier given to a descriptor.
1242 // This name is module-qualified iff the given descriptor describes an
1243 // entity that doesn't come from the current file.
1244 template <typename DescriptorT>
ModuleLevelDescriptorName(const DescriptorT & descriptor) const1245 std::string Generator::ModuleLevelDescriptorName(
1246     const DescriptorT& descriptor) const {
1247   // FIXME(robinson):
1248   // We currently don't worry about collisions with underscores in the type
1249   // names, so these would collide in nasty ways if found in the same file:
1250   //   OuterProto.ProtoA.ProtoB
1251   //   OuterProto_ProtoA.ProtoB  # Underscore instead of period.
1252   // As would these:
1253   //   OuterProto.ProtoA_.ProtoB
1254   //   OuterProto.ProtoA._ProtoB  # Leading vs. trailing underscore.
1255   // (Contrived, but certainly possible).
1256   //
1257   // The C++ implementation doesn't guard against this either.  Leaving
1258   // it for now...
1259   std::string name = NamePrefixedWithNestedTypes(descriptor, "_");
1260   absl::AsciiStrToUpper(&name);
1261   // Module-private for now.  Easy to make public later; almost impossible
1262   // to make private later.
1263   name = absl::StrCat("_", name);
1264   // We now have the name relative to its own module.  Also qualify with
1265   // the module name iff this descriptor is from a different .proto file.
1266   if (descriptor.file() != file_) {
1267     name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1268   }
1269   return name;
1270 }
1271 
1272 // Returns the name of the message class itself, not the descriptor.
1273 // Like ModuleLevelDescriptorName(), module-qualifies the name iff
1274 // the given descriptor describes an entity that doesn't come from
1275 // the current file.
ModuleLevelMessageName(const Descriptor & descriptor) const1276 std::string Generator::ModuleLevelMessageName(
1277     const Descriptor& descriptor) const {
1278   std::string name = NamePrefixedWithNestedTypes(descriptor, ".");
1279   if (descriptor.file() != file_) {
1280     name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1281   }
1282   return name;
1283 }
1284 
1285 // Returns the unique Python module-level identifier given to a service
1286 // descriptor.
ModuleLevelServiceDescriptorName(const ServiceDescriptor & descriptor) const1287 std::string Generator::ModuleLevelServiceDescriptorName(
1288     const ServiceDescriptor& descriptor) const {
1289   std::string name = absl::StrCat("_", descriptor.name());
1290   absl::AsciiStrToUpper(&name);
1291   if (descriptor.file() != file_) {
1292     name = absl::StrCat(ModuleAlias(descriptor.file()->name()), ".", name);
1293   }
1294   return name;
1295 }
1296 
1297 // Prints descriptor offsets _serialized_start and _serialized_end.
1298 // Args:
1299 //   descriptor_proto: The descriptor proto to have a serialized reference.
1300 // Example printer output:
1301 // _globals['_MYMESSAGE']._serialized_start=47
1302 // _globals['_MYMESSAGE']._serialized_end=76
1303 template <typename DescriptorProtoT>
PrintSerializedPbInterval(const DescriptorProtoT & descriptor_proto,absl::string_view name) const1304 void Generator::PrintSerializedPbInterval(
1305     const DescriptorProtoT& descriptor_proto, absl::string_view name) const {
1306   std::string sp;
1307   descriptor_proto.SerializeToString(&sp);
1308   size_t offset = file_descriptor_serialized_.find(sp);
1309   ABSL_CHECK_GE(offset, 0);
1310 
1311   printer_->Print(
1312       "_globals['$name$']._serialized_start=$serialized_start$\n"
1313       "_globals['$name$']._serialized_end=$serialized_end$\n",
1314       "name", name, "serialized_start", absl::StrCat(offset), "serialized_end",
1315       absl::StrCat(offset + sp.size()));
1316 }
1317 
1318 template <typename DescriptorT>
PrintDescriptorOptionsFixingCode(const DescriptorT & descriptor,const typename DescriptorT::Proto & proto,absl::string_view descriptor_str) const1319 bool Generator::PrintDescriptorOptionsFixingCode(
1320     const DescriptorT& descriptor, const typename DescriptorT::Proto& proto,
1321     absl::string_view descriptor_str) const {
1322   std::string options = OptionsValue(proto.options().SerializeAsString());
1323 
1324   // Reset the _options to None thus DescriptorBase.GetOptions() can
1325   // parse _options again after extensions are registered.
1326   size_t dot_pos = descriptor_str.find('.');
1327   std::string descriptor_name;
1328   if (dot_pos == std::string::npos) {
1329     descriptor_name = absl::StrCat("_globals['", descriptor_str, "']");
1330   } else {
1331     descriptor_name =
1332         absl::StrCat("_globals['", descriptor_str.substr(0, dot_pos), "']",
1333                      descriptor_str.substr(dot_pos));
1334   }
1335 
1336   if (options == "None") {
1337     return false;
1338   }
1339 
1340   printer_->Print(
1341       "$descriptor_name$._loaded_options = None\n"
1342       "$descriptor_name$._serialized_options = $serialized_value$\n",
1343       "descriptor_name", descriptor_name, "serialized_value", options);
1344   return true;
1345 }
1346 
1347 // Generates the start and end offsets for each entity in the serialized file
1348 // descriptor. The file argument must exactly match what was serialized into
1349 // file_descriptor_serialized_, and should already have had any
1350 // source-retention options stripped out. This is important because we need an
1351 // exact byte-for-byte match so that we can successfully find the correct
1352 // offsets in the serialized descriptors.
SetSerializedPbInterval(const FileDescriptorProto & file) const1353 void Generator::SetSerializedPbInterval(const FileDescriptorProto& file) const {
1354   // Top level enums.
1355   for (int i = 0; i < file_->enum_type_count(); ++i) {
1356     const EnumDescriptor& descriptor = *file_->enum_type(i);
1357     PrintSerializedPbInterval(file.enum_type(i),
1358                               ModuleLevelDescriptorName(descriptor));
1359   }
1360 
1361   // Messages.
1362   for (int i = 0; i < file_->message_type_count(); ++i) {
1363     SetMessagePbInterval(file.message_type(i), *file_->message_type(i));
1364   }
1365 
1366   // Services.
1367   for (int i = 0; i < file_->service_count(); ++i) {
1368     const ServiceDescriptor& service = *file_->service(i);
1369     PrintSerializedPbInterval(file.service(i),
1370                               ModuleLevelServiceDescriptorName(service));
1371   }
1372 }
1373 
SetMessagePbInterval(const DescriptorProto & message_proto,const Descriptor & descriptor) const1374 void Generator::SetMessagePbInterval(const DescriptorProto& message_proto,
1375                                      const Descriptor& descriptor) const {
1376   PrintSerializedPbInterval(message_proto,
1377                             ModuleLevelDescriptorName(descriptor));
1378 
1379   // Nested messages.
1380   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1381     SetMessagePbInterval(message_proto.nested_type(i),
1382                          *descriptor.nested_type(i));
1383   }
1384 
1385   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1386     const EnumDescriptor& enum_des = *descriptor.enum_type(i);
1387     PrintSerializedPbInterval(message_proto.enum_type(i),
1388                               ModuleLevelDescriptorName(enum_des));
1389   }
1390 }
1391 
1392 // Prints expressions that set the options field of all descriptors.
FixAllDescriptorOptions() const1393 void Generator::FixAllDescriptorOptions() const {
1394   // Prints an expression that sets the file descriptor's options.
1395   if (!PrintDescriptorOptionsFixingCode(*file_, proto_, kDescriptorKey)) {
1396     printer_->Print("DESCRIPTOR._loaded_options = None\n");
1397   }
1398   // Prints expressions that set the options for all top level enums.
1399   for (int i = 0; i < file_->enum_type_count(); ++i) {
1400     FixOptionsForEnum(*file_->enum_type(i), proto_.enum_type(i));
1401   }
1402   // Prints expressions that set the options for all top level extensions.
1403   for (int i = 0; i < file_->extension_count(); ++i) {
1404     FixOptionsForField(*file_->extension(i), proto_.extension(i));
1405   }
1406   // Prints expressions that set the options for all messages, nested enums,
1407   // nested extensions and message fields.
1408   for (int i = 0; i < file_->message_type_count(); ++i) {
1409     FixOptionsForMessage(*file_->message_type(i), proto_.message_type(i));
1410   }
1411 
1412   for (int i = 0; i < file_->service_count(); ++i) {
1413     FixOptionsForService(*file_->service(i), proto_.service(i));
1414   }
1415 }
1416 
FixOptionsForOneof(const OneofDescriptor & oneof,const OneofDescriptorProto & proto) const1417 void Generator::FixOptionsForOneof(const OneofDescriptor& oneof,
1418                                    const OneofDescriptorProto& proto) const {
1419   std::string oneof_name = absl::Substitute(
1420       "$0.$1['$2']", ModuleLevelDescriptorName(*oneof.containing_type()),
1421       "oneofs_by_name", oneof.name());
1422   PrintDescriptorOptionsFixingCode(oneof, proto, oneof_name);
1423 }
1424 
1425 // Prints expressions that set the options for an enum descriptor and its
1426 // value descriptors.
FixOptionsForEnum(const EnumDescriptor & enum_descriptor,const EnumDescriptorProto & proto) const1427 void Generator::FixOptionsForEnum(const EnumDescriptor& enum_descriptor,
1428                                   const EnumDescriptorProto& proto) const {
1429   std::string descriptor_name = ModuleLevelDescriptorName(enum_descriptor);
1430   PrintDescriptorOptionsFixingCode(enum_descriptor, proto, descriptor_name);
1431   for (int i = 0; i < enum_descriptor.value_count(); ++i) {
1432     const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(i);
1433     PrintDescriptorOptionsFixingCode(
1434         value_descriptor, proto.value(i),
1435         absl::StrFormat("%s.values_by_name[\"%s\"]", descriptor_name.c_str(),
1436                         value_descriptor.name()));
1437   }
1438 }
1439 
1440 // Prints expressions that set the options for an service descriptor and its
1441 // value descriptors.
FixOptionsForService(const ServiceDescriptor & service_descriptor,const ServiceDescriptorProto & proto) const1442 void Generator::FixOptionsForService(
1443     const ServiceDescriptor& service_descriptor,
1444     const ServiceDescriptorProto& proto) const {
1445   std::string descriptor_name =
1446       ModuleLevelServiceDescriptorName(service_descriptor);
1447   PrintDescriptorOptionsFixingCode(service_descriptor, proto, descriptor_name);
1448 
1449   for (int i = 0; i < service_descriptor.method_count(); ++i) {
1450     const MethodDescriptor* method = service_descriptor.method(i);
1451     std::string method_name = absl::StrCat(
1452         descriptor_name, ".methods_by_name['", method->name(), "']");
1453     PrintDescriptorOptionsFixingCode(*method, proto.method(i), method_name);
1454   }
1455 }
1456 
1457 // Prints expressions that set the options for field descriptors (including
1458 // extensions).
FixOptionsForField(const FieldDescriptor & field,const FieldDescriptorProto & proto) const1459 void Generator::FixOptionsForField(const FieldDescriptor& field,
1460                                    const FieldDescriptorProto& proto) const {
1461   std::string field_name;
1462   if (field.is_extension()) {
1463     if (field.extension_scope() == nullptr) {
1464       // Top level extensions.
1465       field_name = field.name();
1466     } else {
1467       field_name = FieldReferencingExpression(field.extension_scope(), field,
1468                                               "extensions_by_name");
1469     }
1470   } else {
1471     field_name = FieldReferencingExpression(field.containing_type(), field,
1472                                             "fields_by_name");
1473   }
1474   PrintDescriptorOptionsFixingCode(field, proto, field_name);
1475 }
1476 
1477 // Prints expressions that set the options for a message and all its inner
1478 // types (nested messages, nested enums, extensions, fields).
FixOptionsForMessage(const Descriptor & descriptor,const DescriptorProto & proto) const1479 void Generator::FixOptionsForMessage(const Descriptor& descriptor,
1480                                      const DescriptorProto& proto) const {
1481   // Nested messages.
1482   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1483     FixOptionsForMessage(*descriptor.nested_type(i), proto.nested_type(i));
1484   }
1485   // Oneofs.
1486   for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
1487     FixOptionsForOneof(*descriptor.oneof_decl(i), proto.oneof_decl(i));
1488   }
1489   // Enums.
1490   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1491     FixOptionsForEnum(*descriptor.enum_type(i), proto.enum_type(i));
1492   }
1493   // Fields.
1494   for (int i = 0; i < descriptor.field_count(); ++i) {
1495     const FieldDescriptor& field = *descriptor.field(i);
1496     FixOptionsForField(field, proto.field(i));
1497   }
1498   // Extensions.
1499   for (int i = 0; i < descriptor.extension_count(); ++i) {
1500     const FieldDescriptor& field = *descriptor.extension(i);
1501     FixOptionsForField(field, proto.extension(i));
1502   }
1503   // Message option for this message.
1504   PrintDescriptorOptionsFixingCode(descriptor, proto,
1505                                    ModuleLevelDescriptorName(descriptor));
1506 }
1507 
1508 // If a dependency forwards other files through public dependencies, let's
1509 // copy over the corresponding module aliases.
CopyPublicDependenciesAliases(absl::string_view copy_from,const FileDescriptor * file) const1510 void Generator::CopyPublicDependenciesAliases(
1511     absl::string_view copy_from, const FileDescriptor* file) const {
1512   for (int i = 0; i < file->public_dependency_count(); ++i) {
1513     std::string module_name = ModuleName(file->public_dependency(i)->name());
1514     std::string module_alias = ModuleAlias(file->public_dependency(i)->name());
1515     // There's no module alias in the dependent file if it was generated by
1516     // an old protoc (less than 3.0.0-alpha-1). Use module name in this
1517     // situation.
1518     printer_->Print(
1519         "try:\n"
1520         "  $alias$ = $copy_from$.$alias$\n"
1521         "except AttributeError:\n"
1522         "  $alias$ = $copy_from$.$module$\n",
1523         "alias", module_alias, "module", module_name, "copy_from", copy_from);
1524     CopyPublicDependenciesAliases(copy_from, file->public_dependency(i));
1525   }
1526 }
1527 
1528 }  // namespace python
1529 }  // namespace compiler
1530 }  // namespace protobuf
1531 }  // namespace google
1532