1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 // Author: kenton@google.com (Kenton Varda) 9 // Based on original Protocol Buffers design by 10 // Sanjay Ghemawat, Jeff Dean, and others. 11 // 12 // Implements the Protocol Compiler front-end such that it may be reused by 13 // custom compilers written to support other languages. 14 15 #ifndef GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__ 16 #define GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__ 17 18 #include <cstdint> 19 #include <functional> 20 #include <memory> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "absl/container/btree_map.h" 26 #include "absl/container/flat_hash_map.h" 27 #include "absl/container/flat_hash_set.h" 28 #include "absl/strings/string_view.h" 29 #include "google/protobuf/descriptor.pb.h" 30 #include "google/protobuf/descriptor_database.h" 31 #include "google/protobuf/port.h" 32 33 // Must be included last. 34 #include "google/protobuf/port_def.inc" 35 36 namespace google { 37 namespace protobuf { 38 39 class Descriptor; // descriptor.h 40 class DescriptorDatabase; // descriptor_database.h 41 class DescriptorPool; // descriptor.h 42 class FileDescriptor; // descriptor.h 43 class FileDescriptorSet; // descriptor.h 44 class FileDescriptorProto; // descriptor.pb.h 45 template <typename T> 46 class RepeatedPtrField; // repeated_field.h 47 class SimpleDescriptorDatabase; // descriptor_database.h 48 49 namespace compiler { 50 51 class CodeGenerator; // code_generator.h 52 class GeneratorContext; // code_generator.h 53 class DiskSourceTree; // importer.h 54 55 struct TransitiveDependencyOptions { 56 bool include_json_name = false; 57 bool include_source_code_info = false; 58 bool retain_options = false; 59 }; 60 61 // This class implements the command-line interface to the protocol compiler. 62 // It is designed to make it very easy to create a custom protocol compiler 63 // supporting the languages of your choice. For example, if you wanted to 64 // create a custom protocol compiler binary which includes both the regular 65 // C++ support plus support for your own custom output "Foo", you would 66 // write a class "FooGenerator" which implements the CodeGenerator interface, 67 // then write a main() procedure like this: 68 // 69 // int main(int argc, char* argv[]) { 70 // google::protobuf::compiler::CommandLineInterface cli; 71 // 72 // // Support generation of C++ source and headers. 73 // google::protobuf::compiler::cpp::CppGenerator cpp_generator; 74 // cli.RegisterGenerator("--cpp_out", &cpp_generator, 75 // "Generate C++ source and header."); 76 // 77 // // Support generation of Foo code. 78 // FooGenerator foo_generator; 79 // cli.RegisterGenerator("--foo_out", &foo_generator, 80 // "Generate Foo file."); 81 // 82 // return cli.Run(argc, argv); 83 // } 84 // 85 // The compiler is invoked with syntax like: 86 // protoc --cpp_out=outdir --foo_out=outdir --proto_path=src src/foo.proto 87 // 88 // The .proto file to compile can be specified on the command line using either 89 // its physical file path, or a virtual path relative to a directory specified 90 // in --proto_path. For example, for src/foo.proto, the following two protoc 91 // invocations work the same way: 92 // 1. protoc --proto_path=src src/foo.proto (physical file path) 93 // 2. protoc --proto_path=src foo.proto (virtual path relative to src) 94 // 95 // If a file path can be interpreted both as a physical file path and as a 96 // relative virtual path, the physical file path takes precedence. 97 // 98 // For a full description of the command-line syntax, invoke it with --help. 99 class PROTOC_EXPORT CommandLineInterface { 100 public: 101 static const char* const kPathSeparator; 102 103 CommandLineInterface(); 104 CommandLineInterface(const CommandLineInterface&) = delete; 105 CommandLineInterface& operator=(const CommandLineInterface&) = delete; 106 ~CommandLineInterface(); 107 108 // Register a code generator for a language. 109 // 110 // Parameters: 111 // * flag_name: The command-line flag used to specify an output file of 112 // this type. The name must start with a '-'. If the name is longer 113 // than one letter, it must start with two '-'s. 114 // * generator: The CodeGenerator which will be called to generate files 115 // of this type. 116 // * help_text: Text describing this flag in the --help output. 117 // 118 // Some generators accept extra parameters. You can specify this parameter 119 // on the command-line by placing it before the output directory, separated 120 // by a colon: 121 // protoc --foo_out=enable_bar:outdir 122 // The text before the colon is passed to CodeGenerator::Generate() as the 123 // "parameter". 124 void RegisterGenerator(const std::string& flag_name, CodeGenerator* generator, 125 const std::string& help_text); 126 127 // Register a code generator for a language. 128 // Besides flag_name you can specify another option_flag_name that could be 129 // used to pass extra parameters to the registered code generator. 130 // Suppose you have registered a generator by calling: 131 // command_line_interface.RegisterGenerator("--foo_out", "--foo_opt", ...) 132 // Then you could invoke the compiler with a command like: 133 // protoc --foo_out=enable_bar:outdir --foo_opt=enable_baz 134 // This will pass "enable_bar,enable_baz" as the parameter to the generator. 135 void RegisterGenerator(const std::string& flag_name, 136 const std::string& option_flag_name, 137 CodeGenerator* generator, 138 const std::string& help_text); 139 140 // Enables "plugins". In this mode, if a command-line flag ends with "_out" 141 // but does not match any registered generator, the compiler will attempt to 142 // find a "plugin" to implement the generator. Plugins are just executables. 143 // They should live somewhere in the PATH. 144 // 145 // The compiler determines the executable name to search for by concatenating 146 // exe_name_prefix with the unrecognized flag name, removing "_out". So, for 147 // example, if exe_name_prefix is "protoc-" and you pass the flag --foo_out, 148 // the compiler will try to run the program "protoc-gen-foo". 149 // 150 // The plugin program should implement the following usage: 151 // plugin [--out=OUTDIR] [--parameter=PARAMETER] PROTO_FILES < DESCRIPTORS 152 // --out indicates the output directory (as passed to the --foo_out 153 // parameter); if omitted, the current directory should be used. --parameter 154 // gives the generator parameter, if any was provided (see below). The 155 // PROTO_FILES list the .proto files which were given on the compiler 156 // command-line; these are the files for which the plugin is expected to 157 // generate output code. Finally, DESCRIPTORS is an encoded FileDescriptorSet 158 // (as defined in descriptor.proto). This is piped to the plugin's stdin. 159 // The set will include descriptors for all the files listed in PROTO_FILES as 160 // well as all files that they import. The plugin MUST NOT attempt to read 161 // the PROTO_FILES directly -- it must use the FileDescriptorSet. 162 // 163 // The plugin should generate whatever files are necessary, as code generators 164 // normally do. It should write the names of all files it generates to 165 // stdout. The names should be relative to the output directory, NOT absolute 166 // names or relative to the current directory. If any errors occur, error 167 // messages should be written to stderr. If an error is fatal, the plugin 168 // should exit with a non-zero exit code. 169 // 170 // Plugins can have generator parameters similar to normal built-in 171 // generators. Extra generator parameters can be passed in via a matching 172 // "_opt" parameter. For example: 173 // protoc --plug_out=enable_bar:outdir --plug_opt=enable_baz 174 // This will pass "enable_bar,enable_baz" as the parameter to the plugin. 175 // 176 void AllowPlugins(const std::string& exe_name_prefix); 177 178 // Run the Protocol Compiler with the given command-line parameters. 179 // Returns the error code which should be returned by main(). 180 // 181 // It may not be safe to call Run() in a multi-threaded environment because 182 // it calls strerror(). I'm not sure why you'd want to do this anyway. 183 int Run(int argc, const char* const argv[]); 184 185 // DEPRECATED. Calling this method has no effect. Protocol compiler now 186 // always try to find the .proto file relative to the current directory 187 // first and if the file is not found, it will then treat the input path 188 // as a virtual path. SetInputsAreProtoPathRelative(bool)189 void SetInputsAreProtoPathRelative(bool /* enable */) {} 190 191 // Provides some text which will be printed when the --version flag is 192 // used. The version of libprotoc will also be printed on the next line 193 // after this text. SetVersionInfo(const std::string & text)194 void SetVersionInfo(const std::string& text) { version_info_ = text; } 195 196 197 // Configure protoc to act as if we're in opensource. set_opensource_runtime(bool opensource)198 void set_opensource_runtime(bool opensource) { 199 opensource_runtime_ = opensource; 200 } 201 202 private: 203 // ----------------------------------------------------------------- 204 205 class ErrorPrinter; 206 class GeneratorContextImpl; 207 class MemoryOutputStream; 208 using GeneratorContextMap = 209 absl::flat_hash_map<std::string, std::unique_ptr<GeneratorContextImpl>>; 210 211 // Clear state from previous Run(). 212 void Clear(); 213 214 // Remaps the proto file so that it is relative to one of the directories 215 // in proto_path_. Returns false if an error occurred. 216 bool MakeProtoProtoPathRelative(DiskSourceTree* source_tree, 217 std::string* proto, 218 DescriptorDatabase* fallback_database); 219 220 // Remaps each file in input_files_ so that it is relative to one of the 221 // directories in proto_path_. Returns false if an error occurred. 222 bool MakeInputsBeProtoPathRelative(DiskSourceTree* source_tree, 223 DescriptorDatabase* fallback_database); 224 225 // Fails if these files use proto3 optional and the code generator doesn't 226 // support it. This is a permanent check. 227 bool EnforceProto3OptionalSupport( 228 const std::string& codegen_name, uint64_t supported_features, 229 const std::vector<const FileDescriptor*>& parsed_files) const; 230 231 bool EnforceEditionsSupport( 232 const std::string& codegen_name, uint64_t supported_features, 233 Edition minimum_edition, Edition maximum_edition, 234 const std::vector<const FileDescriptor*>& parsed_files) const; 235 236 237 // Return status for ParseArguments() and InterpretArgument(). 238 enum ParseArgumentStatus { 239 PARSE_ARGUMENT_DONE_AND_CONTINUE, 240 PARSE_ARGUMENT_DONE_AND_EXIT, 241 PARSE_ARGUMENT_FAIL 242 }; 243 244 // Parse all command-line arguments. 245 ParseArgumentStatus ParseArguments(int argc, const char* const argv[]); 246 247 // Read an argument file and append the file's content to the list of 248 // arguments. Return false if the file cannot be read. 249 bool ExpandArgumentFile(const char* file, 250 std::vector<std::string>* arguments); 251 252 // Parses a command-line argument into a name/value pair. Returns 253 // true if the next argument in the argv should be used as the value, 254 // false otherwise. 255 // 256 // Examples: 257 // "-Isrc/protos" -> 258 // name = "-I", value = "src/protos" 259 // "--cpp_out=src/foo.pb2.cc" -> 260 // name = "--cpp_out", value = "src/foo.pb2.cc" 261 // "foo.proto" -> 262 // name = "", value = "foo.proto" 263 bool ParseArgument(const char* arg, std::string* name, std::string* value); 264 265 // Interprets arguments parsed with ParseArgument. 266 ParseArgumentStatus InterpretArgument(const std::string& name, 267 const std::string& value); 268 269 // Print the --help text to stderr. 270 void PrintHelpText(); 271 272 // Loads proto_path_ into the provided source_tree. 273 bool InitializeDiskSourceTree(DiskSourceTree* source_tree, 274 DescriptorDatabase* fallback_database); 275 276 // Verify that all the input files exist in the given database. 277 bool VerifyInputFilesInDescriptors(DescriptorDatabase* fallback_database); 278 279 // Parses input_files_ into parsed_files 280 bool ParseInputFiles(DescriptorPool* descriptor_pool, 281 DiskSourceTree* source_tree, 282 std::vector<const FileDescriptor*>* parsed_files); 283 284 bool SetupFeatureResolution(DescriptorPool& pool); 285 286 // Generate the given output file from the given input. 287 struct OutputDirective; // see below 288 bool GenerateOutput(const std::vector<const FileDescriptor*>& parsed_files, 289 const OutputDirective& output_directive, 290 GeneratorContext* generator_context); 291 bool GeneratePluginOutput( 292 const std::vector<const FileDescriptor*>& parsed_files, 293 const std::string& plugin_name, const std::string& parameter, 294 GeneratorContext* generator_context, std::string* error); 295 296 // Implements --encode and --decode. 297 bool EncodeOrDecode(const DescriptorPool* pool); 298 299 // Implements the --descriptor_set_out option. 300 bool WriteDescriptorSet( 301 const std::vector<const FileDescriptor*>& parsed_files); 302 303 // Implements the --edition_defaults_out option. 304 bool WriteEditionDefaults(const DescriptorPool& pool); 305 306 // Implements the --dependency_out option 307 bool GenerateDependencyManifestFile( 308 const std::vector<const FileDescriptor*>& parsed_files, 309 const GeneratorContextMap& output_directories, 310 DiskSourceTree* source_tree); 311 312 // Implements the --print_free_field_numbers. This function prints free field 313 // numbers into stdout for the message and it's nested message types in 314 // post-order, i.e. nested types first. Printed range are left-right 315 // inclusive, i.e. [a, b]. 316 // 317 // Groups: 318 // For historical reasons, groups are considered to share the same 319 // field number space with the parent message, thus it will not print free 320 // field numbers for groups. The field numbers used in the groups are 321 // excluded in the free field numbers of the parent message. 322 // 323 // Extension Ranges: 324 // Extension ranges are considered ocuppied field numbers and they will not be 325 // listed as free numbers in the output. 326 void PrintFreeFieldNumbers(const Descriptor* descriptor); 327 328 // Get all transitive dependencies of the given file (including the file 329 // itself), adding them to the given list of FileDescriptorProtos. The 330 // protos will be ordered such that every file is listed before any file that 331 // depends on it, so that you can call DescriptorPool::BuildFile() on them 332 // in order. Any files in *already_seen will not be added, and each file 333 // added will be inserted into *already_seen. If include_source_code_info 334 // (from TransitiveDependencyOptions) is true then include the source code 335 // information in the FileDescriptorProtos. If include_json_name is true, 336 // populate the json_name field of FieldDescriptorProto for all fields. 337 void GetTransitiveDependencies( 338 const FileDescriptor* file, 339 absl::flat_hash_set<const FileDescriptor*>* already_seen, 340 RepeatedPtrField<FileDescriptorProto>* output, 341 const TransitiveDependencyOptions& options = 342 TransitiveDependencyOptions()); 343 344 345 // ----------------------------------------------------------------- 346 347 // The name of the executable as invoked (i.e. argv[0]). 348 std::string executable_name_; 349 350 // Version info set with SetVersionInfo(). 351 std::string version_info_; 352 353 // Registered generators. 354 struct GeneratorInfo { 355 std::string flag_name; 356 std::string option_flag_name; 357 CodeGenerator* generator; 358 std::string help_text; 359 }; 360 361 const GeneratorInfo* FindGeneratorByFlag(const std::string& name) const; 362 const GeneratorInfo* FindGeneratorByOption(const std::string& option) const; 363 364 absl::btree_map<std::string, GeneratorInfo> generators_by_flag_name_; 365 absl::flat_hash_map<std::string, GeneratorInfo> generators_by_option_name_; 366 // A map from generator names to the parameters specified using the option 367 // flag. For example, if the user invokes the compiler with: 368 // protoc --foo_out=outputdir --foo_opt=enable_bar ... 369 // Then there will be an entry ("--foo_out", "enable_bar") in this map. 370 absl::flat_hash_map<std::string, std::string> generator_parameters_; 371 // Similar to generator_parameters_, stores the parameters for plugins but the 372 // key is the actual plugin name e.g. "protoc-gen-foo". 373 absl::flat_hash_map<std::string, std::string> plugin_parameters_; 374 375 // See AllowPlugins(). If this is empty, plugins aren't allowed. 376 std::string plugin_prefix_; 377 378 // Maps specific plugin names to files. When executing a plugin, this map 379 // is searched first to find the plugin executable. If not found here, the 380 // PATH (or other OS-specific search strategy) is searched. 381 absl::flat_hash_map<std::string, std::string> plugins_; 382 383 // Stuff parsed from command line. 384 enum Mode { 385 MODE_COMPILE, // Normal mode: parse .proto files and compile them. 386 MODE_ENCODE, // --encode: read text from stdin, write binary to stdout. 387 MODE_DECODE, // --decode: read binary from stdin, write text to stdout. 388 MODE_PRINT, // Print mode: print info of the given .proto files and exit. 389 }; 390 391 Mode mode_ = MODE_COMPILE; 392 393 enum PrintMode { 394 PRINT_NONE, // Not in MODE_PRINT 395 PRINT_FREE_FIELDS, // --print_free_fields 396 }; 397 398 PrintMode print_mode_ = PRINT_NONE; 399 400 enum ErrorFormat { 401 ERROR_FORMAT_GCC, // GCC error output format (default). 402 ERROR_FORMAT_MSVS // Visual Studio output (--error_format=msvs). 403 }; 404 405 ErrorFormat error_format_ = ERROR_FORMAT_GCC; 406 407 // True if we should treat warnings as errors that fail the compilation. 408 bool fatal_warnings_ = false; 409 410 std::vector<std::pair<std::string, std::string>> 411 proto_path_; // Search path for proto files. 412 std::vector<std::string> input_files_; // Names of the input proto files. 413 414 // Names of proto files which are allowed to be imported. Used by build 415 // systems to enforce depend-on-what-you-import. 416 absl::flat_hash_set<std::string> direct_dependencies_; 417 bool direct_dependencies_explicitly_set_ = false; 418 419 // If there's a violation of depend-on-what-you-import, this string will be 420 // presented to the user. "%s" will be replaced with the violating import. 421 std::string direct_dependencies_violation_msg_; 422 423 // output_directives_ lists all the files we are supposed to output and what 424 // generator to use for each. 425 struct OutputDirective { 426 std::string name; // E.g. "--foo_out" 427 CodeGenerator* generator; // NULL for plugins 428 std::string parameter; 429 std::string output_location; 430 }; 431 std::vector<OutputDirective> output_directives_; 432 433 // When using --encode or --decode, this names the type we are encoding or 434 // decoding. (Empty string indicates --decode_raw.) 435 std::string codec_type_; 436 437 // If --descriptor_set_in was given, these are filenames containing 438 // parsed FileDescriptorSets to be used for loading protos. Otherwise, empty. 439 std::vector<std::string> descriptor_set_in_names_; 440 441 // If --descriptor_set_out was given, this is the filename to which the 442 // FileDescriptorSet should be written. Otherwise, empty. 443 std::string descriptor_set_out_name_; 444 445 std::string edition_defaults_out_name_; 446 Edition edition_defaults_minimum_; 447 Edition edition_defaults_maximum_; 448 449 // If --dependency_out was given, this is the path to the file where the 450 // dependency file will be written. Otherwise, empty. 451 std::string dependency_out_name_; 452 453 bool experimental_editions_ = false; 454 455 // True if --include_imports was given, meaning that we should 456 // write all transitive dependencies to the DescriptorSet. Otherwise, only 457 // the .proto files listed on the command-line are added. 458 bool imports_in_descriptor_set_; 459 460 // True if --include_source_info was given, meaning that we should not strip 461 // SourceCodeInfo from the DescriptorSet. 462 bool source_info_in_descriptor_set_ = false; 463 464 // True if --retain_options was given, meaning that we shouldn't strip any 465 // options from the DescriptorSet, even if they have RETENTION_SOURCE 466 // specified. 467 bool retain_options_in_descriptor_set_ = false; 468 469 // Was the --disallow_services flag used? 470 bool disallow_services_ = false; 471 472 // When using --encode, this will be passed to SetSerializationDeterministic. 473 bool deterministic_output_ = false; 474 475 bool opensource_runtime_ = google::protobuf::internal::IsOss(); 476 477 }; 478 479 } // namespace compiler 480 } // namespace protobuf 481 } // namespace google 482 483 #include "google/protobuf/port_undef.inc" 484 485 #endif // GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__ 486