• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/command_line_interface.h"
13 
14 #include <errno.h>
15 #include <limits.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <sys/types.h>
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <cstring>
24 #include <fstream>
25 #include <iostream>
26 #include <memory>
27 #include <ostream>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 #ifdef major
32 #undef major
33 #endif
34 #ifdef minor
35 #undef minor
36 #endif
37 #include <fcntl.h>
38 #include <sys/stat.h>
39 
40 #ifndef _MSC_VER
41 #include <unistd.h>
42 #endif
43 
44 #if defined(__APPLE__)
45 #include <mach-o/dyld.h>
46 #elif defined(__FreeBSD__)
47 #include <sys/sysctl.h>
48 #endif
49 
50 #include "absl/algorithm/container.h"
51 #include "absl/base/attributes.h"
52 #include "absl/base/log_severity.h"
53 #include "absl/container/btree_map.h"
54 #include "absl/container/btree_set.h"
55 #include "absl/container/flat_hash_map.h"
56 #include "absl/container/flat_hash_set.h"
57 #include "absl/log/absl_check.h"
58 #include "absl/log/absl_log.h"
59 #include "absl/log/globals.h"
60 #include "absl/status/status.h"
61 #include "absl/status/statusor.h"
62 #include "absl/strings/ascii.h"
63 #include "absl/strings/match.h"
64 #include "absl/strings/str_cat.h"
65 #include "absl/strings/str_format.h"
66 #include "absl/strings/str_replace.h"
67 #include "absl/strings/str_split.h"
68 #include "absl/strings/string_view.h"
69 #include "absl/strings/substitute.h"
70 #include "absl/types/span.h"
71 #include "google/protobuf/compiler/code_generator.h"
72 #include "google/protobuf/compiler/importer.h"
73 #include "google/protobuf/compiler/plugin.pb.h"
74 #include "google/protobuf/compiler/retention.h"
75 #include "google/protobuf/compiler/subprocess.h"
76 #include "google/protobuf/compiler/versions.h"
77 #include "google/protobuf/compiler/zip_writer.h"
78 #include "google/protobuf/descriptor.h"
79 #include "google/protobuf/descriptor.pb.h"
80 #include "google/protobuf/descriptor_database.h"
81 #include "google/protobuf/descriptor_visitor.h"
82 #include "google/protobuf/dynamic_message.h"
83 #include "google/protobuf/feature_resolver.h"
84 #include "google/protobuf/io/coded_stream.h"
85 #include "google/protobuf/io/printer.h"
86 #include "google/protobuf/io/zero_copy_stream_impl.h"
87 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
88 #include "google/protobuf/text_format.h"
89 
90 
91 #ifdef _WIN32
92 #include "google/protobuf/io/io_win32.h"
93 #endif
94 
95 #include "google/protobuf/stubs/platform_macros.h"
96 
97 // Must be included last.
98 #include "google/protobuf/port_def.inc"
99 
100 namespace google {
101 namespace protobuf {
102 namespace compiler {
103 
104 #ifndef O_BINARY
105 #ifdef _O_BINARY
106 #define O_BINARY _O_BINARY
107 #else
108 #define O_BINARY 0  // If this isn't defined, the platform doesn't need it.
109 #endif
110 #endif
111 
112 namespace {
113 #if defined(_WIN32)
114 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
115 // them like we do below.
116 using google::protobuf::io::win32::access;
117 using google::protobuf::io::win32::close;
118 using google::protobuf::io::win32::mkdir;
119 using google::protobuf::io::win32::open;
120 using google::protobuf::io::win32::setmode;
121 using google::protobuf::io::win32::write;
122 #endif
123 
124 static const char* kDefaultDirectDependenciesViolationMsg =
125     "File is imported but not declared in --direct_dependencies: %s";
126 
127 // Returns true if the text looks like a Windows-style absolute path, starting
128 // with a drive letter.  Example:  "C:\foo".  TODO:  Share this with
129 // copy in importer.cc?
IsWindowsAbsolutePath(const std::string & text)130 static bool IsWindowsAbsolutePath(const std::string& text) {
131 #if defined(_WIN32) || defined(__CYGWIN__)
132   return text.size() >= 3 && text[1] == ':' && absl::ascii_isalpha(text[0]) &&
133          (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
134 #else
135   return false;
136 #endif
137 }
138 
SetFdToTextMode(int fd)139 void SetFdToTextMode(int fd) {
140 #ifdef _WIN32
141   if (setmode(fd, _O_TEXT) == -1) {
142     // This should never happen, I think.
143     ABSL_LOG(WARNING) << "setmode(" << fd << ", _O_TEXT): " << strerror(errno);
144   }
145 #endif
146   // (Text and binary are the same on non-Windows platforms.)
147 }
148 
SetFdToBinaryMode(int fd)149 void SetFdToBinaryMode(int fd) {
150 #ifdef _WIN32
151   if (setmode(fd, _O_BINARY) == -1) {
152     // This should never happen, I think.
153     ABSL_LOG(WARNING) << "setmode(" << fd
154                       << ", _O_BINARY): " << strerror(errno);
155   }
156 #endif
157   // (Text and binary are the same on non-Windows platforms.)
158 }
159 
AddTrailingSlash(std::string * path)160 void AddTrailingSlash(std::string* path) {
161   if (!path->empty() && path->at(path->size() - 1) != '/') {
162     path->push_back('/');
163   }
164 }
165 
VerifyDirectoryExists(const std::string & path)166 bool VerifyDirectoryExists(const std::string& path) {
167   if (path.empty()) return true;
168 
169   if (access(path.c_str(), F_OK) == -1) {
170     std::cerr << path << ": " << strerror(errno) << std::endl;
171     return false;
172   } else {
173     return true;
174   }
175 }
176 
177 // Try to create the parent directory of the given file, creating the parent's
178 // parent if necessary, and so on.  The full file name is actually
179 // (prefix + filename), but we assume |prefix| already exists and only create
180 // directories listed in |filename|.
TryCreateParentDirectory(const std::string & prefix,const std::string & filename)181 bool TryCreateParentDirectory(const std::string& prefix,
182                               const std::string& filename) {
183   // Recursively create parent directories to the output file.
184   // On Windows, both '/' and '\' are valid path separators.
185   std::vector<std::string> parts =
186       absl::StrSplit(filename, absl::ByAnyChar("/\\"), absl::SkipEmpty());
187   std::string path_so_far = prefix;
188   for (size_t i = 0; i < parts.size() - 1; ++i) {
189     path_so_far += parts[i];
190     if (mkdir(path_so_far.c_str(), 0777) != 0) {
191       if (errno != EEXIST) {
192         std::cerr << filename << ": while trying to create directory "
193                   << path_so_far << ": " << strerror(errno) << std::endl;
194         return false;
195       }
196     }
197     path_so_far += '/';
198   }
199 
200   return true;
201 }
202 
203 // Get the absolute path of this protoc binary.
GetProtocAbsolutePath(std::string * path)204 bool GetProtocAbsolutePath(std::string* path) {
205 #ifdef _WIN32
206   char buffer[MAX_PATH];
207   int len = GetModuleFileNameA(nullptr, buffer, MAX_PATH);
208 #elif defined(__APPLE__)
209   char buffer[PATH_MAX];
210   int len = 0;
211 
212   char dirtybuffer[PATH_MAX];
213   uint32_t size = sizeof(dirtybuffer);
214   if (_NSGetExecutablePath(dirtybuffer, &size) == 0) {
215     realpath(dirtybuffer, buffer);
216     len = strlen(buffer);
217   }
218 #elif defined(__FreeBSD__)
219   char buffer[PATH_MAX];
220   size_t len = PATH_MAX;
221   int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
222   if (sysctl(mib, 4, &buffer, &len, nullptr, 0) != 0) {
223     len = 0;
224   }
225 #else
226   char buffer[PATH_MAX];
227   int len = readlink("/proc/self/exe", buffer, PATH_MAX);
228 #endif
229   if (len > 0) {
230     path->assign(buffer, len);
231     return true;
232   } else {
233     return false;
234   }
235 }
236 
237 // Whether a path is where google/protobuf/descriptor.proto and other well-known
238 // type protos are installed.
IsInstalledProtoPath(absl::string_view path)239 bool IsInstalledProtoPath(absl::string_view path) {
240   // Checking the descriptor.proto file should be good enough.
241   std::string file_path =
242       absl::StrCat(path, "/google/protobuf/descriptor.proto");
243   return access(file_path.c_str(), F_OK) != -1;
244 }
245 
246 // Add the paths where google/protobuf/descriptor.proto and other well-known
247 // type protos are installed.
AddDefaultProtoPaths(std::vector<std::pair<std::string,std::string>> * paths)248 void AddDefaultProtoPaths(
249     std::vector<std::pair<std::string, std::string>>* paths) {
250   // TODO: The code currently only checks relative paths of where
251   // the protoc binary is installed. We probably should make it handle more
252   // cases than that.
253   std::string path_str;
254   if (!GetProtocAbsolutePath(&path_str)) {
255     return;
256   }
257   absl::string_view path(path_str);
258   // Strip the binary name.
259   size_t pos = path.find_last_of("/\\");
260   if (pos == path.npos || pos == 0) {
261     return;
262   }
263   path = path.substr(0, pos);
264   // Check the binary's directory.
265   if (IsInstalledProtoPath(path)) {
266     paths->emplace_back("", path);
267     return;
268   }
269   // Check if there is an include subdirectory.
270   std::string include_path = absl::StrCat(path, "/include");
271   if (IsInstalledProtoPath(include_path)) {
272     paths->emplace_back("", std::move(include_path));
273     return;
274   }
275   // Check if the upper level directory has an "include" subdirectory.
276   pos = path.find_last_of("/\\");
277   if (pos == std::string::npos || pos == 0) {
278     return;
279   }
280   path = path.substr(0, pos);
281   include_path = absl::StrCat(path, "/include");
282   if (IsInstalledProtoPath(include_path)) {
283     paths->emplace_back("", std::move(include_path));
284     return;
285   }
286 }
287 
PluginName(absl::string_view plugin_prefix,absl::string_view directive)288 std::string PluginName(absl::string_view plugin_prefix,
289                        absl::string_view directive) {
290   // Assuming the directive starts with "--" and ends with "_out" or "_opt",
291   // strip the "--" and "_out/_opt" and add the plugin prefix.
292   return absl::StrCat(plugin_prefix, "gen-",
293                       directive.substr(2, directive.size() - 6));
294 }
295 
GetBootstrapParam(const std::string & parameter)296 bool GetBootstrapParam(const std::string& parameter) {
297   std::vector<std::string> parts = absl::StrSplit(parameter, ',');
298   for (const auto& part : parts) {
299     if (part == "bootstrap") {
300       return true;
301     }
302   }
303   return false;
304 }
305 
306 
307 }  // namespace
308 
GetTransitiveDependencies(const FileDescriptor * file,absl::flat_hash_set<const FileDescriptor * > * already_seen,RepeatedPtrField<FileDescriptorProto> * output,const TransitiveDependencyOptions & options)309 void CommandLineInterface::GetTransitiveDependencies(
310     const FileDescriptor* file,
311     absl::flat_hash_set<const FileDescriptor*>* already_seen,
312     RepeatedPtrField<FileDescriptorProto>* output,
313     const TransitiveDependencyOptions& options) {
314   if (!already_seen->insert(file).second) {
315     // Already saw this file.  Skip.
316     return;
317   }
318 
319   // Add all dependencies.
320   for (int i = 0; i < file->dependency_count(); ++i) {
321     GetTransitiveDependencies(file->dependency(i), already_seen, output,
322                               options);
323   }
324 
325   // Add this file.
326   FileDescriptorProto* new_descriptor = output->Add();
327   file->CopyTo(new_descriptor);
328   if (options.include_source_code_info) {
329     file->CopySourceCodeInfoTo(new_descriptor);
330   }
331   if (!options.retain_options) {
332     StripSourceRetentionOptions(*file->pool(), *new_descriptor);
333   }
334   if (options.include_json_name) {
335     file->CopyJsonNameTo(new_descriptor);
336   }
337 }
338 
339 // A MultiFileErrorCollector that prints errors to stderr.
340 class CommandLineInterface::ErrorPrinter
341     : public MultiFileErrorCollector,
342       public io::ErrorCollector,
343       public DescriptorPool::ErrorCollector {
344  public:
ErrorPrinter(ErrorFormat format,DiskSourceTree * tree=nullptr)345   explicit ErrorPrinter(ErrorFormat format, DiskSourceTree* tree = nullptr)
346       : format_(format),
347         tree_(tree),
348         found_errors_(false),
349         found_warnings_(false) {}
350   ~ErrorPrinter() override = default;
351 
352   // implements MultiFileErrorCollector ------------------------------
RecordError(absl::string_view filename,int line,int column,absl::string_view message)353   void RecordError(absl::string_view filename, int line, int column,
354                    absl::string_view message) override {
355     found_errors_ = true;
356     AddErrorOrWarning(filename, line, column, message, "error", std::cerr);
357   }
358 
RecordWarning(absl::string_view filename,int line,int column,absl::string_view message)359   void RecordWarning(absl::string_view filename, int line, int column,
360                      absl::string_view message) override {
361     found_warnings_ = true;
362     AddErrorOrWarning(filename, line, column, message, "warning", std::clog);
363   }
364 
365   // implements io::ErrorCollector -----------------------------------
RecordError(int line,int column,absl::string_view message)366   void RecordError(int line, int column, absl::string_view message) override {
367     RecordError("input", line, column, message);
368   }
369 
RecordWarning(int line,int column,absl::string_view message)370   void RecordWarning(int line, int column, absl::string_view message) override {
371     AddErrorOrWarning("input", line, column, message, "warning", std::clog);
372   }
373 
374   // implements DescriptorPool::ErrorCollector-------------------------
RecordError(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)375   void RecordError(absl::string_view filename, absl::string_view element_name,
376                    const Message* descriptor, ErrorLocation location,
377                    absl::string_view message) override {
378     AddErrorOrWarning(filename, -1, -1, message, "error", std::cerr);
379   }
380 
RecordWarning(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)381   void RecordWarning(absl::string_view filename, absl::string_view element_name,
382                      const Message* descriptor, ErrorLocation location,
383                      absl::string_view message) override {
384     AddErrorOrWarning(filename, -1, -1, message, "warning", std::clog);
385   }
386 
FoundErrors() const387   bool FoundErrors() const { return found_errors_; }
388 
FoundWarnings() const389   bool FoundWarnings() const { return found_warnings_; }
390 
391  private:
AddErrorOrWarning(absl::string_view filename,int line,int column,absl::string_view message,absl::string_view type,std::ostream & out)392   void AddErrorOrWarning(absl::string_view filename, int line, int column,
393                          absl::string_view message, absl::string_view type,
394                          std::ostream& out) {
395     std::string dfile;
396     if (
397 #ifndef PROTOBUF_OPENSOURCE
398         // Print full path when running under MSVS
399         format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
400 #endif  // !PROTOBUF_OPENSOURCE
401         tree_ != nullptr && tree_->VirtualFileToDiskFile(filename, &dfile)) {
402       out << dfile;
403     } else {
404       out << filename;
405     }
406 
407     // Users typically expect 1-based line/column numbers, so we add 1
408     // to each here.
409     if (line != -1) {
410       // Allow for both GCC- and Visual-Studio-compatible output.
411       switch (format_) {
412         case CommandLineInterface::ERROR_FORMAT_GCC:
413           out << ":" << (line + 1) << ":" << (column + 1);
414           break;
415         case CommandLineInterface::ERROR_FORMAT_MSVS:
416           out << "(" << (line + 1) << ") : " << type
417               << " in column=" << (column + 1);
418           break;
419       }
420     }
421 
422     if (type == "warning") {
423       out << ": warning: " << message << std::endl;
424     } else {
425       out << ": " << message << std::endl;
426     }
427   }
428 
429   const ErrorFormat format_;
430   DiskSourceTree* tree_;
431   bool found_errors_;
432   bool found_warnings_;
433 };
434 
435 // -------------------------------------------------------------------
436 
437 // A GeneratorContext implementation that buffers files in memory, then dumps
438 // them all to disk on demand.
439 class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
440  public:
441   explicit GeneratorContextImpl(
442       const std::vector<const FileDescriptor*>& parsed_files);
443 
444   // Write all files in the directory to disk at the given output location,
445   // which must end in a '/'.
446   bool WriteAllToDisk(const std::string& prefix);
447 
448   // Write the contents of this directory to a ZIP-format archive with the
449   // given name.
450   bool WriteAllToZip(const std::string& filename);
451 
452   // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
453   // format, unless one has already been written.
454   void AddJarManifest();
455 
456   // Get name of all output files.
457   void GetOutputFilenames(std::vector<std::string>* output_filenames);
458   // implements GeneratorContext --------------------------------------
459   io::ZeroCopyOutputStream* Open(const std::string& filename) override;
460   io::ZeroCopyOutputStream* OpenForAppend(const std::string& filename) override;
461   io::ZeroCopyOutputStream* OpenForInsert(
462       const std::string& filename, const std::string& insertion_point) override;
463   io::ZeroCopyOutputStream* OpenForInsertWithGeneratedCodeInfo(
464       const std::string& filename, const std::string& insertion_point,
465       const google::protobuf::GeneratedCodeInfo& info) override;
ListParsedFiles(std::vector<const FileDescriptor * > * output)466   void ListParsedFiles(std::vector<const FileDescriptor*>* output) override {
467     *output = parsed_files_;
468   }
469 
470  private:
471   friend class MemoryOutputStream;
472 
473   // The files_ field maps from path keys to file content values. It's a map
474   // instead of an unordered_map so that files are written in order (good when
475   // writing zips).
476   absl::btree_map<std::string, std::string> files_;
477   const std::vector<const FileDescriptor*>& parsed_files_;
478   bool had_error_;
479 };
480 
481 class CommandLineInterface::MemoryOutputStream
482     : public io::ZeroCopyOutputStream {
483  public:
484   MemoryOutputStream(GeneratorContextImpl* directory,
485                      const std::string& filename, bool append_mode);
486   MemoryOutputStream(GeneratorContextImpl* directory,
487                      const std::string& filename,
488                      const std::string& insertion_point);
489   MemoryOutputStream(GeneratorContextImpl* directory,
490                      const std::string& filename,
491                      const std::string& insertion_point,
492                      const google::protobuf::GeneratedCodeInfo& info);
493   ~MemoryOutputStream() override;
494 
495   // implements ZeroCopyOutputStream ---------------------------------
Next(void ** data,int * size)496   bool Next(void** data, int* size) override {
497     return inner_->Next(data, size);
498   }
BackUp(int count)499   void BackUp(int count) override { inner_->BackUp(count); }
ByteCount() const500   int64_t ByteCount() const override { return inner_->ByteCount(); }
501 
502  private:
503   // Checks to see if "filename_.pb.meta" exists in directory_; if so, fixes the
504   // offsets in that GeneratedCodeInfo record to reflect bytes inserted in
505   // filename_ at original offset insertion_offset with length insertion_length.
506   // Also adds in the data from info_to_insert_ with updated offsets governed by
507   // insertion_offset and indent_length. We assume that insertions will not
508   // occur within any given annotated span of text. insertion_content must end
509   // with an endline.
510   void UpdateMetadata(const std::string& insertion_content,
511                       size_t insertion_offset, size_t insertion_length,
512                       size_t indent_length);
513 
514   // Inserts info_to_insert_ into target_info, assuming that the relevant
515   // insertion was made at insertion_offset in file_content with the given
516   // indent_length. insertion_content must end with an endline.
517   void InsertShiftedInfo(const std::string& insertion_content,
518                          size_t insertion_offset, size_t indent_length,
519                          google::protobuf::GeneratedCodeInfo& target_info);
520 
521   // Where to insert the string when it's done.
522   GeneratorContextImpl* directory_;
523   std::string filename_;
524   std::string insertion_point_;
525 
526   // The string we're building.
527   std::string data_;
528 
529   // Whether we should append the output stream to the existing file.
530   bool append_mode_;
531 
532   // StringOutputStream writing to data_.
533   std::unique_ptr<io::StringOutputStream> inner_;
534 
535   // The GeneratedCodeInfo to insert at the insertion point.
536   google::protobuf::GeneratedCodeInfo info_to_insert_;
537 };
538 
539 // -------------------------------------------------------------------
540 
GeneratorContextImpl(const std::vector<const FileDescriptor * > & parsed_files)541 CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
542     const std::vector<const FileDescriptor*>& parsed_files)
543     : parsed_files_(parsed_files), had_error_(false) {}
544 
WriteAllToDisk(const std::string & prefix)545 bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
546     const std::string& prefix) {
547   if (had_error_) {
548     return false;
549   }
550 
551   if (!VerifyDirectoryExists(prefix)) {
552     return false;
553   }
554 
555   for (const auto& pair : files_) {
556     const std::string& relative_filename = pair.first;
557     const char* data = pair.second.data();
558     int size = pair.second.size();
559 
560     if (!TryCreateParentDirectory(prefix, relative_filename)) {
561       return false;
562     }
563     std::string filename = prefix + relative_filename;
564 
565     // Create the output file.
566     int file_descriptor;
567     do {
568       file_descriptor =
569           open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
570     } while (file_descriptor < 0 && errno == EINTR);
571 
572     if (file_descriptor < 0) {
573       int error = errno;
574       std::cerr << filename << ": " << strerror(error);
575       return false;
576     }
577 
578     // Write the file.
579     while (size > 0) {
580       int write_result;
581       do {
582         write_result = write(file_descriptor, data, size);
583       } while (write_result < 0 && errno == EINTR);
584 
585       if (write_result <= 0) {
586         // Write error.
587 
588         // FIXME(kenton):  According to the man page, if write() returns zero,
589         //   there was no error; write() simply did not write anything.  It's
590         //   unclear under what circumstances this might happen, but presumably
591         //   errno won't be set in this case.  I am confused as to how such an
592         //   event should be handled.  For now I'm treating it as an error,
593         //   since retrying seems like it could lead to an infinite loop.  I
594         //   suspect this never actually happens anyway.
595 
596         if (write_result < 0) {
597           int error = errno;
598           std::cerr << filename << ": write: " << strerror(error);
599         } else {
600           std::cerr << filename << ": write() returned zero?" << std::endl;
601         }
602         return false;
603       }
604 
605       data += write_result;
606       size -= write_result;
607     }
608 
609     if (close(file_descriptor) != 0) {
610       int error = errno;
611       std::cerr << filename << ": close: " << strerror(error);
612       return false;
613     }
614   }
615 
616   return true;
617 }
618 
WriteAllToZip(const std::string & filename)619 bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
620     const std::string& filename) {
621   if (had_error_) {
622     return false;
623   }
624 
625   // Create the output file.
626   int file_descriptor;
627   do {
628     file_descriptor =
629         open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
630   } while (file_descriptor < 0 && errno == EINTR);
631 
632   if (file_descriptor < 0) {
633     int error = errno;
634     std::cerr << filename << ": " << strerror(error);
635     return false;
636   }
637 
638   // Create the ZipWriter
639   io::FileOutputStream stream(file_descriptor);
640   ZipWriter zip_writer(&stream);
641 
642   for (const auto& pair : files_) {
643     zip_writer.Write(pair.first, pair.second);
644   }
645 
646   zip_writer.WriteDirectory();
647 
648   if (stream.GetErrno() != 0) {
649     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
650     return false;
651   }
652 
653   if (!stream.Close()) {
654     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
655     return false;
656   }
657 
658   return true;
659 }
660 
AddJarManifest()661 void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
662   auto pair = files_.insert({"META-INF/MANIFEST.MF", ""});
663   if (pair.second) {
664     pair.first->second =
665         "Manifest-Version: 1.0\n"
666         "Created-By: 1.6.0 (protoc)\n"
667         "\n";
668   }
669 }
670 
GetOutputFilenames(std::vector<std::string> * output_filenames)671 void CommandLineInterface::GeneratorContextImpl::GetOutputFilenames(
672     std::vector<std::string>* output_filenames) {
673   for (const auto& pair : files_) {
674     output_filenames->push_back(pair.first);
675   }
676 }
677 
Open(const std::string & filename)678 io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
679     const std::string& filename) {
680   return new MemoryOutputStream(this, filename, false);
681 }
682 
683 io::ZeroCopyOutputStream*
OpenForAppend(const std::string & filename)684 CommandLineInterface::GeneratorContextImpl::OpenForAppend(
685     const std::string& filename) {
686   return new MemoryOutputStream(this, filename, true);
687 }
688 
689 io::ZeroCopyOutputStream*
OpenForInsert(const std::string & filename,const std::string & insertion_point)690 CommandLineInterface::GeneratorContextImpl::OpenForInsert(
691     const std::string& filename, const std::string& insertion_point) {
692   return new MemoryOutputStream(this, filename, insertion_point);
693 }
694 
695 io::ZeroCopyOutputStream*
OpenForInsertWithGeneratedCodeInfo(const std::string & filename,const std::string & insertion_point,const google::protobuf::GeneratedCodeInfo & info)696 CommandLineInterface::GeneratorContextImpl::OpenForInsertWithGeneratedCodeInfo(
697     const std::string& filename, const std::string& insertion_point,
698     const google::protobuf::GeneratedCodeInfo& info) {
699   return new MemoryOutputStream(this, filename, insertion_point, info);
700 }
701 
702 // -------------------------------------------------------------------
703 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,bool append_mode)704 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
705     GeneratorContextImpl* directory, const std::string& filename,
706     bool append_mode)
707     : directory_(directory),
708       filename_(filename),
709       append_mode_(append_mode),
710       inner_(new io::StringOutputStream(&data_)) {}
711 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,const std::string & insertion_point)712 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
713     GeneratorContextImpl* directory, const std::string& filename,
714     const std::string& insertion_point)
715     : directory_(directory),
716       filename_(filename),
717       insertion_point_(insertion_point),
718       inner_(new io::StringOutputStream(&data_)) {}
719 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,const std::string & insertion_point,const google::protobuf::GeneratedCodeInfo & info)720 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
721     GeneratorContextImpl* directory, const std::string& filename,
722     const std::string& insertion_point, const google::protobuf::GeneratedCodeInfo& info)
723     : directory_(directory),
724       filename_(filename),
725       insertion_point_(insertion_point),
726       inner_(new io::StringOutputStream(&data_)),
727       info_to_insert_(info) {}
728 
InsertShiftedInfo(const std::string & insertion_content,size_t insertion_offset,size_t indent_length,google::protobuf::GeneratedCodeInfo & target_info)729 void CommandLineInterface::MemoryOutputStream::InsertShiftedInfo(
730     const std::string& insertion_content, size_t insertion_offset,
731     size_t indent_length, google::protobuf::GeneratedCodeInfo& target_info) {
732   // Keep track of how much extra data was added for indents before the
733   // current annotation being inserted. `pos` and `source_annotation.begin()`
734   // are offsets in `insertion_content`. `insertion_offset` is updated so that
735   // it can be added to an annotation's `begin` field to reflect that
736   // annotation's updated location after `insertion_content` was inserted into
737   // the target file.
738   size_t pos = 0;
739   insertion_offset += indent_length;
740   for (const auto& source_annotation : info_to_insert_.annotation()) {
741     GeneratedCodeInfo::Annotation* annotation = target_info.add_annotation();
742     int inner_indent = 0;
743     // insertion_content is guaranteed to end in an endline. This last endline
744     // has no effect on indentation.
745     for (; pos < static_cast<size_t>(source_annotation.end()) &&
746            pos < insertion_content.size() - 1;
747          ++pos) {
748       if (insertion_content[pos] == '\n') {
749         if (pos >= static_cast<size_t>(source_annotation.begin())) {
750           // The beginning of the annotation is at insertion_offset, but the end
751           // can still move further in the target file.
752           inner_indent += indent_length;
753         } else {
754           insertion_offset += indent_length;
755         }
756       }
757     }
758     *annotation = source_annotation;
759     annotation->set_begin(annotation->begin() + insertion_offset);
760     insertion_offset += inner_indent;
761     annotation->set_end(annotation->end() + insertion_offset);
762   }
763 }
764 
UpdateMetadata(const std::string & insertion_content,size_t insertion_offset,size_t insertion_length,size_t indent_length)765 void CommandLineInterface::MemoryOutputStream::UpdateMetadata(
766     const std::string& insertion_content, size_t insertion_offset,
767     size_t insertion_length, size_t indent_length) {
768   auto it = directory_->files_.find(absl::StrCat(filename_, ".pb.meta"));
769   if (it == directory_->files_.end() && info_to_insert_.annotation().empty()) {
770     // No metadata was recorded for this file.
771     return;
772   }
773   GeneratedCodeInfo metadata;
774   bool is_text_format = false;
775   std::string* encoded_data = nullptr;
776   if (it != directory_->files_.end()) {
777     encoded_data = &it->second;
778     // Try to decode a GeneratedCodeInfo proto from the .pb.meta file. It may be
779     // in wire or text format. Keep the same format when the data is written out
780     // later.
781     if (!metadata.ParseFromString(*encoded_data)) {
782       if (!TextFormat::ParseFromString(*encoded_data, &metadata)) {
783         // The metadata is invalid.
784         std::cerr
785             << filename_
786             << ".pb.meta: Could not parse metadata as wire or text format."
787             << std::endl;
788         return;
789       }
790       // Generators that use the public plugin interface emit text-format
791       // metadata (because in the public plugin protocol, file content must be
792       // UTF8-encoded strings).
793       is_text_format = true;
794     }
795   } else {
796     // Create a new file to store the new metadata in info_to_insert_.
797     encoded_data =
798         &directory_->files_.try_emplace(absl::StrCat(filename_, ".pb.meta"), "")
799              .first->second;
800   }
801   GeneratedCodeInfo new_metadata;
802   bool crossed_offset = false;
803   size_t to_add = 0;
804   for (const auto& source_annotation : metadata.annotation()) {
805     // The first time an annotation at or after the insertion point is found,
806     // insert the new metadata from info_to_insert_. Shift all annotations
807     // after the new metadata by the length of the text that was inserted
808     // (including any additional indent length).
809     if (static_cast<size_t>(source_annotation.begin()) >= insertion_offset &&
810         !crossed_offset) {
811       crossed_offset = true;
812       InsertShiftedInfo(insertion_content, insertion_offset, indent_length,
813                         new_metadata);
814       to_add += insertion_length;
815     }
816     GeneratedCodeInfo::Annotation* annotation = new_metadata.add_annotation();
817     *annotation = source_annotation;
818     annotation->set_begin(annotation->begin() + to_add);
819     annotation->set_end(annotation->end() + to_add);
820   }
821   // If there were never any annotations at or after the insertion point,
822   // make sure to still insert the new metadata from info_to_insert_.
823   if (!crossed_offset) {
824     InsertShiftedInfo(insertion_content, insertion_offset, indent_length,
825                       new_metadata);
826   }
827   if (is_text_format) {
828     TextFormat::PrintToString(new_metadata, encoded_data);
829   } else {
830     new_metadata.SerializeToString(encoded_data);
831   }
832 }
833 
~MemoryOutputStream()834 CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
835   // Make sure all data has been written.
836   inner_.reset();
837 
838   // Insert into the directory.
839   auto pair = directory_->files_.insert({filename_, ""});
840   auto it = pair.first;
841   bool already_present = !pair.second;
842 
843   if (insertion_point_.empty()) {
844     // This was just a regular Open().
845     if (already_present) {
846       if (append_mode_) {
847         it->second.append(data_);
848       } else {
849         std::cerr << filename_ << ": Tried to write the same file twice."
850                   << std::endl;
851         directory_->had_error_ = true;
852       }
853       return;
854     }
855 
856     it->second.swap(data_);
857     return;
858   }
859   // This was an OpenForInsert().
860 
861   // If the data doesn't end with a clean line break, add one.
862   if (!data_.empty() && data_[data_.size() - 1] != '\n') {
863     data_.push_back('\n');
864   }
865 
866   // Find the file we are going to insert into.
867   if (!already_present) {
868     std::cerr << filename_ << ": Tried to insert into file that doesn't exist."
869               << std::endl;
870     directory_->had_error_ = true;
871     return;
872   }
873   std::string* target = &it->second;
874 
875   // Find the insertion point.
876   std::string magic_string =
877       absl::Substitute("@@protoc_insertion_point($0)", insertion_point_);
878   std::string::size_type pos = target->find(magic_string);
879 
880   if (pos == std::string::npos) {
881     std::cerr << filename_ << ": insertion point \"" << insertion_point_
882               << "\" not found." << std::endl;
883     directory_->had_error_ = true;
884     return;
885   }
886 
887   if ((pos > 3) && (target->substr(pos - 3, 2) == "/*")) {
888     // Support for inline "/* @@protoc_insertion_point() */"
889     pos = pos - 3;
890   } else {
891     // Seek backwards to the beginning of the line, which is where we will
892     // insert the data.  Note that this has the effect of pushing the
893     // insertion point down, so the data is inserted before it.  This is
894     // intentional because it means that multiple insertions at the same point
895     // will end up in the expected order in the final output.
896     pos = target->find_last_of('\n', pos);
897     if (pos == std::string::npos) {
898       // Insertion point is on the first line.
899       pos = 0;
900     } else {
901       // Advance to character after '\n'.
902       ++pos;
903     }
904   }
905 
906   // Extract indent.
907   std::string indent_(*target, pos,
908                       target->find_first_not_of(" \t", pos) - pos);
909 
910   if (indent_.empty()) {
911     // No indent.  This makes things easier.
912     target->insert(pos, data_);
913     UpdateMetadata(data_, pos, data_.size(), 0);
914     return;
915   }
916   // Calculate how much space we need.
917   int indent_size = 0;
918   for (size_t i = 0; i < data_.size(); ++i) {
919     if (data_[i] == '\n') indent_size += indent_.size();
920   }
921 
922   // Make a hole for it.
923   target->insert(pos, data_.size() + indent_size, '\0');
924 
925   // Now copy in the data.
926   std::string::size_type data_pos = 0;
927   char* target_ptr = &(*target)[pos];
928   while (data_pos < data_.size()) {
929     // Copy indent.
930     memcpy(target_ptr, indent_.data(), indent_.size());
931     target_ptr += indent_.size();
932 
933     // Copy line from data_.
934     // We already guaranteed that data_ ends with a newline (above), so this
935     // search can't fail.
936     std::string::size_type line_length =
937         data_.find_first_of('\n', data_pos) + 1 - data_pos;
938     memcpy(target_ptr, data_.data() + data_pos, line_length);
939     target_ptr += line_length;
940     data_pos += line_length;
941   }
942 
943   ABSL_CHECK_EQ(target_ptr, &(*target)[pos] + data_.size() + indent_size);
944 
945   UpdateMetadata(data_, pos, data_.size() + indent_size, indent_.size());
946 }
947 
948 // ===================================================================
949 
950 #if defined(_WIN32) && !defined(__CYGWIN__)
951 const char* const CommandLineInterface::kPathSeparator = ";";
952 #else
953 const char* const CommandLineInterface::kPathSeparator = ":";
954 #endif
955 
CommandLineInterface()956 CommandLineInterface::CommandLineInterface()
957     : direct_dependencies_violation_msg_(
958           kDefaultDirectDependenciesViolationMsg) {}
959 
960 CommandLineInterface::~CommandLineInterface() = default;
961 
RegisterGenerator(const std::string & flag_name,CodeGenerator * generator,const std::string & help_text)962 void CommandLineInterface::RegisterGenerator(const std::string& flag_name,
963                                              CodeGenerator* generator,
964                                              const std::string& help_text) {
965   GeneratorInfo info;
966   info.flag_name = flag_name;
967   info.generator = generator;
968   info.help_text = help_text;
969   generators_by_flag_name_[flag_name] = info;
970 }
971 
RegisterGenerator(const std::string & flag_name,const std::string & option_flag_name,CodeGenerator * generator,const std::string & help_text)972 void CommandLineInterface::RegisterGenerator(
973     const std::string& flag_name, const std::string& option_flag_name,
974     CodeGenerator* generator, const std::string& help_text) {
975   GeneratorInfo info;
976   info.flag_name = flag_name;
977   info.option_flag_name = option_flag_name;
978   info.generator = generator;
979   info.help_text = help_text;
980   generators_by_flag_name_[flag_name] = info;
981   generators_by_option_name_[option_flag_name] = info;
982 }
983 
AllowPlugins(const std::string & exe_name_prefix)984 void CommandLineInterface::AllowPlugins(const std::string& exe_name_prefix) {
985   plugin_prefix_ = exe_name_prefix;
986 }
987 
988 namespace {
989 
ContainsProto3Optional(const Descriptor * desc)990 bool ContainsProto3Optional(const Descriptor* desc) {
991   for (int i = 0; i < desc->field_count(); ++i) {
992     if (desc->field(i)->real_containing_oneof() == nullptr &&
993         desc->field(i)->containing_oneof() != nullptr) {
994       return true;
995     }
996   }
997   for (int i = 0; i < desc->nested_type_count(); ++i) {
998     if (ContainsProto3Optional(desc->nested_type(i))) {
999       return true;
1000     }
1001   }
1002   return false;
1003 }
1004 
ContainsProto3Optional(Edition edition,const FileDescriptor * file)1005 bool ContainsProto3Optional(Edition edition, const FileDescriptor* file) {
1006   if (edition == Edition::EDITION_PROTO3) {
1007     for (int i = 0; i < file->message_type_count(); ++i) {
1008       if (ContainsProto3Optional(file->message_type(i))) {
1009         return true;
1010       }
1011     }
1012   }
1013   return false;
1014 }
1015 
HasReservedFieldNumber(const FieldDescriptor * field)1016 bool HasReservedFieldNumber(const FieldDescriptor* field) {
1017   if (field->number() >= FieldDescriptor::kFirstReservedNumber &&
1018       field->number() <= FieldDescriptor::kLastReservedNumber) {
1019     return true;
1020   }
1021   return false;
1022 }
1023 
1024 }  // namespace
1025 
1026 namespace {
1027 std::unique_ptr<SimpleDescriptorDatabase>
1028 PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name);
1029 
1030 // Indicates whether the field is compatible with the given target type.
IsFieldCompatible(const FieldDescriptor & field,FieldOptions::OptionTargetType target_type)1031 bool IsFieldCompatible(const FieldDescriptor& field,
1032                        FieldOptions::OptionTargetType target_type) {
1033   const RepeatedField<int>& allowed_targets = field.options().targets();
1034   return allowed_targets.empty() ||
1035          absl::c_linear_search(allowed_targets, target_type);
1036 }
1037 
1038 // Converts the OptionTargetType enum to a string suitable for use in error
1039 // messages.
TargetTypeString(FieldOptions::OptionTargetType target_type)1040 absl::string_view TargetTypeString(FieldOptions::OptionTargetType target_type) {
1041   switch (target_type) {
1042     case FieldOptions::TARGET_TYPE_FILE:
1043       return "file";
1044     case FieldOptions::TARGET_TYPE_EXTENSION_RANGE:
1045       return "extension range";
1046     case FieldOptions::TARGET_TYPE_MESSAGE:
1047       return "message";
1048     case FieldOptions::TARGET_TYPE_FIELD:
1049       return "field";
1050     case FieldOptions::TARGET_TYPE_ONEOF:
1051       return "oneof";
1052     case FieldOptions::TARGET_TYPE_ENUM:
1053       return "enum";
1054     case FieldOptions::TARGET_TYPE_ENUM_ENTRY:
1055       return "enum entry";
1056     case FieldOptions::TARGET_TYPE_SERVICE:
1057       return "service";
1058     case FieldOptions::TARGET_TYPE_METHOD:
1059       return "method";
1060     default:
1061       return "unknown";
1062   }
1063 }
1064 
1065 // Recursively validates that the options message (or subpiece of an options
1066 // message) is compatible with the given target type.
ValidateTargetConstraintsRecursive(const Message & m,DescriptorPool::ErrorCollector & error_collector,absl::string_view file_name,FieldOptions::OptionTargetType target_type)1067 bool ValidateTargetConstraintsRecursive(
1068     const Message& m, DescriptorPool::ErrorCollector& error_collector,
1069     absl::string_view file_name, FieldOptions::OptionTargetType target_type) {
1070   std::vector<const FieldDescriptor*> fields;
1071   const Reflection* reflection = m.GetReflection();
1072   reflection->ListFields(m, &fields);
1073   bool success = true;
1074   for (const auto* field : fields) {
1075     if (!IsFieldCompatible(*field, target_type)) {
1076       success = false;
1077       error_collector.RecordError(
1078           file_name, "", nullptr, DescriptorPool::ErrorCollector::OPTION_NAME,
1079           absl::StrCat("Option ", field->full_name(),
1080                        " cannot be set on an entity of type `",
1081                        TargetTypeString(target_type), "`."));
1082     }
1083     if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
1084       if (field->is_repeated()) {
1085         int field_size = reflection->FieldSize(m, field);
1086         for (int i = 0; i < field_size; ++i) {
1087           if (!ValidateTargetConstraintsRecursive(
1088                   reflection->GetRepeatedMessage(m, field, i), error_collector,
1089                   file_name, target_type)) {
1090             success = false;
1091           }
1092         }
1093       } else if (!ValidateTargetConstraintsRecursive(
1094                      reflection->GetMessage(m, field), error_collector,
1095                      file_name, target_type)) {
1096         success = false;
1097       }
1098     }
1099   }
1100   return success;
1101 }
1102 
1103 // Validates that the options message is correct with respect to target
1104 // constraints, returning true if successful. This function converts the
1105 // options message to a DynamicMessage so that we have visibility into custom
1106 // options. We take the element name as a FunctionRef so that we do not have to
1107 // pay the cost of constructing it unless there is an error.
ValidateTargetConstraints(const Message & options,const DescriptorPool & pool,DescriptorPool::ErrorCollector & error_collector,absl::string_view file_name,FieldOptions::OptionTargetType target_type)1108 bool ValidateTargetConstraints(const Message& options,
1109                                const DescriptorPool& pool,
1110                                DescriptorPool::ErrorCollector& error_collector,
1111                                absl::string_view file_name,
1112                                FieldOptions::OptionTargetType target_type) {
1113   const Descriptor* descriptor =
1114       pool.FindMessageTypeByName(options.GetTypeName());
1115   if (descriptor == nullptr) {
1116     // We were unable to find the options message in the descriptor pool. This
1117     // implies that the proto files we are working with do not depend on
1118     // descriptor.proto, in which case there are no custom options to worry
1119     // about. We can therefore skip the use of DynamicMessage.
1120     return ValidateTargetConstraintsRecursive(options, error_collector,
1121                                               file_name, target_type);
1122   } else {
1123     DynamicMessageFactory factory;
1124     std::unique_ptr<Message> dynamic_message(
1125         factory.GetPrototype(descriptor)->New());
1126     std::string serialized;
1127     ABSL_CHECK(options.SerializeToString(&serialized));
1128     ABSL_CHECK(dynamic_message->ParseFromString(serialized));
1129     return ValidateTargetConstraintsRecursive(*dynamic_message, error_collector,
1130                                               file_name, target_type);
1131   }
1132 }
1133 
1134 // The overloaded GetTargetType() functions below allow us to map from a
1135 // descriptor type to the associated OptionTargetType enum.
GetTargetType(const FileDescriptor *)1136 FieldOptions::OptionTargetType GetTargetType(const FileDescriptor*) {
1137   return FieldOptions::TARGET_TYPE_FILE;
1138 }
1139 
GetTargetType(const Descriptor::ExtensionRange *)1140 FieldOptions::OptionTargetType GetTargetType(
1141     const Descriptor::ExtensionRange*) {
1142   return FieldOptions::TARGET_TYPE_EXTENSION_RANGE;
1143 }
1144 
GetTargetType(const Descriptor *)1145 FieldOptions::OptionTargetType GetTargetType(const Descriptor*) {
1146   return FieldOptions::TARGET_TYPE_MESSAGE;
1147 }
1148 
GetTargetType(const FieldDescriptor *)1149 FieldOptions::OptionTargetType GetTargetType(const FieldDescriptor*) {
1150   return FieldOptions::TARGET_TYPE_FIELD;
1151 }
1152 
GetTargetType(const OneofDescriptor *)1153 FieldOptions::OptionTargetType GetTargetType(const OneofDescriptor*) {
1154   return FieldOptions::TARGET_TYPE_ONEOF;
1155 }
1156 
GetTargetType(const EnumDescriptor *)1157 FieldOptions::OptionTargetType GetTargetType(const EnumDescriptor*) {
1158   return FieldOptions::TARGET_TYPE_ENUM;
1159 }
1160 
GetTargetType(const EnumValueDescriptor *)1161 FieldOptions::OptionTargetType GetTargetType(const EnumValueDescriptor*) {
1162   return FieldOptions::TARGET_TYPE_ENUM_ENTRY;
1163 }
1164 
GetTargetType(const ServiceDescriptor *)1165 FieldOptions::OptionTargetType GetTargetType(const ServiceDescriptor*) {
1166   return FieldOptions::TARGET_TYPE_SERVICE;
1167 }
1168 
GetTargetType(const MethodDescriptor *)1169 FieldOptions::OptionTargetType GetTargetType(const MethodDescriptor*) {
1170   return FieldOptions::TARGET_TYPE_METHOD;
1171 }
1172 }  // namespace
1173 
Run(int argc,const char * const argv[])1174 int CommandLineInterface::Run(int argc, const char* const argv[]) {
1175   Clear();
1176 
1177   switch (ParseArguments(argc, argv)) {
1178     case PARSE_ARGUMENT_DONE_AND_EXIT:
1179       return 0;
1180     case PARSE_ARGUMENT_FAIL:
1181       return 1;
1182     case PARSE_ARGUMENT_DONE_AND_CONTINUE:
1183       break;
1184   }
1185 
1186   std::vector<const FileDescriptor*> parsed_files;
1187   std::unique_ptr<DiskSourceTree> disk_source_tree;
1188   std::unique_ptr<ErrorPrinter> error_collector;
1189   std::unique_ptr<DescriptorPool> descriptor_pool;
1190 
1191   // The SimpleDescriptorDatabases here are the constituents of the
1192   // MergedDescriptorDatabase descriptor_set_in_database, so this vector is for
1193   // managing their lifetimes. Its scope should match descriptor_set_in_database
1194   std::vector<std::unique_ptr<SimpleDescriptorDatabase>>
1195       databases_per_descriptor_set;
1196   std::unique_ptr<MergedDescriptorDatabase> descriptor_set_in_database;
1197 
1198   std::unique_ptr<SourceTreeDescriptorDatabase> source_tree_database;
1199 
1200   // Any --descriptor_set_in FileDescriptorSet objects will be used as a
1201   // fallback to input_files on command line, so create that db first.
1202   if (!descriptor_set_in_names_.empty()) {
1203     for (const std::string& name : descriptor_set_in_names_) {
1204       std::unique_ptr<SimpleDescriptorDatabase> database_for_descriptor_set =
1205           PopulateSingleSimpleDescriptorDatabase(name);
1206       if (!database_for_descriptor_set) {
1207         return EXIT_FAILURE;
1208       }
1209       databases_per_descriptor_set.push_back(
1210           std::move(database_for_descriptor_set));
1211     }
1212 
1213     std::vector<DescriptorDatabase*> raw_databases_per_descriptor_set;
1214     raw_databases_per_descriptor_set.reserve(
1215         databases_per_descriptor_set.size());
1216     for (const std::unique_ptr<SimpleDescriptorDatabase>& db :
1217          databases_per_descriptor_set) {
1218       raw_databases_per_descriptor_set.push_back(db.get());
1219     }
1220     descriptor_set_in_database = std::make_unique<MergedDescriptorDatabase>(
1221         raw_databases_per_descriptor_set);
1222   }
1223 
1224   if (proto_path_.empty()) {
1225     // If there are no --proto_path flags, then just look in the specified
1226     // --descriptor_set_in files.  But first, verify that the input files are
1227     // there.
1228     if (!VerifyInputFilesInDescriptors(descriptor_set_in_database.get())) {
1229       return 1;
1230     }
1231 
1232     error_collector = std::make_unique<ErrorPrinter>(error_format_);
1233     descriptor_pool = std::make_unique<DescriptorPool>(
1234         descriptor_set_in_database.get(), error_collector.get());
1235   } else {
1236     disk_source_tree = std::make_unique<DiskSourceTree>();
1237     if (!InitializeDiskSourceTree(disk_source_tree.get(),
1238                                   descriptor_set_in_database.get())) {
1239       return 1;
1240     }
1241 
1242     error_collector =
1243         std::make_unique<ErrorPrinter>(error_format_, disk_source_tree.get());
1244 
1245     source_tree_database = std::make_unique<SourceTreeDescriptorDatabase>(
1246         disk_source_tree.get(), descriptor_set_in_database.get());
1247     source_tree_database->RecordErrorsTo(error_collector.get());
1248 
1249     descriptor_pool = std::make_unique<DescriptorPool>(
1250         source_tree_database.get(),
1251         source_tree_database->GetValidationErrorCollector());
1252   }
1253 
1254   descriptor_pool->EnforceWeakDependencies(true);
1255 
1256   if (!SetupFeatureResolution(*descriptor_pool)) {
1257     return EXIT_FAILURE;
1258   }
1259 
1260   // Enforce extension declarations only when compiling. We want to skip
1261   // this enforcement when protoc is just being invoked to encode or decode
1262   // protos.
1263   if (mode_ == MODE_COMPILE
1264   ) {
1265     descriptor_pool->EnforceExtensionDeclarations(true);
1266   }
1267   if (!ParseInputFiles(descriptor_pool.get(), disk_source_tree.get(),
1268                        &parsed_files)) {
1269     return 1;
1270   }
1271 
1272   bool validation_error = false;  // Defer exiting so we log more warnings.
1273 
1274   for (auto& file : parsed_files) {
1275     google::protobuf::internal::VisitDescriptors(
1276         *file, [&](const FieldDescriptor& field) {
1277           if (HasReservedFieldNumber(&field)) {
1278             const char* error_link = nullptr;
1279             validation_error = true;
1280             std::string error;
1281             if (field.number() >= FieldDescriptor::kFirstReservedNumber &&
1282                 field.number() <= FieldDescriptor::kLastReservedNumber) {
1283               error = absl::Substitute(
1284                   "Field numbers $0 through $1 are reserved "
1285                   "for the protocol buffer library implementation.",
1286                   FieldDescriptor::kFirstReservedNumber,
1287                   FieldDescriptor::kLastReservedNumber);
1288             } else {
1289               error = absl::Substitute(
1290                   "Field number $0 is reserved for specific purposes.",
1291                   field.number());
1292             }
1293             if (error_link) {
1294               absl::StrAppend(&error, "(See ", error_link, ")");
1295             }
1296             static_cast<DescriptorPool::ErrorCollector*>(error_collector.get())
1297                 ->RecordError(field.file()->name(), field.full_name(), nullptr,
1298                               DescriptorPool::ErrorCollector::NUMBER, error);
1299           }
1300         });
1301   }
1302 
1303   // We visit one file at a time because we need to provide the file name for
1304   // error messages. Usually we can get the file name from any descriptor with
1305   // something like descriptor->file()->name(), but ExtensionRange does not
1306   // support this.
1307   for (const google::protobuf::FileDescriptor* file : parsed_files) {
1308     FileDescriptorProto proto;
1309     file->CopyTo(&proto);
1310     google::protobuf::internal::VisitDescriptors(
1311         *file, proto, [&](const auto& descriptor, const auto& proto) {
1312           if (!ValidateTargetConstraints(proto.options(), *descriptor_pool,
1313                                          *error_collector, file->name(),
1314                                          GetTargetType(&descriptor))) {
1315             validation_error = true;
1316           }
1317         });
1318   }
1319 
1320 
1321   if (validation_error) {
1322     return 1;
1323   }
1324 
1325   // We construct a separate GeneratorContext for each output location.  Note
1326   // that two code generators may output to the same location, in which case
1327   // they should share a single GeneratorContext so that OpenForInsert() works.
1328   GeneratorContextMap output_directories;
1329 
1330   // Generate output.
1331   if (mode_ == MODE_COMPILE) {
1332     for (size_t i = 0; i < output_directives_.size(); ++i) {
1333       std::string output_location = output_directives_[i].output_location;
1334       if (!absl::EndsWith(output_location, ".zip") &&
1335           !absl::EndsWith(output_location, ".jar") &&
1336           !absl::EndsWith(output_location, ".srcjar")) {
1337         AddTrailingSlash(&output_location);
1338       }
1339 
1340       auto& generator = output_directories[output_location];
1341 
1342       if (!generator) {
1343         // First time we've seen this output location.
1344         generator = std::make_unique<GeneratorContextImpl>(parsed_files);
1345       }
1346 
1347       if (!GenerateOutput(parsed_files, output_directives_[i],
1348                           generator.get())) {
1349         return 1;
1350       }
1351     }
1352   }
1353 
1354   for (const auto& pair : output_directories) {
1355     const std::string& location = pair.first;
1356     GeneratorContextImpl* directory = pair.second.get();
1357     if (absl::EndsWith(location, "/")) {
1358       if (!directory->WriteAllToDisk(location)) {
1359         return 1;
1360       }
1361     } else {
1362       if (absl::EndsWith(location, ".jar")) {
1363         directory->AddJarManifest();
1364       }
1365 
1366       if (!directory->WriteAllToZip(location)) {
1367         return 1;
1368       }
1369     }
1370   }
1371 
1372   if (!dependency_out_name_.empty()) {
1373     ABSL_DCHECK(disk_source_tree.get());
1374     if (!GenerateDependencyManifestFile(parsed_files, output_directories,
1375                                         disk_source_tree.get())) {
1376       return 1;
1377     }
1378   }
1379 
1380   if (!descriptor_set_out_name_.empty()) {
1381     if (!WriteDescriptorSet(parsed_files)) {
1382       return 1;
1383     }
1384   }
1385 
1386   if (!edition_defaults_out_name_.empty()) {
1387     if (!WriteEditionDefaults(*descriptor_pool)) {
1388       return 1;
1389     }
1390   }
1391 
1392   if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
1393     if (codec_type_.empty()) {
1394       // HACK:  Define an EmptyMessage type to use for decoding.
1395       DescriptorPool pool;
1396       FileDescriptorProto file;
1397       file.set_name("empty_message.proto");
1398       file.add_message_type()->set_name("EmptyMessage");
1399       ABSL_CHECK(pool.BuildFile(file) != nullptr);
1400       codec_type_ = "EmptyMessage";
1401       if (!EncodeOrDecode(&pool)) {
1402         return 1;
1403       }
1404     } else {
1405       if (!EncodeOrDecode(descriptor_pool.get())) {
1406         return 1;
1407       }
1408     }
1409   }
1410 
1411   if (error_collector->FoundErrors() ||
1412       (fatal_warnings_ && error_collector->FoundWarnings())) {
1413     return 1;
1414   }
1415 
1416   if (mode_ == MODE_PRINT) {
1417     switch (print_mode_) {
1418       case PRINT_FREE_FIELDS:
1419         for (size_t i = 0; i < parsed_files.size(); ++i) {
1420           const FileDescriptor* fd = parsed_files[i];
1421           for (int j = 0; j < fd->message_type_count(); ++j) {
1422             PrintFreeFieldNumbers(fd->message_type(j));
1423           }
1424         }
1425         break;
1426       case PRINT_NONE:
1427         ABSL_LOG(ERROR)
1428             << "If the code reaches here, it usually means a bug of "
1429                "flag parsing in the CommandLineInterface.";
1430         return 1;
1431 
1432         // Do not add a default case.
1433     }
1434   }
1435   return 0;
1436 }
1437 
InitializeDiskSourceTree(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)1438 bool CommandLineInterface::InitializeDiskSourceTree(
1439     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
1440   AddDefaultProtoPaths(&proto_path_);
1441 
1442   // Set up the source tree.
1443   for (size_t i = 0; i < proto_path_.size(); ++i) {
1444     source_tree->MapPath(proto_path_[i].first, proto_path_[i].second);
1445   }
1446 
1447   // Map input files to virtual paths if possible.
1448   if (!MakeInputsBeProtoPathRelative(source_tree, fallback_database)) {
1449     return false;
1450   }
1451 
1452   return true;
1453 }
1454 
1455 namespace {
1456 std::unique_ptr<SimpleDescriptorDatabase>
PopulateSingleSimpleDescriptorDatabase(const std::string & descriptor_set_name)1457 PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name) {
1458   int fd;
1459   do {
1460     fd = open(descriptor_set_name.c_str(), O_RDONLY | O_BINARY);
1461   } while (fd < 0 && errno == EINTR);
1462   if (fd < 0) {
1463     std::cerr << descriptor_set_name << ": " << strerror(ENOENT) << std::endl;
1464     return nullptr;
1465   }
1466 
1467   FileDescriptorSet file_descriptor_set;
1468   bool parsed = file_descriptor_set.ParseFromFileDescriptor(fd);
1469   if (close(fd) != 0) {
1470     std::cerr << descriptor_set_name << ": close: " << strerror(errno)
1471               << std::endl;
1472     return nullptr;
1473   }
1474 
1475   if (!parsed) {
1476     std::cerr << descriptor_set_name << ": Unable to parse." << std::endl;
1477     return nullptr;
1478   }
1479 
1480   std::unique_ptr<SimpleDescriptorDatabase> database{
1481       new SimpleDescriptorDatabase()};
1482 
1483   for (int j = 0; j < file_descriptor_set.file_size(); j++) {
1484     FileDescriptorProto previously_added_file_descriptor_proto;
1485     if (database->FindFileByName(file_descriptor_set.file(j).name(),
1486                                  &previously_added_file_descriptor_proto)) {
1487       // already present - skip
1488       continue;
1489     }
1490     if (!database->Add(file_descriptor_set.file(j))) {
1491       return nullptr;
1492     }
1493   }
1494   return database;
1495 }
1496 
1497 }  // namespace
1498 
1499 
VerifyInputFilesInDescriptors(DescriptorDatabase * database)1500 bool CommandLineInterface::VerifyInputFilesInDescriptors(
1501     DescriptorDatabase* database) {
1502   for (const auto& input_file : input_files_) {
1503     FileDescriptorProto file_descriptor;
1504     if (!database->FindFileByName(input_file, &file_descriptor)) {
1505       std::cerr << "Could not find file in descriptor database: " << input_file
1506                 << ": " << strerror(ENOENT) << std::endl;
1507       return false;
1508     }
1509 
1510     // Enforce --disallow_services.
1511     if (disallow_services_ && file_descriptor.service_size() > 0) {
1512       std::cerr << file_descriptor.name()
1513                 << ": This file contains services, but "
1514                    "--disallow_services was used."
1515                 << std::endl;
1516       return false;
1517     }
1518 
1519   }
1520   return true;
1521 }
1522 
SetupFeatureResolution(DescriptorPool & pool)1523 bool CommandLineInterface::SetupFeatureResolution(DescriptorPool& pool) {
1524   // Calculate the feature defaults for each built-in generator.  All generators
1525   // that support editions must agree on the supported edition range.
1526   std::vector<const FieldDescriptor*> feature_extensions;
1527   Edition minimum_edition = MinimumAllowedEdition();
1528   // Override maximum_edition if experimental_editions is true.
1529   Edition maximum_edition =
1530       !experimental_editions_ ? MaximumAllowedEdition() : Edition::EDITION_MAX;
1531   for (const auto& output : output_directives_) {
1532     if (output.generator == nullptr) continue;
1533     if (!experimental_editions_ &&
1534         (output.generator->GetSupportedFeatures() &
1535          CodeGenerator::FEATURE_SUPPORTS_EDITIONS) != 0) {
1536       // Only validate min/max edition on generators that advertise editions
1537       // support.  Generators still under development will always use the
1538       // correct values.
1539       if (output.generator->GetMinimumEdition() != minimum_edition) {
1540         ABSL_LOG(ERROR) << "Built-in generator " << output.name
1541                         << " specifies a minimum edition "
1542                         << output.generator->GetMinimumEdition()
1543                         << " which is not the protoc minimum "
1544                         << minimum_edition << ".";
1545         return false;
1546       }
1547       if (output.generator->GetMaximumEdition() != maximum_edition) {
1548         ABSL_LOG(ERROR) << "Built-in generator " << output.name
1549                         << " specifies a maximum edition "
1550                         << output.generator->GetMaximumEdition()
1551                         << " which is not the protoc maximum "
1552                         << maximum_edition << ".";
1553         return false;
1554       }
1555     }
1556     for (const FieldDescriptor* ext :
1557          output.generator->GetFeatureExtensions()) {
1558       if (ext == nullptr) {
1559         ABSL_LOG(ERROR) << "Built-in generator " << output.name
1560                         << " specifies an unknown feature extension.";
1561         return false;
1562       }
1563       feature_extensions.push_back(ext);
1564     }
1565   }
1566   absl::StatusOr<FeatureSetDefaults> defaults =
1567       FeatureResolver::CompileDefaults(FeatureSet::descriptor(),
1568                                        feature_extensions, minimum_edition,
1569                                        maximum_edition);
1570   if (!defaults.ok()) {
1571     ABSL_LOG(ERROR) << defaults.status();
1572     return false;
1573   }
1574   absl::Status status = pool.SetFeatureSetDefaults(std::move(defaults).value());
1575   ABSL_CHECK(status.ok()) << status.message();
1576   return true;
1577 }
1578 
ParseInputFiles(DescriptorPool * descriptor_pool,DiskSourceTree * source_tree,std::vector<const FileDescriptor * > * parsed_files)1579 bool CommandLineInterface::ParseInputFiles(
1580     DescriptorPool* descriptor_pool, DiskSourceTree* source_tree,
1581     std::vector<const FileDescriptor*>* parsed_files) {
1582 
1583   if (!proto_path_.empty()) {
1584     // Track unused imports in all source files that were loaded from the
1585     // filesystem. We do not track unused imports for files loaded from
1586     // descriptor sets as they may be programmatically generated in which case
1587     // exerting this level of rigor is less desirable. We're also making the
1588     // assumption that the initial parse of the proto from the filesystem
1589     // was rigorous in checking unused imports and that the descriptor set
1590     // being parsed was produced then and that it was subsequent mutations
1591     // of that descriptor set that left unused imports.
1592     //
1593     // Note that relying on proto_path exclusively is limited in that we may
1594     // be loading descriptors from both the filesystem and descriptor sets
1595     // depending on the invocation. At least for invocations that are
1596     // exclusively reading from descriptor sets, we can eliminate this failure
1597     // condition.
1598     for (const auto& input_file : input_files_) {
1599       descriptor_pool->AddDirectInputFile(input_file);
1600     }
1601   }
1602 
1603   bool result = true;
1604   // Parse each file.
1605   for (const auto& input_file : input_files_) {
1606     // Import the file.
1607     const FileDescriptor* parsed_file =
1608         descriptor_pool->FindFileByName(input_file);
1609     if (parsed_file == nullptr) {
1610       result = false;
1611       break;
1612     }
1613     parsed_files->push_back(parsed_file);
1614 
1615     // Enforce --disallow_services.
1616     if (disallow_services_ && parsed_file->service_count() > 0) {
1617       std::cerr << parsed_file->name()
1618                 << ": This file contains services, but "
1619                    "--disallow_services was used."
1620                 << std::endl;
1621       result = false;
1622       break;
1623     }
1624 
1625 
1626     // Enforce --direct_dependencies
1627     if (direct_dependencies_explicitly_set_) {
1628       bool indirect_imports = false;
1629       for (int i = 0; i < parsed_file->dependency_count(); ++i) {
1630         if (direct_dependencies_.find(parsed_file->dependency(i)->name()) ==
1631             direct_dependencies_.end()) {
1632           indirect_imports = true;
1633           std::cerr << parsed_file->name() << ": "
1634                     << absl::StrReplaceAll(
1635                            direct_dependencies_violation_msg_,
1636                            {{"%s", parsed_file->dependency(i)->name()}})
1637                     << std::endl;
1638         }
1639       }
1640       if (indirect_imports) {
1641         result = false;
1642         break;
1643       }
1644     }
1645   }
1646   descriptor_pool->ClearDirectInputFiles();
1647   return result;
1648 }
1649 
Clear()1650 void CommandLineInterface::Clear() {
1651   // Clear all members that are set by Run().  Note that we must not clear
1652   // members which are set by other methods before Run() is called.
1653   executable_name_.clear();
1654   proto_path_.clear();
1655   input_files_.clear();
1656   direct_dependencies_.clear();
1657   direct_dependencies_violation_msg_ = kDefaultDirectDependenciesViolationMsg;
1658   output_directives_.clear();
1659   codec_type_.clear();
1660   descriptor_set_in_names_.clear();
1661   descriptor_set_out_name_.clear();
1662   dependency_out_name_.clear();
1663 
1664   experimental_editions_ = false;
1665   edition_defaults_out_name_.clear();
1666   edition_defaults_minimum_ = EDITION_UNKNOWN;
1667   edition_defaults_maximum_ = EDITION_UNKNOWN;
1668 
1669   mode_ = MODE_COMPILE;
1670   print_mode_ = PRINT_NONE;
1671   imports_in_descriptor_set_ = false;
1672   source_info_in_descriptor_set_ = false;
1673   retain_options_in_descriptor_set_ = false;
1674   disallow_services_ = false;
1675   direct_dependencies_explicitly_set_ = false;
1676   deterministic_output_ = false;
1677 }
1678 
MakeProtoProtoPathRelative(DiskSourceTree * source_tree,std::string * proto,DescriptorDatabase * fallback_database)1679 bool CommandLineInterface::MakeProtoProtoPathRelative(
1680     DiskSourceTree* source_tree, std::string* proto,
1681     DescriptorDatabase* fallback_database) {
1682   // If it's in the fallback db, don't report non-existent file errors.
1683   FileDescriptorProto fallback_file;
1684   bool in_fallback_database =
1685       fallback_database != nullptr &&
1686       fallback_database->FindFileByName(*proto, &fallback_file);
1687 
1688   // If the input file path is not a physical file path, it must be a virtual
1689   // path.
1690   if (access(proto->c_str(), F_OK) < 0) {
1691     std::string disk_file;
1692     if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1693         in_fallback_database) {
1694       return true;
1695     } else {
1696       std::cerr << "Could not make proto path relative: " << *proto << ": "
1697                 << strerror(ENOENT) << std::endl;
1698       return false;
1699     }
1700   }
1701 
1702   std::string virtual_file, shadowing_disk_file;
1703   switch (source_tree->DiskFileToVirtualFile(*proto, &virtual_file,
1704                                              &shadowing_disk_file)) {
1705     case DiskSourceTree::SUCCESS:
1706       *proto = virtual_file;
1707       break;
1708     case DiskSourceTree::SHADOWED:
1709       std::cerr << *proto << ": Input is shadowed in the --proto_path by \""
1710                 << shadowing_disk_file
1711                 << "\".  Either use the latter file as your input or reorder "
1712                    "the --proto_path so that the former file's location "
1713                    "comes first."
1714                 << std::endl;
1715       return false;
1716     case DiskSourceTree::CANNOT_OPEN: {
1717       if (in_fallback_database) {
1718         return true;
1719       }
1720       std::string error_str = source_tree->GetLastErrorMessage().empty()
1721                                   ? strerror(errno)
1722                                   : source_tree->GetLastErrorMessage();
1723       std::cerr << "Could not map to virtual file: " << *proto << ": "
1724                 << error_str << std::endl;
1725       return false;
1726     }
1727     case DiskSourceTree::NO_MAPPING: {
1728       // Try to interpret the path as a virtual path.
1729       std::string disk_file;
1730       if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1731           in_fallback_database) {
1732         return true;
1733       } else {
1734         // The input file path can't be mapped to any --proto_path and it also
1735         // can't be interpreted as a virtual path.
1736         std::cerr
1737             << *proto
1738             << ": File does not reside within any path "
1739                "specified using --proto_path (or -I).  You must specify a "
1740                "--proto_path which encompasses this file.  Note that the "
1741                "proto_path must be an exact prefix of the .proto file "
1742                "names -- protoc is too dumb to figure out when two paths "
1743                "(e.g. absolute and relative) are equivalent (it's harder "
1744                "than you think)."
1745             << std::endl;
1746         return false;
1747       }
1748     }
1749   }
1750   return true;
1751 }
1752 
MakeInputsBeProtoPathRelative(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)1753 bool CommandLineInterface::MakeInputsBeProtoPathRelative(
1754     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
1755   for (auto& input_file : input_files_) {
1756     if (!MakeProtoProtoPathRelative(source_tree, &input_file,
1757                                     fallback_database)) {
1758       return false;
1759     }
1760   }
1761 
1762   return true;
1763 }
1764 
1765 
ExpandArgumentFile(const char * file,std::vector<std::string> * arguments)1766 bool CommandLineInterface::ExpandArgumentFile(
1767     const char* file, std::vector<std::string>* arguments) {
1768 // On windows to force ifstream to handle proper utr-8, we need to convert to
1769 // proper supported utf8 wstring. If we dont then the file can't be opened.
1770 #ifdef _MSC_VER
1771   // Convert the file name to wide chars.
1772   int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), nullptr, 0);
1773   std::wstring file_str;
1774   file_str.resize(size);
1775   MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &file_str[0],
1776                       file_str.size());
1777 #else
1778   std::string file_str(file);
1779 #endif
1780 
1781   // The argument file is searched in the working directory only. We don't
1782   // use the proto import path here.
1783   std::ifstream file_stream(file_str.c_str());
1784   if (!file_stream.is_open()) {
1785     return false;
1786   }
1787   std::string argument;
1788   // We don't support any kind of shell expansion right now.
1789   while (std::getline(file_stream, argument)) {
1790     arguments->push_back(argument);
1791   }
1792   return true;
1793 }
1794 
ParseArguments(int argc,const char * const argv[])1795 CommandLineInterface::ParseArgumentStatus CommandLineInterface::ParseArguments(
1796     int argc, const char* const argv[]) {
1797   executable_name_ = argv[0];
1798 
1799   std::vector<std::string> arguments;
1800   for (int i = 1; i < argc; ++i) {
1801     if (argv[i][0] == '@') {
1802       if (!ExpandArgumentFile(argv[i] + 1, &arguments)) {
1803         std::cerr << "Failed to open argument file: " << (argv[i] + 1)
1804                   << std::endl;
1805         return PARSE_ARGUMENT_FAIL;
1806       }
1807       continue;
1808     }
1809     arguments.push_back(argv[i]);
1810   }
1811 
1812   // if no arguments are given, show help
1813   if (arguments.empty()) {
1814     PrintHelpText();
1815     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1816   }
1817 
1818   // Iterate through all arguments and parse them.
1819   for (size_t i = 0; i < arguments.size(); ++i) {
1820     std::string name, value;
1821 
1822     if (ParseArgument(arguments[i].c_str(), &name, &value)) {
1823       // Returned true => Use the next argument as the flag value.
1824       if (i + 1 == arguments.size() || arguments[i + 1][0] == '-') {
1825         std::cerr << "Missing value for flag: " << name << std::endl;
1826         if (name == "--decode") {
1827           std::cerr << "To decode an unknown message, use --decode_raw."
1828                     << std::endl;
1829         }
1830         return PARSE_ARGUMENT_FAIL;
1831       } else {
1832         ++i;
1833         value = arguments[i];
1834       }
1835     }
1836 
1837     ParseArgumentStatus status = InterpretArgument(name, value);
1838     if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE) return status;
1839   }
1840 
1841   // Make sure each plugin option has a matching plugin output.
1842   bool foundUnknownPluginOption = false;
1843   for (const auto& kv : plugin_parameters_) {
1844     if (plugins_.find(kv.first) != plugins_.end()) {
1845       continue;
1846     }
1847     bool foundImplicitPlugin = false;
1848     for (const auto& d : output_directives_) {
1849       if (d.generator == nullptr) {
1850         // Infers the plugin name from the plugin_prefix_ and output directive.
1851         std::string plugin_name = PluginName(plugin_prefix_, d.name);
1852 
1853         // Since plugin_parameters_ is also inferred from --xxx_opt, we check
1854         // that it actually matches the plugin name inferred from --xxx_out.
1855         if (plugin_name == kv.first) {
1856           foundImplicitPlugin = true;
1857           break;
1858         }
1859       }
1860     }
1861 
1862     // This is a special case for cc_plugin invocations that are only with
1863     // "--cpp_opt" but no "--cpp_out". In this case, "--cpp_opt" only serves
1864     // as passing the arguments to cc_plugins, and no C++ generator is required
1865     // to be present in the invocation. We didn't have to skip for C++ generator
1866     // previously because the C++ generator was built-in.
1867     if (!foundImplicitPlugin &&
1868         kv.first != absl::StrCat(plugin_prefix_, "gen-cpp")) {
1869       std::cerr << "Unknown flag: "
1870                 // strip prefix + "gen-" and add back "_opt"
1871                 << "--" << kv.first.substr(plugin_prefix_.size() + 4) << "_opt"
1872                 << std::endl;
1873       foundUnknownPluginOption = true;
1874     }
1875   }
1876   if (foundUnknownPluginOption) {
1877     return PARSE_ARGUMENT_FAIL;
1878   }
1879 
1880   // The --proto_path & --descriptor_set_in flags both specify places to look
1881   // for proto files. If neither were given, use the current working directory.
1882   if (proto_path_.empty() && descriptor_set_in_names_.empty()) {
1883     // Don't use make_pair as the old/default standard library on Solaris
1884     // doesn't support it without explicit template parameters, which are
1885     // incompatible with C++0x's make_pair.
1886     proto_path_.push_back(std::pair<std::string, std::string>("", "."));
1887   }
1888 
1889   // Check error cases that span multiple flag values.
1890   bool missing_proto_definitions = false;
1891   switch (mode_) {
1892     case MODE_COMPILE:
1893       missing_proto_definitions = input_files_.empty();
1894       break;
1895     case MODE_DECODE:
1896       // Handle --decode_raw separately, since it requires that no proto
1897       // definitions are specified.
1898       if (codec_type_.empty()) {
1899         if (!input_files_.empty() || !descriptor_set_in_names_.empty()) {
1900           std::cerr
1901               << "When using --decode_raw, no input files should be given."
1902               << std::endl;
1903           return PARSE_ARGUMENT_FAIL;
1904         }
1905         missing_proto_definitions = false;
1906         break;  // only for --decode_raw
1907       }
1908       // --decode (not raw) is handled the same way as the rest of the modes.
1909       ABSL_FALLTHROUGH_INTENDED;
1910     case MODE_ENCODE:
1911     case MODE_PRINT:
1912       missing_proto_definitions =
1913           input_files_.empty() && descriptor_set_in_names_.empty();
1914       break;
1915     default:
1916       ABSL_LOG(FATAL) << "Unexpected mode: " << mode_;
1917   }
1918   if (missing_proto_definitions) {
1919     std::cerr << "Missing input file." << std::endl;
1920     return PARSE_ARGUMENT_FAIL;
1921   }
1922   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
1923       descriptor_set_out_name_.empty() && edition_defaults_out_name_.empty()) {
1924     std::cerr << "Missing output directives." << std::endl;
1925     return PARSE_ARGUMENT_FAIL;
1926   }
1927   if (mode_ != MODE_COMPILE && !dependency_out_name_.empty()) {
1928     std::cerr << "Can only use --dependency_out=FILE when generating code."
1929               << std::endl;
1930     return PARSE_ARGUMENT_FAIL;
1931   }
1932   if (mode_ != MODE_ENCODE && deterministic_output_) {
1933     std::cerr << "Can only use --deterministic_output with --encode."
1934               << std::endl;
1935     return PARSE_ARGUMENT_FAIL;
1936   }
1937   if (!dependency_out_name_.empty() && input_files_.size() > 1) {
1938     std::cerr
1939         << "Can only process one input file when using --dependency_out=FILE."
1940         << std::endl;
1941     return PARSE_ARGUMENT_FAIL;
1942   }
1943   if (imports_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1944     std::cerr << "--include_imports only makes sense when combined with "
1945                  "--descriptor_set_out."
1946               << std::endl;
1947   }
1948   if (source_info_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1949     std::cerr << "--include_source_info only makes sense when combined with "
1950                  "--descriptor_set_out."
1951               << std::endl;
1952   }
1953   if (retain_options_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1954     std::cerr << "--retain_options only makes sense when combined with "
1955                  "--descriptor_set_out."
1956               << std::endl;
1957   }
1958 
1959   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1960 }
1961 
ParseArgument(const char * arg,std::string * name,std::string * value)1962 bool CommandLineInterface::ParseArgument(const char* arg, std::string* name,
1963                                          std::string* value) {
1964   bool parsed_value = false;
1965 
1966   if (arg[0] != '-') {
1967     // Not a flag.
1968     name->clear();
1969     parsed_value = true;
1970     *value = arg;
1971   } else if (arg[1] == '-') {
1972     // Two dashes:  Multi-character name, with '=' separating name and
1973     //   value.
1974     const char* equals_pos = strchr(arg, '=');
1975     if (equals_pos != nullptr) {
1976       *name = std::string(arg, equals_pos - arg);
1977       *value = equals_pos + 1;
1978       parsed_value = true;
1979     } else {
1980       *name = arg;
1981     }
1982   } else {
1983     // One dash:  One-character name, all subsequent characters are the
1984     //   value.
1985     if (arg[1] == '\0') {
1986       // arg is just "-".  We treat this as an input file, except that at
1987       // present this will just lead to a "file not found" error.
1988       name->clear();
1989       *value = arg;
1990       parsed_value = true;
1991     } else {
1992       *name = std::string(arg, 2);
1993       *value = arg + 2;
1994       parsed_value = !value->empty();
1995     }
1996   }
1997 
1998   // Need to return true iff the next arg should be used as the value for this
1999   // one, false otherwise.
2000 
2001   if (parsed_value) {
2002     // We already parsed a value for this flag.
2003     return false;
2004   }
2005 
2006   if (*name == "-h" || *name == "--help" || *name == "--disallow_services" ||
2007       *name == "--include_imports" || *name == "--include_source_info" ||
2008       *name == "--retain_options" || *name == "--version" ||
2009       *name == "--decode_raw" ||
2010       *name == "--experimental_editions" ||
2011       *name == "--print_free_field_numbers" ||
2012       *name == "--experimental_allow_proto3_optional" ||
2013       *name == "--deterministic_output" || *name == "--fatal_warnings") {
2014     // HACK:  These are the only flags that don't take a value.
2015     //   They probably should not be hard-coded like this but for now it's
2016     //   not worth doing better.
2017     return false;
2018   }
2019 
2020   // Next argument is the flag value.
2021   return true;
2022 }
2023 
2024 CommandLineInterface::ParseArgumentStatus
InterpretArgument(const std::string & name,const std::string & value)2025 CommandLineInterface::InterpretArgument(const std::string& name,
2026                                         const std::string& value) {
2027   if (name.empty()) {
2028     // Not a flag.  Just a filename.
2029     if (value.empty()) {
2030       std::cerr
2031           << "You seem to have passed an empty string as one of the "
2032              "arguments to "
2033           << executable_name_
2034           << ".  This is actually "
2035              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
2036              "input so the program is going to die now."
2037           << std::endl;
2038       return PARSE_ARGUMENT_FAIL;
2039     }
2040 
2041 #if defined(_WIN32)
2042     // On Windows, the shell (typically cmd.exe) does not expand wildcards in
2043     // file names (e.g. foo\*.proto), so we do it ourselves.
2044     switch (google::protobuf::io::win32::ExpandWildcards(
2045         value, [this](const std::string& path) {
2046           this->input_files_.push_back(path);
2047         })) {
2048       case google::protobuf::io::win32::ExpandWildcardsResult::kSuccess:
2049         break;
2050       case google::protobuf::io::win32::ExpandWildcardsResult::kErrorNoMatchingFile:
2051         // Path does not exist, is not a file, or it's longer than MAX_PATH and
2052         // long path handling is disabled.
2053         std::cerr << "Invalid file name pattern or missing input file \""
2054                   << value << "\"" << std::endl;
2055         return PARSE_ARGUMENT_FAIL;
2056       default:
2057         std::cerr << "Cannot convert path \"" << value
2058                   << "\" to or from Windows style" << std::endl;
2059         return PARSE_ARGUMENT_FAIL;
2060     }
2061 #else   // not _WIN32
2062     // On other platforms than Windows (e.g. Linux, Mac OS) the shell (typically
2063     // Bash) expands wildcards.
2064     input_files_.push_back(value);
2065 #endif  // _WIN32
2066 
2067   } else if (name == "-I" || name == "--proto_path") {
2068     // Java's -classpath (and some other languages) delimits path components
2069     // with colons.  Let's accept that syntax too just to make things more
2070     // intuitive.
2071     std::vector<std::string> parts = absl::StrSplit(
2072         value, absl::ByAnyChar(CommandLineInterface::kPathSeparator),
2073         absl::SkipEmpty());
2074 
2075     for (size_t i = 0; i < parts.size(); ++i) {
2076       std::string virtual_path;
2077       std::string disk_path;
2078 
2079       std::string::size_type equals_pos = parts[i].find_first_of('=');
2080       if (equals_pos == std::string::npos) {
2081         virtual_path = "";
2082         disk_path = parts[i];
2083       } else {
2084         virtual_path = parts[i].substr(0, equals_pos);
2085         disk_path = parts[i].substr(equals_pos + 1);
2086       }
2087 
2088       if (disk_path.empty()) {
2089         std::cerr
2090             << "--proto_path passed empty directory name.  (Use \".\" for "
2091                "current directory.)"
2092             << std::endl;
2093         return PARSE_ARGUMENT_FAIL;
2094       }
2095 
2096       // Make sure disk path exists, warn otherwise.
2097       if (access(disk_path.c_str(), F_OK) < 0) {
2098         // Try the original path; it may have just happened to have a '=' in it.
2099         if (access(parts[i].c_str(), F_OK) < 0) {
2100           std::cerr << disk_path << ": warning: directory does not exist."
2101                     << std::endl;
2102         } else {
2103           virtual_path = "";
2104           disk_path = parts[i];
2105         }
2106       }
2107 
2108       // Don't use make_pair as the old/default standard library on Solaris
2109       // doesn't support it without explicit template parameters, which are
2110       // incompatible with C++0x's make_pair.
2111       proto_path_.push_back(
2112           std::pair<std::string, std::string>(virtual_path, disk_path));
2113     }
2114 
2115   } else if (name == "--direct_dependencies") {
2116     if (direct_dependencies_explicitly_set_) {
2117       std::cerr << name
2118                 << " may only be passed once. To specify multiple "
2119                    "direct dependencies, pass them all as a single "
2120                    "parameter separated by ':'."
2121                 << std::endl;
2122       return PARSE_ARGUMENT_FAIL;
2123     }
2124 
2125     direct_dependencies_explicitly_set_ = true;
2126     std::vector<std::string> direct =
2127         absl::StrSplit(value, ':', absl::SkipEmpty());
2128     ABSL_DCHECK(direct_dependencies_.empty());
2129     direct_dependencies_.insert(direct.begin(), direct.end());
2130 
2131   } else if (name == "--direct_dependencies_violation_msg") {
2132     direct_dependencies_violation_msg_ = value;
2133 
2134   } else if (name == "--descriptor_set_in") {
2135     if (!descriptor_set_in_names_.empty()) {
2136       std::cerr << name
2137                 << " may only be passed once. To specify multiple "
2138                    "descriptor sets, pass them all as a single "
2139                    "parameter separated by '"
2140                 << CommandLineInterface::kPathSeparator << "'." << std::endl;
2141       return PARSE_ARGUMENT_FAIL;
2142     }
2143     if (value.empty()) {
2144       std::cerr << name << " requires a non-empty value." << std::endl;
2145       return PARSE_ARGUMENT_FAIL;
2146     }
2147     if (!dependency_out_name_.empty()) {
2148       std::cerr << name << " cannot be used with --dependency_out."
2149                 << std::endl;
2150       return PARSE_ARGUMENT_FAIL;
2151     }
2152 
2153     descriptor_set_in_names_ = absl::StrSplit(
2154         value, absl::ByAnyChar(CommandLineInterface::kPathSeparator),
2155         absl::SkipEmpty());
2156 
2157   } else if (name == "-o" || name == "--descriptor_set_out") {
2158     if (!descriptor_set_out_name_.empty()) {
2159       std::cerr << name << " may only be passed once." << std::endl;
2160       return PARSE_ARGUMENT_FAIL;
2161     }
2162     if (value.empty()) {
2163       std::cerr << name << " requires a non-empty value." << std::endl;
2164       return PARSE_ARGUMENT_FAIL;
2165     }
2166     if (mode_ != MODE_COMPILE) {
2167       std::cerr
2168           << "Cannot use --encode or --decode and generate descriptors at the "
2169              "same time."
2170           << std::endl;
2171       return PARSE_ARGUMENT_FAIL;
2172     }
2173     descriptor_set_out_name_ = value;
2174 
2175   } else if (name == "--dependency_out") {
2176     if (!dependency_out_name_.empty()) {
2177       std::cerr << name << " may only be passed once." << std::endl;
2178       return PARSE_ARGUMENT_FAIL;
2179     }
2180     if (value.empty()) {
2181       std::cerr << name << " requires a non-empty value." << std::endl;
2182       return PARSE_ARGUMENT_FAIL;
2183     }
2184     if (!descriptor_set_in_names_.empty()) {
2185       std::cerr << name << " cannot be used with --descriptor_set_in."
2186                 << std::endl;
2187       return PARSE_ARGUMENT_FAIL;
2188     }
2189     dependency_out_name_ = value;
2190 
2191   } else if (name == "--include_imports") {
2192     if (imports_in_descriptor_set_) {
2193       std::cerr << name << " may only be passed once." << std::endl;
2194       return PARSE_ARGUMENT_FAIL;
2195     }
2196     imports_in_descriptor_set_ = true;
2197 
2198   } else if (name == "--include_source_info") {
2199     if (source_info_in_descriptor_set_) {
2200       std::cerr << name << " may only be passed once." << std::endl;
2201       return PARSE_ARGUMENT_FAIL;
2202     }
2203     source_info_in_descriptor_set_ = true;
2204 
2205   } else if (name == "--retain_options") {
2206     if (retain_options_in_descriptor_set_) {
2207       std::cerr << name << " may only be passed once." << std::endl;
2208       return PARSE_ARGUMENT_FAIL;
2209     }
2210     retain_options_in_descriptor_set_ = true;
2211 
2212   } else if (name == "-h" || name == "--help") {
2213     PrintHelpText();
2214     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
2215 
2216   } else if (name == "--version") {
2217     if (!version_info_.empty()) {
2218       std::cout << version_info_ << std::endl;
2219     }
2220     std::cout << "libprotoc "
2221               << ::google::protobuf::internal::ProtocVersionString(
2222                      PROTOBUF_VERSION)
2223               << PROTOBUF_VERSION_SUFFIX << std::endl;
2224     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
2225 
2226   } else if (name == "--disallow_services") {
2227     disallow_services_ = true;
2228 
2229 
2230   } else if (name == "--experimental_allow_proto3_optional") {
2231     // Flag is no longer observed, but we allow it for backward compat.
2232   } else if (name == "--encode" || name == "--decode" ||
2233              name == "--decode_raw") {
2234     if (mode_ != MODE_COMPILE) {
2235       std::cerr << "Only one of --encode and --decode can be specified."
2236                 << std::endl;
2237       return PARSE_ARGUMENT_FAIL;
2238     }
2239     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
2240       std::cerr << "Cannot use " << name
2241                 << " and generate code or descriptors at the same time."
2242                 << std::endl;
2243       return PARSE_ARGUMENT_FAIL;
2244     }
2245 
2246     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
2247 
2248     if (value.empty() && name != "--decode_raw") {
2249       std::cerr << "Type name for " << name << " cannot be blank." << std::endl;
2250       if (name == "--decode") {
2251         std::cerr << "To decode an unknown message, use --decode_raw."
2252                   << std::endl;
2253       }
2254       return PARSE_ARGUMENT_FAIL;
2255     } else if (!value.empty() && name == "--decode_raw") {
2256       std::cerr << "--decode_raw does not take a parameter." << std::endl;
2257       return PARSE_ARGUMENT_FAIL;
2258     }
2259 
2260     codec_type_ = value;
2261 
2262   } else if (name == "--deterministic_output") {
2263     deterministic_output_ = true;
2264 
2265   } else if (name == "--error_format") {
2266     if (value == "gcc") {
2267       error_format_ = ERROR_FORMAT_GCC;
2268     } else if (value == "msvs") {
2269       error_format_ = ERROR_FORMAT_MSVS;
2270     } else {
2271       std::cerr << "Unknown error format: " << value << std::endl;
2272       return PARSE_ARGUMENT_FAIL;
2273     }
2274 
2275   } else if (name == "--fatal_warnings") {
2276     if (fatal_warnings_) {
2277       std::cerr << name << " may only be passed once." << std::endl;
2278       return PARSE_ARGUMENT_FAIL;
2279     }
2280     fatal_warnings_ = true;
2281   } else if (name == "--plugin") {
2282     if (plugin_prefix_.empty()) {
2283       std::cerr << "This compiler does not support plugins." << std::endl;
2284       return PARSE_ARGUMENT_FAIL;
2285     }
2286 
2287     std::string plugin_name;
2288     std::string path;
2289 
2290     std::string::size_type equals_pos = value.find_first_of('=');
2291     if (equals_pos == std::string::npos) {
2292       // Use the basename of the file.
2293       std::string::size_type slash_pos = value.find_last_of('/');
2294       if (slash_pos == std::string::npos) {
2295         plugin_name = value;
2296       } else {
2297         plugin_name = value.substr(slash_pos + 1);
2298       }
2299       path = value;
2300     } else {
2301       plugin_name = value.substr(0, equals_pos);
2302       path = value.substr(equals_pos + 1);
2303     }
2304 
2305     plugins_[plugin_name] = path;
2306 
2307   } else if (name == "--print_free_field_numbers") {
2308     if (mode_ != MODE_COMPILE) {
2309       std::cerr << "Cannot use " << name
2310                 << " and use --encode, --decode or print "
2311                 << "other info at the same time." << std::endl;
2312       return PARSE_ARGUMENT_FAIL;
2313     }
2314     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
2315       std::cerr << "Cannot use " << name
2316                 << " and generate code or descriptors at the same time."
2317                 << std::endl;
2318       return PARSE_ARGUMENT_FAIL;
2319     }
2320     mode_ = MODE_PRINT;
2321     print_mode_ = PRINT_FREE_FIELDS;
2322   } else if (name == "--enable_codegen_trace") {
2323     // We use environment variables here so that subprocesses see this setting
2324     // when we spawn them.
2325     //
2326     // Setting environment variables is more-or-less asking for a data race,
2327     // because C got this wrong and did not mandate synchronization.
2328     // In practice, this code path is "only" in the main thread of protoc, and
2329     // it is common knowledge that touching setenv in a library is asking for
2330     // life-ruining bugs *anyways*. As such, there is a reasonable probability
2331     // that there isn't another thread kicking environment variables at this
2332     // moment.
2333 
2334 #ifdef _WIN32
2335     ::_putenv(absl::StrCat(io::Printer::kProtocCodegenTrace, "=yes").c_str());
2336 #else
2337     ::setenv(io::Printer::kProtocCodegenTrace.data(), "yes", 0);
2338 #endif
2339   } else if (name == "--experimental_editions") {
2340     // If you're reading this, you're probably wondering what
2341     // --experimental_editions is for and thinking of turning it on. This is an
2342     // experimental, undocumented, unsupported flag. Enable it at your own risk
2343     // (or, just don't!).
2344     experimental_editions_ = true;
2345   } else if (name == "--edition_defaults_out") {
2346     if (!edition_defaults_out_name_.empty()) {
2347       std::cerr << name << " may only be passed once." << std::endl;
2348       return PARSE_ARGUMENT_FAIL;
2349     }
2350     if (value.empty()) {
2351       std::cerr << name << " requires a non-empty value." << std::endl;
2352       return PARSE_ARGUMENT_FAIL;
2353     }
2354     if (mode_ != MODE_COMPILE) {
2355       std::cerr
2356           << "Cannot use --encode or --decode and generate defaults at the "
2357              "same time."
2358           << std::endl;
2359       return PARSE_ARGUMENT_FAIL;
2360     }
2361     edition_defaults_out_name_ = value;
2362   } else if (name == "--edition_defaults_minimum") {
2363     if (edition_defaults_minimum_ != EDITION_UNKNOWN) {
2364       std::cerr << name << " may only be passed once." << std::endl;
2365       return PARSE_ARGUMENT_FAIL;
2366     }
2367     if (!Edition_Parse(absl::StrCat("EDITION_", value),
2368                        &edition_defaults_minimum_)) {
2369       std::cerr << name << " unknown edition \"" << value << "\"." << std::endl;
2370       return PARSE_ARGUMENT_FAIL;
2371     }
2372   } else if (name == "--edition_defaults_maximum") {
2373     if (edition_defaults_maximum_ != EDITION_UNKNOWN) {
2374       std::cerr << name << " may only be passed once." << std::endl;
2375       return PARSE_ARGUMENT_FAIL;
2376     }
2377     if (!Edition_Parse(absl::StrCat("EDITION_", value),
2378                        &edition_defaults_maximum_)) {
2379       std::cerr << name << " unknown edition \"" << value << "\"." << std::endl;
2380       return PARSE_ARGUMENT_FAIL;
2381     }
2382   } else {
2383     // Some other flag.  Look it up in the generators list.
2384     const GeneratorInfo* generator_info = FindGeneratorByFlag(name);
2385     if (generator_info == nullptr &&
2386         (plugin_prefix_.empty() || !absl::EndsWith(name, "_out"))) {
2387       // Check if it's a generator option flag.
2388       generator_info = FindGeneratorByOption(name);
2389       if (generator_info != nullptr) {
2390         std::string* parameters =
2391             &generator_parameters_[generator_info->flag_name];
2392         if (!parameters->empty()) {
2393           parameters->append(",");
2394         }
2395         parameters->append(value);
2396       } else if (absl::StartsWith(name, "--") && absl::EndsWith(name, "_opt")) {
2397         std::string* parameters =
2398             &plugin_parameters_[PluginName(plugin_prefix_, name)];
2399         if (!parameters->empty()) {
2400           parameters->append(",");
2401         }
2402         parameters->append(value);
2403       } else {
2404         std::cerr << "Unknown flag: " << name << std::endl;
2405         return PARSE_ARGUMENT_FAIL;
2406       }
2407     } else {
2408       // It's an output flag.  Add it to the output directives.
2409       if (mode_ != MODE_COMPILE) {
2410         std::cerr << "Cannot use --encode, --decode or print .proto info and "
2411                      "generate code at the same time."
2412                   << std::endl;
2413         return PARSE_ARGUMENT_FAIL;
2414       }
2415 
2416       OutputDirective directive;
2417       directive.name = name;
2418       if (generator_info == nullptr) {
2419         directive.generator = nullptr;
2420       } else {
2421         directive.generator = generator_info->generator;
2422       }
2423 
2424       // Split value at ':' to separate the generator parameter from the
2425       // filename.  However, avoid doing this if the colon is part of a valid
2426       // Windows-style absolute path.
2427       std::string::size_type colon_pos = value.find_first_of(':');
2428       if (colon_pos == std::string::npos || IsWindowsAbsolutePath(value)) {
2429         directive.output_location = value;
2430       } else {
2431         directive.parameter = value.substr(0, colon_pos);
2432         directive.output_location = value.substr(colon_pos + 1);
2433       }
2434 
2435       output_directives_.push_back(directive);
2436     }
2437   }
2438 
2439   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
2440 }
2441 
PrintHelpText()2442 void CommandLineInterface::PrintHelpText() {
2443   // Sorry for indentation here; line wrapping would be uglier.
2444   std::cout << "Usage: " << executable_name_ << " [OPTION] PROTO_FILES";
2445   std::cout << R"(
2446 Parse PROTO_FILES and generate output based on the options given:
2447   -IPATH, --proto_path=PATH   Specify the directory in which to search for
2448                               imports.  May be specified multiple times;
2449                               directories will be searched in order.  If not
2450                               given, the current working directory is used.
2451                               If not found in any of the these directories,
2452                               the --descriptor_set_in descriptors will be
2453                               checked for required proto file.
2454   --version                   Show version info and exit.
2455   -h, --help                  Show this text and exit.
2456   --encode=MESSAGE_TYPE       Read a text-format message of the given type
2457                               from standard input and write it in binary
2458                               to standard output.  The message type must
2459                               be defined in PROTO_FILES or their imports.
2460   --deterministic_output      When using --encode, ensure map fields are
2461                               deterministically ordered. Note that this order
2462                               is not canonical, and changes across builds or
2463                               releases of protoc.
2464   --decode=MESSAGE_TYPE       Read a binary message of the given type from
2465                               standard input and write it in text format
2466                               to standard output.  The message type must
2467                               be defined in PROTO_FILES or their imports.
2468   --decode_raw                Read an arbitrary protocol message from
2469                               standard input and write the raw tag/value
2470                               pairs in text format to standard output.  No
2471                               PROTO_FILES should be given when using this
2472                               flag.
2473   --descriptor_set_in=FILES   Specifies a delimited list of FILES
2474                               each containing a FileDescriptorSet (a
2475                               protocol buffer defined in descriptor.proto).
2476                               The FileDescriptor for each of the PROTO_FILES
2477                               provided will be loaded from these
2478                               FileDescriptorSets. If a FileDescriptor
2479                               appears multiple times, the first occurrence
2480                               will be used.
2481   -oFILE,                     Writes a FileDescriptorSet (a protocol buffer,
2482     --descriptor_set_out=FILE defined in descriptor.proto) containing all of
2483                               the input files to FILE.
2484   --include_imports           When using --descriptor_set_out, also include
2485                               all dependencies of the input files in the
2486                               set, so that the set is self-contained.
2487   --include_source_info       When using --descriptor_set_out, do not strip
2488                               SourceCodeInfo from the FileDescriptorProto.
2489                               This results in vastly larger descriptors that
2490                               include information about the original
2491                               location of each decl in the source file as
2492                               well as surrounding comments.
2493   --retain_options            When using --descriptor_set_out, do not strip
2494                               any options from the FileDescriptorProto.
2495                               This results in potentially larger descriptors
2496                               that include information about options that were
2497                               only meant to be useful during compilation.
2498   --dependency_out=FILE       Write a dependency output file in the format
2499                               expected by make. This writes the transitive
2500                               set of input file paths to FILE
2501   --error_format=FORMAT       Set the format in which to print errors.
2502                               FORMAT may be 'gcc' (the default) or 'msvs'
2503                               (Microsoft Visual Studio format).
2504   --fatal_warnings            Make warnings be fatal (similar to -Werr in
2505                               gcc). This flag will make protoc return
2506                               with a non-zero exit code if any warnings
2507                               are generated.
2508   --print_free_field_numbers  Print the free field numbers of the messages
2509                               defined in the given proto files. Extension ranges
2510                               are counted as occupied fields numbers.
2511   --enable_codegen_trace      Enables tracing which parts of protoc are
2512                               responsible for what codegen output. Not supported
2513                               by all backends or on all platforms.)";
2514   if (!plugin_prefix_.empty()) {
2515     std::cout << R"(
2516   --plugin=EXECUTABLE         Specifies a plugin executable to use.
2517                               Normally, protoc searches the PATH for
2518                               plugins, but you may specify additional
2519                               executables not in the path using this flag.
2520                               Additionally, EXECUTABLE may be of the form
2521                               NAME=PATH, in which case the given plugin name
2522                               is mapped to the given executable even if
2523                               the executable's own name differs.)";
2524   }
2525 
2526   for (const auto& kv : generators_by_flag_name_) {
2527     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
2528     //   but fixing this nicely (e.g. splitting on spaces) is probably more
2529     //   trouble than it's worth.
2530     std::cout << std::endl
2531               << "  " << kv.first << "=OUT_DIR "
2532               << std::string(19 - kv.first.size(),
2533                              ' ')  // Spaces for alignment.
2534               << kv.second.help_text;
2535   }
2536   std::cout << R"(
2537   @<filename>                 Read options and filenames from file. If a
2538                               relative file path is specified, the file
2539                               will be searched in the working directory.
2540                               The --proto_path option will not affect how
2541                               this argument file is searched. Content of
2542                               the file will be expanded in the position of
2543                               @<filename> as in the argument list. Note
2544                               that shell expansion is not applied to the
2545                               content of the file (i.e., you cannot use
2546                               quotes, wildcards, escapes, commands, etc.).
2547                               Each line corresponds to a single argument,
2548                               even if it contains spaces.)";
2549   std::cout << std::endl;
2550 }
2551 
EnforceProto3OptionalSupport(const std::string & codegen_name,uint64_t supported_features,const std::vector<const FileDescriptor * > & parsed_files) const2552 bool CommandLineInterface::EnforceProto3OptionalSupport(
2553     const std::string& codegen_name, uint64_t supported_features,
2554     const std::vector<const FileDescriptor*>& parsed_files) const {
2555   bool supports_proto3_optional =
2556       supported_features & CodeGenerator::FEATURE_PROTO3_OPTIONAL;
2557   if (!supports_proto3_optional) {
2558     for (const auto fd : parsed_files) {
2559       if (ContainsProto3Optional(
2560               ::google::protobuf::internal::InternalFeatureHelper::GetEdition(*fd), fd)) {
2561         std::cerr << fd->name()
2562                   << ": is a proto3 file that contains optional fields, but "
2563                      "code generator "
2564                   << codegen_name
2565                   << " hasn't been updated to support optional fields in "
2566                      "proto3. Please ask the owner of this code generator to "
2567                      "support proto3 optional."
2568                   << std::endl;
2569         return false;
2570       }
2571     }
2572   }
2573   return true;
2574 }
2575 
EnforceEditionsSupport(const std::string & codegen_name,uint64_t supported_features,Edition minimum_edition,Edition maximum_edition,const std::vector<const FileDescriptor * > & parsed_files) const2576 bool CommandLineInterface::EnforceEditionsSupport(
2577     const std::string& codegen_name, uint64_t supported_features,
2578     Edition minimum_edition, Edition maximum_edition,
2579     const std::vector<const FileDescriptor*>& parsed_files) const {
2580   if (experimental_editions_) {
2581     // The user has explicitly specified the experimental flag.
2582     return true;
2583   }
2584   for (const auto* fd : parsed_files) {
2585     Edition edition =
2586         ::google::protobuf::internal::InternalFeatureHelper::GetEdition(*fd);
2587     if (edition < Edition::EDITION_2023 || CanSkipEditionCheck(fd->name())) {
2588       // Legacy proto2/proto3 or exempted files don't need any checks.
2589       continue;
2590     }
2591 
2592     if ((supported_features & CodeGenerator::FEATURE_SUPPORTS_EDITIONS) == 0) {
2593       std::cerr << absl::Substitute(
2594           "$0: is an editions file, but code generator $1 hasn't been "
2595           "updated to support editions yet.  Please ask the owner of this code "
2596           "generator to add support or switch back to proto2/proto3.\n\nSee "
2597           "https://protobuf.dev/editions/overview/ for more information.",
2598           fd->name(), codegen_name);
2599       return false;
2600     }
2601     if (edition < minimum_edition) {
2602       std::cerr << absl::Substitute(
2603           "$0: is a file using edition $2, which isn't supported by code "
2604           "generator $1.  Please upgrade your file to at least edition $3.",
2605           fd->name(), codegen_name, edition, minimum_edition);
2606       return false;
2607     }
2608     if (edition > maximum_edition) {
2609       std::cerr << absl::Substitute(
2610           "$0: is a file using edition $2, which isn't supported by code "
2611           "generator $1.  Please ask the owner of this code generator to add "
2612           "support or switch back to a maximum of edition $3.",
2613           fd->name(), codegen_name, edition, maximum_edition);
2614       return false;
2615     }
2616   }
2617   return true;
2618 }
2619 
GenerateOutput(const std::vector<const FileDescriptor * > & parsed_files,const OutputDirective & output_directive,GeneratorContext * generator_context)2620 bool CommandLineInterface::GenerateOutput(
2621     const std::vector<const FileDescriptor*>& parsed_files,
2622     const OutputDirective& output_directive,
2623     GeneratorContext* generator_context) {
2624   // Call the generator.
2625   std::string error;
2626   if (output_directive.generator == nullptr) {
2627     // This is a plugin.
2628     ABSL_CHECK(absl::StartsWith(output_directive.name, "--") &&
2629                absl::EndsWith(output_directive.name, "_out"))
2630         << "Bad name for plugin generator: " << output_directive.name;
2631 
2632     std::string plugin_name = PluginName(plugin_prefix_, output_directive.name);
2633     std::string parameters = output_directive.parameter;
2634     if (!plugin_parameters_[plugin_name].empty()) {
2635       if (!parameters.empty()) {
2636         parameters.append(",");
2637       }
2638       parameters.append(plugin_parameters_[plugin_name]);
2639     }
2640     if (!GeneratePluginOutput(parsed_files, plugin_name, parameters,
2641                               generator_context, &error)) {
2642       std::cerr << output_directive.name << ": " << error << std::endl;
2643       return false;
2644     }
2645   } else {
2646     // Regular generator.
2647     std::string parameters = output_directive.parameter;
2648     if (!generator_parameters_[output_directive.name].empty()) {
2649       if (!parameters.empty()) {
2650         parameters.append(",");
2651       }
2652       parameters.append(generator_parameters_[output_directive.name]);
2653     }
2654     if (!EnforceProto3OptionalSupport(
2655             output_directive.name,
2656             output_directive.generator->GetSupportedFeatures(), parsed_files)) {
2657       return false;
2658     }
2659 
2660     if (!EnforceEditionsSupport(
2661             output_directive.name,
2662             output_directive.generator->GetSupportedFeatures(),
2663             output_directive.generator->GetMinimumEdition(),
2664             output_directive.generator->GetMaximumEdition(), parsed_files)) {
2665       return false;
2666     }
2667 
2668     if (!output_directive.generator->GenerateAll(parsed_files, parameters,
2669                                                  generator_context, &error)) {
2670       // Generator returned an error.
2671       std::cerr << output_directive.name << ": " << error << std::endl;
2672       return false;
2673     }
2674   }
2675 
2676   return true;
2677 }
2678 
GenerateDependencyManifestFile(const std::vector<const FileDescriptor * > & parsed_files,const GeneratorContextMap & output_directories,DiskSourceTree * source_tree)2679 bool CommandLineInterface::GenerateDependencyManifestFile(
2680     const std::vector<const FileDescriptor*>& parsed_files,
2681     const GeneratorContextMap& output_directories,
2682     DiskSourceTree* source_tree) {
2683   FileDescriptorSet file_set;
2684 
2685   absl::flat_hash_set<const FileDescriptor*> already_seen;
2686   for (size_t i = 0; i < parsed_files.size(); ++i) {
2687     GetTransitiveDependencies(parsed_files[i], &already_seen,
2688                               file_set.mutable_file());
2689   }
2690 
2691   std::vector<std::string> output_filenames;
2692   for (const auto& pair : output_directories) {
2693     const std::string& location = pair.first;
2694     GeneratorContextImpl* directory = pair.second.get();
2695     std::vector<std::string> relative_output_filenames;
2696     directory->GetOutputFilenames(&relative_output_filenames);
2697     for (size_t i = 0; i < relative_output_filenames.size(); ++i) {
2698       std::string output_filename = location + relative_output_filenames[i];
2699       if (output_filename.compare(0, 2, "./") == 0) {
2700         output_filename = output_filename.substr(2);
2701       }
2702       output_filenames.push_back(output_filename);
2703     }
2704   }
2705 
2706   if (!descriptor_set_out_name_.empty()) {
2707     output_filenames.push_back(descriptor_set_out_name_);
2708   }
2709 
2710   if (!edition_defaults_out_name_.empty()) {
2711     output_filenames.push_back(edition_defaults_out_name_);
2712   }
2713 
2714   // Create the depfile, even if it will be empty.
2715   int fd;
2716   do {
2717     fd = open(dependency_out_name_.c_str(),
2718               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
2719   } while (fd < 0 && errno == EINTR);
2720 
2721   if (fd < 0) {
2722     perror(dependency_out_name_.c_str());
2723     return false;
2724   }
2725 
2726   // Only write to the depfile if there is at least one output_filename.
2727   // Otherwise, the depfile will be malformed.
2728   if (!output_filenames.empty()) {
2729     io::FileOutputStream out(fd);
2730     io::Printer printer(&out, '$');
2731 
2732     for (size_t i = 0; i < output_filenames.size(); ++i) {
2733       printer.Print(output_filenames[i]);
2734       if (i == output_filenames.size() - 1) {
2735         printer.Print(":");
2736       } else {
2737         printer.Print(" \\\n");
2738       }
2739     }
2740 
2741     for (int i = 0; i < file_set.file_size(); ++i) {
2742       const FileDescriptorProto& file = file_set.file(i);
2743       const std::string& virtual_file = file.name();
2744       std::string disk_file;
2745       if (source_tree &&
2746           source_tree->VirtualFileToDiskFile(virtual_file, &disk_file)) {
2747         printer.Print(" $disk_file$", "disk_file", disk_file);
2748         if (i < file_set.file_size() - 1) printer.Print("\\\n");
2749       } else {
2750         std::cerr << "Unable to identify path for file " << virtual_file
2751                   << std::endl;
2752         return false;
2753       }
2754     }
2755   }
2756 
2757   return true;
2758 }
2759 
GeneratePluginOutput(const std::vector<const FileDescriptor * > & parsed_files,const std::string & plugin_name,const std::string & parameter,GeneratorContext * generator_context,std::string * error)2760 bool CommandLineInterface::GeneratePluginOutput(
2761     const std::vector<const FileDescriptor*>& parsed_files,
2762     const std::string& plugin_name, const std::string& parameter,
2763     GeneratorContext* generator_context, std::string* error) {
2764   CodeGeneratorRequest request;
2765   CodeGeneratorResponse response;
2766   std::string processed_parameter = parameter;
2767 
2768   bool bootstrap = GetBootstrapParam(processed_parameter);
2769 
2770   // Build the request.
2771   if (!processed_parameter.empty()) {
2772     request.set_parameter(processed_parameter);
2773   }
2774 
2775 
2776   absl::flat_hash_set<const FileDescriptor*> already_seen;
2777   for (const FileDescriptor* file : parsed_files) {
2778     request.add_file_to_generate(file->name());
2779     GetTransitiveDependencies(file, &already_seen, request.mutable_proto_file(),
2780                               {/*.include_json_name =*/true,
2781                                /*.include_source_code_info =*/true,
2782                                /*.retain_options =*/true});
2783   }
2784 
2785   // Populate source_file_descriptors and remove source-retention options from
2786   // proto_file.
2787   ABSL_CHECK(!parsed_files.empty());
2788   const DescriptorPool* pool = parsed_files[0]->pool();
2789   absl::flat_hash_set<std::string> files_to_generate(input_files_.begin(),
2790                                                      input_files_.end());
2791   static const auto builtin_plugins = new absl::flat_hash_set<std::string>(
2792       {"protoc-gen-cpp", "protoc-gen-java", "protoc-gen-mutable_java",
2793        "protoc-gen-python"});
2794   for (FileDescriptorProto& file_proto : *request.mutable_proto_file()) {
2795     if (files_to_generate.contains(file_proto.name())) {
2796       const FileDescriptor* file = pool->FindFileByName(file_proto.name());
2797       *request.add_source_file_descriptors() = std::move(file_proto);
2798       file->CopyTo(&file_proto);
2799       // Don't populate source code info or json_name for bootstrap protos.
2800       if (!bootstrap) {
2801         file->CopySourceCodeInfoTo(&file_proto);
2802 
2803         // The built-in code generators didn't use the json names.
2804         if (!builtin_plugins->contains(plugin_name)) {
2805           file->CopyJsonNameTo(&file_proto);
2806         }
2807       }
2808       StripSourceRetentionOptions(*file->pool(), file_proto);
2809     }
2810   }
2811 
2812   google::protobuf::compiler::Version* version =
2813       request.mutable_compiler_version();
2814   version->set_major(PROTOBUF_VERSION / 1000000);
2815   version->set_minor(PROTOBUF_VERSION / 1000 % 1000);
2816   version->set_patch(PROTOBUF_VERSION % 1000);
2817   version->set_suffix(PROTOBUF_VERSION_SUFFIX);
2818 
2819   // Invoke the plugin.
2820   Subprocess subprocess;
2821 
2822   if (plugins_.count(plugin_name) > 0) {
2823     subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
2824   } else {
2825     subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
2826   }
2827 
2828   std::string communicate_error;
2829   if (!subprocess.Communicate(request, &response, &communicate_error)) {
2830     *error = absl::Substitute("$0: $1", plugin_name, communicate_error);
2831     return false;
2832   }
2833 
2834   // Write the files.  We do this even if there was a generator error in order
2835   // to match the behavior of a compiled-in generator.
2836   std::unique_ptr<io::ZeroCopyOutputStream> current_output;
2837   for (int i = 0; i < response.file_size(); ++i) {
2838     const CodeGeneratorResponse::File& output_file = response.file(i);
2839 
2840     if (!output_file.insertion_point().empty()) {
2841       std::string filename = output_file.name();
2842       // Open a file for insert.
2843       // We reset current_output to nullptr first so that the old file is closed
2844       // before the new one is opened.
2845       current_output.reset();
2846       current_output.reset(
2847           generator_context->OpenForInsertWithGeneratedCodeInfo(
2848               filename, output_file.insertion_point(),
2849               output_file.generated_code_info()));
2850     } else if (!output_file.name().empty()) {
2851       // Starting a new file.  Open it.
2852       // We reset current_output to nullptr first so that the old file is closed
2853       // before the new one is opened.
2854       current_output.reset();
2855       current_output.reset(generator_context->Open(output_file.name()));
2856     } else if (current_output == nullptr) {
2857       *error = absl::Substitute(
2858           "$0: First file chunk returned by plugin did not specify a file "
2859           "name.",
2860           plugin_name);
2861       return false;
2862     }
2863 
2864     // Use CodedOutputStream for convenience; otherwise we'd need to provide
2865     // our own buffer-copying loop.
2866     io::CodedOutputStream writer(current_output.get());
2867     writer.WriteString(output_file.content());
2868   }
2869 
2870   // Check for errors.
2871   bool success = true;
2872   if (!EnforceProto3OptionalSupport(plugin_name, response.supported_features(),
2873                                     parsed_files)) {
2874     success = false;
2875   }
2876   if (!EnforceEditionsSupport(plugin_name, response.supported_features(),
2877                               static_cast<Edition>(response.minimum_edition()),
2878                               static_cast<Edition>(response.maximum_edition()),
2879                               parsed_files)) {
2880     success = false;
2881   }
2882   if (!response.error().empty()) {
2883     // Generator returned an error.
2884     *error = response.error();
2885     success = false;
2886   }
2887 
2888   return success;
2889 }
2890 
EncodeOrDecode(const DescriptorPool * pool)2891 bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
2892   // Look up the type.
2893   const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
2894   if (type == nullptr) {
2895     std::cerr << "Type not defined: " << codec_type_ << std::endl;
2896     return false;
2897   }
2898 
2899   DynamicMessageFactory dynamic_factory(pool);
2900   std::unique_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
2901 
2902   if (mode_ == MODE_ENCODE) {
2903     SetFdToTextMode(STDIN_FILENO);
2904     SetFdToBinaryMode(STDOUT_FILENO);
2905   } else {
2906     SetFdToBinaryMode(STDIN_FILENO);
2907     SetFdToTextMode(STDOUT_FILENO);
2908   }
2909 
2910   io::FileInputStream in(STDIN_FILENO);
2911   io::FileOutputStream out(STDOUT_FILENO);
2912 
2913   if (mode_ == MODE_ENCODE) {
2914     // Input is text.
2915     ErrorPrinter error_collector(error_format_);
2916     TextFormat::Parser parser;
2917     parser.RecordErrorsTo(&error_collector);
2918     parser.AllowPartialMessage(true);
2919 
2920     if (!parser.Parse(&in, message.get())) {
2921       std::cerr << "Failed to parse input." << std::endl;
2922       return false;
2923     }
2924   } else {
2925     // Input is binary.
2926     if (!message->ParsePartialFromZeroCopyStream(&in)) {
2927       std::cerr << "Failed to parse input." << std::endl;
2928       return false;
2929     }
2930   }
2931 
2932   if (!message->IsInitialized()) {
2933     std::cerr << "warning:  Input message is missing required fields:  "
2934               << message->InitializationErrorString() << std::endl;
2935   }
2936 
2937   if (mode_ == MODE_ENCODE) {
2938     // Output is binary.
2939     io::CodedOutputStream coded_out(&out);
2940     coded_out.SetSerializationDeterministic(deterministic_output_);
2941     if (!message->SerializePartialToCodedStream(&coded_out)) {
2942       std::cerr << "output: I/O error." << std::endl;
2943       return false;
2944     }
2945   } else {
2946     // Output is text.
2947     if (!TextFormat::Print(*message, &out)) {
2948       std::cerr << "output: I/O error." << std::endl;
2949       return false;
2950     }
2951   }
2952 
2953   return true;
2954 }
2955 
WriteDescriptorSet(const std::vector<const FileDescriptor * > & parsed_files)2956 bool CommandLineInterface::WriteDescriptorSet(
2957     const std::vector<const FileDescriptor*>& parsed_files) {
2958   FileDescriptorSet file_set;
2959 
2960   absl::flat_hash_set<const FileDescriptor*> already_seen;
2961   if (!imports_in_descriptor_set_) {
2962     // Since we don't want to output transitive dependencies, but we do want
2963     // things to be in dependency order, add all dependencies that aren't in
2964     // parsed_files to already_seen.  This will short circuit the recursion
2965     // in GetTransitiveDependencies.
2966     absl::flat_hash_set<const FileDescriptor*> to_output;
2967     to_output.insert(parsed_files.begin(), parsed_files.end());
2968     for (size_t i = 0; i < parsed_files.size(); ++i) {
2969       const FileDescriptor* file = parsed_files[i];
2970       for (int j = 0; j < file->dependency_count(); j++) {
2971         const FileDescriptor* dependency = file->dependency(j);
2972         // if the dependency isn't in parsed files, mark it as already seen
2973         if (to_output.find(dependency) == to_output.end()) {
2974           already_seen.insert(dependency);
2975         }
2976       }
2977     }
2978   }
2979   TransitiveDependencyOptions options;
2980   options.include_json_name = true;
2981   options.include_source_code_info = source_info_in_descriptor_set_;
2982   options.retain_options = retain_options_in_descriptor_set_;
2983   for (size_t i = 0; i < parsed_files.size(); ++i) {
2984     GetTransitiveDependencies(parsed_files[i], &already_seen,
2985                               file_set.mutable_file(), options);
2986   }
2987 
2988   int fd;
2989   do {
2990     fd = open(descriptor_set_out_name_.c_str(),
2991               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
2992   } while (fd < 0 && errno == EINTR);
2993 
2994   if (fd < 0) {
2995     perror(descriptor_set_out_name_.c_str());
2996     return false;
2997   }
2998 
2999   io::FileOutputStream out(fd);
3000 
3001   {
3002     io::CodedOutputStream coded_out(&out);
3003     // Determinism is useful here because build outputs are sometimes checked
3004     // into version control.
3005     coded_out.SetSerializationDeterministic(true);
3006     if (!file_set.SerializeToCodedStream(&coded_out)) {
3007       std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
3008                 << std::endl;
3009       out.Close();
3010       return false;
3011     }
3012   }
3013 
3014   if (!out.Close()) {
3015     std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
3016               << std::endl;
3017     return false;
3018   }
3019 
3020   return true;
3021 }
3022 
WriteEditionDefaults(const DescriptorPool & pool)3023 bool CommandLineInterface::WriteEditionDefaults(const DescriptorPool& pool) {
3024   const Descriptor* feature_set;
3025   if (opensource_runtime_) {
3026     feature_set = pool.FindMessageTypeByName("google.protobuf.FeatureSet");
3027   } else {
3028     feature_set = pool.FindMessageTypeByName("google.protobuf.FeatureSet");
3029   }
3030   if (feature_set == nullptr) {
3031     std::cerr << edition_defaults_out_name_
3032               << ": Could not find FeatureSet in descriptor pool.  Please make "
3033                  "sure descriptor.proto is in your import path"
3034               << std::endl;
3035     return false;
3036   }
3037   std::vector<const FieldDescriptor*> extensions;
3038   pool.FindAllExtensions(feature_set, &extensions);
3039 
3040   Edition minimum = MinimumAllowedEdition();
3041   if (edition_defaults_minimum_ != EDITION_UNKNOWN) {
3042     minimum = edition_defaults_minimum_;
3043   }
3044   Edition maximum = MaximumAllowedEdition();
3045   if (edition_defaults_maximum_ != EDITION_UNKNOWN) {
3046     maximum = edition_defaults_maximum_;
3047   }
3048 
3049   absl::StatusOr<FeatureSetDefaults> defaults =
3050       FeatureResolver::CompileDefaults(feature_set, extensions, minimum,
3051                                        maximum);
3052   if (!defaults.ok()) {
3053     std::cerr << edition_defaults_out_name_ << ": "
3054               << defaults.status().message() << std::endl;
3055     return false;
3056   }
3057 
3058   int fd;
3059   do {
3060     fd = open(edition_defaults_out_name_.c_str(),
3061               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
3062   } while (fd < 0 && errno == EINTR);
3063 
3064   if (fd < 0) {
3065     perror(edition_defaults_out_name_.c_str());
3066     return false;
3067   }
3068 
3069   io::FileOutputStream out(fd);
3070 
3071   {
3072     io::CodedOutputStream coded_out(&out);
3073     // Determinism is useful here because build outputs are sometimes checked
3074     // into version control.
3075     coded_out.SetSerializationDeterministic(true);
3076     if (!defaults->SerializeToCodedStream(&coded_out)) {
3077       std::cerr << edition_defaults_out_name_ << ": "
3078                 << strerror(out.GetErrno()) << std::endl;
3079       out.Close();
3080       return false;
3081     }
3082   }
3083 
3084   if (!out.Close()) {
3085     std::cerr << edition_defaults_out_name_ << ": " << strerror(out.GetErrno())
3086               << std::endl;
3087     return false;
3088   }
3089 
3090   return true;
3091 }
3092 
3093 const CommandLineInterface::GeneratorInfo*
FindGeneratorByFlag(const std::string & name) const3094 CommandLineInterface::FindGeneratorByFlag(const std::string& name) const {
3095   auto it = generators_by_flag_name_.find(name);
3096   if (it == generators_by_flag_name_.end()) return nullptr;
3097   return &it->second;
3098 }
3099 
3100 const CommandLineInterface::GeneratorInfo*
FindGeneratorByOption(const std::string & option) const3101 CommandLineInterface::FindGeneratorByOption(const std::string& option) const {
3102   auto it = generators_by_option_name_.find(option);
3103   if (it == generators_by_option_name_.end()) return nullptr;
3104   return &it->second;
3105 }
3106 
3107 namespace {
3108 
3109 // Utility function for PrintFreeFieldNumbers.
3110 // Stores occupied ranges into the ranges parameter, and next level of sub
3111 // message types into the nested_messages parameter.  The FieldRange is left
3112 // inclusive, right exclusive. i.e. [a, b).
3113 //
3114 // Nested Messages:
3115 // Note that it only stores the nested message type, iff the nested type is
3116 // either a direct child of the given descriptor, or the nested type is a
3117 // descendant of the given descriptor and all the nodes between the
3118 // nested type and the given descriptor are group types. e.g.
3119 //
3120 // message Foo {
3121 //   message Bar {
3122 //     message NestedBar {}
3123 //   }
3124 //   group Baz = 1 {
3125 //     group NestedBazGroup = 2 {
3126 //       message Quz {
3127 //         message NestedQuz {}
3128 //       }
3129 //     }
3130 //     message NestedBaz {}
3131 //   }
3132 // }
3133 //
3134 // In this case, Bar, Quz and NestedBaz will be added into the nested types.
3135 // Since free field numbers of group types will not be printed, this makes sure
3136 // the nested message types in groups will not be dropped. The nested_messages
3137 // parameter will contain the direct children (when groups are ignored in the
3138 // tree) of the given descriptor for the caller to traverse. The declaration
3139 // order of the nested messages is also preserved.
3140 typedef std::pair<int, int> FieldRange;
GatherOccupiedFieldRanges(const Descriptor * descriptor,absl::btree_set<FieldRange> * ranges,std::vector<const Descriptor * > * nested_messages)3141 void GatherOccupiedFieldRanges(
3142     const Descriptor* descriptor, absl::btree_set<FieldRange>* ranges,
3143     std::vector<const Descriptor*>* nested_messages) {
3144   for (int i = 0; i < descriptor->field_count(); ++i) {
3145     const FieldDescriptor* fd = descriptor->field(i);
3146     ranges->insert(FieldRange(fd->number(), fd->number() + 1));
3147   }
3148   for (int i = 0; i < descriptor->extension_range_count(); ++i) {
3149     ranges->insert(FieldRange(descriptor->extension_range(i)->start_number(),
3150                               descriptor->extension_range(i)->end_number()));
3151   }
3152   for (int i = 0; i < descriptor->reserved_range_count(); ++i) {
3153     ranges->insert(FieldRange(descriptor->reserved_range(i)->start,
3154                               descriptor->reserved_range(i)->end));
3155   }
3156   // Handle the nested messages/groups in declaration order to make it
3157   // post-order strict.
3158   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
3159     const Descriptor* nested_desc = descriptor->nested_type(i);
3160     nested_messages->push_back(nested_desc);
3161   }
3162 }
3163 
3164 // Utility function for PrintFreeFieldNumbers.
3165 // Actually prints the formatted free field numbers for given message name and
3166 // occupied ranges.
FormatFreeFieldNumbers(absl::string_view name,const absl::btree_set<FieldRange> & ranges)3167 void FormatFreeFieldNumbers(absl::string_view name,
3168                             const absl::btree_set<FieldRange>& ranges) {
3169   std::string output;
3170   absl::StrAppendFormat(&output, "%-35s free:", name);
3171   int next_free_number = 1;
3172   for (const auto& range : ranges) {
3173     // This happens when groups re-use parent field numbers, in which
3174     // case we skip the FieldRange entirely.
3175     if (next_free_number >= range.second) continue;
3176 
3177     if (next_free_number < range.first) {
3178       if (next_free_number + 1 == range.first) {
3179         // Singleton
3180         absl::StrAppendFormat(&output, " %d", next_free_number);
3181       } else {
3182         // Range
3183         absl::StrAppendFormat(&output, " %d-%d", next_free_number,
3184                               range.first - 1);
3185       }
3186     }
3187     next_free_number = range.second;
3188   }
3189   if (next_free_number <= FieldDescriptor::kMaxNumber) {
3190     absl::StrAppendFormat(&output, " %d-INF", next_free_number);
3191   }
3192   std::cout << output << std::endl;
3193 }
3194 
3195 }  // namespace
3196 
PrintFreeFieldNumbers(const Descriptor * descriptor)3197 void CommandLineInterface::PrintFreeFieldNumbers(const Descriptor* descriptor) {
3198   absl::btree_set<FieldRange> ranges;
3199   std::vector<const Descriptor*> nested_messages;
3200   GatherOccupiedFieldRanges(descriptor, &ranges, &nested_messages);
3201 
3202   for (size_t i = 0; i < nested_messages.size(); ++i) {
3203     PrintFreeFieldNumbers(nested_messages[i]);
3204   }
3205   FormatFreeFieldNumbers(descriptor->full_name(), ranges);
3206 }
3207 
3208 
3209 }  // namespace compiler
3210 }  // namespace protobuf
3211 }  // namespace google
3212 
3213 #include "google/protobuf/port_undef.inc"
3214