• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/command_line_interface.h>
36 
37 #include <google/protobuf/stubs/platform_macros.h>
38 
39 #include <stdio.h>
40 #include <sys/types.h>
41 #ifdef major
42 #undef major
43 #endif
44 #ifdef minor
45 #undef minor
46 #endif
47 #include <fcntl.h>
48 #include <sys/stat.h>
49 #ifndef _MSC_VER
50 #include <unistd.h>
51 #endif
52 #include <ctype.h>
53 #include <errno.h>
54 #include <fstream>
55 #include <iostream>
56 
57 #include <limits.h>  //For PATH_MAX
58 
59 #include <memory>
60 
61 #if defined(__APPLE__)
62 #include <mach-o/dyld.h>
63 #elif defined(__FreeBSD__)
64 #include <sys/sysctl.h>
65 #endif
66 
67 #include <google/protobuf/stubs/common.h>
68 #include <google/protobuf/stubs/logging.h>
69 #include <google/protobuf/stubs/stringprintf.h>
70 #include <google/protobuf/compiler/subprocess.h>
71 #include <google/protobuf/compiler/zip_writer.h>
72 #include <google/protobuf/compiler/plugin.pb.h>
73 #include <google/protobuf/compiler/code_generator.h>
74 #include <google/protobuf/compiler/importer.h>
75 #include <google/protobuf/io/coded_stream.h>
76 #include <google/protobuf/io/printer.h>
77 #include <google/protobuf/io/zero_copy_stream_impl.h>
78 #include <google/protobuf/descriptor.h>
79 #include <google/protobuf/dynamic_message.h>
80 #include <google/protobuf/text_format.h>
81 #include <google/protobuf/stubs/strutil.h>
82 #include <google/protobuf/stubs/substitute.h>
83 #include <google/protobuf/io/io_win32.h>
84 #include <google/protobuf/stubs/map_util.h>
85 #include <google/protobuf/stubs/stl_util.h>
86 
87 
88 #include <google/protobuf/port_def.inc>
89 
90 namespace google {
91 namespace protobuf {
92 namespace compiler {
93 
94 #ifndef O_BINARY
95 #ifdef _O_BINARY
96 #define O_BINARY _O_BINARY
97 #else
98 #define O_BINARY 0  // If this isn't defined, the platform doesn't need it.
99 #endif
100 #endif
101 
102 namespace {
103 #if defined(_WIN32)
104 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
105 // them like we do below.
106 using google::protobuf::io::win32::access;
107 using google::protobuf::io::win32::close;
108 using google::protobuf::io::win32::mkdir;
109 using google::protobuf::io::win32::open;
110 using google::protobuf::io::win32::setmode;
111 using google::protobuf::io::win32::write;
112 #endif
113 
114 static const char* kDefaultDirectDependenciesViolationMsg =
115     "File is imported but not declared in --direct_dependencies: %s";
116 
117 // Returns true if the text looks like a Windows-style absolute path, starting
118 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
119 // copy in importer.cc?
IsWindowsAbsolutePath(const std::string & text)120 static bool IsWindowsAbsolutePath(const std::string& text) {
121 #if defined(_WIN32) || defined(__CYGWIN__)
122   return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) &&
123          (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
124 #else
125   return false;
126 #endif
127 }
128 
SetFdToTextMode(int fd)129 void SetFdToTextMode(int fd) {
130 #ifdef _WIN32
131   if (setmode(fd, _O_TEXT) == -1) {
132     // This should never happen, I think.
133     GOOGLE_LOG(WARNING) << "setmode(" << fd << ", _O_TEXT): " << strerror(errno);
134   }
135 #endif
136   // (Text and binary are the same on non-Windows platforms.)
137 }
138 
SetFdToBinaryMode(int fd)139 void SetFdToBinaryMode(int fd) {
140 #ifdef _WIN32
141   if (setmode(fd, _O_BINARY) == -1) {
142     // This should never happen, I think.
143     GOOGLE_LOG(WARNING) << "setmode(" << fd << ", _O_BINARY): " << strerror(errno);
144   }
145 #endif
146   // (Text and binary are the same on non-Windows platforms.)
147 }
148 
AddTrailingSlash(std::string * path)149 void AddTrailingSlash(std::string* path) {
150   if (!path->empty() && path->at(path->size() - 1) != '/') {
151     path->push_back('/');
152   }
153 }
154 
VerifyDirectoryExists(const std::string & path)155 bool VerifyDirectoryExists(const std::string& path) {
156   if (path.empty()) return true;
157 
158   if (access(path.c_str(), F_OK) == -1) {
159     std::cerr << path << ": " << strerror(errno) << std::endl;
160     return false;
161   } else {
162     return true;
163   }
164 }
165 
166 // Try to create the parent directory of the given file, creating the parent's
167 // parent if necessary, and so on.  The full file name is actually
168 // (prefix + filename), but we assume |prefix| already exists and only create
169 // directories listed in |filename|.
TryCreateParentDirectory(const std::string & prefix,const std::string & filename)170 bool TryCreateParentDirectory(const std::string& prefix,
171                               const std::string& filename) {
172   // Recursively create parent directories to the output file.
173   // On Windows, both '/' and '\' are valid path separators.
174   std::vector<std::string> parts =
175       Split(filename, "/\\", true);
176   std::string path_so_far = prefix;
177   for (int i = 0; i < parts.size() - 1; i++) {
178     path_so_far += parts[i];
179     if (mkdir(path_so_far.c_str(), 0777) != 0) {
180       if (errno != EEXIST) {
181         std::cerr << filename << ": while trying to create directory "
182                   << path_so_far << ": " << strerror(errno) << std::endl;
183         return false;
184       }
185     }
186     path_so_far += '/';
187   }
188 
189   return true;
190 }
191 
192 // Get the absolute path of this protoc binary.
GetProtocAbsolutePath(std::string * path)193 bool GetProtocAbsolutePath(std::string* path) {
194 #ifdef _WIN32
195   char buffer[MAX_PATH];
196   int len = GetModuleFileNameA(NULL, buffer, MAX_PATH);
197 #elif defined(__APPLE__)
198   char buffer[PATH_MAX];
199   int len = 0;
200 
201   char dirtybuffer[PATH_MAX];
202   uint32_t size = sizeof(dirtybuffer);
203   if (_NSGetExecutablePath(dirtybuffer, &size) == 0) {
204     realpath(dirtybuffer, buffer);
205     len = strlen(buffer);
206   }
207 #elif defined(__FreeBSD__)
208   char buffer[PATH_MAX];
209   size_t len = PATH_MAX;
210   int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
211   if (sysctl(mib, 4, &buffer, &len, NULL, 0) != 0) {
212     len = 0;
213   }
214 #else
215   char buffer[PATH_MAX];
216   int len = readlink("/proc/self/exe", buffer, PATH_MAX);
217 #endif
218   if (len > 0) {
219     path->assign(buffer, len);
220     return true;
221   } else {
222     return false;
223   }
224 }
225 
226 // Whether a path is where google/protobuf/descriptor.proto and other well-known
227 // type protos are installed.
IsInstalledProtoPath(const std::string & path)228 bool IsInstalledProtoPath(const std::string& path) {
229   // Checking the descriptor.proto file should be good enough.
230   std::string file_path = path + "/google/protobuf/descriptor.proto";
231   return access(file_path.c_str(), F_OK) != -1;
232 }
233 
234 // Add the paths where google/protobuf/descriptor.proto and other well-known
235 // type protos are installed.
AddDefaultProtoPaths(std::vector<std::pair<std::string,std::string>> * paths)236 void AddDefaultProtoPaths(
237     std::vector<std::pair<std::string, std::string> >* paths) {
238   // TODO(xiaofeng): The code currently only checks relative paths of where
239   // the protoc binary is installed. We probably should make it handle more
240   // cases than that.
241   std::string path;
242   if (!GetProtocAbsolutePath(&path)) {
243     return;
244   }
245   // Strip the binary name.
246   size_t pos = path.find_last_of("/\\");
247   if (pos == std::string::npos || pos == 0) {
248     return;
249   }
250   path = path.substr(0, pos);
251   // Check the binary's directory.
252   if (IsInstalledProtoPath(path)) {
253     paths->push_back(std::pair<std::string, std::string>("", path));
254     return;
255   }
256   // Check if there is an include subdirectory.
257   if (IsInstalledProtoPath(path + "/include")) {
258     paths->push_back(
259         std::pair<std::string, std::string>("", path + "/include"));
260     return;
261   }
262   // Check if the upper level directory has an "include" subdirectory.
263   pos = path.find_last_of("/\\");
264   if (pos == std::string::npos || pos == 0) {
265     return;
266   }
267   path = path.substr(0, pos);
268   if (IsInstalledProtoPath(path + "/include")) {
269     paths->push_back(
270         std::pair<std::string, std::string>("", path + "/include"));
271     return;
272   }
273 }
274 
PluginName(const std::string & plugin_prefix,const std::string & directive)275 std::string PluginName(const std::string& plugin_prefix,
276                        const std::string& directive) {
277   // Assuming the directive starts with "--" and ends with "_out" or "_opt",
278   // strip the "--" and "_out/_opt" and add the plugin prefix.
279   return plugin_prefix + "gen-" + directive.substr(2, directive.size() - 6);
280 }
281 
282 }  // namespace
283 
284 // A MultiFileErrorCollector that prints errors to stderr.
285 class CommandLineInterface::ErrorPrinter
286     : public MultiFileErrorCollector,
287       public io::ErrorCollector,
288       public DescriptorPool::ErrorCollector {
289  public:
ErrorPrinter(ErrorFormat format,DiskSourceTree * tree=NULL)290   ErrorPrinter(ErrorFormat format, DiskSourceTree* tree = NULL)
291       : format_(format), tree_(tree), found_errors_(false) {}
~ErrorPrinter()292   ~ErrorPrinter() {}
293 
294   // implements MultiFileErrorCollector ------------------------------
AddError(const std::string & filename,int line,int column,const std::string & message)295   void AddError(const std::string& filename, int line, int column,
296                 const std::string& message) {
297     found_errors_ = true;
298     AddErrorOrWarning(filename, line, column, message, "error", std::cerr);
299   }
300 
AddWarning(const std::string & filename,int line,int column,const std::string & message)301   void AddWarning(const std::string& filename, int line, int column,
302                   const std::string& message) {
303     AddErrorOrWarning(filename, line, column, message, "warning", std::clog);
304   }
305 
306   // implements io::ErrorCollector -----------------------------------
AddError(int line,int column,const std::string & message)307   void AddError(int line, int column, const std::string& message) {
308     AddError("input", line, column, message);
309   }
310 
AddWarning(int line,int column,const std::string & message)311   void AddWarning(int line, int column, const std::string& message) {
312     AddErrorOrWarning("input", line, column, message, "warning", std::clog);
313   }
314 
315   // implements DescriptorPool::ErrorCollector-------------------------
AddError(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)316   void AddError(const std::string& filename, const std::string& element_name,
317                 const Message* descriptor, ErrorLocation location,
318                 const std::string& message) {
319     AddErrorOrWarning(filename, -1, -1, message, "error", std::cerr);
320   }
321 
AddWarning(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)322   void AddWarning(const std::string& filename, const std::string& element_name,
323                   const Message* descriptor, ErrorLocation location,
324                   const std::string& message) {
325     AddErrorOrWarning(filename, -1, -1, message, "warning", std::clog);
326   }
327 
FoundErrors() const328   bool FoundErrors() const { return found_errors_; }
329 
330  private:
AddErrorOrWarning(const std::string & filename,int line,int column,const std::string & message,const std::string & type,std::ostream & out)331   void AddErrorOrWarning(const std::string& filename, int line, int column,
332                          const std::string& message, const std::string& type,
333                          std::ostream& out) {
334     // Print full path when running under MSVS
335     std::string dfile;
336     if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS && tree_ != NULL &&
337         tree_->VirtualFileToDiskFile(filename, &dfile)) {
338       out << dfile;
339     } else {
340       out << filename;
341     }
342 
343     // Users typically expect 1-based line/column numbers, so we add 1
344     // to each here.
345     if (line != -1) {
346       // Allow for both GCC- and Visual-Studio-compatible output.
347       switch (format_) {
348         case CommandLineInterface::ERROR_FORMAT_GCC:
349           out << ":" << (line + 1) << ":" << (column + 1);
350           break;
351         case CommandLineInterface::ERROR_FORMAT_MSVS:
352           out << "(" << (line + 1) << ") : " << type
353               << " in column=" << (column + 1);
354           break;
355       }
356     }
357 
358     if (type == "warning") {
359       out << ": warning: " << message << std::endl;
360     } else {
361       out << ": " << message << std::endl;
362     }
363   }
364 
365   const ErrorFormat format_;
366   DiskSourceTree* tree_;
367   bool found_errors_;
368 };
369 
370 // -------------------------------------------------------------------
371 
372 // A GeneratorContext implementation that buffers files in memory, then dumps
373 // them all to disk on demand.
374 class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
375  public:
376   GeneratorContextImpl(const std::vector<const FileDescriptor*>& parsed_files);
377 
378   // Write all files in the directory to disk at the given output location,
379   // which must end in a '/'.
380   bool WriteAllToDisk(const std::string& prefix);
381 
382   // Write the contents of this directory to a ZIP-format archive with the
383   // given name.
384   bool WriteAllToZip(const std::string& filename);
385 
386   // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
387   // format, unless one has already been written.
388   void AddJarManifest();
389 
390   // Get name of all output files.
391   void GetOutputFilenames(std::vector<std::string>* output_filenames);
392 
393   // implements GeneratorContext --------------------------------------
394   io::ZeroCopyOutputStream* Open(const std::string& filename);
395   io::ZeroCopyOutputStream* OpenForAppend(const std::string& filename);
396   io::ZeroCopyOutputStream* OpenForInsert(const std::string& filename,
397                                           const std::string& insertion_point);
398   io::ZeroCopyOutputStream* OpenForInsertWithGeneratedCodeInfo(
399       const std::string& filename, const std::string& insertion_point,
400       const google::protobuf::GeneratedCodeInfo& info);
ListParsedFiles(std::vector<const FileDescriptor * > * output)401   void ListParsedFiles(std::vector<const FileDescriptor*>* output) {
402     *output = parsed_files_;
403   }
404 
405  private:
406   friend class MemoryOutputStream;
407 
408   // The files_ field maps from path keys to file content values. It's a map
409   // instead of an unordered_map so that files are written in order (good when
410   // writing zips).
411   std::map<std::string, std::string> files_;
412   const std::vector<const FileDescriptor*>& parsed_files_;
413   bool had_error_;
414 };
415 
416 class CommandLineInterface::MemoryOutputStream
417     : public io::ZeroCopyOutputStream {
418  public:
419   MemoryOutputStream(GeneratorContextImpl* directory,
420                      const std::string& filename, bool append_mode);
421   MemoryOutputStream(GeneratorContextImpl* directory,
422                      const std::string& filename,
423                      const std::string& insertion_point);
424   MemoryOutputStream(GeneratorContextImpl* directory,
425                      const std::string& filename,
426                      const std::string& insertion_point,
427                      const google::protobuf::GeneratedCodeInfo& info);
428   virtual ~MemoryOutputStream();
429 
430   // implements ZeroCopyOutputStream ---------------------------------
Next(void ** data,int * size)431   bool Next(void** data, int* size) override {
432     return inner_->Next(data, size);
433   }
BackUp(int count)434   void BackUp(int count) override { inner_->BackUp(count); }
ByteCount() const435   int64_t ByteCount() const override { return inner_->ByteCount(); }
436 
437  private:
438   // Checks to see if "filename_.pb.meta" exists in directory_; if so, fixes the
439   // offsets in that GeneratedCodeInfo record to reflect bytes inserted in
440   // filename_ at original offset insertion_offset with length insertion_length.
441   // Also adds in the data from info_to_insert_ with updated offsets governed by
442   // insertion_offset and indent_length. We assume that insertions will not
443   // occur within any given annotated span of text. insertion_content must end
444   // with an endline.
445   void UpdateMetadata(const std::string& insertion_content,
446                       size_t insertion_offset, size_t insertion_length,
447                       size_t indent_length);
448 
449   // Inserts info_to_insert_ into target_info, assuming that the relevant
450   // insertion was made at insertion_offset in file_content with the given
451   // indent_length. insertion_content must end with an endline.
452   void InsertShiftedInfo(const std::string& insertion_content,
453                          size_t insertion_offset, size_t indent_length,
454                          google::protobuf::GeneratedCodeInfo& target_info);
455 
456   // Where to insert the string when it's done.
457   GeneratorContextImpl* directory_;
458   std::string filename_;
459   std::string insertion_point_;
460 
461   // The string we're building.
462   std::string data_;
463 
464   // Whether we should append the output stream to the existing file.
465   bool append_mode_;
466 
467   // StringOutputStream writing to data_.
468   std::unique_ptr<io::StringOutputStream> inner_;
469 
470   // The GeneratedCodeInfo to insert at the insertion point.
471   google::protobuf::GeneratedCodeInfo info_to_insert_;
472 };
473 
474 // -------------------------------------------------------------------
475 
GeneratorContextImpl(const std::vector<const FileDescriptor * > & parsed_files)476 CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
477     const std::vector<const FileDescriptor*>& parsed_files)
478     : parsed_files_(parsed_files), had_error_(false) {}
479 
WriteAllToDisk(const std::string & prefix)480 bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
481     const std::string& prefix) {
482   if (had_error_) {
483     return false;
484   }
485 
486   if (!VerifyDirectoryExists(prefix)) {
487     return false;
488   }
489 
490   for (const auto& pair : files_) {
491     const std::string& relative_filename = pair.first;
492     const char* data = pair.second.data();
493     int size = pair.second.size();
494 
495     if (!TryCreateParentDirectory(prefix, relative_filename)) {
496       return false;
497     }
498     std::string filename = prefix + relative_filename;
499 
500     // Create the output file.
501     int file_descriptor;
502     do {
503       file_descriptor =
504           open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
505     } while (file_descriptor < 0 && errno == EINTR);
506 
507     if (file_descriptor < 0) {
508       int error = errno;
509       std::cerr << filename << ": " << strerror(error);
510       return false;
511     }
512 
513     // Write the file.
514     while (size > 0) {
515       int write_result;
516       do {
517         write_result = write(file_descriptor, data, size);
518       } while (write_result < 0 && errno == EINTR);
519 
520       if (write_result <= 0) {
521         // Write error.
522 
523         // FIXME(kenton):  According to the man page, if write() returns zero,
524         //   there was no error; write() simply did not write anything.  It's
525         //   unclear under what circumstances this might happen, but presumably
526         //   errno won't be set in this case.  I am confused as to how such an
527         //   event should be handled.  For now I'm treating it as an error,
528         //   since retrying seems like it could lead to an infinite loop.  I
529         //   suspect this never actually happens anyway.
530 
531         if (write_result < 0) {
532           int error = errno;
533           std::cerr << filename << ": write: " << strerror(error);
534         } else {
535           std::cerr << filename << ": write() returned zero?" << std::endl;
536         }
537         return false;
538       }
539 
540       data += write_result;
541       size -= write_result;
542     }
543 
544     if (close(file_descriptor) != 0) {
545       int error = errno;
546       std::cerr << filename << ": close: " << strerror(error);
547       return false;
548     }
549   }
550 
551   return true;
552 }
553 
WriteAllToZip(const std::string & filename)554 bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
555     const std::string& filename) {
556   if (had_error_) {
557     return false;
558   }
559 
560   // Create the output file.
561   int file_descriptor;
562   do {
563     file_descriptor =
564         open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
565   } while (file_descriptor < 0 && errno == EINTR);
566 
567   if (file_descriptor < 0) {
568     int error = errno;
569     std::cerr << filename << ": " << strerror(error);
570     return false;
571   }
572 
573   // Create the ZipWriter
574   io::FileOutputStream stream(file_descriptor);
575   ZipWriter zip_writer(&stream);
576 
577   for (const auto& pair : files_) {
578     zip_writer.Write(pair.first, pair.second);
579   }
580 
581   zip_writer.WriteDirectory();
582 
583   if (stream.GetErrno() != 0) {
584     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
585   }
586 
587   if (!stream.Close()) {
588     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
589   }
590 
591   return true;
592 }
593 
AddJarManifest()594 void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
595   auto pair = files_.insert({"META-INF/MANIFEST.MF", ""});
596   if (pair.second) {
597     pair.first->second =
598         "Manifest-Version: 1.0\n"
599         "Created-By: 1.6.0 (protoc)\n"
600         "\n";
601   }
602 }
603 
GetOutputFilenames(std::vector<std::string> * output_filenames)604 void CommandLineInterface::GeneratorContextImpl::GetOutputFilenames(
605     std::vector<std::string>* output_filenames) {
606   for (const auto& pair : files_) {
607     output_filenames->push_back(pair.first);
608   }
609 }
610 
Open(const std::string & filename)611 io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
612     const std::string& filename) {
613   return new MemoryOutputStream(this, filename, false);
614 }
615 
616 io::ZeroCopyOutputStream*
OpenForAppend(const std::string & filename)617 CommandLineInterface::GeneratorContextImpl::OpenForAppend(
618     const std::string& filename) {
619   return new MemoryOutputStream(this, filename, true);
620 }
621 
622 io::ZeroCopyOutputStream*
OpenForInsert(const std::string & filename,const std::string & insertion_point)623 CommandLineInterface::GeneratorContextImpl::OpenForInsert(
624     const std::string& filename, const std::string& insertion_point) {
625   return new MemoryOutputStream(this, filename, insertion_point);
626 }
627 
628 io::ZeroCopyOutputStream*
OpenForInsertWithGeneratedCodeInfo(const std::string & filename,const std::string & insertion_point,const google::protobuf::GeneratedCodeInfo & info)629 CommandLineInterface::GeneratorContextImpl::OpenForInsertWithGeneratedCodeInfo(
630     const std::string& filename, const std::string& insertion_point,
631     const google::protobuf::GeneratedCodeInfo& info) {
632   return new MemoryOutputStream(this, filename, insertion_point, info);
633 }
634 
635 // -------------------------------------------------------------------
636 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,bool append_mode)637 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
638     GeneratorContextImpl* directory, const std::string& filename,
639     bool append_mode)
640     : directory_(directory),
641       filename_(filename),
642       append_mode_(append_mode),
643       inner_(new io::StringOutputStream(&data_)) {}
644 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,const std::string & insertion_point)645 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
646     GeneratorContextImpl* directory, const std::string& filename,
647     const std::string& insertion_point)
648     : directory_(directory),
649       filename_(filename),
650       insertion_point_(insertion_point),
651       inner_(new io::StringOutputStream(&data_)) {}
652 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,const std::string & insertion_point,const google::protobuf::GeneratedCodeInfo & info)653 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
654     GeneratorContextImpl* directory, const std::string& filename,
655     const std::string& insertion_point, const google::protobuf::GeneratedCodeInfo& info)
656     : directory_(directory),
657       filename_(filename),
658       insertion_point_(insertion_point),
659       inner_(new io::StringOutputStream(&data_)),
660       info_to_insert_(info) {}
661 
InsertShiftedInfo(const std::string & insertion_content,size_t insertion_offset,size_t indent_length,google::protobuf::GeneratedCodeInfo & target_info)662 void CommandLineInterface::MemoryOutputStream::InsertShiftedInfo(
663     const std::string& insertion_content, size_t insertion_offset,
664     size_t indent_length, google::protobuf::GeneratedCodeInfo& target_info) {
665   // Keep track of how much extra data was added for indents before the
666   // current annotation being inserted. `pos` and `source_annotation.begin()`
667   // are offsets in `insertion_content`. `insertion_offset` is updated so that
668   // it can be added to an annotation's `begin` field to reflect that
669   // annotation's updated location after `insertion_content` was inserted into
670   // the target file.
671   size_t pos = 0;
672   insertion_offset += indent_length;
673   for (const auto& source_annotation : info_to_insert_.annotation()) {
674     GeneratedCodeInfo::Annotation* annotation = target_info.add_annotation();
675     int inner_indent = 0;
676     // insertion_content is guaranteed to end in an endline. This last endline
677     // has no effect on indentation.
678     for (; pos < source_annotation.end() && pos < insertion_content.size() - 1;
679          ++pos) {
680       if (insertion_content[pos] == '\n') {
681         if (pos >= source_annotation.begin()) {
682           // The beginning of the annotation is at insertion_offset, but the end
683           // can still move further in the target file.
684           inner_indent += indent_length;
685         } else {
686           insertion_offset += indent_length;
687         }
688       }
689     }
690     *annotation = source_annotation;
691     annotation->set_begin(annotation->begin() + insertion_offset);
692     insertion_offset += inner_indent;
693     annotation->set_end(annotation->end() + insertion_offset);
694   }
695 }
696 
UpdateMetadata(const std::string & insertion_content,size_t insertion_offset,size_t insertion_length,size_t indent_length)697 void CommandLineInterface::MemoryOutputStream::UpdateMetadata(
698     const std::string& insertion_content, size_t insertion_offset,
699     size_t insertion_length, size_t indent_length) {
700   auto it = directory_->files_.find(filename_ + ".pb.meta");
701   if (it == directory_->files_.end() && info_to_insert_.annotation().empty()) {
702     // No metadata was recorded for this file.
703     return;
704   }
705   GeneratedCodeInfo metadata;
706   bool is_text_format = false;
707   std::string* encoded_data = nullptr;
708   if (it != directory_->files_.end()) {
709     encoded_data = &it->second;
710     // Try to decode a GeneratedCodeInfo proto from the .pb.meta file. It may be
711     // in wire or text format. Keep the same format when the data is written out
712     // later.
713     if (!metadata.ParseFromString(*encoded_data)) {
714       if (!TextFormat::ParseFromString(*encoded_data, &metadata)) {
715         // The metadata is invalid.
716         std::cerr
717             << filename_
718             << ".pb.meta: Could not parse metadata as wire or text format."
719             << std::endl;
720         return;
721       }
722       // Generators that use the public plugin interface emit text-format
723       // metadata (because in the public plugin protocol, file content must be
724       // UTF8-encoded strings).
725       is_text_format = true;
726     }
727   } else {
728     // Create a new file to store the new metadata in info_to_insert_.
729     encoded_data =
730         &directory_->files_.insert({filename_ + ".pb.meta", ""}).first->second;
731   }
732   GeneratedCodeInfo new_metadata;
733   bool crossed_offset = false;
734   size_t to_add = 0;
735   for (const auto& source_annotation : metadata.annotation()) {
736     // The first time an annotation at or after the insertion point is found,
737     // insert the new metadata from info_to_insert_. Shift all annotations
738     // after the new metadata by the length of the text that was inserted
739     // (including any additional indent length).
740     if (source_annotation.begin() >= insertion_offset && !crossed_offset) {
741       crossed_offset = true;
742       InsertShiftedInfo(insertion_content, insertion_offset, indent_length,
743                         new_metadata);
744       to_add += insertion_length;
745     }
746     GeneratedCodeInfo::Annotation* annotation = new_metadata.add_annotation();
747     *annotation = source_annotation;
748     annotation->set_begin(annotation->begin() + to_add);
749     annotation->set_end(annotation->end() + to_add);
750   }
751   // If there were never any annotations at or after the insertion point,
752   // make sure to still insert the new metadata from info_to_insert_.
753   if (!crossed_offset) {
754     InsertShiftedInfo(insertion_content, insertion_offset, indent_length,
755                       new_metadata);
756   }
757   if (is_text_format) {
758     TextFormat::PrintToString(new_metadata, encoded_data);
759   } else {
760     new_metadata.SerializeToString(encoded_data);
761   }
762 }
763 
~MemoryOutputStream()764 CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
765   // Make sure all data has been written.
766   inner_.reset();
767 
768   // Insert into the directory.
769   auto pair = directory_->files_.insert({filename_, ""});
770   auto it = pair.first;
771   bool already_present = !pair.second;
772 
773   if (insertion_point_.empty()) {
774     // This was just a regular Open().
775     if (already_present) {
776       if (append_mode_) {
777         it->second.append(data_);
778       } else {
779         std::cerr << filename_ << ": Tried to write the same file twice."
780                   << std::endl;
781         directory_->had_error_ = true;
782       }
783       return;
784     }
785 
786     it->second.swap(data_);
787   } else {
788     // This was an OpenForInsert().
789 
790     // If the data doesn't end with a clean line break, add one.
791     if (!data_.empty() && data_[data_.size() - 1] != '\n') {
792       data_.push_back('\n');
793     }
794 
795     // Find the file we are going to insert into.
796     if (!already_present) {
797       std::cerr << filename_
798                 << ": Tried to insert into file that doesn't exist."
799                 << std::endl;
800       directory_->had_error_ = true;
801       return;
802     }
803     std::string* target = &it->second;
804 
805     // Find the insertion point.
806     std::string magic_string =
807         strings::Substitute("@@protoc_insertion_point($0)", insertion_point_);
808     std::string::size_type pos = target->find(magic_string);
809 
810     if (pos == std::string::npos) {
811       std::cerr << filename_ << ": insertion point \"" << insertion_point_
812                 << "\" not found." << std::endl;
813       directory_->had_error_ = true;
814       return;
815     }
816 
817     if ((pos > 3) && (target->substr(pos - 3, 2) == "/*")) {
818       // Support for inline "/* @@protoc_insertion_point() */"
819       pos = pos - 3;
820     } else {
821       // Seek backwards to the beginning of the line, which is where we will
822       // insert the data.  Note that this has the effect of pushing the
823       // insertion point down, so the data is inserted before it.  This is
824       // intentional because it means that multiple insertions at the same point
825       // will end up in the expected order in the final output.
826       pos = target->find_last_of('\n', pos);
827       if (pos == std::string::npos) {
828         // Insertion point is on the first line.
829         pos = 0;
830       } else {
831         // Advance to character after '\n'.
832         ++pos;
833       }
834     }
835 
836     // Extract indent.
837     std::string indent_(*target, pos,
838                         target->find_first_not_of(" \t", pos) - pos);
839 
840     if (indent_.empty()) {
841       // No indent.  This makes things easier.
842       target->insert(pos, data_);
843       UpdateMetadata(data_, pos, data_.size(), 0);
844     } else {
845       // Calculate how much space we need.
846       int indent_size = 0;
847       for (int i = 0; i < data_.size(); i++) {
848         if (data_[i] == '\n') indent_size += indent_.size();
849       }
850 
851       // Make a hole for it.
852       target->insert(pos, data_.size() + indent_size, '\0');
853 
854       // Now copy in the data.
855       std::string::size_type data_pos = 0;
856       char* target_ptr = ::google::protobuf::string_as_array(target) + pos;
857       while (data_pos < data_.size()) {
858         // Copy indent.
859         memcpy(target_ptr, indent_.data(), indent_.size());
860         target_ptr += indent_.size();
861 
862         // Copy line from data_.
863         // We already guaranteed that data_ ends with a newline (above), so this
864         // search can't fail.
865         std::string::size_type line_length =
866             data_.find_first_of('\n', data_pos) + 1 - data_pos;
867         memcpy(target_ptr, data_.data() + data_pos, line_length);
868         target_ptr += line_length;
869         data_pos += line_length;
870       }
871       UpdateMetadata(data_, pos, data_.size() + indent_size, indent_.size());
872 
873       GOOGLE_CHECK_EQ(target_ptr,
874                ::google::protobuf::string_as_array(target) + pos + data_.size() + indent_size);
875     }
876   }
877 }
878 
879 // ===================================================================
880 
881 #if defined(_WIN32) && !defined(__CYGWIN__)
882 const char* const CommandLineInterface::kPathSeparator = ";";
883 #else
884 const char* const CommandLineInterface::kPathSeparator = ":";
885 #endif
886 
CommandLineInterface()887 CommandLineInterface::CommandLineInterface()
888     : direct_dependencies_violation_msg_(
889           kDefaultDirectDependenciesViolationMsg) {}
890 
~CommandLineInterface()891 CommandLineInterface::~CommandLineInterface() {}
892 
RegisterGenerator(const std::string & flag_name,CodeGenerator * generator,const std::string & help_text)893 void CommandLineInterface::RegisterGenerator(const std::string& flag_name,
894                                              CodeGenerator* generator,
895                                              const std::string& help_text) {
896   GeneratorInfo info;
897   info.flag_name = flag_name;
898   info.generator = generator;
899   info.help_text = help_text;
900   generators_by_flag_name_[flag_name] = info;
901 }
902 
RegisterGenerator(const std::string & flag_name,const std::string & option_flag_name,CodeGenerator * generator,const std::string & help_text)903 void CommandLineInterface::RegisterGenerator(
904     const std::string& flag_name, const std::string& option_flag_name,
905     CodeGenerator* generator, const std::string& help_text) {
906   GeneratorInfo info;
907   info.flag_name = flag_name;
908   info.option_flag_name = option_flag_name;
909   info.generator = generator;
910   info.help_text = help_text;
911   generators_by_flag_name_[flag_name] = info;
912   generators_by_option_name_[option_flag_name] = info;
913 }
914 
AllowPlugins(const std::string & exe_name_prefix)915 void CommandLineInterface::AllowPlugins(const std::string& exe_name_prefix) {
916   plugin_prefix_ = exe_name_prefix;
917 }
918 
919 namespace {
920 
ContainsProto3Optional(const Descriptor * desc)921 bool ContainsProto3Optional(const Descriptor* desc) {
922   for (int i = 0; i < desc->field_count(); i++) {
923     if (desc->field(i)->has_optional_keyword()) {
924       return true;
925     }
926   }
927   for (int i = 0; i < desc->nested_type_count(); i++) {
928     if (ContainsProto3Optional(desc->nested_type(i))) {
929       return true;
930     }
931   }
932   return false;
933 }
934 
ContainsProto3Optional(const FileDescriptor * file)935 bool ContainsProto3Optional(const FileDescriptor* file) {
936   if (file->syntax() == FileDescriptor::SYNTAX_PROTO3) {
937     for (int i = 0; i < file->message_type_count(); i++) {
938       if (ContainsProto3Optional(file->message_type(i))) {
939         return true;
940       }
941     }
942   }
943   return false;
944 }
945 
946 }  // namespace
947 
948 namespace {
949 std::unique_ptr<SimpleDescriptorDatabase>
950 PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name);
951 }
952 
Run(int argc,const char * const argv[])953 int CommandLineInterface::Run(int argc, const char* const argv[]) {
954   Clear();
955   switch (ParseArguments(argc, argv)) {
956     case PARSE_ARGUMENT_DONE_AND_EXIT:
957       return 0;
958     case PARSE_ARGUMENT_FAIL:
959       return 1;
960     case PARSE_ARGUMENT_DONE_AND_CONTINUE:
961       break;
962   }
963 
964   std::vector<const FileDescriptor*> parsed_files;
965   std::unique_ptr<DiskSourceTree> disk_source_tree;
966   std::unique_ptr<ErrorPrinter> error_collector;
967   std::unique_ptr<DescriptorPool> descriptor_pool;
968 
969   // The SimpleDescriptorDatabases here are the constituents of the
970   // MergedDescriptorDatabase descriptor_set_in_database, so this vector is for
971   // managing their lifetimes. Its scope should match descriptor_set_in_database
972   std::vector<std::unique_ptr<SimpleDescriptorDatabase>>
973       databases_per_descriptor_set;
974   std::unique_ptr<MergedDescriptorDatabase> descriptor_set_in_database;
975 
976   std::unique_ptr<SourceTreeDescriptorDatabase> source_tree_database;
977 
978   // Any --descriptor_set_in FileDescriptorSet objects will be used as a
979   // fallback to input_files on command line, so create that db first.
980   if (!descriptor_set_in_names_.empty()) {
981     for (const std::string& name : descriptor_set_in_names_) {
982       std::unique_ptr<SimpleDescriptorDatabase> database_for_descriptor_set =
983           PopulateSingleSimpleDescriptorDatabase(name);
984       if (!database_for_descriptor_set) {
985         return EXIT_FAILURE;
986       }
987       databases_per_descriptor_set.push_back(
988           std::move(database_for_descriptor_set));
989     }
990 
991     std::vector<DescriptorDatabase*> raw_databases_per_descriptor_set;
992     raw_databases_per_descriptor_set.reserve(
993         databases_per_descriptor_set.size());
994     for (const std::unique_ptr<SimpleDescriptorDatabase>& db :
995          databases_per_descriptor_set) {
996       raw_databases_per_descriptor_set.push_back(db.get());
997     }
998     descriptor_set_in_database.reset(
999         new MergedDescriptorDatabase(raw_databases_per_descriptor_set));
1000   }
1001 
1002   if (proto_path_.empty()) {
1003     // If there are no --proto_path flags, then just look in the specified
1004     // --descriptor_set_in files.  But first, verify that the input files are
1005     // there.
1006     if (!VerifyInputFilesInDescriptors(descriptor_set_in_database.get())) {
1007       return 1;
1008     }
1009 
1010     error_collector.reset(new ErrorPrinter(error_format_));
1011     descriptor_pool.reset(new DescriptorPool(descriptor_set_in_database.get(),
1012                                              error_collector.get()));
1013   } else {
1014     disk_source_tree.reset(new DiskSourceTree());
1015     if (!InitializeDiskSourceTree(disk_source_tree.get(),
1016                                   descriptor_set_in_database.get())) {
1017       return 1;
1018     }
1019 
1020     error_collector.reset(
1021         new ErrorPrinter(error_format_, disk_source_tree.get()));
1022 
1023     source_tree_database.reset(new SourceTreeDescriptorDatabase(
1024         disk_source_tree.get(), descriptor_set_in_database.get()));
1025     source_tree_database->RecordErrorsTo(error_collector.get());
1026 
1027     descriptor_pool.reset(new DescriptorPool(
1028         source_tree_database.get(),
1029         source_tree_database->GetValidationErrorCollector()));
1030   }
1031 
1032   descriptor_pool->EnforceWeakDependencies(true);
1033   if (!ParseInputFiles(descriptor_pool.get(), disk_source_tree.get(),
1034                        &parsed_files)) {
1035     return 1;
1036   }
1037 
1038 
1039   for (auto fd : parsed_files) {
1040     if (!AllowProto3Optional(*fd) && ContainsProto3Optional(fd)) {
1041       std::cerr << fd->name()
1042                 << ": This file contains proto3 optional fields, but "
1043                    "--experimental_allow_proto3_optional was not set."
1044                 << std::endl;
1045       return 1;
1046     }
1047   }
1048 
1049   // We construct a separate GeneratorContext for each output location.  Note
1050   // that two code generators may output to the same location, in which case
1051   // they should share a single GeneratorContext so that OpenForInsert() works.
1052   GeneratorContextMap output_directories;
1053 
1054   // Generate output.
1055   if (mode_ == MODE_COMPILE) {
1056     for (int i = 0; i < output_directives_.size(); i++) {
1057       std::string output_location = output_directives_[i].output_location;
1058       if (!HasSuffixString(output_location, ".zip") &&
1059           !HasSuffixString(output_location, ".jar") &&
1060           !HasSuffixString(output_location, ".srcjar")) {
1061         AddTrailingSlash(&output_location);
1062       }
1063 
1064       auto& generator = output_directories[output_location];
1065 
1066       if (!generator) {
1067         // First time we've seen this output location.
1068         generator.reset(new GeneratorContextImpl(parsed_files));
1069       }
1070 
1071       if (!GenerateOutput(parsed_files, output_directives_[i],
1072                           generator.get())) {
1073         return 1;
1074       }
1075     }
1076   }
1077 
1078   // Write all output to disk.
1079   for (const auto& pair : output_directories) {
1080     const std::string& location = pair.first;
1081     GeneratorContextImpl* directory = pair.second.get();
1082     if (HasSuffixString(location, "/")) {
1083       if (!directory->WriteAllToDisk(location)) {
1084         return 1;
1085       }
1086     } else {
1087       if (HasSuffixString(location, ".jar")) {
1088         directory->AddJarManifest();
1089       }
1090 
1091       if (!directory->WriteAllToZip(location)) {
1092         return 1;
1093       }
1094     }
1095   }
1096 
1097   if (!dependency_out_name_.empty()) {
1098     GOOGLE_DCHECK(disk_source_tree.get());
1099     if (!GenerateDependencyManifestFile(parsed_files, output_directories,
1100                                         disk_source_tree.get())) {
1101       return 1;
1102     }
1103   }
1104 
1105   if (!descriptor_set_out_name_.empty()) {
1106     if (!WriteDescriptorSet(parsed_files)) {
1107       return 1;
1108     }
1109   }
1110 
1111   if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
1112     if (codec_type_.empty()) {
1113       // HACK:  Define an EmptyMessage type to use for decoding.
1114       DescriptorPool pool;
1115       FileDescriptorProto file;
1116       file.set_name("empty_message.proto");
1117       file.add_message_type()->set_name("EmptyMessage");
1118       GOOGLE_CHECK(pool.BuildFile(file) != NULL);
1119       codec_type_ = "EmptyMessage";
1120       if (!EncodeOrDecode(&pool)) {
1121         return 1;
1122       }
1123     } else {
1124       if (!EncodeOrDecode(descriptor_pool.get())) {
1125         return 1;
1126       }
1127     }
1128   }
1129 
1130   if (error_collector->FoundErrors()) {
1131     return 1;
1132   }
1133 
1134   if (mode_ == MODE_PRINT) {
1135     switch (print_mode_) {
1136       case PRINT_FREE_FIELDS:
1137         for (int i = 0; i < parsed_files.size(); ++i) {
1138           const FileDescriptor* fd = parsed_files[i];
1139           for (int j = 0; j < fd->message_type_count(); ++j) {
1140             PrintFreeFieldNumbers(fd->message_type(j));
1141           }
1142         }
1143         break;
1144       case PRINT_NONE:
1145         GOOGLE_LOG(ERROR) << "If the code reaches here, it usually means a bug of "
1146                       "flag parsing in the CommandLineInterface.";
1147         return 1;
1148 
1149         // Do not add a default case.
1150     }
1151   }
1152 
1153   return 0;
1154 }
1155 
InitializeDiskSourceTree(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)1156 bool CommandLineInterface::InitializeDiskSourceTree(
1157     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
1158   AddDefaultProtoPaths(&proto_path_);
1159 
1160   // Set up the source tree.
1161   for (int i = 0; i < proto_path_.size(); i++) {
1162     source_tree->MapPath(proto_path_[i].first, proto_path_[i].second);
1163   }
1164 
1165   // Map input files to virtual paths if possible.
1166   if (!MakeInputsBeProtoPathRelative(source_tree, fallback_database)) {
1167     return false;
1168   }
1169 
1170   return true;
1171 }
1172 
1173 namespace {
1174 std::unique_ptr<SimpleDescriptorDatabase>
PopulateSingleSimpleDescriptorDatabase(const std::string & descriptor_set_name)1175 PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name) {
1176   int fd;
1177   do {
1178     fd = open(descriptor_set_name.c_str(), O_RDONLY | O_BINARY);
1179   } while (fd < 0 && errno == EINTR);
1180   if (fd < 0) {
1181     std::cerr << descriptor_set_name << ": " << strerror(ENOENT) << std::endl;
1182     return nullptr;
1183   }
1184 
1185   FileDescriptorSet file_descriptor_set;
1186   bool parsed = file_descriptor_set.ParseFromFileDescriptor(fd);
1187   if (close(fd) != 0) {
1188     std::cerr << descriptor_set_name << ": close: " << strerror(errno)
1189               << std::endl;
1190     return nullptr;
1191   }
1192 
1193   if (!parsed) {
1194     std::cerr << descriptor_set_name << ": Unable to parse." << std::endl;
1195     return nullptr;
1196   }
1197 
1198   std::unique_ptr<SimpleDescriptorDatabase> database{
1199       new SimpleDescriptorDatabase()};
1200 
1201   for (int j = 0; j < file_descriptor_set.file_size(); j++) {
1202     FileDescriptorProto previously_added_file_descriptor_proto;
1203     if (database->FindFileByName(file_descriptor_set.file(j).name(),
1204                                  &previously_added_file_descriptor_proto)) {
1205       // already present - skip
1206       continue;
1207     }
1208     if (!database->Add(file_descriptor_set.file(j))) {
1209       return nullptr;
1210     }
1211   }
1212   return database;
1213 }
1214 
1215 }  // namespace
1216 
AllowProto3Optional(const FileDescriptor & file) const1217 bool CommandLineInterface::AllowProto3Optional(
1218     const FileDescriptor& file) const {
1219   // If the --experimental_allow_proto3_optional flag was set, we allow.
1220   if (allow_proto3_optional_) return true;
1221 
1222   // Whitelist all ads protos. Ads is an early adopter of this feature.
1223   if (file.name().find("google/ads/googleads") != std::string::npos) {
1224     return true;
1225   }
1226 
1227   // Whitelist all protos testing proto3 optional.
1228   if (file.name().find("test_proto3_optional") != std::string::npos) {
1229     return true;
1230   }
1231 
1232 
1233   return false;
1234 }
1235 
1236 
VerifyInputFilesInDescriptors(DescriptorDatabase * database)1237 bool CommandLineInterface::VerifyInputFilesInDescriptors(
1238     DescriptorDatabase* database) {
1239   for (const auto& input_file : input_files_) {
1240     FileDescriptorProto file_descriptor;
1241     if (!database->FindFileByName(input_file, &file_descriptor)) {
1242       std::cerr << "Could not find file in descriptor database: " << input_file
1243                 << ": " << strerror(ENOENT) << std::endl;
1244       return false;
1245     }
1246 
1247     // Enforce --disallow_services.
1248     if (disallow_services_ && file_descriptor.service_size() > 0) {
1249       std::cerr << file_descriptor.name()
1250                 << ": This file contains services, but "
1251                    "--disallow_services was used."
1252                 << std::endl;
1253       return false;
1254     }
1255 
1256   }
1257   return true;
1258 }
1259 
ParseInputFiles(DescriptorPool * descriptor_pool,DiskSourceTree * source_tree,std::vector<const FileDescriptor * > * parsed_files)1260 bool CommandLineInterface::ParseInputFiles(
1261     DescriptorPool* descriptor_pool, DiskSourceTree* source_tree,
1262     std::vector<const FileDescriptor*>* parsed_files) {
1263 
1264   if (!proto_path_.empty()) {
1265     // Track unused imports in all source files that were loaded from the
1266     // filesystem. We do not track unused imports for files loaded from
1267     // descriptor sets as they may be programmatically generated in which case
1268     // exerting this level of rigor is less desirable. We're also making the
1269     // assumption that the initial parse of the proto from the filesystem
1270     // was rigorous in checking unused imports and that the descriptor set
1271     // being parsed was produced then and that it was subsequent mutations
1272     // of that descriptor set that left unused imports.
1273     //
1274     // Note that relying on proto_path exclusively is limited in that we may
1275     // be loading descriptors from both the filesystem and descriptor sets
1276     // depending on the invocation. At least for invocations that are
1277     // exclusively reading from descriptor sets, we can eliminate this failure
1278     // condition.
1279     for (const auto& input_file : input_files_) {
1280       descriptor_pool->AddUnusedImportTrackFile(input_file);
1281     }
1282   }
1283 
1284   bool result = true;
1285   // Parse each file.
1286   for (const auto& input_file : input_files_) {
1287     // Import the file.
1288     const FileDescriptor* parsed_file =
1289         descriptor_pool->FindFileByName(input_file);
1290     if (parsed_file == NULL) {
1291       result = false;
1292       break;
1293     }
1294     parsed_files->push_back(parsed_file);
1295 
1296     // Enforce --disallow_services.
1297     if (disallow_services_ && parsed_file->service_count() > 0) {
1298       std::cerr << parsed_file->name()
1299                 << ": This file contains services, but "
1300                    "--disallow_services was used."
1301                 << std::endl;
1302       result = false;
1303       break;
1304     }
1305 
1306 
1307     // Enforce --direct_dependencies
1308     if (direct_dependencies_explicitly_set_) {
1309       bool indirect_imports = false;
1310       for (int i = 0; i < parsed_file->dependency_count(); i++) {
1311         if (direct_dependencies_.find(parsed_file->dependency(i)->name()) ==
1312             direct_dependencies_.end()) {
1313           indirect_imports = true;
1314           std::cerr << parsed_file->name() << ": "
1315                     << StringReplace(direct_dependencies_violation_msg_, "%s",
1316                                      parsed_file->dependency(i)->name(),
1317                                      true /* replace_all */)
1318                     << std::endl;
1319         }
1320       }
1321       if (indirect_imports) {
1322         result = false;
1323         break;
1324       }
1325     }
1326   }
1327   descriptor_pool->ClearUnusedImportTrackFiles();
1328   return result;
1329 }
1330 
Clear()1331 void CommandLineInterface::Clear() {
1332   // Clear all members that are set by Run().  Note that we must not clear
1333   // members which are set by other methods before Run() is called.
1334   executable_name_.clear();
1335   proto_path_.clear();
1336   input_files_.clear();
1337   direct_dependencies_.clear();
1338   direct_dependencies_violation_msg_ = kDefaultDirectDependenciesViolationMsg;
1339   output_directives_.clear();
1340   codec_type_.clear();
1341   descriptor_set_in_names_.clear();
1342   descriptor_set_out_name_.clear();
1343   dependency_out_name_.clear();
1344 
1345 
1346   mode_ = MODE_COMPILE;
1347   print_mode_ = PRINT_NONE;
1348   imports_in_descriptor_set_ = false;
1349   source_info_in_descriptor_set_ = false;
1350   disallow_services_ = false;
1351   direct_dependencies_explicitly_set_ = false;
1352   allow_proto3_optional_ = false;
1353   deterministic_output_ = false;
1354 }
1355 
MakeProtoProtoPathRelative(DiskSourceTree * source_tree,std::string * proto,DescriptorDatabase * fallback_database)1356 bool CommandLineInterface::MakeProtoProtoPathRelative(
1357     DiskSourceTree* source_tree, std::string* proto,
1358     DescriptorDatabase* fallback_database) {
1359   // If it's in the fallback db, don't report non-existent file errors.
1360   FileDescriptorProto fallback_file;
1361   bool in_fallback_database =
1362       fallback_database != nullptr &&
1363       fallback_database->FindFileByName(*proto, &fallback_file);
1364 
1365   // If the input file path is not a physical file path, it must be a virtual
1366   // path.
1367   if (access(proto->c_str(), F_OK) < 0) {
1368     std::string disk_file;
1369     if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1370         in_fallback_database) {
1371       return true;
1372     } else {
1373       std::cerr << "Could not make proto path relative: " << *proto << ": "
1374                 << strerror(ENOENT) << std::endl;
1375       return false;
1376     }
1377   }
1378 
1379   std::string virtual_file, shadowing_disk_file;
1380   switch (source_tree->DiskFileToVirtualFile(*proto, &virtual_file,
1381                                              &shadowing_disk_file)) {
1382     case DiskSourceTree::SUCCESS:
1383       *proto = virtual_file;
1384       break;
1385     case DiskSourceTree::SHADOWED:
1386       std::cerr << *proto << ": Input is shadowed in the --proto_path by \""
1387                 << shadowing_disk_file
1388                 << "\".  Either use the latter file as your input or reorder "
1389                    "the --proto_path so that the former file's location "
1390                    "comes first."
1391                 << std::endl;
1392       return false;
1393     case DiskSourceTree::CANNOT_OPEN: {
1394       if (in_fallback_database) {
1395         return true;
1396       }
1397       std::string error_str = source_tree->GetLastErrorMessage().empty()
1398                                   ? strerror(errno)
1399                                   : source_tree->GetLastErrorMessage();
1400       std::cerr << "Could not map to virtual file: " << *proto << ": "
1401                 << error_str << std::endl;
1402       return false;
1403     }
1404     case DiskSourceTree::NO_MAPPING: {
1405       // Try to interpret the path as a virtual path.
1406       std::string disk_file;
1407       if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1408           in_fallback_database) {
1409         return true;
1410       } else {
1411         // The input file path can't be mapped to any --proto_path and it also
1412         // can't be interpreted as a virtual path.
1413         std::cerr
1414             << *proto
1415             << ": File does not reside within any path "
1416                "specified using --proto_path (or -I).  You must specify a "
1417                "--proto_path which encompasses this file.  Note that the "
1418                "proto_path must be an exact prefix of the .proto file "
1419                "names -- protoc is too dumb to figure out when two paths "
1420                "(e.g. absolute and relative) are equivalent (it's harder "
1421                "than you think)."
1422             << std::endl;
1423         return false;
1424       }
1425     }
1426   }
1427   return true;
1428 }
1429 
MakeInputsBeProtoPathRelative(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)1430 bool CommandLineInterface::MakeInputsBeProtoPathRelative(
1431     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
1432   for (auto& input_file : input_files_) {
1433     if (!MakeProtoProtoPathRelative(source_tree, &input_file,
1434                                     fallback_database)) {
1435       return false;
1436     }
1437   }
1438 
1439   return true;
1440 }
1441 
1442 
ExpandArgumentFile(const std::string & file,std::vector<std::string> * arguments)1443 bool CommandLineInterface::ExpandArgumentFile(
1444     const std::string& file, std::vector<std::string>* arguments) {
1445   // The argument file is searched in the working directory only. We don't
1446   // use the proto import path here.
1447   std::ifstream file_stream(file.c_str());
1448   if (!file_stream.is_open()) {
1449     return false;
1450   }
1451   std::string argument;
1452   // We don't support any kind of shell expansion right now.
1453   while (std::getline(file_stream, argument)) {
1454     arguments->push_back(argument);
1455   }
1456   return true;
1457 }
1458 
ParseArguments(int argc,const char * const argv[])1459 CommandLineInterface::ParseArgumentStatus CommandLineInterface::ParseArguments(
1460     int argc, const char* const argv[]) {
1461   executable_name_ = argv[0];
1462 
1463   std::vector<std::string> arguments;
1464   for (int i = 1; i < argc; ++i) {
1465     if (argv[i][0] == '@') {
1466       if (!ExpandArgumentFile(argv[i] + 1, &arguments)) {
1467         std::cerr << "Failed to open argument file: " << (argv[i] + 1)
1468                   << std::endl;
1469         return PARSE_ARGUMENT_FAIL;
1470       }
1471       continue;
1472     }
1473     arguments.push_back(argv[i]);
1474   }
1475 
1476   // if no arguments are given, show help
1477   if (arguments.empty()) {
1478     PrintHelpText();
1479     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1480   }
1481 
1482   // Iterate through all arguments and parse them.
1483   for (int i = 0; i < arguments.size(); ++i) {
1484     std::string name, value;
1485 
1486     if (ParseArgument(arguments[i].c_str(), &name, &value)) {
1487       // Returned true => Use the next argument as the flag value.
1488       if (i + 1 == arguments.size() || arguments[i + 1][0] == '-') {
1489         std::cerr << "Missing value for flag: " << name << std::endl;
1490         if (name == "--decode") {
1491           std::cerr << "To decode an unknown message, use --decode_raw."
1492                     << std::endl;
1493         }
1494         return PARSE_ARGUMENT_FAIL;
1495       } else {
1496         ++i;
1497         value = arguments[i];
1498       }
1499     }
1500 
1501     ParseArgumentStatus status = InterpretArgument(name, value);
1502     if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE) return status;
1503   }
1504 
1505   // Make sure each plugin option has a matching plugin output.
1506   bool foundUnknownPluginOption = false;
1507   for (std::map<std::string, std::string>::const_iterator i =
1508            plugin_parameters_.begin();
1509        i != plugin_parameters_.end(); ++i) {
1510     if (plugins_.find(i->first) != plugins_.end()) {
1511       continue;
1512     }
1513     bool foundImplicitPlugin = false;
1514     for (std::vector<OutputDirective>::const_iterator j =
1515              output_directives_.begin();
1516          j != output_directives_.end(); ++j) {
1517       if (j->generator == NULL) {
1518         std::string plugin_name = PluginName(plugin_prefix_, j->name);
1519         if (plugin_name == i->first) {
1520           foundImplicitPlugin = true;
1521           break;
1522         }
1523       }
1524     }
1525     if (!foundImplicitPlugin) {
1526       std::cerr << "Unknown flag: "
1527                 // strip prefix + "gen-" and add back "_opt"
1528                 << "--" + i->first.substr(plugin_prefix_.size() + 4) + "_opt"
1529                 << std::endl;
1530       foundUnknownPluginOption = true;
1531     }
1532   }
1533   if (foundUnknownPluginOption) {
1534     return PARSE_ARGUMENT_FAIL;
1535   }
1536 
1537   // The --proto_path & --descriptor_set_in flags both specify places to look
1538   // for proto files. If neither were given, use the current working directory.
1539   if (proto_path_.empty() && descriptor_set_in_names_.empty()) {
1540     // Don't use make_pair as the old/default standard library on Solaris
1541     // doesn't support it without explicit template parameters, which are
1542     // incompatible with C++0x's make_pair.
1543     proto_path_.push_back(std::pair<std::string, std::string>("", "."));
1544   }
1545 
1546   // Check error cases that span multiple flag values.
1547   bool missing_proto_definitions = false;
1548   switch (mode_) {
1549     case MODE_COMPILE:
1550       missing_proto_definitions = input_files_.empty();
1551       break;
1552     case MODE_DECODE:
1553       // Handle --decode_raw separately, since it requires that no proto
1554       // definitions are specified.
1555       if (codec_type_.empty()) {
1556         if (!input_files_.empty() || !descriptor_set_in_names_.empty()) {
1557           std::cerr
1558               << "When using --decode_raw, no input files should be given."
1559               << std::endl;
1560           return PARSE_ARGUMENT_FAIL;
1561         }
1562         missing_proto_definitions = false;
1563         break;  // only for --decode_raw
1564       }
1565       // --decode (not raw) is handled the same way as the rest of the modes.
1566       PROTOBUF_FALLTHROUGH_INTENDED;
1567     case MODE_ENCODE:
1568     case MODE_PRINT:
1569       missing_proto_definitions =
1570           input_files_.empty() && descriptor_set_in_names_.empty();
1571       break;
1572     default:
1573       GOOGLE_LOG(FATAL) << "Unexpected mode: " << mode_;
1574   }
1575   if (missing_proto_definitions) {
1576     std::cerr << "Missing input file." << std::endl;
1577     return PARSE_ARGUMENT_FAIL;
1578   }
1579   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
1580       descriptor_set_out_name_.empty()) {
1581     std::cerr << "Missing output directives." << std::endl;
1582     return PARSE_ARGUMENT_FAIL;
1583   }
1584   if (mode_ != MODE_COMPILE && !dependency_out_name_.empty()) {
1585     std::cerr << "Can only use --dependency_out=FILE when generating code."
1586               << std::endl;
1587     return PARSE_ARGUMENT_FAIL;
1588   }
1589   if (mode_ != MODE_ENCODE && deterministic_output_) {
1590     std::cerr << "Can only use --deterministic_output with --encode."
1591               << std::endl;
1592     return PARSE_ARGUMENT_FAIL;
1593   }
1594   if (!dependency_out_name_.empty() && input_files_.size() > 1) {
1595     std::cerr
1596         << "Can only process one input file when using --dependency_out=FILE."
1597         << std::endl;
1598     return PARSE_ARGUMENT_FAIL;
1599   }
1600   if (imports_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1601     std::cerr << "--include_imports only makes sense when combined with "
1602                  "--descriptor_set_out."
1603               << std::endl;
1604   }
1605   if (source_info_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1606     std::cerr << "--include_source_info only makes sense when combined with "
1607                  "--descriptor_set_out."
1608               << std::endl;
1609   }
1610 
1611   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1612 }
1613 
ParseArgument(const char * arg,std::string * name,std::string * value)1614 bool CommandLineInterface::ParseArgument(const char* arg, std::string* name,
1615                                          std::string* value) {
1616   bool parsed_value = false;
1617 
1618   if (arg[0] != '-') {
1619     // Not a flag.
1620     name->clear();
1621     parsed_value = true;
1622     *value = arg;
1623   } else if (arg[1] == '-') {
1624     // Two dashes:  Multi-character name, with '=' separating name and
1625     //   value.
1626     const char* equals_pos = strchr(arg, '=');
1627     if (equals_pos != NULL) {
1628       *name = std::string(arg, equals_pos - arg);
1629       *value = equals_pos + 1;
1630       parsed_value = true;
1631     } else {
1632       *name = arg;
1633     }
1634   } else {
1635     // One dash:  One-character name, all subsequent characters are the
1636     //   value.
1637     if (arg[1] == '\0') {
1638       // arg is just "-".  We treat this as an input file, except that at
1639       // present this will just lead to a "file not found" error.
1640       name->clear();
1641       *value = arg;
1642       parsed_value = true;
1643     } else {
1644       *name = std::string(arg, 2);
1645       *value = arg + 2;
1646       parsed_value = !value->empty();
1647     }
1648   }
1649 
1650   // Need to return true iff the next arg should be used as the value for this
1651   // one, false otherwise.
1652 
1653   if (parsed_value) {
1654     // We already parsed a value for this flag.
1655     return false;
1656   }
1657 
1658   if (*name == "-h" || *name == "--help" || *name == "--disallow_services" ||
1659       *name == "--include_imports" || *name == "--include_source_info" ||
1660       *name == "--version" || *name == "--decode_raw" ||
1661       *name == "--print_free_field_numbers" ||
1662       *name == "--experimental_allow_proto3_optional" ||
1663       *name == "--deterministic_output") {
1664     // HACK:  These are the only flags that don't take a value.
1665     //   They probably should not be hard-coded like this but for now it's
1666     //   not worth doing better.
1667     return false;
1668   }
1669 
1670   // Next argument is the flag value.
1671   return true;
1672 }
1673 
1674 CommandLineInterface::ParseArgumentStatus
InterpretArgument(const std::string & name,const std::string & value)1675 CommandLineInterface::InterpretArgument(const std::string& name,
1676                                         const std::string& value) {
1677   if (name.empty()) {
1678     // Not a flag.  Just a filename.
1679     if (value.empty()) {
1680       std::cerr
1681           << "You seem to have passed an empty string as one of the "
1682              "arguments to "
1683           << executable_name_
1684           << ".  This is actually "
1685              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
1686              "input so the program is going to die now."
1687           << std::endl;
1688       return PARSE_ARGUMENT_FAIL;
1689     }
1690 
1691 #if defined(_WIN32)
1692     // On Windows, the shell (typically cmd.exe) does not expand wildcards in
1693     // file names (e.g. foo\*.proto), so we do it ourselves.
1694     switch (google::protobuf::io::win32::ExpandWildcards(
1695         value,
1696         [this](const string& path) { this->input_files_.push_back(path); })) {
1697       case google::protobuf::io::win32::ExpandWildcardsResult::kSuccess:
1698         break;
1699       case google::protobuf::io::win32::ExpandWildcardsResult::
1700           kErrorNoMatchingFile:
1701         // Path does not exist, is not a file, or it's longer than MAX_PATH and
1702         // long path handling is disabled.
1703         std::cerr << "Invalid file name pattern or missing input file \""
1704                   << value << "\"" << std::endl;
1705         return PARSE_ARGUMENT_FAIL;
1706       default:
1707         std::cerr << "Cannot convert path \"" << value
1708                   << "\" to or from Windows style" << std::endl;
1709         return PARSE_ARGUMENT_FAIL;
1710     }
1711 #else   // not _WIN32
1712     // On other platforms than Windows (e.g. Linux, Mac OS) the shell (typically
1713     // Bash) expands wildcards.
1714     input_files_.push_back(value);
1715 #endif  // _WIN32
1716 
1717   } else if (name == "-I" || name == "--proto_path") {
1718     // Java's -classpath (and some other languages) delimits path components
1719     // with colons.  Let's accept that syntax too just to make things more
1720     // intuitive.
1721     std::vector<std::string> parts = Split(
1722         value, CommandLineInterface::kPathSeparator,
1723         true);
1724 
1725     for (int i = 0; i < parts.size(); i++) {
1726       std::string virtual_path;
1727       std::string disk_path;
1728 
1729       std::string::size_type equals_pos = parts[i].find_first_of('=');
1730       if (equals_pos == std::string::npos) {
1731         virtual_path = "";
1732         disk_path = parts[i];
1733       } else {
1734         virtual_path = parts[i].substr(0, equals_pos);
1735         disk_path = parts[i].substr(equals_pos + 1);
1736       }
1737 
1738       if (disk_path.empty()) {
1739         std::cerr
1740             << "--proto_path passed empty directory name.  (Use \".\" for "
1741                "current directory.)"
1742             << std::endl;
1743         return PARSE_ARGUMENT_FAIL;
1744       }
1745 
1746       // Make sure disk path exists, warn otherwise.
1747       if (access(disk_path.c_str(), F_OK) < 0) {
1748         // Try the original path; it may have just happened to have a '=' in it.
1749         if (access(parts[i].c_str(), F_OK) < 0) {
1750           std::cerr << disk_path << ": warning: directory does not exist."
1751                     << std::endl;
1752         } else {
1753           virtual_path = "";
1754           disk_path = parts[i];
1755         }
1756       }
1757 
1758       // Don't use make_pair as the old/default standard library on Solaris
1759       // doesn't support it without explicit template parameters, which are
1760       // incompatible with C++0x's make_pair.
1761       proto_path_.push_back(
1762           std::pair<std::string, std::string>(virtual_path, disk_path));
1763     }
1764 
1765   } else if (name == "--direct_dependencies") {
1766     if (direct_dependencies_explicitly_set_) {
1767       std::cerr << name
1768                 << " may only be passed once. To specify multiple "
1769                    "direct dependencies, pass them all as a single "
1770                    "parameter separated by ':'."
1771                 << std::endl;
1772       return PARSE_ARGUMENT_FAIL;
1773     }
1774 
1775     direct_dependencies_explicitly_set_ = true;
1776     std::vector<std::string> direct =
1777         Split(value, ":", true);
1778     GOOGLE_DCHECK(direct_dependencies_.empty());
1779     direct_dependencies_.insert(direct.begin(), direct.end());
1780 
1781   } else if (name == "--direct_dependencies_violation_msg") {
1782     direct_dependencies_violation_msg_ = value;
1783 
1784   } else if (name == "--descriptor_set_in") {
1785     if (!descriptor_set_in_names_.empty()) {
1786       std::cerr << name
1787                 << " may only be passed once. To specify multiple "
1788                    "descriptor sets, pass them all as a single "
1789                    "parameter separated by '"
1790                 << CommandLineInterface::kPathSeparator << "'." << std::endl;
1791       return PARSE_ARGUMENT_FAIL;
1792     }
1793     if (value.empty()) {
1794       std::cerr << name << " requires a non-empty value." << std::endl;
1795       return PARSE_ARGUMENT_FAIL;
1796     }
1797     if (!dependency_out_name_.empty()) {
1798       std::cerr << name << " cannot be used with --dependency_out."
1799                 << std::endl;
1800       return PARSE_ARGUMENT_FAIL;
1801     }
1802 
1803     descriptor_set_in_names_ = Split(
1804         value, CommandLineInterface::kPathSeparator,
1805         true);
1806 
1807   } else if (name == "-o" || name == "--descriptor_set_out") {
1808     if (!descriptor_set_out_name_.empty()) {
1809       std::cerr << name << " may only be passed once." << std::endl;
1810       return PARSE_ARGUMENT_FAIL;
1811     }
1812     if (value.empty()) {
1813       std::cerr << name << " requires a non-empty value." << std::endl;
1814       return PARSE_ARGUMENT_FAIL;
1815     }
1816     if (mode_ != MODE_COMPILE) {
1817       std::cerr
1818           << "Cannot use --encode or --decode and generate descriptors at the "
1819              "same time."
1820           << std::endl;
1821       return PARSE_ARGUMENT_FAIL;
1822     }
1823     descriptor_set_out_name_ = value;
1824 
1825   } else if (name == "--dependency_out") {
1826     if (!dependency_out_name_.empty()) {
1827       std::cerr << name << " may only be passed once." << std::endl;
1828       return PARSE_ARGUMENT_FAIL;
1829     }
1830     if (value.empty()) {
1831       std::cerr << name << " requires a non-empty value." << std::endl;
1832       return PARSE_ARGUMENT_FAIL;
1833     }
1834     if (!descriptor_set_in_names_.empty()) {
1835       std::cerr << name << " cannot be used with --descriptor_set_in."
1836                 << std::endl;
1837       return PARSE_ARGUMENT_FAIL;
1838     }
1839     dependency_out_name_ = value;
1840 
1841   } else if (name == "--include_imports") {
1842     if (imports_in_descriptor_set_) {
1843       std::cerr << name << " may only be passed once." << std::endl;
1844       return PARSE_ARGUMENT_FAIL;
1845     }
1846     imports_in_descriptor_set_ = true;
1847 
1848   } else if (name == "--include_source_info") {
1849     if (source_info_in_descriptor_set_) {
1850       std::cerr << name << " may only be passed once." << std::endl;
1851       return PARSE_ARGUMENT_FAIL;
1852     }
1853     source_info_in_descriptor_set_ = true;
1854 
1855   } else if (name == "-h" || name == "--help") {
1856     PrintHelpText();
1857     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1858 
1859   } else if (name == "--version") {
1860     if (!version_info_.empty()) {
1861       std::cout << version_info_ << std::endl;
1862     }
1863     std::cout << "libprotoc " << internal::VersionString(PROTOBUF_VERSION)
1864               << PROTOBUF_VERSION_SUFFIX << std::endl;
1865     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1866 
1867   } else if (name == "--disallow_services") {
1868     disallow_services_ = true;
1869 
1870 
1871   } else if (name == "--experimental_allow_proto3_optional") {
1872     allow_proto3_optional_ = true;
1873 
1874   } else if (name == "--encode" || name == "--decode" ||
1875              name == "--decode_raw") {
1876     if (mode_ != MODE_COMPILE) {
1877       std::cerr << "Only one of --encode and --decode can be specified."
1878                 << std::endl;
1879       return PARSE_ARGUMENT_FAIL;
1880     }
1881     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
1882       std::cerr << "Cannot use " << name
1883                 << " and generate code or descriptors at the same time."
1884                 << std::endl;
1885       return PARSE_ARGUMENT_FAIL;
1886     }
1887 
1888     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
1889 
1890     if (value.empty() && name != "--decode_raw") {
1891       std::cerr << "Type name for " << name << " cannot be blank." << std::endl;
1892       if (name == "--decode") {
1893         std::cerr << "To decode an unknown message, use --decode_raw."
1894                   << std::endl;
1895       }
1896       return PARSE_ARGUMENT_FAIL;
1897     } else if (!value.empty() && name == "--decode_raw") {
1898       std::cerr << "--decode_raw does not take a parameter." << std::endl;
1899       return PARSE_ARGUMENT_FAIL;
1900     }
1901 
1902     codec_type_ = value;
1903 
1904   } else if (name == "--deterministic_output") {
1905     deterministic_output_ = true;
1906 
1907   } else if (name == "--error_format") {
1908     if (value == "gcc") {
1909       error_format_ = ERROR_FORMAT_GCC;
1910     } else if (value == "msvs") {
1911       error_format_ = ERROR_FORMAT_MSVS;
1912     } else {
1913       std::cerr << "Unknown error format: " << value << std::endl;
1914       return PARSE_ARGUMENT_FAIL;
1915     }
1916 
1917   } else if (name == "--plugin") {
1918     if (plugin_prefix_.empty()) {
1919       std::cerr << "This compiler does not support plugins." << std::endl;
1920       return PARSE_ARGUMENT_FAIL;
1921     }
1922 
1923     std::string plugin_name;
1924     std::string path;
1925 
1926     std::string::size_type equals_pos = value.find_first_of('=');
1927     if (equals_pos == std::string::npos) {
1928       // Use the basename of the file.
1929       std::string::size_type slash_pos = value.find_last_of('/');
1930       if (slash_pos == std::string::npos) {
1931         plugin_name = value;
1932       } else {
1933         plugin_name = value.substr(slash_pos + 1);
1934       }
1935       path = value;
1936     } else {
1937       plugin_name = value.substr(0, equals_pos);
1938       path = value.substr(equals_pos + 1);
1939     }
1940 
1941     plugins_[plugin_name] = path;
1942 
1943   } else if (name == "--print_free_field_numbers") {
1944     if (mode_ != MODE_COMPILE) {
1945       std::cerr << "Cannot use " << name
1946                 << " and use --encode, --decode or print "
1947                 << "other info at the same time." << std::endl;
1948       return PARSE_ARGUMENT_FAIL;
1949     }
1950     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
1951       std::cerr << "Cannot use " << name
1952                 << " and generate code or descriptors at the same time."
1953                 << std::endl;
1954       return PARSE_ARGUMENT_FAIL;
1955     }
1956     mode_ = MODE_PRINT;
1957     print_mode_ = PRINT_FREE_FIELDS;
1958   } else {
1959     // Some other flag.  Look it up in the generators list.
1960     const GeneratorInfo* generator_info =
1961         FindOrNull(generators_by_flag_name_, name);
1962     if (generator_info == NULL &&
1963         (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
1964       // Check if it's a generator option flag.
1965       generator_info = FindOrNull(generators_by_option_name_, name);
1966       if (generator_info != NULL) {
1967         std::string* parameters =
1968             &generator_parameters_[generator_info->flag_name];
1969         if (!parameters->empty()) {
1970           parameters->append(",");
1971         }
1972         parameters->append(value);
1973       } else if (HasPrefixString(name, "--") && HasSuffixString(name, "_opt")) {
1974         std::string* parameters =
1975             &plugin_parameters_[PluginName(plugin_prefix_, name)];
1976         if (!parameters->empty()) {
1977           parameters->append(",");
1978         }
1979         parameters->append(value);
1980       } else {
1981         std::cerr << "Unknown flag: " << name << std::endl;
1982         return PARSE_ARGUMENT_FAIL;
1983       }
1984     } else {
1985       // It's an output flag.  Add it to the output directives.
1986       if (mode_ != MODE_COMPILE) {
1987         std::cerr << "Cannot use --encode, --decode or print .proto info and "
1988                      "generate code at the same time."
1989                   << std::endl;
1990         return PARSE_ARGUMENT_FAIL;
1991       }
1992 
1993       OutputDirective directive;
1994       directive.name = name;
1995       if (generator_info == NULL) {
1996         directive.generator = NULL;
1997       } else {
1998         directive.generator = generator_info->generator;
1999       }
2000 
2001       // Split value at ':' to separate the generator parameter from the
2002       // filename.  However, avoid doing this if the colon is part of a valid
2003       // Windows-style absolute path.
2004       std::string::size_type colon_pos = value.find_first_of(':');
2005       if (colon_pos == std::string::npos || IsWindowsAbsolutePath(value)) {
2006         directive.output_location = value;
2007       } else {
2008         directive.parameter = value.substr(0, colon_pos);
2009         directive.output_location = value.substr(colon_pos + 1);
2010       }
2011 
2012       output_directives_.push_back(directive);
2013     }
2014   }
2015 
2016   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
2017 }
2018 
PrintHelpText()2019 void CommandLineInterface::PrintHelpText() {
2020   // Sorry for indentation here; line wrapping would be uglier.
2021   std::cout
2022       <<
2023       "Usage: " << executable_name_
2024       << " [OPTION] PROTO_FILES\n"
2025          "Parse PROTO_FILES and generate output based on the options given:\n"
2026          "  -IPATH, --proto_path=PATH   Specify the directory in which to "
2027          "search for\n"
2028          "                              imports.  May be specified multiple "
2029          "times;\n"
2030          "                              directories will be searched in order. "
2031          " If not\n"
2032          "                              given, the current working directory "
2033          "is used.\n"
2034          "                              If not found in any of the these "
2035          "directories,\n"
2036          "                              the --descriptor_set_in descriptors "
2037          "will be\n"
2038          "                              checked for required proto file.\n"
2039          "  --version                   Show version info and exit.\n"
2040          "  -h, --help                  Show this text and exit.\n"
2041          "  --encode=MESSAGE_TYPE       Read a text-format message of the "
2042          "given type\n"
2043          "                              from standard input and write it in "
2044          "binary\n"
2045          "                              to standard output.  The message type "
2046          "must\n"
2047          "                              be defined in PROTO_FILES or their "
2048          "imports.\n"
2049          "  --deterministic_output      When using --encode, ensure map fields "
2050          "are\n"
2051          "                              deterministically ordered. Note that"
2052          "this order is not\n"
2053          "                              canonical, and changes across builds"
2054          "or releases of protoc.\n"
2055          "  --decode=MESSAGE_TYPE       Read a binary message of the given "
2056          "type from\n"
2057          "                              standard input and write it in text "
2058          "format\n"
2059          "                              to standard output.  The message type "
2060          "must\n"
2061          "                              be defined in PROTO_FILES or their "
2062          "imports.\n"
2063          "  --decode_raw                Read an arbitrary protocol message "
2064          "from\n"
2065          "                              standard input and write the raw "
2066          "tag/value\n"
2067          "                              pairs in text format to standard "
2068          "output.  No\n"
2069          "                              PROTO_FILES should be given when using "
2070          "this\n"
2071          "                              flag.\n"
2072          "  --descriptor_set_in=FILES   Specifies a delimited list of FILES\n"
2073          "                              each containing a FileDescriptorSet "
2074          "(a\n"
2075          "                              protocol buffer defined in "
2076          "descriptor.proto).\n"
2077          "                              The FileDescriptor for each of the "
2078          "PROTO_FILES\n"
2079          "                              provided will be loaded from these\n"
2080          "                              FileDescriptorSets. If a "
2081          "FileDescriptor\n"
2082          "                              appears multiple times, the first "
2083          "occurrence\n"
2084          "                              will be used.\n"
2085          "  -oFILE,                     Writes a FileDescriptorSet (a protocol "
2086          "buffer,\n"
2087          "    --descriptor_set_out=FILE defined in descriptor.proto) "
2088          "containing all of\n"
2089          "                              the input files to FILE.\n"
2090          "  --include_imports           When using --descriptor_set_out, also "
2091          "include\n"
2092          "                              all dependencies of the input files in "
2093          "the\n"
2094          "                              set, so that the set is "
2095          "self-contained.\n"
2096          "  --include_source_info       When using --descriptor_set_out, do "
2097          "not strip\n"
2098          "                              SourceCodeInfo from the "
2099          "FileDescriptorProto.\n"
2100          "                              This results in vastly larger "
2101          "descriptors that\n"
2102          "                              include information about the "
2103          "original\n"
2104          "                              location of each decl in the source "
2105          "file as\n"
2106          "                              well as surrounding comments.\n"
2107          "  --dependency_out=FILE       Write a dependency output file in the "
2108          "format\n"
2109          "                              expected by make. This writes the "
2110          "transitive\n"
2111          "                              set of input file paths to FILE\n"
2112          "  --error_format=FORMAT       Set the format in which to print "
2113          "errors.\n"
2114          "                              FORMAT may be 'gcc' (the default) or "
2115          "'msvs'\n"
2116          "                              (Microsoft Visual Studio format).\n"
2117          "  --print_free_field_numbers  Print the free field numbers of the "
2118          "messages\n"
2119          "                              defined in the given proto files. "
2120          "Groups share\n"
2121          "                              the same field number space with the "
2122          "parent \n"
2123          "                              message. Extension ranges are counted "
2124          "as \n"
2125          "                              occupied fields numbers.\n"
2126       << std::endl;
2127   if (!plugin_prefix_.empty()) {
2128     std::cout
2129         << "  --plugin=EXECUTABLE         Specifies a plugin executable to "
2130            "use.\n"
2131            "                              Normally, protoc searches the PATH "
2132            "for\n"
2133            "                              plugins, but you may specify "
2134            "additional\n"
2135            "                              executables not in the path using "
2136            "this flag.\n"
2137            "                              Additionally, EXECUTABLE may be of "
2138            "the form\n"
2139            "                              NAME=PATH, in which case the given "
2140            "plugin name\n"
2141            "                              is mapped to the given executable "
2142            "even if\n"
2143            "                              the executable's own name differs."
2144         << std::endl;
2145   }
2146 
2147   for (GeneratorMap::iterator iter = generators_by_flag_name_.begin();
2148        iter != generators_by_flag_name_.end(); ++iter) {
2149     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
2150     //   but fixing this nicely (e.g. splitting on spaces) is probably more
2151     //   trouble than it's worth.
2152     std::cout << "  " << iter->first << "=OUT_DIR "
2153               << std::string(19 - iter->first.size(),
2154                              ' ')  // Spaces for alignment.
2155               << iter->second.help_text << std::endl;
2156   }
2157   std::cout << "  @<filename>                 Read options and filenames from "
2158                "file. If a\n"
2159                "                              relative file path is specified, "
2160                "the file\n"
2161                "                              will be searched in the working "
2162                "directory.\n"
2163                "                              The --proto_path option will not "
2164                "affect how\n"
2165                "                              this argument file is searched. "
2166                "Content of\n"
2167                "                              the file will be expanded in the "
2168                "position of\n"
2169                "                              @<filename> as in the argument "
2170                "list. Note\n"
2171                "                              that shell expansion is not "
2172                "applied to the\n"
2173                "                              content of the file (i.e., you "
2174                "cannot use\n"
2175                "                              quotes, wildcards, escapes, "
2176                "commands, etc.).\n"
2177                "                              Each line corresponds to a "
2178                "single argument,\n"
2179                "                              even if it contains spaces."
2180             << std::endl;
2181 }
2182 
EnforceProto3OptionalSupport(const std::string & codegen_name,uint64 supported_features,const std::vector<const FileDescriptor * > & parsed_files) const2183 bool CommandLineInterface::EnforceProto3OptionalSupport(
2184     const std::string& codegen_name, uint64 supported_features,
2185     const std::vector<const FileDescriptor*>& parsed_files) const {
2186   bool supports_proto3_optional =
2187       supported_features & CodeGenerator::FEATURE_PROTO3_OPTIONAL;
2188   if (!supports_proto3_optional) {
2189     for (const auto fd : parsed_files) {
2190       if (ContainsProto3Optional(fd)) {
2191         std::cerr << fd->name()
2192                   << ": is a proto3 file that contains optional fields, but "
2193                      "code generator "
2194                   << codegen_name
2195                   << " hasn't been updated to support optional fields in "
2196                      "proto3. Please ask the owner of this code generator to "
2197                      "support proto3 optional.";
2198         return false;
2199       }
2200     }
2201   }
2202   return true;
2203 }
2204 
GenerateOutput(const std::vector<const FileDescriptor * > & parsed_files,const OutputDirective & output_directive,GeneratorContext * generator_context)2205 bool CommandLineInterface::GenerateOutput(
2206     const std::vector<const FileDescriptor*>& parsed_files,
2207     const OutputDirective& output_directive,
2208     GeneratorContext* generator_context) {
2209   // Call the generator.
2210   std::string error;
2211   if (output_directive.generator == NULL) {
2212     // This is a plugin.
2213     GOOGLE_CHECK(HasPrefixString(output_directive.name, "--") &&
2214           HasSuffixString(output_directive.name, "_out"))
2215         << "Bad name for plugin generator: " << output_directive.name;
2216 
2217     std::string plugin_name = PluginName(plugin_prefix_, output_directive.name);
2218     std::string parameters = output_directive.parameter;
2219     if (!plugin_parameters_[plugin_name].empty()) {
2220       if (!parameters.empty()) {
2221         parameters.append(",");
2222       }
2223       parameters.append(plugin_parameters_[plugin_name]);
2224     }
2225     if (!GeneratePluginOutput(parsed_files, plugin_name, parameters,
2226                               generator_context, &error)) {
2227       std::cerr << output_directive.name << ": " << error << std::endl;
2228       return false;
2229     }
2230   } else {
2231     // Regular generator.
2232     std::string parameters = output_directive.parameter;
2233     if (!generator_parameters_[output_directive.name].empty()) {
2234       if (!parameters.empty()) {
2235         parameters.append(",");
2236       }
2237       parameters.append(generator_parameters_[output_directive.name]);
2238     }
2239     if (!EnforceProto3OptionalSupport(
2240             output_directive.name,
2241             output_directive.generator->GetSupportedFeatures(), parsed_files)) {
2242       return false;
2243     }
2244 
2245     if (!output_directive.generator->GenerateAll(parsed_files, parameters,
2246                                                  generator_context, &error)) {
2247       // Generator returned an error.
2248       std::cerr << output_directive.name << ": " << error << std::endl;
2249       return false;
2250     }
2251   }
2252 
2253   return true;
2254 }
2255 
GenerateDependencyManifestFile(const std::vector<const FileDescriptor * > & parsed_files,const GeneratorContextMap & output_directories,DiskSourceTree * source_tree)2256 bool CommandLineInterface::GenerateDependencyManifestFile(
2257     const std::vector<const FileDescriptor*>& parsed_files,
2258     const GeneratorContextMap& output_directories,
2259     DiskSourceTree* source_tree) {
2260   FileDescriptorSet file_set;
2261 
2262   std::set<const FileDescriptor*> already_seen;
2263   for (int i = 0; i < parsed_files.size(); i++) {
2264     GetTransitiveDependencies(parsed_files[i], false, false, &already_seen,
2265                               file_set.mutable_file());
2266   }
2267 
2268   std::vector<std::string> output_filenames;
2269   for (const auto& pair : output_directories) {
2270     const std::string& location = pair.first;
2271     GeneratorContextImpl* directory = pair.second.get();
2272     std::vector<std::string> relative_output_filenames;
2273     directory->GetOutputFilenames(&relative_output_filenames);
2274     for (int i = 0; i < relative_output_filenames.size(); i++) {
2275       std::string output_filename = location + relative_output_filenames[i];
2276       if (output_filename.compare(0, 2, "./") == 0) {
2277         output_filename = output_filename.substr(2);
2278       }
2279       output_filenames.push_back(output_filename);
2280     }
2281   }
2282 
2283   int fd;
2284   do {
2285     fd = open(dependency_out_name_.c_str(),
2286               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
2287   } while (fd < 0 && errno == EINTR);
2288 
2289   if (fd < 0) {
2290     perror(dependency_out_name_.c_str());
2291     return false;
2292   }
2293 
2294   io::FileOutputStream out(fd);
2295   io::Printer printer(&out, '$');
2296 
2297   for (int i = 0; i < output_filenames.size(); i++) {
2298     printer.Print(output_filenames[i].c_str());
2299     if (i == output_filenames.size() - 1) {
2300       printer.Print(":");
2301     } else {
2302       printer.Print(" \\\n");
2303     }
2304   }
2305 
2306   for (int i = 0; i < file_set.file_size(); i++) {
2307     const FileDescriptorProto& file = file_set.file(i);
2308     const std::string& virtual_file = file.name();
2309     std::string disk_file;
2310     if (source_tree &&
2311         source_tree->VirtualFileToDiskFile(virtual_file, &disk_file)) {
2312       printer.Print(" $disk_file$", "disk_file", disk_file);
2313       if (i < file_set.file_size() - 1) printer.Print("\\\n");
2314     } else {
2315       std::cerr << "Unable to identify path for file " << virtual_file
2316                 << std::endl;
2317       return false;
2318     }
2319   }
2320 
2321   return true;
2322 }
2323 
GeneratePluginOutput(const std::vector<const FileDescriptor * > & parsed_files,const std::string & plugin_name,const std::string & parameter,GeneratorContext * generator_context,std::string * error)2324 bool CommandLineInterface::GeneratePluginOutput(
2325     const std::vector<const FileDescriptor*>& parsed_files,
2326     const std::string& plugin_name, const std::string& parameter,
2327     GeneratorContext* generator_context, std::string* error) {
2328   CodeGeneratorRequest request;
2329   CodeGeneratorResponse response;
2330   std::string processed_parameter = parameter;
2331 
2332 
2333   // Build the request.
2334   if (!processed_parameter.empty()) {
2335     request.set_parameter(processed_parameter);
2336   }
2337 
2338 
2339   std::set<const FileDescriptor*> already_seen;
2340   for (int i = 0; i < parsed_files.size(); i++) {
2341     request.add_file_to_generate(parsed_files[i]->name());
2342     GetTransitiveDependencies(parsed_files[i],
2343                               true,  // Include json_name for plugins.
2344                               true,  // Include source code info.
2345                               &already_seen, request.mutable_proto_file());
2346   }
2347 
2348   google::protobuf::compiler::Version* version =
2349       request.mutable_compiler_version();
2350   version->set_major(PROTOBUF_VERSION / 1000000);
2351   version->set_minor(PROTOBUF_VERSION / 1000 % 1000);
2352   version->set_patch(PROTOBUF_VERSION % 1000);
2353   version->set_suffix(PROTOBUF_VERSION_SUFFIX);
2354 
2355   // Invoke the plugin.
2356   Subprocess subprocess;
2357 
2358   if (plugins_.count(plugin_name) > 0) {
2359     subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
2360   } else {
2361     subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
2362   }
2363 
2364   std::string communicate_error;
2365   if (!subprocess.Communicate(request, &response, &communicate_error)) {
2366     *error = strings::Substitute("$0: $1", plugin_name, communicate_error);
2367     return false;
2368   }
2369 
2370   // Write the files.  We do this even if there was a generator error in order
2371   // to match the behavior of a compiled-in generator.
2372   std::unique_ptr<io::ZeroCopyOutputStream> current_output;
2373   for (int i = 0; i < response.file_size(); i++) {
2374     const CodeGeneratorResponse::File& output_file = response.file(i);
2375 
2376     if (!output_file.insertion_point().empty()) {
2377       std::string filename = output_file.name();
2378       // Open a file for insert.
2379       // We reset current_output to NULL first so that the old file is closed
2380       // before the new one is opened.
2381       current_output.reset();
2382       current_output.reset(
2383           generator_context->OpenForInsertWithGeneratedCodeInfo(
2384               filename, output_file.insertion_point(),
2385               output_file.generated_code_info()));
2386     } else if (!output_file.name().empty()) {
2387       // Starting a new file.  Open it.
2388       // We reset current_output to NULL first so that the old file is closed
2389       // before the new one is opened.
2390       current_output.reset();
2391       current_output.reset(generator_context->Open(output_file.name()));
2392     } else if (current_output == NULL) {
2393       *error = strings::Substitute(
2394           "$0: First file chunk returned by plugin did not specify a file "
2395           "name.",
2396           plugin_name);
2397       return false;
2398     }
2399 
2400     // Use CodedOutputStream for convenience; otherwise we'd need to provide
2401     // our own buffer-copying loop.
2402     io::CodedOutputStream writer(current_output.get());
2403     writer.WriteString(output_file.content());
2404   }
2405 
2406   // Check for errors.
2407   if (!response.error().empty()) {
2408     // Generator returned an error.
2409     *error = response.error();
2410     return false;
2411   } else if (!EnforceProto3OptionalSupport(
2412                  plugin_name, response.supported_features(), parsed_files)) {
2413     return false;
2414   }
2415 
2416   return true;
2417 }
2418 
EncodeOrDecode(const DescriptorPool * pool)2419 bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
2420   // Look up the type.
2421   const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
2422   if (type == NULL) {
2423     std::cerr << "Type not defined: " << codec_type_ << std::endl;
2424     return false;
2425   }
2426 
2427   DynamicMessageFactory dynamic_factory(pool);
2428   std::unique_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
2429 
2430   if (mode_ == MODE_ENCODE) {
2431     SetFdToTextMode(STDIN_FILENO);
2432     SetFdToBinaryMode(STDOUT_FILENO);
2433   } else {
2434     SetFdToBinaryMode(STDIN_FILENO);
2435     SetFdToTextMode(STDOUT_FILENO);
2436   }
2437 
2438   io::FileInputStream in(STDIN_FILENO);
2439   io::FileOutputStream out(STDOUT_FILENO);
2440 
2441   if (mode_ == MODE_ENCODE) {
2442     // Input is text.
2443     ErrorPrinter error_collector(error_format_);
2444     TextFormat::Parser parser;
2445     parser.RecordErrorsTo(&error_collector);
2446     parser.AllowPartialMessage(true);
2447 
2448     if (!parser.Parse(&in, message.get())) {
2449       std::cerr << "Failed to parse input." << std::endl;
2450       return false;
2451     }
2452   } else {
2453     // Input is binary.
2454     if (!message->ParsePartialFromZeroCopyStream(&in)) {
2455       std::cerr << "Failed to parse input." << std::endl;
2456       return false;
2457     }
2458   }
2459 
2460   if (!message->IsInitialized()) {
2461     std::cerr << "warning:  Input message is missing required fields:  "
2462               << message->InitializationErrorString() << std::endl;
2463   }
2464 
2465   if (mode_ == MODE_ENCODE) {
2466     // Output is binary.
2467     io::CodedOutputStream coded_out(&out);
2468     coded_out.SetSerializationDeterministic(deterministic_output_);
2469     if (!message->SerializePartialToCodedStream(&coded_out)) {
2470       std::cerr << "output: I/O error." << std::endl;
2471       return false;
2472     }
2473   } else {
2474     // Output is text.
2475     if (!TextFormat::Print(*message, &out)) {
2476       std::cerr << "output: I/O error." << std::endl;
2477       return false;
2478     }
2479   }
2480 
2481   return true;
2482 }
2483 
WriteDescriptorSet(const std::vector<const FileDescriptor * > & parsed_files)2484 bool CommandLineInterface::WriteDescriptorSet(
2485     const std::vector<const FileDescriptor*>& parsed_files) {
2486   FileDescriptorSet file_set;
2487 
2488   std::set<const FileDescriptor*> already_seen;
2489   if (!imports_in_descriptor_set_) {
2490     // Since we don't want to output transitive dependencies, but we do want
2491     // things to be in dependency order, add all dependencies that aren't in
2492     // parsed_files to already_seen.  This will short circuit the recursion
2493     // in GetTransitiveDependencies.
2494     std::set<const FileDescriptor*> to_output;
2495     to_output.insert(parsed_files.begin(), parsed_files.end());
2496     for (int i = 0; i < parsed_files.size(); i++) {
2497       const FileDescriptor* file = parsed_files[i];
2498       for (int i = 0; i < file->dependency_count(); i++) {
2499         const FileDescriptor* dependency = file->dependency(i);
2500         // if the dependency isn't in parsed files, mark it as already seen
2501         if (to_output.find(dependency) == to_output.end()) {
2502           already_seen.insert(dependency);
2503         }
2504       }
2505     }
2506   }
2507   for (int i = 0; i < parsed_files.size(); i++) {
2508     GetTransitiveDependencies(parsed_files[i],
2509                               true,  // Include json_name
2510                               source_info_in_descriptor_set_, &already_seen,
2511                               file_set.mutable_file());
2512   }
2513 
2514   int fd;
2515   do {
2516     fd = open(descriptor_set_out_name_.c_str(),
2517               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
2518   } while (fd < 0 && errno == EINTR);
2519 
2520   if (fd < 0) {
2521     perror(descriptor_set_out_name_.c_str());
2522     return false;
2523   }
2524 
2525   io::FileOutputStream out(fd);
2526 
2527   {
2528     io::CodedOutputStream coded_out(&out);
2529     // Determinism is useful here because build outputs are sometimes checked
2530     // into version control.
2531     coded_out.SetSerializationDeterministic(true);
2532     if (!file_set.SerializeToCodedStream(&coded_out)) {
2533       std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
2534                 << std::endl;
2535       out.Close();
2536       return false;
2537     }
2538   }
2539 
2540   if (!out.Close()) {
2541     std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
2542               << std::endl;
2543     return false;
2544   }
2545 
2546   return true;
2547 }
2548 
GetTransitiveDependencies(const FileDescriptor * file,bool include_json_name,bool include_source_code_info,std::set<const FileDescriptor * > * already_seen,RepeatedPtrField<FileDescriptorProto> * output)2549 void CommandLineInterface::GetTransitiveDependencies(
2550     const FileDescriptor* file, bool include_json_name,
2551     bool include_source_code_info,
2552     std::set<const FileDescriptor*>* already_seen,
2553     RepeatedPtrField<FileDescriptorProto>* output) {
2554   if (!already_seen->insert(file).second) {
2555     // Already saw this file.  Skip.
2556     return;
2557   }
2558 
2559   // Add all dependencies.
2560   for (int i = 0; i < file->dependency_count(); i++) {
2561     GetTransitiveDependencies(file->dependency(i), include_json_name,
2562                               include_source_code_info, already_seen, output);
2563   }
2564 
2565   // Add this file.
2566   FileDescriptorProto* new_descriptor = output->Add();
2567   file->CopyTo(new_descriptor);
2568   if (include_json_name) {
2569     file->CopyJsonNameTo(new_descriptor);
2570   }
2571   if (include_source_code_info) {
2572     file->CopySourceCodeInfoTo(new_descriptor);
2573   }
2574 }
2575 
2576 namespace {
2577 
2578 // Utility function for PrintFreeFieldNumbers.
2579 // Stores occupied ranges into the ranges parameter, and next level of sub
2580 // message types into the nested_messages parameter.  The FieldRange is left
2581 // inclusive, right exclusive. i.e. [a, b).
2582 //
2583 // Nested Messages:
2584 // Note that it only stores the nested message type, iff the nested type is
2585 // either a direct child of the given descriptor, or the nested type is a
2586 // descendant of the given descriptor and all the nodes between the
2587 // nested type and the given descriptor are group types. e.g.
2588 //
2589 // message Foo {
2590 //   message Bar {
2591 //     message NestedBar {}
2592 //   }
2593 //   group Baz = 1 {
2594 //     group NestedBazGroup = 2 {
2595 //       message Quz {
2596 //         message NestedQuz {}
2597 //       }
2598 //     }
2599 //     message NestedBaz {}
2600 //   }
2601 // }
2602 //
2603 // In this case, Bar, Quz and NestedBaz will be added into the nested types.
2604 // Since free field numbers of group types will not be printed, this makes sure
2605 // the nested message types in groups will not be dropped. The nested_messages
2606 // parameter will contain the direct children (when groups are ignored in the
2607 // tree) of the given descriptor for the caller to traverse. The declaration
2608 // order of the nested messages is also preserved.
2609 typedef std::pair<int, int> FieldRange;
GatherOccupiedFieldRanges(const Descriptor * descriptor,std::set<FieldRange> * ranges,std::vector<const Descriptor * > * nested_messages)2610 void GatherOccupiedFieldRanges(
2611     const Descriptor* descriptor, std::set<FieldRange>* ranges,
2612     std::vector<const Descriptor*>* nested_messages) {
2613   std::set<const Descriptor*> groups;
2614   for (int i = 0; i < descriptor->field_count(); ++i) {
2615     const FieldDescriptor* fd = descriptor->field(i);
2616     ranges->insert(FieldRange(fd->number(), fd->number() + 1));
2617     if (fd->type() == FieldDescriptor::TYPE_GROUP) {
2618       groups.insert(fd->message_type());
2619     }
2620   }
2621   for (int i = 0; i < descriptor->extension_range_count(); ++i) {
2622     ranges->insert(FieldRange(descriptor->extension_range(i)->start,
2623                               descriptor->extension_range(i)->end));
2624   }
2625   for (int i = 0; i < descriptor->reserved_range_count(); ++i) {
2626     ranges->insert(FieldRange(descriptor->reserved_range(i)->start,
2627                               descriptor->reserved_range(i)->end));
2628   }
2629   // Handle the nested messages/groups in declaration order to make it
2630   // post-order strict.
2631   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
2632     const Descriptor* nested_desc = descriptor->nested_type(i);
2633     if (groups.find(nested_desc) != groups.end()) {
2634       GatherOccupiedFieldRanges(nested_desc, ranges, nested_messages);
2635     } else {
2636       nested_messages->push_back(nested_desc);
2637     }
2638   }
2639 }
2640 
2641 // Utility function for PrintFreeFieldNumbers.
2642 // Actually prints the formatted free field numbers for given message name and
2643 // occupied ranges.
FormatFreeFieldNumbers(const std::string & name,const std::set<FieldRange> & ranges)2644 void FormatFreeFieldNumbers(const std::string& name,
2645                             const std::set<FieldRange>& ranges) {
2646   std::string output;
2647   StringAppendF(&output, "%-35s free:", name.c_str());
2648   int next_free_number = 1;
2649   for (std::set<FieldRange>::const_iterator i = ranges.begin();
2650        i != ranges.end(); ++i) {
2651     // This happens when groups re-use parent field numbers, in which
2652     // case we skip the FieldRange entirely.
2653     if (next_free_number >= i->second) continue;
2654 
2655     if (next_free_number < i->first) {
2656       if (next_free_number + 1 == i->first) {
2657         // Singleton
2658         StringAppendF(&output, " %d", next_free_number);
2659       } else {
2660         // Range
2661         StringAppendF(&output, " %d-%d", next_free_number, i->first - 1);
2662       }
2663     }
2664     next_free_number = i->second;
2665   }
2666   if (next_free_number <= FieldDescriptor::kMaxNumber) {
2667     StringAppendF(&output, " %d-INF", next_free_number);
2668   }
2669   std::cout << output << std::endl;
2670 }
2671 
2672 }  // namespace
2673 
PrintFreeFieldNumbers(const Descriptor * descriptor)2674 void CommandLineInterface::PrintFreeFieldNumbers(const Descriptor* descriptor) {
2675   std::set<FieldRange> ranges;
2676   std::vector<const Descriptor*> nested_messages;
2677   GatherOccupiedFieldRanges(descriptor, &ranges, &nested_messages);
2678 
2679   for (int i = 0; i < nested_messages.size(); ++i) {
2680     PrintFreeFieldNumbers(nested_messages[i]);
2681   }
2682   FormatFreeFieldNumbers(descriptor->full_name(), ranges);
2683 }
2684 
2685 
2686 }  // namespace compiler
2687 }  // namespace protobuf
2688 }  // namespace google
2689