• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/importer.h"
13 
14 #ifdef _MSC_VER
15 #include <direct.h>
16 #else
17 #include <unistd.h>
18 #endif
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 
24 #include <algorithm>
25 #include <memory>
26 #include <vector>
27 
28 #include "absl/strings/match.h"
29 #include "absl/strings/str_cat.h"
30 #include "absl/strings/str_join.h"
31 #include "absl/strings/str_replace.h"
32 #include "absl/strings/str_split.h"
33 #include "absl/strings/string_view.h"
34 #include "google/protobuf/compiler/parser.h"
35 #include "google/protobuf/io/io_win32.h"
36 #include "google/protobuf/io/tokenizer.h"
37 #include "google/protobuf/io/zero_copy_stream_impl.h"
38 
39 namespace google {
40 namespace protobuf {
41 namespace compiler {
42 
43 #ifdef _WIN32
44 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
45 // them like we do below.
46 using google::protobuf::io::win32::access;
47 using google::protobuf::io::win32::open;
48 #endif
49 
50 #if defined(_WIN32) || defined(__CYGWIN__)
51 #include "absl/strings/ascii.h"
52 #endif
53 
54 // Returns true if the text looks like a Windows-style absolute path, starting
55 // with a drive letter.  Example:  "C:\foo".  TODO:  Share this with
56 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(absl::string_view text)57 static bool IsWindowsAbsolutePath(absl::string_view text) {
58 #if defined(_WIN32) || defined(__CYGWIN__)
59   return text.size() >= 3 && text[1] == ':' && absl::ascii_isalpha(text[0]) &&
60          (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
61 #else
62   return false;
63 #endif
64 }
65 
~MultiFileErrorCollector()66 MultiFileErrorCollector::~MultiFileErrorCollector() {}
67 
68 // This class serves two purposes:
69 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
70 //   in terms of MultiFileErrorCollector, using a particular filename.
71 // - It lets us check if any errors have occurred.
72 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
73     : public io::ErrorCollector {
74  public:
SingleFileErrorCollector(const std::string & filename,MultiFileErrorCollector * multi_file_error_collector)75   SingleFileErrorCollector(const std::string& filename,
76                            MultiFileErrorCollector* multi_file_error_collector)
77       : filename_(filename),
78         multi_file_error_collector_(multi_file_error_collector),
79         had_errors_(false) {}
~SingleFileErrorCollector()80   ~SingleFileErrorCollector() override {}
81 
had_errors()82   bool had_errors() { return had_errors_; }
83 
84   // implements ErrorCollector ---------------------------------------
RecordError(int line,int column,absl::string_view message)85   void RecordError(int line, int column, absl::string_view message) override {
86     if (multi_file_error_collector_ != nullptr) {
87       multi_file_error_collector_->RecordError(filename_, line, column,
88                                                message);
89     }
90     had_errors_ = true;
91   }
92 
93  private:
94   std::string filename_;
95   MultiFileErrorCollector* multi_file_error_collector_;
96   bool had_errors_;
97 };
98 
99 // ===================================================================
100 
SourceTreeDescriptorDatabase(SourceTree * source_tree)101 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
102     SourceTree* source_tree)
103     : source_tree_(source_tree),
104       fallback_database_(nullptr),
105       error_collector_(nullptr),
106       using_validation_error_collector_(false),
107       validation_error_collector_(this) {}
108 
SourceTreeDescriptorDatabase(SourceTree * source_tree,DescriptorDatabase * fallback_database)109 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
110     SourceTree* source_tree, DescriptorDatabase* fallback_database)
111     : source_tree_(source_tree),
112       fallback_database_(fallback_database),
113       error_collector_(nullptr),
114       using_validation_error_collector_(false),
115       validation_error_collector_(this) {}
116 
~SourceTreeDescriptorDatabase()117 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
118 
FindFileByName(const std::string & filename,FileDescriptorProto * output)119 bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename,
120                                                   FileDescriptorProto* output) {
121   std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
122   if (input == nullptr) {
123     if (fallback_database_ != nullptr &&
124         fallback_database_->FindFileByName(filename, output)) {
125       return true;
126     }
127     if (error_collector_ != nullptr) {
128       error_collector_->RecordError(filename, -1, 0,
129                                     source_tree_->GetLastErrorMessage());
130     }
131     return false;
132   }
133 
134   // Set up the tokenizer and parser.
135   SingleFileErrorCollector file_error_collector(filename, error_collector_);
136   io::Tokenizer tokenizer(input.get(), &file_error_collector);
137 
138   Parser parser;
139   if (error_collector_ != nullptr) {
140     parser.RecordErrorsTo(&file_error_collector);
141   }
142   if (using_validation_error_collector_) {
143     parser.RecordSourceLocationsTo(&source_locations_);
144   }
145 
146   // Parse it.
147   output->set_name(filename);
148   return parser.Parse(&tokenizer, output) && !file_error_collector.had_errors();
149 }
150 
FindFileContainingSymbol(const std::string & symbol_name,FileDescriptorProto * output)151 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
152     const std::string& symbol_name, FileDescriptorProto* output) {
153   return false;
154 }
155 
FindFileContainingExtension(const std::string & containing_type,int field_number,FileDescriptorProto * output)156 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
157     const std::string& containing_type, int field_number,
158     FileDescriptorProto* output) {
159   return false;
160 }
161 
162 // -------------------------------------------------------------------
163 
164 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)165     ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
166     : owner_(owner) {}
167 
168 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()169     ~ValidationErrorCollector() {}
170 
RecordError(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)171 void SourceTreeDescriptorDatabase::ValidationErrorCollector::RecordError(
172     absl::string_view filename, absl::string_view element_name,
173     const Message* descriptor, ErrorLocation location,
174     absl::string_view message) {
175   if (owner_->error_collector_ == nullptr) return;
176 
177   int line, column;
178   if (location == DescriptorPool::ErrorCollector::IMPORT) {
179     owner_->source_locations_.FindImport(descriptor, element_name, &line,
180                                          &column);
181   } else {
182     owner_->source_locations_.Find(descriptor, location, &line, &column);
183   }
184   owner_->error_collector_->RecordError(filename, line, column, message);
185 }
186 
RecordWarning(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)187 void SourceTreeDescriptorDatabase::ValidationErrorCollector::RecordWarning(
188     absl::string_view filename, absl::string_view element_name,
189     const Message* descriptor, ErrorLocation location,
190     absl::string_view message) {
191   if (owner_->error_collector_ == nullptr) return;
192 
193   int line, column;
194   if (location == DescriptorPool::ErrorCollector::IMPORT) {
195     owner_->source_locations_.FindImport(descriptor, element_name, &line,
196                                          &column);
197   } else {
198     owner_->source_locations_.Find(descriptor, location, &line, &column);
199   }
200   owner_->error_collector_->RecordWarning(filename, line, column, message);
201 }
202 
203 // ===================================================================
204 
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)205 Importer::Importer(SourceTree* source_tree,
206                    MultiFileErrorCollector* error_collector)
207     : database_(source_tree),
208       pool_(&database_, database_.GetValidationErrorCollector()) {
209   pool_.EnforceWeakDependencies(true);
210   database_.RecordErrorsTo(error_collector);
211 }
212 
~Importer()213 Importer::~Importer() {}
214 
Import(const std::string & filename)215 const FileDescriptor* Importer::Import(const std::string& filename) {
216   return pool_.FindFileByName(filename);
217 }
218 
AddDirectInputFile(absl::string_view file_name,bool is_error)219 void Importer::AddDirectInputFile(absl::string_view file_name, bool is_error) {
220   pool_.AddDirectInputFile(file_name, is_error);
221 }
222 
ClearDirectInputFiles()223 void Importer::ClearDirectInputFiles() { pool_.ClearDirectInputFiles(); }
224 
225 
226 // ===================================================================
227 
~SourceTree()228 SourceTree::~SourceTree() {}
229 
GetLastErrorMessage()230 std::string SourceTree::GetLastErrorMessage() { return "File not found."; }
231 
DiskSourceTree()232 DiskSourceTree::DiskSourceTree() {}
233 
~DiskSourceTree()234 DiskSourceTree::~DiskSourceTree() {}
235 
236 // Given a path, returns an equivalent path with these changes:
237 // - On Windows, any backslashes are replaced with forward slashes.
238 // - Any instances of the directory "." are removed.
239 // - Any consecutive '/'s are collapsed into a single slash.
240 // Note that the resulting string may be empty.
241 //
242 // TODO:  It would be nice to handle "..", e.g. so that we can figure
243 //   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
244 //   symlink or doesn't exist, then things get complicated, and we can't
245 //   actually determine this without investigating the filesystem, probably
246 //   in non-portable ways.  So, we punt.
247 //
248 // TODO:  It would be nice to use realpath() here except that it
249 //   resolves symbolic links.  This could cause problems if people place
250 //   symbolic links in their source tree.  For example, if you executed:
251 //     protoc --proto_path=foo foo/bar/baz.proto
252 //   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
253 //   to a path which does not appear to be under foo, and thus the compiler
254 //   will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(absl::string_view path)255 static std::string CanonicalizePath(absl::string_view path) {
256 #ifdef _WIN32
257   // The Win32 API accepts forward slashes as a path delimiter even though
258   // backslashes are standard.  Let's avoid confusion and use only forward
259   // slashes.
260   std::string path_str;
261   if (absl::StartsWith(path, "\\\\")) {
262     // Avoid converting two leading backslashes.
263     path_str = absl::StrCat("\\\\",
264                             absl::StrReplaceAll(path.substr(2), {{"\\", "/"}}));
265   } else {
266     path_str = absl::StrReplaceAll(path, {{"\\", "/"}});
267   }
268   path = path_str;
269 #endif
270 
271   std::vector<absl::string_view> canonical_parts;
272   if (!path.empty() && path.front() == '/') canonical_parts.push_back("");
273   for (absl::string_view part : absl::StrSplit(path, '/', absl::SkipEmpty())) {
274     if (part == ".") {
275       // Ignore.
276     } else {
277       canonical_parts.push_back(part);
278     }
279   }
280   if (!path.empty() && path.back() == '/') canonical_parts.push_back("");
281 
282   return absl::StrJoin(canonical_parts, "/");
283 }
284 
ContainsParentReference(absl::string_view path)285 static inline bool ContainsParentReference(absl::string_view path) {
286   return path == ".." || absl::StartsWith(path, "../") ||
287          absl::EndsWith(path, "/..") || absl::StrContains(path, "/../");
288 }
289 
290 // Maps a file from an old location to a new one.  Typically, old_prefix is
291 // a virtual path and new_prefix is its corresponding disk path.  Returns
292 // false if the filename did not start with old_prefix, otherwise replaces
293 // old_prefix with new_prefix and stores the result in *result.  Examples:
294 //   string result;
295 //   assert(ApplyMapping("foo/bar", "", "baz", &result));
296 //   assert(result == "baz/foo/bar");
297 //
298 //   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
299 //   assert(result == "baz/bar");
300 //
301 //   assert(ApplyMapping("foo", "foo", "bar", &result));
302 //   assert(result == "bar");
303 //
304 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
305 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
306 //   assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(absl::string_view filename,absl::string_view old_prefix,absl::string_view new_prefix,std::string * result)307 static bool ApplyMapping(absl::string_view filename,
308                          absl::string_view old_prefix,
309                          absl::string_view new_prefix, std::string* result) {
310   if (old_prefix.empty()) {
311     // old_prefix matches any relative path.
312     if (ContainsParentReference(filename)) {
313       // We do not allow the file name to use "..".
314       return false;
315     }
316     if (absl::StartsWith(filename, "/") || IsWindowsAbsolutePath(filename)) {
317       // This is an absolute path, so it isn't matched by the empty string.
318       return false;
319     }
320     result->assign(std::string(new_prefix));
321     if (!result->empty()) result->push_back('/');
322     result->append(std::string(filename));
323     return true;
324   } else if (absl::StartsWith(filename, old_prefix)) {
325     // old_prefix is a prefix of the filename.  Is it the whole filename?
326     if (filename.size() == old_prefix.size()) {
327       // Yep, it's an exact match.
328       *result = std::string(new_prefix);
329       return true;
330     } else {
331       // Not an exact match.  Is the next character a '/'?  Otherwise,
332       // this isn't actually a match at all.  E.g. the prefix "foo/bar"
333       // does not match the filename "foo/barbaz".
334       int after_prefix_start = -1;
335       if (filename[old_prefix.size()] == '/') {
336         after_prefix_start = old_prefix.size() + 1;
337       } else if (filename[old_prefix.size() - 1] == '/') {
338         // old_prefix is never empty, and canonicalized paths never have
339         // consecutive '/' characters.
340         after_prefix_start = old_prefix.size();
341       }
342       if (after_prefix_start != -1) {
343         // Yep.  So the prefixes are directories and the filename is a file
344         // inside them.
345         absl::string_view after_prefix = filename.substr(after_prefix_start);
346         if (ContainsParentReference(after_prefix)) {
347           // We do not allow the file name to use "..".
348           return false;
349         }
350         result->assign(std::string(new_prefix));
351         if (!result->empty()) result->push_back('/');
352         result->append(std::string(after_prefix));
353         return true;
354       }
355     }
356   }
357 
358   return false;
359 }
360 
MapPath(absl::string_view virtual_path,absl::string_view disk_path)361 void DiskSourceTree::MapPath(absl::string_view virtual_path,
362                              absl::string_view disk_path) {
363   mappings_.push_back(
364       Mapping(std::string(virtual_path), CanonicalizePath(disk_path)));
365 }
366 
367 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(absl::string_view disk_file,std::string * virtual_file,std::string * shadowing_disk_file)368 DiskSourceTree::DiskFileToVirtualFile(absl::string_view disk_file,
369                                       std::string* virtual_file,
370                                       std::string* shadowing_disk_file) {
371   int mapping_index = -1;
372   std::string canonical_disk_file = CanonicalizePath(disk_file);
373 
374   for (int i = 0; i < mappings_.size(); i++) {
375     // Apply the mapping in reverse.
376     if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
377                      mappings_[i].virtual_path, virtual_file)) {
378       // Success.
379       mapping_index = i;
380       break;
381     }
382   }
383 
384   if (mapping_index == -1) {
385     return NO_MAPPING;
386   }
387 
388   // Iterate through all mappings with higher precedence and verify that none
389   // of them map this file to some other existing file.
390   for (int i = 0; i < mapping_index; i++) {
391     if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
392                      mappings_[i].disk_path, shadowing_disk_file)) {
393       if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
394         // File exists.
395         return SHADOWED;
396       }
397     }
398   }
399   shadowing_disk_file->clear();
400 
401   // Verify that we can open the file.  Note that this also has the side-effect
402   // of verifying that we are not canonicalizing away any non-existent
403   // directories.
404   std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
405   if (stream == nullptr) {
406     return CANNOT_OPEN;
407   }
408 
409   return SUCCESS;
410 }
411 
VirtualFileToDiskFile(absl::string_view virtual_file,std::string * disk_file)412 bool DiskSourceTree::VirtualFileToDiskFile(absl::string_view virtual_file,
413                                            std::string* disk_file) {
414   std::unique_ptr<io::ZeroCopyInputStream> stream(
415       OpenVirtualFile(virtual_file, disk_file));
416   return stream != nullptr;
417 }
418 
Open(absl::string_view filename)419 io::ZeroCopyInputStream* DiskSourceTree::Open(absl::string_view filename) {
420   return OpenVirtualFile(filename, nullptr);
421 }
422 
GetLastErrorMessage()423 std::string DiskSourceTree::GetLastErrorMessage() {
424   return last_error_message_;
425 }
426 
OpenVirtualFile(absl::string_view virtual_file,std::string * disk_file)427 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
428     absl::string_view virtual_file, std::string* disk_file) {
429   if (virtual_file != CanonicalizePath(virtual_file) ||
430       ContainsParentReference(virtual_file)) {
431     // We do not allow importing of paths containing things like ".." or
432     // consecutive slashes since the compiler expects files to be uniquely
433     // identified by file name.
434     last_error_message_ =
435         "Backslashes, consecutive slashes, \".\", or \"..\" "
436         "are not allowed in the virtual path";
437     return nullptr;
438   }
439 
440   for (const auto& mapping : mappings_) {
441     std::string temp_disk_file;
442     if (ApplyMapping(virtual_file, mapping.virtual_path, mapping.disk_path,
443                      &temp_disk_file)) {
444       io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
445       if (stream != nullptr) {
446         if (disk_file != nullptr) {
447           *disk_file = temp_disk_file;
448         }
449         return stream;
450       }
451 
452       if (errno == EACCES) {
453         // The file exists but is not readable.
454         last_error_message_ =
455             absl::StrCat("Read access is denied for file: ", temp_disk_file);
456         return nullptr;
457       }
458     }
459   }
460   last_error_message_ = "File not found.";
461   return nullptr;
462 }
463 
OpenDiskFile(absl::string_view filename)464 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
465     absl::string_view filename) {
466   struct stat sb;
467   int ret = 0;
468   do {
469     ret = stat(std::string(filename).c_str(), &sb);
470   } while (ret != 0 && errno == EINTR);
471 #if defined(_WIN32)
472   if (ret == 0 && sb.st_mode & S_IFDIR) {
473     last_error_message_ = "Input file is a directory.";
474     return nullptr;
475   }
476 #else
477   if (ret == 0 && S_ISDIR(sb.st_mode)) {
478     last_error_message_ = "Input file is a directory.";
479     return nullptr;
480   }
481 #endif
482   int file_descriptor;
483   do {
484     file_descriptor = open(std::string(filename).c_str(), O_RDONLY);
485   } while (file_descriptor < 0 && errno == EINTR);
486   if (file_descriptor >= 0) {
487     io::FileInputStream* result = new io::FileInputStream(file_descriptor);
488     result->SetCloseOnDelete(true);
489     return result;
490   } else {
491     return nullptr;
492   }
493 }
494 
495 }  // namespace compiler
496 }  // namespace protobuf
497 }  // namespace google
498