1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifdef _MSC_VER
36 #include <direct.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44
45 #include <algorithm>
46 #include <memory>
47
48 #include <google/protobuf/compiler/importer.h>
49 #include <google/protobuf/compiler/parser.h>
50 #include <google/protobuf/io/tokenizer.h>
51 #include <google/protobuf/io/zero_copy_stream_impl.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/io/io_win32.h>
54
55 #ifdef _WIN32
56 #include <ctype.h>
57 #endif
58
59 namespace google {
60 namespace protobuf {
61 namespace compiler {
62
63 #ifdef _WIN32
64 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
65 // them like we do below.
66 using google::protobuf::io::win32::access;
67 using google::protobuf::io::win32::open;
68 #endif
69
70 // Returns true if the text looks like a Windows-style absolute path, starting
71 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
72 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const std::string & text)73 static bool IsWindowsAbsolutePath(const std::string& text) {
74 #if defined(_WIN32) || defined(__CYGWIN__)
75 return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) &&
76 (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
77 #else
78 return false;
79 #endif
80 }
81
~MultiFileErrorCollector()82 MultiFileErrorCollector::~MultiFileErrorCollector() {}
83
84 // This class serves two purposes:
85 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
86 // in terms of MultiFileErrorCollector, using a particular filename.
87 // - It lets us check if any errors have occurred.
88 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
89 : public io::ErrorCollector {
90 public:
SingleFileErrorCollector(const std::string & filename,MultiFileErrorCollector * multi_file_error_collector)91 SingleFileErrorCollector(const std::string& filename,
92 MultiFileErrorCollector* multi_file_error_collector)
93 : filename_(filename),
94 multi_file_error_collector_(multi_file_error_collector),
95 had_errors_(false) {}
~SingleFileErrorCollector()96 ~SingleFileErrorCollector() {}
97
had_errors()98 bool had_errors() { return had_errors_; }
99
100 // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const std::string & message)101 void AddError(int line, int column, const std::string& message) override {
102 if (multi_file_error_collector_ != NULL) {
103 multi_file_error_collector_->AddError(filename_, line, column, message);
104 }
105 had_errors_ = true;
106 }
107
108 private:
109 std::string filename_;
110 MultiFileErrorCollector* multi_file_error_collector_;
111 bool had_errors_;
112 };
113
114 // ===================================================================
115
SourceTreeDescriptorDatabase(SourceTree * source_tree)116 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
117 SourceTree* source_tree)
118 : source_tree_(source_tree),
119 fallback_database_(nullptr),
120 error_collector_(nullptr),
121 using_validation_error_collector_(false),
122 validation_error_collector_(this) {}
123
SourceTreeDescriptorDatabase(SourceTree * source_tree,DescriptorDatabase * fallback_database)124 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
125 SourceTree* source_tree, DescriptorDatabase* fallback_database)
126 : source_tree_(source_tree),
127 fallback_database_(fallback_database),
128 error_collector_(nullptr),
129 using_validation_error_collector_(false),
130 validation_error_collector_(this) {}
131
~SourceTreeDescriptorDatabase()132 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
133
FindFileByName(const std::string & filename,FileDescriptorProto * output)134 bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename,
135 FileDescriptorProto* output) {
136 std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
137 if (input == NULL) {
138 if (fallback_database_ != nullptr &&
139 fallback_database_->FindFileByName(filename, output)) {
140 return true;
141 }
142 if (error_collector_ != NULL) {
143 error_collector_->AddError(filename, -1, 0,
144 source_tree_->GetLastErrorMessage());
145 }
146 return false;
147 }
148
149 // Set up the tokenizer and parser.
150 SingleFileErrorCollector file_error_collector(filename, error_collector_);
151 io::Tokenizer tokenizer(input.get(), &file_error_collector);
152
153 Parser parser;
154 if (error_collector_ != NULL) {
155 parser.RecordErrorsTo(&file_error_collector);
156 }
157 if (using_validation_error_collector_) {
158 parser.RecordSourceLocationsTo(&source_locations_);
159 }
160
161 // Parse it.
162 output->set_name(filename);
163 return parser.Parse(&tokenizer, output) && !file_error_collector.had_errors();
164 }
165
FindFileContainingSymbol(const std::string & symbol_name,FileDescriptorProto * output)166 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
167 const std::string& symbol_name, FileDescriptorProto* output) {
168 return false;
169 }
170
FindFileContainingExtension(const std::string & containing_type,int field_number,FileDescriptorProto * output)171 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
172 const std::string& containing_type, int field_number,
173 FileDescriptorProto* output) {
174 return false;
175 }
176
177 // -------------------------------------------------------------------
178
179 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)180 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
181 : owner_(owner) {}
182
183 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()184 ~ValidationErrorCollector() {}
185
AddError(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)186 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
187 const std::string& filename, const std::string& element_name,
188 const Message* descriptor, ErrorLocation location,
189 const std::string& message) {
190 if (owner_->error_collector_ == NULL) return;
191
192 int line, column;
193 if (location == DescriptorPool::ErrorCollector::IMPORT) {
194 owner_->source_locations_.FindImport(descriptor, element_name, &line,
195 &column);
196 } else {
197 owner_->source_locations_.Find(descriptor, location, &line, &column);
198 }
199 owner_->error_collector_->AddError(filename, line, column, message);
200 }
201
AddWarning(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)202 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning(
203 const std::string& filename, const std::string& element_name,
204 const Message* descriptor, ErrorLocation location,
205 const std::string& message) {
206 if (owner_->error_collector_ == NULL) return;
207
208 int line, column;
209 if (location == DescriptorPool::ErrorCollector::IMPORT) {
210 owner_->source_locations_.FindImport(descriptor, element_name, &line,
211 &column);
212 } else {
213 owner_->source_locations_.Find(descriptor, location, &line, &column);
214 }
215 owner_->error_collector_->AddWarning(filename, line, column, message);
216 }
217
218 // ===================================================================
219
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)220 Importer::Importer(SourceTree* source_tree,
221 MultiFileErrorCollector* error_collector)
222 : database_(source_tree),
223 pool_(&database_, database_.GetValidationErrorCollector()) {
224 pool_.EnforceWeakDependencies(true);
225 database_.RecordErrorsTo(error_collector);
226 }
227
~Importer()228 Importer::~Importer() {}
229
Import(const std::string & filename)230 const FileDescriptor* Importer::Import(const std::string& filename) {
231 return pool_.FindFileByName(filename);
232 }
233
AddUnusedImportTrackFile(const std::string & file_name,bool is_error)234 void Importer::AddUnusedImportTrackFile(const std::string& file_name,
235 bool is_error) {
236 pool_.AddUnusedImportTrackFile(file_name, is_error);
237 }
238
ClearUnusedImportTrackFiles()239 void Importer::ClearUnusedImportTrackFiles() {
240 pool_.ClearUnusedImportTrackFiles();
241 }
242
243
244 // ===================================================================
245
~SourceTree()246 SourceTree::~SourceTree() {}
247
GetLastErrorMessage()248 std::string SourceTree::GetLastErrorMessage() { return "File not found."; }
249
DiskSourceTree()250 DiskSourceTree::DiskSourceTree() {}
251
~DiskSourceTree()252 DiskSourceTree::~DiskSourceTree() {}
253
LastChar(const std::string & str)254 static inline char LastChar(const std::string& str) {
255 return str[str.size() - 1];
256 }
257
258 // Given a path, returns an equivalent path with these changes:
259 // - On Windows, any backslashes are replaced with forward slashes.
260 // - Any instances of the directory "." are removed.
261 // - Any consecutive '/'s are collapsed into a single slash.
262 // Note that the resulting string may be empty.
263 //
264 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
265 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
266 // symlink or doesn't exist, then things get complicated, and we can't
267 // actually determine this without investigating the filesystem, probably
268 // in non-portable ways. So, we punt.
269 //
270 // TODO(kenton): It would be nice to use realpath() here except that it
271 // resolves symbolic links. This could cause problems if people place
272 // symbolic links in their source tree. For example, if you executed:
273 // protoc --proto_path=foo foo/bar/baz.proto
274 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
275 // to a path which does not appear to be under foo, and thus the compiler
276 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(std::string path)277 static std::string CanonicalizePath(std::string path) {
278 #ifdef _WIN32
279 // The Win32 API accepts forward slashes as a path delimiter even though
280 // backslashes are standard. Let's avoid confusion and use only forward
281 // slashes.
282 if (HasPrefixString(path, "\\\\")) {
283 // Avoid converting two leading backslashes.
284 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
285 } else {
286 path = StringReplace(path, "\\", "/", true);
287 }
288 #endif
289
290 std::vector<std::string> canonical_parts;
291 std::vector<std::string> parts = Split(
292 path, "/", true); // Note: Removes empty parts.
293 for (int i = 0; i < parts.size(); i++) {
294 if (parts[i] == ".") {
295 // Ignore.
296 } else {
297 canonical_parts.push_back(parts[i]);
298 }
299 }
300 std::string result = Join(canonical_parts, "/");
301 if (!path.empty() && path[0] == '/') {
302 // Restore leading slash.
303 result = '/' + result;
304 }
305 if (!path.empty() && LastChar(path) == '/' && !result.empty() &&
306 LastChar(result) != '/') {
307 // Restore trailing slash.
308 result += '/';
309 }
310 return result;
311 }
312
ContainsParentReference(const std::string & path)313 static inline bool ContainsParentReference(const std::string& path) {
314 return path == ".." || HasPrefixString(path, "../") ||
315 HasSuffixString(path, "/..") || path.find("/../") != std::string::npos;
316 }
317
318 // Maps a file from an old location to a new one. Typically, old_prefix is
319 // a virtual path and new_prefix is its corresponding disk path. Returns
320 // false if the filename did not start with old_prefix, otherwise replaces
321 // old_prefix with new_prefix and stores the result in *result. Examples:
322 // string result;
323 // assert(ApplyMapping("foo/bar", "", "baz", &result));
324 // assert(result == "baz/foo/bar");
325 //
326 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
327 // assert(result == "baz/bar");
328 //
329 // assert(ApplyMapping("foo", "foo", "bar", &result));
330 // assert(result == "bar");
331 //
332 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
333 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
334 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const std::string & filename,const std::string & old_prefix,const std::string & new_prefix,std::string * result)335 static bool ApplyMapping(const std::string& filename,
336 const std::string& old_prefix,
337 const std::string& new_prefix, std::string* result) {
338 if (old_prefix.empty()) {
339 // old_prefix matches any relative path.
340 if (ContainsParentReference(filename)) {
341 // We do not allow the file name to use "..".
342 return false;
343 }
344 if (HasPrefixString(filename, "/") || IsWindowsAbsolutePath(filename)) {
345 // This is an absolute path, so it isn't matched by the empty string.
346 return false;
347 }
348 result->assign(new_prefix);
349 if (!result->empty()) result->push_back('/');
350 result->append(filename);
351 return true;
352 } else if (HasPrefixString(filename, old_prefix)) {
353 // old_prefix is a prefix of the filename. Is it the whole filename?
354 if (filename.size() == old_prefix.size()) {
355 // Yep, it's an exact match.
356 *result = new_prefix;
357 return true;
358 } else {
359 // Not an exact match. Is the next character a '/'? Otherwise,
360 // this isn't actually a match at all. E.g. the prefix "foo/bar"
361 // does not match the filename "foo/barbaz".
362 int after_prefix_start = -1;
363 if (filename[old_prefix.size()] == '/') {
364 after_prefix_start = old_prefix.size() + 1;
365 } else if (filename[old_prefix.size() - 1] == '/') {
366 // old_prefix is never empty, and canonicalized paths never have
367 // consecutive '/' characters.
368 after_prefix_start = old_prefix.size();
369 }
370 if (after_prefix_start != -1) {
371 // Yep. So the prefixes are directories and the filename is a file
372 // inside them.
373 std::string after_prefix = filename.substr(after_prefix_start);
374 if (ContainsParentReference(after_prefix)) {
375 // We do not allow the file name to use "..".
376 return false;
377 }
378 result->assign(new_prefix);
379 if (!result->empty()) result->push_back('/');
380 result->append(after_prefix);
381 return true;
382 }
383 }
384 }
385
386 return false;
387 }
388
MapPath(const std::string & virtual_path,const std::string & disk_path)389 void DiskSourceTree::MapPath(const std::string& virtual_path,
390 const std::string& disk_path) {
391 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
392 }
393
394 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const std::string & disk_file,std::string * virtual_file,std::string * shadowing_disk_file)395 DiskSourceTree::DiskFileToVirtualFile(const std::string& disk_file,
396 std::string* virtual_file,
397 std::string* shadowing_disk_file) {
398 int mapping_index = -1;
399 std::string canonical_disk_file = CanonicalizePath(disk_file);
400
401 for (int i = 0; i < mappings_.size(); i++) {
402 // Apply the mapping in reverse.
403 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
404 mappings_[i].virtual_path, virtual_file)) {
405 // Success.
406 mapping_index = i;
407 break;
408 }
409 }
410
411 if (mapping_index == -1) {
412 return NO_MAPPING;
413 }
414
415 // Iterate through all mappings with higher precedence and verify that none
416 // of them map this file to some other existing file.
417 for (int i = 0; i < mapping_index; i++) {
418 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
419 mappings_[i].disk_path, shadowing_disk_file)) {
420 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
421 // File exists.
422 return SHADOWED;
423 }
424 }
425 }
426 shadowing_disk_file->clear();
427
428 // Verify that we can open the file. Note that this also has the side-effect
429 // of verifying that we are not canonicalizing away any non-existent
430 // directories.
431 std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
432 if (stream == NULL) {
433 return CANNOT_OPEN;
434 }
435
436 return SUCCESS;
437 }
438
VirtualFileToDiskFile(const std::string & virtual_file,std::string * disk_file)439 bool DiskSourceTree::VirtualFileToDiskFile(const std::string& virtual_file,
440 std::string* disk_file) {
441 std::unique_ptr<io::ZeroCopyInputStream> stream(
442 OpenVirtualFile(virtual_file, disk_file));
443 return stream != NULL;
444 }
445
Open(const std::string & filename)446 io::ZeroCopyInputStream* DiskSourceTree::Open(const std::string& filename) {
447 return OpenVirtualFile(filename, NULL);
448 }
449
GetLastErrorMessage()450 std::string DiskSourceTree::GetLastErrorMessage() {
451 return last_error_message_;
452 }
453
OpenVirtualFile(const std::string & virtual_file,std::string * disk_file)454 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
455 const std::string& virtual_file, std::string* disk_file) {
456 if (virtual_file != CanonicalizePath(virtual_file) ||
457 ContainsParentReference(virtual_file)) {
458 // We do not allow importing of paths containing things like ".." or
459 // consecutive slashes since the compiler expects files to be uniquely
460 // identified by file name.
461 last_error_message_ =
462 "Backslashes, consecutive slashes, \".\", or \"..\" "
463 "are not allowed in the virtual path";
464 return NULL;
465 }
466
467 for (int i = 0; i < mappings_.size(); i++) {
468 std::string temp_disk_file;
469 if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
470 mappings_[i].disk_path, &temp_disk_file)) {
471 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
472 if (stream != NULL) {
473 if (disk_file != NULL) {
474 *disk_file = temp_disk_file;
475 }
476 return stream;
477 }
478
479 if (errno == EACCES) {
480 // The file exists but is not readable.
481 last_error_message_ =
482 "Read access is denied for file: " + temp_disk_file;
483 return NULL;
484 }
485 }
486 }
487 last_error_message_ = "File not found.";
488 return NULL;
489 }
490
OpenDiskFile(const std::string & filename)491 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
492 const std::string& filename) {
493 struct stat sb;
494 int ret = 0;
495 do {
496 ret = stat(filename.c_str(), &sb);
497 } while (ret != 0 && errno == EINTR);
498 if (sb.st_mode & S_IFDIR) {
499 last_error_message_ = "Input file is a directory.";
500 return NULL;
501 }
502 int file_descriptor;
503 do {
504 file_descriptor = open(filename.c_str(), O_RDONLY);
505 } while (file_descriptor < 0 && errno == EINTR);
506 if (file_descriptor >= 0) {
507 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
508 result->SetCloseOnDelete(true);
509 return result;
510 } else {
511 return NULL;
512 }
513 }
514
515 } // namespace compiler
516 } // namespace protobuf
517 } // namespace google
518