1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifdef _MSC_VER
36 #include <direct.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44
45 #include <algorithm>
46 #include <memory>
47
48 #include <google/protobuf/compiler/importer.h>
49
50 #include <google/protobuf/compiler/parser.h>
51 #include <google/protobuf/io/io_win32.h>
52 #include <google/protobuf/io/tokenizer.h>
53 #include <google/protobuf/io/zero_copy_stream_impl.h>
54 #include <google/protobuf/stubs/strutil.h>
55
56
57
58 #ifdef _WIN32
59 #include <ctype.h>
60 #endif
61
62 namespace google {
63 namespace protobuf {
64 namespace compiler {
65
66 #ifdef _WIN32
67 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
68 // them like we do below.
69 using google::protobuf::io::win32::access;
70 using google::protobuf::io::win32::open;
71 #endif
72
73 // Returns true if the text looks like a Windows-style absolute path, starting
74 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
75 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const std::string & text)76 static bool IsWindowsAbsolutePath(const std::string& text) {
77 #if defined(_WIN32) || defined(__CYGWIN__)
78 return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) &&
79 (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
80 #else
81 return false;
82 #endif
83 }
84
~MultiFileErrorCollector()85 MultiFileErrorCollector::~MultiFileErrorCollector() {}
86
87 // This class serves two purposes:
88 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
89 // in terms of MultiFileErrorCollector, using a particular filename.
90 // - It lets us check if any errors have occurred.
91 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
92 : public io::ErrorCollector {
93 public:
SingleFileErrorCollector(const std::string & filename,MultiFileErrorCollector * multi_file_error_collector)94 SingleFileErrorCollector(const std::string& filename,
95 MultiFileErrorCollector* multi_file_error_collector)
96 : filename_(filename),
97 multi_file_error_collector_(multi_file_error_collector),
98 had_errors_(false) {}
~SingleFileErrorCollector()99 ~SingleFileErrorCollector() {}
100
had_errors()101 bool had_errors() { return had_errors_; }
102
103 // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const std::string & message)104 void AddError(int line, int column, const std::string& message) override {
105 if (multi_file_error_collector_ != NULL) {
106 multi_file_error_collector_->AddError(filename_, line, column, message);
107 }
108 had_errors_ = true;
109 }
110
111 private:
112 std::string filename_;
113 MultiFileErrorCollector* multi_file_error_collector_;
114 bool had_errors_;
115 };
116
117 // ===================================================================
118
SourceTreeDescriptorDatabase(SourceTree * source_tree)119 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
120 SourceTree* source_tree)
121 : source_tree_(source_tree),
122 fallback_database_(nullptr),
123 error_collector_(nullptr),
124 using_validation_error_collector_(false),
125 validation_error_collector_(this) {}
126
SourceTreeDescriptorDatabase(SourceTree * source_tree,DescriptorDatabase * fallback_database)127 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
128 SourceTree* source_tree, DescriptorDatabase* fallback_database)
129 : source_tree_(source_tree),
130 fallback_database_(fallback_database),
131 error_collector_(nullptr),
132 using_validation_error_collector_(false),
133 validation_error_collector_(this) {}
134
~SourceTreeDescriptorDatabase()135 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
136
FindFileByName(const std::string & filename,FileDescriptorProto * output)137 bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename,
138 FileDescriptorProto* output) {
139 std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
140 if (input == NULL) {
141 if (fallback_database_ != nullptr &&
142 fallback_database_->FindFileByName(filename, output)) {
143 return true;
144 }
145 if (error_collector_ != NULL) {
146 error_collector_->AddError(filename, -1, 0,
147 source_tree_->GetLastErrorMessage());
148 }
149 return false;
150 }
151
152 // Set up the tokenizer and parser.
153 SingleFileErrorCollector file_error_collector(filename, error_collector_);
154 io::Tokenizer tokenizer(input.get(), &file_error_collector);
155
156 Parser parser;
157 if (error_collector_ != NULL) {
158 parser.RecordErrorsTo(&file_error_collector);
159 }
160 if (using_validation_error_collector_) {
161 parser.RecordSourceLocationsTo(&source_locations_);
162 }
163
164 // Parse it.
165 output->set_name(filename);
166 return parser.Parse(&tokenizer, output) && !file_error_collector.had_errors();
167 }
168
FindFileContainingSymbol(const std::string & symbol_name,FileDescriptorProto * output)169 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
170 const std::string& symbol_name, FileDescriptorProto* output) {
171 return false;
172 }
173
FindFileContainingExtension(const std::string & containing_type,int field_number,FileDescriptorProto * output)174 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
175 const std::string& containing_type, int field_number,
176 FileDescriptorProto* output) {
177 return false;
178 }
179
180 // -------------------------------------------------------------------
181
182 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)183 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
184 : owner_(owner) {}
185
186 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()187 ~ValidationErrorCollector() {}
188
AddError(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)189 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
190 const std::string& filename, const std::string& element_name,
191 const Message* descriptor, ErrorLocation location,
192 const std::string& message) {
193 if (owner_->error_collector_ == NULL) return;
194
195 int line, column;
196 if (location == DescriptorPool::ErrorCollector::IMPORT) {
197 owner_->source_locations_.FindImport(descriptor, element_name, &line,
198 &column);
199 } else {
200 owner_->source_locations_.Find(descriptor, location, &line, &column);
201 }
202 owner_->error_collector_->AddError(filename, line, column, message);
203 }
204
AddWarning(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)205 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning(
206 const std::string& filename, const std::string& element_name,
207 const Message* descriptor, ErrorLocation location,
208 const std::string& message) {
209 if (owner_->error_collector_ == NULL) return;
210
211 int line, column;
212 if (location == DescriptorPool::ErrorCollector::IMPORT) {
213 owner_->source_locations_.FindImport(descriptor, element_name, &line,
214 &column);
215 } else {
216 owner_->source_locations_.Find(descriptor, location, &line, &column);
217 }
218 owner_->error_collector_->AddWarning(filename, line, column, message);
219 }
220
221 // ===================================================================
222
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)223 Importer::Importer(SourceTree* source_tree,
224 MultiFileErrorCollector* error_collector)
225 : database_(source_tree),
226 pool_(&database_, database_.GetValidationErrorCollector()) {
227 pool_.EnforceWeakDependencies(true);
228 database_.RecordErrorsTo(error_collector);
229 }
230
~Importer()231 Importer::~Importer() {}
232
Import(const std::string & filename)233 const FileDescriptor* Importer::Import(const std::string& filename) {
234 return pool_.FindFileByName(filename);
235 }
236
AddUnusedImportTrackFile(const std::string & file_name)237 void Importer::AddUnusedImportTrackFile(const std::string& file_name) {
238 pool_.AddUnusedImportTrackFile(file_name);
239 }
240
ClearUnusedImportTrackFiles()241 void Importer::ClearUnusedImportTrackFiles() {
242 pool_.ClearUnusedImportTrackFiles();
243 }
244
245
246 // ===================================================================
247
~SourceTree()248 SourceTree::~SourceTree() {}
249
GetLastErrorMessage()250 std::string SourceTree::GetLastErrorMessage() { return "File not found."; }
251
DiskSourceTree()252 DiskSourceTree::DiskSourceTree() {}
253
~DiskSourceTree()254 DiskSourceTree::~DiskSourceTree() {}
255
LastChar(const std::string & str)256 static inline char LastChar(const std::string& str) {
257 return str[str.size() - 1];
258 }
259
260 // Given a path, returns an equivalent path with these changes:
261 // - On Windows, any backslashes are replaced with forward slashes.
262 // - Any instances of the directory "." are removed.
263 // - Any consecutive '/'s are collapsed into a single slash.
264 // Note that the resulting string may be empty.
265 //
266 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
267 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
268 // symlink or doesn't exist, then things get complicated, and we can't
269 // actually determine this without investigating the filesystem, probably
270 // in non-portable ways. So, we punt.
271 //
272 // TODO(kenton): It would be nice to use realpath() here except that it
273 // resolves symbolic links. This could cause problems if people place
274 // symbolic links in their source tree. For example, if you executed:
275 // protoc --proto_path=foo foo/bar/baz.proto
276 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
277 // to a path which does not appear to be under foo, and thus the compiler
278 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(std::string path)279 static std::string CanonicalizePath(std::string path) {
280 #ifdef _WIN32
281 // The Win32 API accepts forward slashes as a path delimiter even though
282 // backslashes are standard. Let's avoid confusion and use only forward
283 // slashes.
284 if (HasPrefixString(path, "\\\\")) {
285 // Avoid converting two leading backslashes.
286 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
287 } else {
288 path = StringReplace(path, "\\", "/", true);
289 }
290 #endif
291
292 std::vector<std::string> canonical_parts;
293 std::vector<std::string> parts = Split(
294 path, "/", true); // Note: Removes empty parts.
295 for (int i = 0; i < parts.size(); i++) {
296 if (parts[i] == ".") {
297 // Ignore.
298 } else {
299 canonical_parts.push_back(parts[i]);
300 }
301 }
302 std::string result = Join(canonical_parts, "/");
303 if (!path.empty() && path[0] == '/') {
304 // Restore leading slash.
305 result = '/' + result;
306 }
307 if (!path.empty() && LastChar(path) == '/' && !result.empty() &&
308 LastChar(result) != '/') {
309 // Restore trailing slash.
310 result += '/';
311 }
312 return result;
313 }
314
ContainsParentReference(const std::string & path)315 static inline bool ContainsParentReference(const std::string& path) {
316 return path == ".." || HasPrefixString(path, "../") ||
317 HasSuffixString(path, "/..") || path.find("/../") != string::npos;
318 }
319
320 // Maps a file from an old location to a new one. Typically, old_prefix is
321 // a virtual path and new_prefix is its corresponding disk path. Returns
322 // false if the filename did not start with old_prefix, otherwise replaces
323 // old_prefix with new_prefix and stores the result in *result. Examples:
324 // string result;
325 // assert(ApplyMapping("foo/bar", "", "baz", &result));
326 // assert(result == "baz/foo/bar");
327 //
328 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
329 // assert(result == "baz/bar");
330 //
331 // assert(ApplyMapping("foo", "foo", "bar", &result));
332 // assert(result == "bar");
333 //
334 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
335 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
336 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const std::string & filename,const std::string & old_prefix,const std::string & new_prefix,std::string * result)337 static bool ApplyMapping(const std::string& filename,
338 const std::string& old_prefix,
339 const std::string& new_prefix, std::string* result) {
340 if (old_prefix.empty()) {
341 // old_prefix matches any relative path.
342 if (ContainsParentReference(filename)) {
343 // We do not allow the file name to use "..".
344 return false;
345 }
346 if (HasPrefixString(filename, "/") || IsWindowsAbsolutePath(filename)) {
347 // This is an absolute path, so it isn't matched by the empty string.
348 return false;
349 }
350 result->assign(new_prefix);
351 if (!result->empty()) result->push_back('/');
352 result->append(filename);
353 return true;
354 } else if (HasPrefixString(filename, old_prefix)) {
355 // old_prefix is a prefix of the filename. Is it the whole filename?
356 if (filename.size() == old_prefix.size()) {
357 // Yep, it's an exact match.
358 *result = new_prefix;
359 return true;
360 } else {
361 // Not an exact match. Is the next character a '/'? Otherwise,
362 // this isn't actually a match at all. E.g. the prefix "foo/bar"
363 // does not match the filename "foo/barbaz".
364 int after_prefix_start = -1;
365 if (filename[old_prefix.size()] == '/') {
366 after_prefix_start = old_prefix.size() + 1;
367 } else if (filename[old_prefix.size() - 1] == '/') {
368 // old_prefix is never empty, and canonicalized paths never have
369 // consecutive '/' characters.
370 after_prefix_start = old_prefix.size();
371 }
372 if (after_prefix_start != -1) {
373 // Yep. So the prefixes are directories and the filename is a file
374 // inside them.
375 std::string after_prefix = filename.substr(after_prefix_start);
376 if (ContainsParentReference(after_prefix)) {
377 // We do not allow the file name to use "..".
378 return false;
379 }
380 result->assign(new_prefix);
381 if (!result->empty()) result->push_back('/');
382 result->append(after_prefix);
383 return true;
384 }
385 }
386 }
387
388 return false;
389 }
390
MapPath(const std::string & virtual_path,const std::string & disk_path)391 void DiskSourceTree::MapPath(const std::string& virtual_path,
392 const std::string& disk_path) {
393 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
394 }
395
396 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const std::string & disk_file,std::string * virtual_file,std::string * shadowing_disk_file)397 DiskSourceTree::DiskFileToVirtualFile(const std::string& disk_file,
398 std::string* virtual_file,
399 std::string* shadowing_disk_file) {
400 int mapping_index = -1;
401 std::string canonical_disk_file = CanonicalizePath(disk_file);
402
403 for (int i = 0; i < mappings_.size(); i++) {
404 // Apply the mapping in reverse.
405 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
406 mappings_[i].virtual_path, virtual_file)) {
407 // Success.
408 mapping_index = i;
409 break;
410 }
411 }
412
413 if (mapping_index == -1) {
414 return NO_MAPPING;
415 }
416
417 // Iterate through all mappings with higher precedence and verify that none
418 // of them map this file to some other existing file.
419 for (int i = 0; i < mapping_index; i++) {
420 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
421 mappings_[i].disk_path, shadowing_disk_file)) {
422 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
423 // File exists.
424 return SHADOWED;
425 }
426 }
427 }
428 shadowing_disk_file->clear();
429
430 // Verify that we can open the file. Note that this also has the side-effect
431 // of verifying that we are not canonicalizing away any non-existent
432 // directories.
433 std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
434 if (stream == NULL) {
435 return CANNOT_OPEN;
436 }
437
438 return SUCCESS;
439 }
440
VirtualFileToDiskFile(const std::string & virtual_file,std::string * disk_file)441 bool DiskSourceTree::VirtualFileToDiskFile(const std::string& virtual_file,
442 std::string* disk_file) {
443 std::unique_ptr<io::ZeroCopyInputStream> stream(
444 OpenVirtualFile(virtual_file, disk_file));
445 return stream != NULL;
446 }
447
Open(const std::string & filename)448 io::ZeroCopyInputStream* DiskSourceTree::Open(const std::string& filename) {
449 return OpenVirtualFile(filename, NULL);
450 }
451
GetLastErrorMessage()452 std::string DiskSourceTree::GetLastErrorMessage() {
453 return last_error_message_;
454 }
455
OpenVirtualFile(const std::string & virtual_file,std::string * disk_file)456 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
457 const std::string& virtual_file, std::string* disk_file) {
458 if (virtual_file != CanonicalizePath(virtual_file) ||
459 ContainsParentReference(virtual_file)) {
460 // We do not allow importing of paths containing things like ".." or
461 // consecutive slashes since the compiler expects files to be uniquely
462 // identified by file name.
463 last_error_message_ =
464 "Backslashes, consecutive slashes, \".\", or \"..\" "
465 "are not allowed in the virtual path";
466 return NULL;
467 }
468
469 for (int i = 0; i < mappings_.size(); i++) {
470 std::string temp_disk_file;
471 if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
472 mappings_[i].disk_path, &temp_disk_file)) {
473 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
474 if (stream != NULL) {
475 if (disk_file != NULL) {
476 *disk_file = temp_disk_file;
477 }
478 return stream;
479 }
480
481 if (errno == EACCES) {
482 // The file exists but is not readable.
483 last_error_message_ =
484 "Read access is denied for file: " + temp_disk_file;
485 return NULL;
486 }
487 }
488 }
489 last_error_message_ = "File not found.";
490 return NULL;
491 }
492
OpenDiskFile(const std::string & filename)493 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
494 const std::string& filename) {
495 int file_descriptor;
496 do {
497 file_descriptor = open(filename.c_str(), O_RDONLY);
498 } while (file_descriptor < 0 && errno == EINTR);
499 if (file_descriptor >= 0) {
500 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
501 result->SetCloseOnDelete(true);
502 return result;
503 } else {
504 return NULL;
505 }
506 }
507
508 } // namespace compiler
509 } // namespace protobuf
510 } // namespace google
511