1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifdef _MSC_VER
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <errno.h>
44
45 #include <algorithm>
46 #include <memory>
47 #ifndef _SHARED_PTR_H
48 #include <google/protobuf/stubs/shared_ptr.h>
49 #endif
50
51 #include <google/protobuf/compiler/importer.h>
52
53 #include <google/protobuf/compiler/parser.h>
54 #include <google/protobuf/io/tokenizer.h>
55 #include <google/protobuf/io/zero_copy_stream_impl.h>
56 #include <google/protobuf/stubs/strutil.h>
57
58 namespace google {
59 namespace protobuf {
60 namespace compiler {
61
62 #ifdef _WIN32
63 #ifndef F_OK
64 #define F_OK 00 // not defined by MSVC for whatever reason
65 #endif
66 #include <ctype.h>
67 #endif
68
69 // Returns true if the text looks like a Windows-style absolute path, starting
70 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
71 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const string & text)72 static bool IsWindowsAbsolutePath(const string& text) {
73 #if defined(_WIN32) || defined(__CYGWIN__)
74 return text.size() >= 3 && text[1] == ':' &&
75 isalpha(text[0]) &&
76 (text[2] == '/' || text[2] == '\\') &&
77 text.find_last_of(':') == 1;
78 #else
79 return false;
80 #endif
81 }
82
~MultiFileErrorCollector()83 MultiFileErrorCollector::~MultiFileErrorCollector() {}
84
85 // This class serves two purposes:
86 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
87 // in terms of MultiFileErrorCollector, using a particular filename.
88 // - It lets us check if any errors have occurred.
89 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
90 : public io::ErrorCollector {
91 public:
SingleFileErrorCollector(const string & filename,MultiFileErrorCollector * multi_file_error_collector)92 SingleFileErrorCollector(const string& filename,
93 MultiFileErrorCollector* multi_file_error_collector)
94 : filename_(filename),
95 multi_file_error_collector_(multi_file_error_collector),
96 had_errors_(false) {}
~SingleFileErrorCollector()97 ~SingleFileErrorCollector() {}
98
had_errors()99 bool had_errors() { return had_errors_; }
100
101 // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const string & message)102 void AddError(int line, int column, const string& message) {
103 if (multi_file_error_collector_ != NULL) {
104 multi_file_error_collector_->AddError(filename_, line, column, message);
105 }
106 had_errors_ = true;
107 }
108
109 private:
110 string filename_;
111 MultiFileErrorCollector* multi_file_error_collector_;
112 bool had_errors_;
113 };
114
115 // ===================================================================
116
SourceTreeDescriptorDatabase(SourceTree * source_tree)117 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
118 SourceTree* source_tree)
119 : source_tree_(source_tree),
120 error_collector_(NULL),
121 using_validation_error_collector_(false),
122 validation_error_collector_(this) {}
123
~SourceTreeDescriptorDatabase()124 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
125
FindFileByName(const string & filename,FileDescriptorProto * output)126 bool SourceTreeDescriptorDatabase::FindFileByName(
127 const string& filename, FileDescriptorProto* output) {
128 google::protobuf::scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
129 if (input == NULL) {
130 if (error_collector_ != NULL) {
131 error_collector_->AddError(filename, -1, 0,
132 source_tree_->GetLastErrorMessage());
133 }
134 return false;
135 }
136
137 // Set up the tokenizer and parser.
138 SingleFileErrorCollector file_error_collector(filename, error_collector_);
139 io::Tokenizer tokenizer(input.get(), &file_error_collector);
140
141 Parser parser;
142 if (error_collector_ != NULL) {
143 parser.RecordErrorsTo(&file_error_collector);
144 }
145 if (using_validation_error_collector_) {
146 parser.RecordSourceLocationsTo(&source_locations_);
147 }
148
149 // Parse it.
150 output->set_name(filename);
151 return parser.Parse(&tokenizer, output) &&
152 !file_error_collector.had_errors();
153 }
154
FindFileContainingSymbol(const string & symbol_name,FileDescriptorProto * output)155 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
156 const string& symbol_name, FileDescriptorProto* output) {
157 return false;
158 }
159
FindFileContainingExtension(const string & containing_type,int field_number,FileDescriptorProto * output)160 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
161 const string& containing_type, int field_number,
162 FileDescriptorProto* output) {
163 return false;
164 }
165
166 // -------------------------------------------------------------------
167
168 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)169 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
170 : owner_(owner) {}
171
172 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()173 ~ValidationErrorCollector() {}
174
AddError(const string & filename,const string & element_name,const Message * descriptor,ErrorLocation location,const string & message)175 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
176 const string& filename,
177 const string& element_name,
178 const Message* descriptor,
179 ErrorLocation location,
180 const string& message) {
181 if (owner_->error_collector_ == NULL) return;
182
183 int line, column;
184 owner_->source_locations_.Find(descriptor, location, &line, &column);
185 owner_->error_collector_->AddError(filename, line, column, message);
186 }
187
AddWarning(const string & filename,const string & element_name,const Message * descriptor,ErrorLocation location,const string & message)188 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddWarning(
189 const string& filename,
190 const string& element_name,
191 const Message* descriptor,
192 ErrorLocation location,
193 const string& message) {
194 if (owner_->error_collector_ == NULL) return;
195
196 int line, column;
197 owner_->source_locations_.Find(descriptor, location, &line, &column);
198 owner_->error_collector_->AddWarning(filename, line, column, message);
199 }
200
201 // ===================================================================
202
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)203 Importer::Importer(SourceTree* source_tree,
204 MultiFileErrorCollector* error_collector)
205 : database_(source_tree),
206 pool_(&database_, database_.GetValidationErrorCollector()) {
207 pool_.EnforceWeakDependencies(true);
208 database_.RecordErrorsTo(error_collector);
209 }
210
~Importer()211 Importer::~Importer() {}
212
Import(const string & filename)213 const FileDescriptor* Importer::Import(const string& filename) {
214 return pool_.FindFileByName(filename);
215 }
216
AddUnusedImportTrackFile(const string & file_name)217 void Importer::AddUnusedImportTrackFile(const string& file_name) {
218 pool_.AddUnusedImportTrackFile(file_name);
219 }
220
ClearUnusedImportTrackFiles()221 void Importer::ClearUnusedImportTrackFiles() {
222 pool_.ClearUnusedImportTrackFiles();
223 }
224
225 // ===================================================================
226
~SourceTree()227 SourceTree::~SourceTree() {}
228
GetLastErrorMessage()229 string SourceTree::GetLastErrorMessage() {
230 return "File not found.";
231 }
232
DiskSourceTree()233 DiskSourceTree::DiskSourceTree() {}
234
~DiskSourceTree()235 DiskSourceTree::~DiskSourceTree() {}
236
LastChar(const string & str)237 static inline char LastChar(const string& str) {
238 return str[str.size() - 1];
239 }
240
241 // Given a path, returns an equivalent path with these changes:
242 // - On Windows, any backslashes are replaced with forward slashes.
243 // - Any instances of the directory "." are removed.
244 // - Any consecutive '/'s are collapsed into a single slash.
245 // Note that the resulting string may be empty.
246 //
247 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
248 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
249 // symlink or doesn't exist, then things get complicated, and we can't
250 // actually determine this without investigating the filesystem, probably
251 // in non-portable ways. So, we punt.
252 //
253 // TODO(kenton): It would be nice to use realpath() here except that it
254 // resolves symbolic links. This could cause problems if people place
255 // symbolic links in their source tree. For example, if you executed:
256 // protoc --proto_path=foo foo/bar/baz.proto
257 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
258 // to a path which does not appear to be under foo, and thus the compiler
259 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(string path)260 static string CanonicalizePath(string path) {
261 #ifdef _WIN32
262 // The Win32 API accepts forward slashes as a path delimiter even though
263 // backslashes are standard. Let's avoid confusion and use only forward
264 // slashes.
265 if (HasPrefixString(path, "\\\\")) {
266 // Avoid converting two leading backslashes.
267 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
268 } else {
269 path = StringReplace(path, "\\", "/", true);
270 }
271 #endif
272
273 vector<string> canonical_parts;
274 vector<string> parts = Split(
275 path, "/", true); // Note: Removes empty parts.
276 for (int i = 0; i < parts.size(); i++) {
277 if (parts[i] == ".") {
278 // Ignore.
279 } else {
280 canonical_parts.push_back(parts[i]);
281 }
282 }
283 string result = Join(canonical_parts, "/");
284 if (!path.empty() && path[0] == '/') {
285 // Restore leading slash.
286 result = '/' + result;
287 }
288 if (!path.empty() && LastChar(path) == '/' &&
289 !result.empty() && LastChar(result) != '/') {
290 // Restore trailing slash.
291 result += '/';
292 }
293 return result;
294 }
295
ContainsParentReference(const string & path)296 static inline bool ContainsParentReference(const string& path) {
297 return path == ".." ||
298 HasPrefixString(path, "../") ||
299 HasSuffixString(path, "/..") ||
300 path.find("/../") != string::npos;
301 }
302
303 // Maps a file from an old location to a new one. Typically, old_prefix is
304 // a virtual path and new_prefix is its corresponding disk path. Returns
305 // false if the filename did not start with old_prefix, otherwise replaces
306 // old_prefix with new_prefix and stores the result in *result. Examples:
307 // string result;
308 // assert(ApplyMapping("foo/bar", "", "baz", &result));
309 // assert(result == "baz/foo/bar");
310 //
311 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
312 // assert(result == "baz/bar");
313 //
314 // assert(ApplyMapping("foo", "foo", "bar", &result));
315 // assert(result == "bar");
316 //
317 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
318 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
319 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const string & filename,const string & old_prefix,const string & new_prefix,string * result)320 static bool ApplyMapping(const string& filename,
321 const string& old_prefix,
322 const string& new_prefix,
323 string* result) {
324 if (old_prefix.empty()) {
325 // old_prefix matches any relative path.
326 if (ContainsParentReference(filename)) {
327 // We do not allow the file name to use "..".
328 return false;
329 }
330 if (HasPrefixString(filename, "/") ||
331 IsWindowsAbsolutePath(filename)) {
332 // This is an absolute path, so it isn't matched by the empty string.
333 return false;
334 }
335 result->assign(new_prefix);
336 if (!result->empty()) result->push_back('/');
337 result->append(filename);
338 return true;
339 } else if (HasPrefixString(filename, old_prefix)) {
340 // old_prefix is a prefix of the filename. Is it the whole filename?
341 if (filename.size() == old_prefix.size()) {
342 // Yep, it's an exact match.
343 *result = new_prefix;
344 return true;
345 } else {
346 // Not an exact match. Is the next character a '/'? Otherwise,
347 // this isn't actually a match at all. E.g. the prefix "foo/bar"
348 // does not match the filename "foo/barbaz".
349 int after_prefix_start = -1;
350 if (filename[old_prefix.size()] == '/') {
351 after_prefix_start = old_prefix.size() + 1;
352 } else if (filename[old_prefix.size() - 1] == '/') {
353 // old_prefix is never empty, and canonicalized paths never have
354 // consecutive '/' characters.
355 after_prefix_start = old_prefix.size();
356 }
357 if (after_prefix_start != -1) {
358 // Yep. So the prefixes are directories and the filename is a file
359 // inside them.
360 string after_prefix = filename.substr(after_prefix_start);
361 if (ContainsParentReference(after_prefix)) {
362 // We do not allow the file name to use "..".
363 return false;
364 }
365 result->assign(new_prefix);
366 if (!result->empty()) result->push_back('/');
367 result->append(after_prefix);
368 return true;
369 }
370 }
371 }
372
373 return false;
374 }
375
MapPath(const string & virtual_path,const string & disk_path)376 void DiskSourceTree::MapPath(const string& virtual_path,
377 const string& disk_path) {
378 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
379 }
380
381 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const string & disk_file,string * virtual_file,string * shadowing_disk_file)382 DiskSourceTree::DiskFileToVirtualFile(
383 const string& disk_file,
384 string* virtual_file,
385 string* shadowing_disk_file) {
386 int mapping_index = -1;
387 string canonical_disk_file = CanonicalizePath(disk_file);
388
389 for (int i = 0; i < mappings_.size(); i++) {
390 // Apply the mapping in reverse.
391 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
392 mappings_[i].virtual_path, virtual_file)) {
393 // Success.
394 mapping_index = i;
395 break;
396 }
397 }
398
399 if (mapping_index == -1) {
400 return NO_MAPPING;
401 }
402
403 // Iterate through all mappings with higher precedence and verify that none
404 // of them map this file to some other existing file.
405 for (int i = 0; i < mapping_index; i++) {
406 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
407 mappings_[i].disk_path, shadowing_disk_file)) {
408 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
409 // File exists.
410 return SHADOWED;
411 }
412 }
413 }
414 shadowing_disk_file->clear();
415
416 // Verify that we can open the file. Note that this also has the side-effect
417 // of verifying that we are not canonicalizing away any non-existent
418 // directories.
419 google::protobuf::scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
420 if (stream == NULL) {
421 return CANNOT_OPEN;
422 }
423
424 return SUCCESS;
425 }
426
VirtualFileToDiskFile(const string & virtual_file,string * disk_file)427 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
428 string* disk_file) {
429 google::protobuf::scoped_ptr<io::ZeroCopyInputStream> stream(
430 OpenVirtualFile(virtual_file, disk_file));
431 return stream != NULL;
432 }
433
Open(const string & filename)434 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
435 return OpenVirtualFile(filename, NULL);
436 }
437
GetLastErrorMessage()438 string DiskSourceTree::GetLastErrorMessage() {
439 return last_error_message_;
440 }
441
OpenVirtualFile(const string & virtual_file,string * disk_file)442 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
443 const string& virtual_file,
444 string* disk_file) {
445 if (virtual_file != CanonicalizePath(virtual_file) ||
446 ContainsParentReference(virtual_file)) {
447 // We do not allow importing of paths containing things like ".." or
448 // consecutive slashes since the compiler expects files to be uniquely
449 // identified by file name.
450 last_error_message_ = "Backslashes, consecutive slashes, \".\", or \"..\" "
451 "are not allowed in the virtual path";
452 return NULL;
453 }
454
455 for (int i = 0; i < mappings_.size(); i++) {
456 string temp_disk_file;
457 if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
458 mappings_[i].disk_path, &temp_disk_file)) {
459 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
460 if (stream != NULL) {
461 if (disk_file != NULL) {
462 *disk_file = temp_disk_file;
463 }
464 return stream;
465 }
466
467 if (errno == EACCES) {
468 // The file exists but is not readable.
469 last_error_message_ = "Read access is denied for file: " +
470 temp_disk_file;
471 return NULL;
472 }
473 }
474 }
475 last_error_message_ = "File not found.";
476 return NULL;
477 }
478
OpenDiskFile(const string & filename)479 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
480 const string& filename) {
481 int file_descriptor;
482 do {
483 file_descriptor = open(filename.c_str(), O_RDONLY);
484 } while (file_descriptor < 0 && errno == EINTR);
485 if (file_descriptor >= 0) {
486 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
487 result->SetCloseOnDelete(true);
488 return result;
489 } else {
490 return NULL;
491 }
492 }
493
494 } // namespace compiler
495 } // namespace protobuf
496 } // namespace google
497