1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifdef _MSC_VER
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <errno.h>
44
45 #include <algorithm>
46
47 #include <google/protobuf/compiler/importer.h>
48
49 #include <google/protobuf/compiler/parser.h>
50 #include <google/protobuf/io/tokenizer.h>
51 #include <google/protobuf/io/zero_copy_stream_impl.h>
52 #include <google/protobuf/stubs/strutil.h>
53
54 namespace google {
55 namespace protobuf {
56 namespace compiler {
57
58 #ifdef _WIN32
59 #ifndef F_OK
60 #define F_OK 00 // not defined by MSVC for whatever reason
61 #endif
62 #include <ctype.h>
63 #endif
64
65 // Returns true if the text looks like a Windows-style absolute path, starting
66 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
67 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const string & text)68 static bool IsWindowsAbsolutePath(const string& text) {
69 #if defined(_WIN32) || defined(__CYGWIN__)
70 return text.size() >= 3 && text[1] == ':' &&
71 isalpha(text[0]) &&
72 (text[2] == '/' || text[2] == '\\') &&
73 text.find_last_of(':') == 1;
74 #else
75 return false;
76 #endif
77 }
78
~MultiFileErrorCollector()79 MultiFileErrorCollector::~MultiFileErrorCollector() {}
80
81 // This class serves two purposes:
82 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
83 // in terms of MultiFileErrorCollector, using a particular filename.
84 // - It lets us check if any errors have occurred.
85 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
86 : public io::ErrorCollector {
87 public:
SingleFileErrorCollector(const string & filename,MultiFileErrorCollector * multi_file_error_collector)88 SingleFileErrorCollector(const string& filename,
89 MultiFileErrorCollector* multi_file_error_collector)
90 : filename_(filename),
91 multi_file_error_collector_(multi_file_error_collector),
92 had_errors_(false) {}
~SingleFileErrorCollector()93 ~SingleFileErrorCollector() {}
94
had_errors()95 bool had_errors() { return had_errors_; }
96
97 // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const string & message)98 void AddError(int line, int column, const string& message) {
99 if (multi_file_error_collector_ != NULL) {
100 multi_file_error_collector_->AddError(filename_, line, column, message);
101 }
102 had_errors_ = true;
103 }
104
105 private:
106 string filename_;
107 MultiFileErrorCollector* multi_file_error_collector_;
108 bool had_errors_;
109 };
110
111 // ===================================================================
112
SourceTreeDescriptorDatabase(SourceTree * source_tree)113 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
114 SourceTree* source_tree)
115 : source_tree_(source_tree),
116 error_collector_(NULL),
117 using_validation_error_collector_(false),
118 validation_error_collector_(this) {}
119
~SourceTreeDescriptorDatabase()120 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
121
FindFileByName(const string & filename,FileDescriptorProto * output)122 bool SourceTreeDescriptorDatabase::FindFileByName(
123 const string& filename, FileDescriptorProto* output) {
124 scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
125 if (input == NULL) {
126 if (error_collector_ != NULL) {
127 error_collector_->AddError(filename, -1, 0, "File not found.");
128 }
129 return false;
130 }
131
132 // Set up the tokenizer and parser.
133 SingleFileErrorCollector file_error_collector(filename, error_collector_);
134 io::Tokenizer tokenizer(input.get(), &file_error_collector);
135
136 Parser parser;
137 if (error_collector_ != NULL) {
138 parser.RecordErrorsTo(&file_error_collector);
139 }
140 if (using_validation_error_collector_) {
141 parser.RecordSourceLocationsTo(&source_locations_);
142 }
143
144 // Parse it.
145 output->set_name(filename);
146 return parser.Parse(&tokenizer, output) &&
147 !file_error_collector.had_errors();
148 }
149
FindFileContainingSymbol(const string & symbol_name,FileDescriptorProto * output)150 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
151 const string& symbol_name, FileDescriptorProto* output) {
152 return false;
153 }
154
FindFileContainingExtension(const string & containing_type,int field_number,FileDescriptorProto * output)155 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
156 const string& containing_type, int field_number,
157 FileDescriptorProto* output) {
158 return false;
159 }
160
161 // -------------------------------------------------------------------
162
163 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)164 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
165 : owner_(owner) {}
166
167 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()168 ~ValidationErrorCollector() {}
169
AddError(const string & filename,const string & element_name,const Message * descriptor,ErrorLocation location,const string & message)170 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
171 const string& filename,
172 const string& element_name,
173 const Message* descriptor,
174 ErrorLocation location,
175 const string& message) {
176 if (owner_->error_collector_ == NULL) return;
177
178 int line, column;
179 owner_->source_locations_.Find(descriptor, location, &line, &column);
180 owner_->error_collector_->AddError(filename, line, column, message);
181 }
182
183 // ===================================================================
184
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)185 Importer::Importer(SourceTree* source_tree,
186 MultiFileErrorCollector* error_collector)
187 : database_(source_tree),
188 pool_(&database_, database_.GetValidationErrorCollector()) {
189 database_.RecordErrorsTo(error_collector);
190 }
191
~Importer()192 Importer::~Importer() {}
193
Import(const string & filename)194 const FileDescriptor* Importer::Import(const string& filename) {
195 return pool_.FindFileByName(filename);
196 }
197
198 // ===================================================================
199
~SourceTree()200 SourceTree::~SourceTree() {}
201
DiskSourceTree()202 DiskSourceTree::DiskSourceTree() {}
203
~DiskSourceTree()204 DiskSourceTree::~DiskSourceTree() {}
205
LastChar(const string & str)206 static inline char LastChar(const string& str) {
207 return str[str.size() - 1];
208 }
209
210 // Given a path, returns an equivalent path with these changes:
211 // - On Windows, any backslashes are replaced with forward slashes.
212 // - Any instances of the directory "." are removed.
213 // - Any consecutive '/'s are collapsed into a single slash.
214 // Note that the resulting string may be empty.
215 //
216 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
217 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
218 // symlink or doesn't exist, then things get complicated, and we can't
219 // actually determine this without investigating the filesystem, probably
220 // in non-portable ways. So, we punt.
221 //
222 // TODO(kenton): It would be nice to use realpath() here except that it
223 // resolves symbolic links. This could cause problems if people place
224 // symbolic links in their source tree. For example, if you executed:
225 // protoc --proto_path=foo foo/bar/baz.proto
226 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
227 // to a path which does not appear to be under foo, and thus the compiler
228 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(string path)229 static string CanonicalizePath(string path) {
230 #ifdef _WIN32
231 // The Win32 API accepts forward slashes as a path delimiter even though
232 // backslashes are standard. Let's avoid confusion and use only forward
233 // slashes.
234 path = StringReplace(path, "\\", "/", true);
235 #endif
236
237 vector<string> parts;
238 vector<string> canonical_parts;
239 SplitStringUsing(path, "/", &parts); // Note: Removes empty parts.
240 for (int i = 0; i < parts.size(); i++) {
241 if (parts[i] == ".") {
242 // Ignore.
243 } else {
244 canonical_parts.push_back(parts[i]);
245 }
246 }
247 string result = JoinStrings(canonical_parts, "/");
248 if (!path.empty() && path[0] == '/') {
249 // Restore leading slash.
250 result = '/' + result;
251 }
252 if (!path.empty() && LastChar(path) == '/' &&
253 !result.empty() && LastChar(result) != '/') {
254 // Restore trailing slash.
255 result += '/';
256 }
257 return result;
258 }
259
ContainsParentReference(const string & path)260 static inline bool ContainsParentReference(const string& path) {
261 return path == ".." ||
262 HasPrefixString(path, "../") ||
263 HasSuffixString(path, "/..") ||
264 path.find("/../") != string::npos;
265 }
266
267 // Maps a file from an old location to a new one. Typically, old_prefix is
268 // a virtual path and new_prefix is its corresponding disk path. Returns
269 // false if the filename did not start with old_prefix, otherwise replaces
270 // old_prefix with new_prefix and stores the result in *result. Examples:
271 // string result;
272 // assert(ApplyMapping("foo/bar", "", "baz", &result));
273 // assert(result == "baz/foo/bar");
274 //
275 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
276 // assert(result == "baz/bar");
277 //
278 // assert(ApplyMapping("foo", "foo", "bar", &result));
279 // assert(result == "bar");
280 //
281 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
282 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
283 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const string & filename,const string & old_prefix,const string & new_prefix,string * result)284 static bool ApplyMapping(const string& filename,
285 const string& old_prefix,
286 const string& new_prefix,
287 string* result) {
288 if (old_prefix.empty()) {
289 // old_prefix matches any relative path.
290 if (ContainsParentReference(filename)) {
291 // We do not allow the file name to use "..".
292 return false;
293 }
294 if (HasPrefixString(filename, "/") ||
295 IsWindowsAbsolutePath(filename)) {
296 // This is an absolute path, so it isn't matched by the empty string.
297 return false;
298 }
299 result->assign(new_prefix);
300 if (!result->empty()) result->push_back('/');
301 result->append(filename);
302 return true;
303 } else if (HasPrefixString(filename, old_prefix)) {
304 // old_prefix is a prefix of the filename. Is it the whole filename?
305 if (filename.size() == old_prefix.size()) {
306 // Yep, it's an exact match.
307 *result = new_prefix;
308 return true;
309 } else {
310 // Not an exact match. Is the next character a '/'? Otherwise,
311 // this isn't actually a match at all. E.g. the prefix "foo/bar"
312 // does not match the filename "foo/barbaz".
313 int after_prefix_start = -1;
314 if (filename[old_prefix.size()] == '/') {
315 after_prefix_start = old_prefix.size() + 1;
316 } else if (filename[old_prefix.size() - 1] == '/') {
317 // old_prefix is never empty, and canonicalized paths never have
318 // consecutive '/' characters.
319 after_prefix_start = old_prefix.size();
320 }
321 if (after_prefix_start != -1) {
322 // Yep. So the prefixes are directories and the filename is a file
323 // inside them.
324 string after_prefix = filename.substr(after_prefix_start);
325 if (ContainsParentReference(after_prefix)) {
326 // We do not allow the file name to use "..".
327 return false;
328 }
329 result->assign(new_prefix);
330 if (!result->empty()) result->push_back('/');
331 result->append(after_prefix);
332 return true;
333 }
334 }
335 }
336
337 return false;
338 }
339
MapPath(const string & virtual_path,const string & disk_path)340 void DiskSourceTree::MapPath(const string& virtual_path,
341 const string& disk_path) {
342 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
343 }
344
345 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const string & disk_file,string * virtual_file,string * shadowing_disk_file)346 DiskSourceTree::DiskFileToVirtualFile(
347 const string& disk_file,
348 string* virtual_file,
349 string* shadowing_disk_file) {
350 int mapping_index = -1;
351 string canonical_disk_file = CanonicalizePath(disk_file);
352
353 for (int i = 0; i < mappings_.size(); i++) {
354 // Apply the mapping in reverse.
355 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
356 mappings_[i].virtual_path, virtual_file)) {
357 // Success.
358 mapping_index = i;
359 break;
360 }
361 }
362
363 if (mapping_index == -1) {
364 return NO_MAPPING;
365 }
366
367 // Iterate through all mappings with higher precedence and verify that none
368 // of them map this file to some other existing file.
369 for (int i = 0; i < mapping_index; i++) {
370 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
371 mappings_[i].disk_path, shadowing_disk_file)) {
372 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
373 // File exists.
374 return SHADOWED;
375 }
376 }
377 }
378 shadowing_disk_file->clear();
379
380 // Verify that we can open the file. Note that this also has the side-effect
381 // of verifying that we are not canonicalizing away any non-existent
382 // directories.
383 scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
384 if (stream == NULL) {
385 return CANNOT_OPEN;
386 }
387
388 return SUCCESS;
389 }
390
VirtualFileToDiskFile(const string & virtual_file,string * disk_file)391 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
392 string* disk_file) {
393 scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file,
394 disk_file));
395 return stream != NULL;
396 }
397
Open(const string & filename)398 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
399 return OpenVirtualFile(filename, NULL);
400 }
401
OpenVirtualFile(const string & virtual_file,string * disk_file)402 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
403 const string& virtual_file,
404 string* disk_file) {
405 if (virtual_file != CanonicalizePath(virtual_file) ||
406 ContainsParentReference(virtual_file)) {
407 // We do not allow importing of paths containing things like ".." or
408 // consecutive slashes since the compiler expects files to be uniquely
409 // identified by file name.
410 return NULL;
411 }
412
413 for (int i = 0; i < mappings_.size(); i++) {
414 string temp_disk_file;
415 if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
416 mappings_[i].disk_path, &temp_disk_file)) {
417 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
418 if (stream != NULL) {
419 if (disk_file != NULL) {
420 *disk_file = temp_disk_file;
421 }
422 return stream;
423 }
424
425 if (errno == EACCES) {
426 // The file exists but is not readable.
427 // TODO(kenton): Find a way to report this more nicely.
428 GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file;
429 return NULL;
430 }
431 }
432 }
433
434 return NULL;
435 }
436
OpenDiskFile(const string & filename)437 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
438 const string& filename) {
439 int file_descriptor;
440 do {
441 file_descriptor = open(filename.c_str(), O_RDONLY);
442 } while (file_descriptor < 0 && errno == EINTR);
443 if (file_descriptor >= 0) {
444 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
445 result->SetCloseOnDelete(true);
446 return result;
447 } else {
448 return NULL;
449 }
450 }
451
452 } // namespace compiler
453 } // namespace protobuf
454 } // namespace google
455