1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: kenton@google.com (Kenton Varda)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/compiler/importer.h"
13
14 #ifdef _MSC_VER
15 #include <direct.h>
16 #else
17 #include <unistd.h>
18 #endif
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23
24 #include <algorithm>
25 #include <memory>
26 #include <vector>
27
28 #include "absl/strings/match.h"
29 #include "absl/strings/str_cat.h"
30 #include "absl/strings/str_join.h"
31 #include "absl/strings/str_replace.h"
32 #include "absl/strings/str_split.h"
33 #include "absl/strings/string_view.h"
34 #include "google/protobuf/compiler/parser.h"
35 #include "google/protobuf/io/io_win32.h"
36 #include "google/protobuf/io/tokenizer.h"
37 #include "google/protobuf/io/zero_copy_stream_impl.h"
38
39 namespace google {
40 namespace protobuf {
41 namespace compiler {
42
43 #ifdef _WIN32
44 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
45 // them like we do below.
46 using google::protobuf::io::win32::access;
47 using google::protobuf::io::win32::open;
48 #endif
49
50 #if defined(_WIN32) || defined(__CYGWIN__)
51 #include "absl/strings/ascii.h"
52 #endif
53
54 // Returns true if the text looks like a Windows-style absolute path, starting
55 // with a drive letter. Example: "C:\foo". TODO: Share this with
56 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(absl::string_view text)57 static bool IsWindowsAbsolutePath(absl::string_view text) {
58 #if defined(_WIN32) || defined(__CYGWIN__)
59 return text.size() >= 3 && text[1] == ':' && absl::ascii_isalpha(text[0]) &&
60 (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
61 #else
62 return false;
63 #endif
64 }
65
~MultiFileErrorCollector()66 MultiFileErrorCollector::~MultiFileErrorCollector() {}
67
68 // This class serves two purposes:
69 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
70 // in terms of MultiFileErrorCollector, using a particular filename.
71 // - It lets us check if any errors have occurred.
72 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
73 : public io::ErrorCollector {
74 public:
SingleFileErrorCollector(const std::string & filename,MultiFileErrorCollector * multi_file_error_collector)75 SingleFileErrorCollector(const std::string& filename,
76 MultiFileErrorCollector* multi_file_error_collector)
77 : filename_(filename),
78 multi_file_error_collector_(multi_file_error_collector),
79 had_errors_(false) {}
~SingleFileErrorCollector()80 ~SingleFileErrorCollector() override {}
81
had_errors()82 bool had_errors() { return had_errors_; }
83
84 // implements ErrorCollector ---------------------------------------
RecordError(int line,int column,absl::string_view message)85 void RecordError(int line, int column, absl::string_view message) override {
86 if (multi_file_error_collector_ != nullptr) {
87 multi_file_error_collector_->RecordError(filename_, line, column,
88 message);
89 }
90 had_errors_ = true;
91 }
92
93 private:
94 std::string filename_;
95 MultiFileErrorCollector* multi_file_error_collector_;
96 bool had_errors_;
97 };
98
99 // ===================================================================
100
SourceTreeDescriptorDatabase(SourceTree * source_tree)101 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
102 SourceTree* source_tree)
103 : source_tree_(source_tree),
104 fallback_database_(nullptr),
105 error_collector_(nullptr),
106 using_validation_error_collector_(false),
107 validation_error_collector_(this) {}
108
SourceTreeDescriptorDatabase(SourceTree * source_tree,DescriptorDatabase * fallback_database)109 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
110 SourceTree* source_tree, DescriptorDatabase* fallback_database)
111 : source_tree_(source_tree),
112 fallback_database_(fallback_database),
113 error_collector_(nullptr),
114 using_validation_error_collector_(false),
115 validation_error_collector_(this) {}
116
~SourceTreeDescriptorDatabase()117 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
118
FindFileByName(const std::string & filename,FileDescriptorProto * output)119 bool SourceTreeDescriptorDatabase::FindFileByName(const std::string& filename,
120 FileDescriptorProto* output) {
121 std::unique_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
122 if (input == nullptr) {
123 if (fallback_database_ != nullptr &&
124 fallback_database_->FindFileByName(filename, output)) {
125 return true;
126 }
127 if (error_collector_ != nullptr) {
128 error_collector_->RecordError(filename, -1, 0,
129 source_tree_->GetLastErrorMessage());
130 }
131 return false;
132 }
133
134 // Set up the tokenizer and parser.
135 SingleFileErrorCollector file_error_collector(filename, error_collector_);
136 io::Tokenizer tokenizer(input.get(), &file_error_collector);
137
138 Parser parser;
139 if (error_collector_ != nullptr) {
140 parser.RecordErrorsTo(&file_error_collector);
141 }
142 if (using_validation_error_collector_) {
143 parser.RecordSourceLocationsTo(&source_locations_);
144 }
145
146 // Parse it.
147 output->set_name(filename);
148 return parser.Parse(&tokenizer, output) && !file_error_collector.had_errors();
149 }
150
FindFileContainingSymbol(const std::string & symbol_name,FileDescriptorProto * output)151 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
152 const std::string& symbol_name, FileDescriptorProto* output) {
153 return false;
154 }
155
FindFileContainingExtension(const std::string & containing_type,int field_number,FileDescriptorProto * output)156 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
157 const std::string& containing_type, int field_number,
158 FileDescriptorProto* output) {
159 return false;
160 }
161
162 // -------------------------------------------------------------------
163
164 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)165 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
166 : owner_(owner) {}
167
168 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()169 ~ValidationErrorCollector() {}
170
RecordError(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)171 void SourceTreeDescriptorDatabase::ValidationErrorCollector::RecordError(
172 absl::string_view filename, absl::string_view element_name,
173 const Message* descriptor, ErrorLocation location,
174 absl::string_view message) {
175 if (owner_->error_collector_ == nullptr) return;
176
177 int line, column;
178 if (location == DescriptorPool::ErrorCollector::IMPORT) {
179 owner_->source_locations_.FindImport(descriptor, element_name, &line,
180 &column);
181 } else {
182 owner_->source_locations_.Find(descriptor, location, &line, &column);
183 }
184 owner_->error_collector_->RecordError(filename, line, column, message);
185 }
186
RecordWarning(absl::string_view filename,absl::string_view element_name,const Message * descriptor,ErrorLocation location,absl::string_view message)187 void SourceTreeDescriptorDatabase::ValidationErrorCollector::RecordWarning(
188 absl::string_view filename, absl::string_view element_name,
189 const Message* descriptor, ErrorLocation location,
190 absl::string_view message) {
191 if (owner_->error_collector_ == nullptr) return;
192
193 int line, column;
194 if (location == DescriptorPool::ErrorCollector::IMPORT) {
195 owner_->source_locations_.FindImport(descriptor, element_name, &line,
196 &column);
197 } else {
198 owner_->source_locations_.Find(descriptor, location, &line, &column);
199 }
200 owner_->error_collector_->RecordWarning(filename, line, column, message);
201 }
202
203 // ===================================================================
204
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)205 Importer::Importer(SourceTree* source_tree,
206 MultiFileErrorCollector* error_collector)
207 : database_(source_tree),
208 pool_(&database_, database_.GetValidationErrorCollector()) {
209 pool_.EnforceWeakDependencies(true);
210 database_.RecordErrorsTo(error_collector);
211 }
212
~Importer()213 Importer::~Importer() {}
214
Import(const std::string & filename)215 const FileDescriptor* Importer::Import(const std::string& filename) {
216 return pool_.FindFileByName(filename);
217 }
218
AddDirectInputFile(absl::string_view file_name,bool is_error)219 void Importer::AddDirectInputFile(absl::string_view file_name, bool is_error) {
220 pool_.AddDirectInputFile(file_name, is_error);
221 }
222
ClearDirectInputFiles()223 void Importer::ClearDirectInputFiles() { pool_.ClearDirectInputFiles(); }
224
225
226 // ===================================================================
227
~SourceTree()228 SourceTree::~SourceTree() {}
229
GetLastErrorMessage()230 std::string SourceTree::GetLastErrorMessage() { return "File not found."; }
231
DiskSourceTree()232 DiskSourceTree::DiskSourceTree() {}
233
~DiskSourceTree()234 DiskSourceTree::~DiskSourceTree() {}
235
236 // Given a path, returns an equivalent path with these changes:
237 // - On Windows, any backslashes are replaced with forward slashes.
238 // - Any instances of the directory "." are removed.
239 // - Any consecutive '/'s are collapsed into a single slash.
240 // Note that the resulting string may be empty.
241 //
242 // TODO: It would be nice to handle "..", e.g. so that we can figure
243 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
244 // symlink or doesn't exist, then things get complicated, and we can't
245 // actually determine this without investigating the filesystem, probably
246 // in non-portable ways. So, we punt.
247 //
248 // TODO: It would be nice to use realpath() here except that it
249 // resolves symbolic links. This could cause problems if people place
250 // symbolic links in their source tree. For example, if you executed:
251 // protoc --proto_path=foo foo/bar/baz.proto
252 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
253 // to a path which does not appear to be under foo, and thus the compiler
254 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(absl::string_view path)255 static std::string CanonicalizePath(absl::string_view path) {
256 #ifdef _WIN32
257 // The Win32 API accepts forward slashes as a path delimiter even though
258 // backslashes are standard. Let's avoid confusion and use only forward
259 // slashes.
260 std::string path_str;
261 if (absl::StartsWith(path, "\\\\")) {
262 // Avoid converting two leading backslashes.
263 path_str = absl::StrCat("\\\\",
264 absl::StrReplaceAll(path.substr(2), {{"\\", "/"}}));
265 } else {
266 path_str = absl::StrReplaceAll(path, {{"\\", "/"}});
267 }
268 path = path_str;
269 #endif
270
271 std::vector<absl::string_view> canonical_parts;
272 if (!path.empty() && path.front() == '/') canonical_parts.push_back("");
273 for (absl::string_view part : absl::StrSplit(path, '/', absl::SkipEmpty())) {
274 if (part == ".") {
275 // Ignore.
276 } else {
277 canonical_parts.push_back(part);
278 }
279 }
280 if (!path.empty() && path.back() == '/') canonical_parts.push_back("");
281
282 return absl::StrJoin(canonical_parts, "/");
283 }
284
ContainsParentReference(absl::string_view path)285 static inline bool ContainsParentReference(absl::string_view path) {
286 return path == ".." || absl::StartsWith(path, "../") ||
287 absl::EndsWith(path, "/..") || absl::StrContains(path, "/../");
288 }
289
290 // Maps a file from an old location to a new one. Typically, old_prefix is
291 // a virtual path and new_prefix is its corresponding disk path. Returns
292 // false if the filename did not start with old_prefix, otherwise replaces
293 // old_prefix with new_prefix and stores the result in *result. Examples:
294 // string result;
295 // assert(ApplyMapping("foo/bar", "", "baz", &result));
296 // assert(result == "baz/foo/bar");
297 //
298 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
299 // assert(result == "baz/bar");
300 //
301 // assert(ApplyMapping("foo", "foo", "bar", &result));
302 // assert(result == "bar");
303 //
304 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
305 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
306 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(absl::string_view filename,absl::string_view old_prefix,absl::string_view new_prefix,std::string * result)307 static bool ApplyMapping(absl::string_view filename,
308 absl::string_view old_prefix,
309 absl::string_view new_prefix, std::string* result) {
310 if (old_prefix.empty()) {
311 // old_prefix matches any relative path.
312 if (ContainsParentReference(filename)) {
313 // We do not allow the file name to use "..".
314 return false;
315 }
316 if (absl::StartsWith(filename, "/") || IsWindowsAbsolutePath(filename)) {
317 // This is an absolute path, so it isn't matched by the empty string.
318 return false;
319 }
320 result->assign(std::string(new_prefix));
321 if (!result->empty()) result->push_back('/');
322 result->append(std::string(filename));
323 return true;
324 } else if (absl::StartsWith(filename, old_prefix)) {
325 // old_prefix is a prefix of the filename. Is it the whole filename?
326 if (filename.size() == old_prefix.size()) {
327 // Yep, it's an exact match.
328 *result = std::string(new_prefix);
329 return true;
330 } else {
331 // Not an exact match. Is the next character a '/'? Otherwise,
332 // this isn't actually a match at all. E.g. the prefix "foo/bar"
333 // does not match the filename "foo/barbaz".
334 int after_prefix_start = -1;
335 if (filename[old_prefix.size()] == '/') {
336 after_prefix_start = old_prefix.size() + 1;
337 } else if (filename[old_prefix.size() - 1] == '/') {
338 // old_prefix is never empty, and canonicalized paths never have
339 // consecutive '/' characters.
340 after_prefix_start = old_prefix.size();
341 }
342 if (after_prefix_start != -1) {
343 // Yep. So the prefixes are directories and the filename is a file
344 // inside them.
345 absl::string_view after_prefix = filename.substr(after_prefix_start);
346 if (ContainsParentReference(after_prefix)) {
347 // We do not allow the file name to use "..".
348 return false;
349 }
350 result->assign(std::string(new_prefix));
351 if (!result->empty()) result->push_back('/');
352 result->append(std::string(after_prefix));
353 return true;
354 }
355 }
356 }
357
358 return false;
359 }
360
MapPath(absl::string_view virtual_path,absl::string_view disk_path)361 void DiskSourceTree::MapPath(absl::string_view virtual_path,
362 absl::string_view disk_path) {
363 mappings_.push_back(
364 Mapping(std::string(virtual_path), CanonicalizePath(disk_path)));
365 }
366
367 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(absl::string_view disk_file,std::string * virtual_file,std::string * shadowing_disk_file)368 DiskSourceTree::DiskFileToVirtualFile(absl::string_view disk_file,
369 std::string* virtual_file,
370 std::string* shadowing_disk_file) {
371 int mapping_index = -1;
372 std::string canonical_disk_file = CanonicalizePath(disk_file);
373
374 for (int i = 0; i < mappings_.size(); i++) {
375 // Apply the mapping in reverse.
376 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
377 mappings_[i].virtual_path, virtual_file)) {
378 // Success.
379 mapping_index = i;
380 break;
381 }
382 }
383
384 if (mapping_index == -1) {
385 return NO_MAPPING;
386 }
387
388 // Iterate through all mappings with higher precedence and verify that none
389 // of them map this file to some other existing file.
390 for (int i = 0; i < mapping_index; i++) {
391 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
392 mappings_[i].disk_path, shadowing_disk_file)) {
393 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
394 // File exists.
395 return SHADOWED;
396 }
397 }
398 }
399 shadowing_disk_file->clear();
400
401 // Verify that we can open the file. Note that this also has the side-effect
402 // of verifying that we are not canonicalizing away any non-existent
403 // directories.
404 std::unique_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
405 if (stream == nullptr) {
406 return CANNOT_OPEN;
407 }
408
409 return SUCCESS;
410 }
411
VirtualFileToDiskFile(absl::string_view virtual_file,std::string * disk_file)412 bool DiskSourceTree::VirtualFileToDiskFile(absl::string_view virtual_file,
413 std::string* disk_file) {
414 std::unique_ptr<io::ZeroCopyInputStream> stream(
415 OpenVirtualFile(virtual_file, disk_file));
416 return stream != nullptr;
417 }
418
Open(absl::string_view filename)419 io::ZeroCopyInputStream* DiskSourceTree::Open(absl::string_view filename) {
420 return OpenVirtualFile(filename, nullptr);
421 }
422
GetLastErrorMessage()423 std::string DiskSourceTree::GetLastErrorMessage() {
424 return last_error_message_;
425 }
426
OpenVirtualFile(absl::string_view virtual_file,std::string * disk_file)427 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
428 absl::string_view virtual_file, std::string* disk_file) {
429 if (virtual_file != CanonicalizePath(virtual_file) ||
430 ContainsParentReference(virtual_file)) {
431 // We do not allow importing of paths containing things like ".." or
432 // consecutive slashes since the compiler expects files to be uniquely
433 // identified by file name.
434 last_error_message_ =
435 "Backslashes, consecutive slashes, \".\", or \"..\" "
436 "are not allowed in the virtual path";
437 return nullptr;
438 }
439
440 for (const auto& mapping : mappings_) {
441 std::string temp_disk_file;
442 if (ApplyMapping(virtual_file, mapping.virtual_path, mapping.disk_path,
443 &temp_disk_file)) {
444 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
445 if (stream != nullptr) {
446 if (disk_file != nullptr) {
447 *disk_file = temp_disk_file;
448 }
449 return stream;
450 }
451
452 if (errno == EACCES) {
453 // The file exists but is not readable.
454 last_error_message_ =
455 absl::StrCat("Read access is denied for file: ", temp_disk_file);
456 return nullptr;
457 }
458 }
459 }
460 last_error_message_ = "File not found.";
461 return nullptr;
462 }
463
OpenDiskFile(absl::string_view filename)464 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
465 absl::string_view filename) {
466 struct stat sb;
467 int ret = 0;
468 do {
469 ret = stat(std::string(filename).c_str(), &sb);
470 } while (ret != 0 && errno == EINTR);
471 #if defined(_WIN32)
472 if (ret == 0 && sb.st_mode & S_IFDIR) {
473 last_error_message_ = "Input file is a directory.";
474 return nullptr;
475 }
476 #else
477 if (ret == 0 && S_ISDIR(sb.st_mode)) {
478 last_error_message_ = "Input file is a directory.";
479 return nullptr;
480 }
481 #endif
482 int file_descriptor;
483 do {
484 file_descriptor = open(std::string(filename).c_str(), O_RDONLY);
485 } while (file_descriptor < 0 && errno == EINTR);
486 if (file_descriptor >= 0) {
487 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
488 result->SetCloseOnDelete(true);
489 return result;
490 } else {
491 return nullptr;
492 }
493 }
494
495 } // namespace compiler
496 } // namespace protobuf
497 } // namespace google
498