• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: laszlocsomor@google.com (Laszlo Csomor)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 // Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well
13 // as for the supporting utility functions.
14 //
15 // These functions convert the input path to an absolute Windows path
16 // with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc.
17 // (declared in <io.h>) respectively. This allows working with files/directories
18 // whose paths are longer than MAX_PATH (260 chars).
19 //
20 // This file is only used on Windows, it's empty on other platforms.
21 
22 #if defined(_WIN32) && !defined(_XBOX_ONE)
23 
24 // Comment this out to fall back to using the ANSI versions (open, mkdir, ...)
25 // instead of the Unicode ones (_wopen, _wmkdir, ...). Doing so can be useful to
26 // debug failing tests if that's caused by the long path support.
27 #define SUPPORT_LONGPATHS
28 
29 #include "google/protobuf/io/io_win32.h"
30 
31 #include <direct.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <io.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 
38 #include "absl/strings/ascii.h"
39 
40 #ifndef WIN32_LEAN_AND_MEAN
41 #define WIN32_LEAN_AND_MEAN 1
42 #endif
43 
44 #include <windows.h>
45 
46 #include <memory>
47 #include <sstream>
48 #include <string>
49 #include <vector>
50 
51 namespace google {
52 namespace protobuf {
53 namespace io {
54 namespace win32 {
55 namespace {
56 
57 using std::string;
58 using std::wstring;
59 
60 template <typename char_type>
61 struct CharTraits {
62   static bool is_alpha(char_type ch);
63 };
64 
65 template <>
66 struct CharTraits<char> {
is_alphagoogle::protobuf::io::win32::__anon59bbff6c0111::CharTraits67   static bool is_alpha(char ch) { return absl::ascii_isalpha(ch); }
68 };
69 
70 template <>
71 struct CharTraits<wchar_t> {
is_alphagoogle::protobuf::io::win32::__anon59bbff6c0111::CharTraits72   static bool is_alpha(wchar_t ch) {
73     return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
74   }
75 };
76 
77 template <typename char_type>
null_or_empty(const char_type * s)78 bool null_or_empty(const char_type* s) {
79   return s == nullptr || *s == 0;
80 }
81 
82 // Returns true if the path starts with a drive letter, e.g. "c:".
83 // Note that this won't check for the "\" after the drive letter, so this also
84 // returns true for "c:foo" (which is "c:\${PWD}\foo").
85 // This check requires that a path not have a longpath prefix ("\\?\").
86 template <typename char_type>
has_drive_letter(const char_type * ch)87 bool has_drive_letter(const char_type* ch) {
88   return CharTraits<char_type>::is_alpha(ch[0]) && ch[1] == ':';
89 }
90 
91 // Returns true if the path starts with a longpath prefix ("\\?\").
92 template <typename char_type>
has_longpath_prefix(const char_type * path)93 bool has_longpath_prefix(const char_type* path) {
94   return path[0] == '\\' && path[1] == '\\' && path[2] == '?' &&
95          path[3] == '\\';
96 }
97 
98 template <typename char_type>
is_separator(char_type c)99 bool is_separator(char_type c) {
100   return c == '/' || c == '\\';
101 }
102 
103 // Returns true if the path starts with a drive specifier (e.g. "c:\").
104 template <typename char_type>
is_path_absolute(const char_type * path)105 bool is_path_absolute(const char_type* path) {
106   return has_drive_letter(path) && is_separator(path[2]);
107 }
108 
109 template <typename char_type>
is_drive_relative(const char_type * path)110 bool is_drive_relative(const char_type* path) {
111   return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2]));
112 }
113 
join_paths(const wstring & path1,const wstring & path2)114 wstring join_paths(const wstring& path1, const wstring& path2) {
115   if (path1.empty() || is_path_absolute(path2.c_str()) ||
116       has_longpath_prefix(path2.c_str())) {
117     return path2;
118   }
119   if (path2.empty()) {
120     return path1;
121   }
122 
123   if (is_separator(path1[path1.size() - 1])) {
124     return is_separator(path2[0]) ? (path1 + path2.substr(1))
125                                        : (path1 + path2);
126   } else {
127     return is_separator(path2[0]) ? (path1 + path2)
128                                        : (path1 + L'\\' + path2);
129   }
130 }
131 
normalize(wstring path)132 wstring normalize(wstring path) {
133   if (has_longpath_prefix(path.c_str())) {
134     path = path.substr(4);
135   }
136 
137   static const wstring dot(L".");
138   static const wstring dotdot(L"..");
139   const WCHAR* p = path.c_str();
140 
141   std::vector<wstring> segments;
142   int segment_start = -1;
143   // Find the path segments in `path` (separated by "/").
144   for (int i = 0;; ++i) {
145     if (!is_separator(p[i]) && p[i] != L'\0') {
146       // The current character does not end a segment, so start one unless it's
147       // already started.
148       if (segment_start < 0) {
149         segment_start = i;
150       }
151     } else if (segment_start >= 0 && i > segment_start) {
152       // The current character is "/" or "\0", so this ends a segment.
153       // Add that to `segments` if there's anything to add; handle "." and "..".
154       wstring segment(p, segment_start, i - segment_start);
155       segment_start = -1;
156       if (segment == dotdot) {
157         if (!segments.empty() &&
158             (!has_drive_letter(segments[0].c_str()) || segments.size() > 1)) {
159           segments.pop_back();
160         }
161       } else if (segment != dot && !segment.empty()) {
162         segments.push_back(segment);
163       }
164     }
165     if (p[i] == L'\0') {
166       break;
167     }
168   }
169 
170   // Handle the case when `path` is just a drive specifier (or some degenerate
171   // form of it, e.g. "c:\..").
172   if (segments.size() == 1 && segments[0].size() == 2 &&
173       has_drive_letter(segments[0].c_str())) {
174     return segments[0] + L'\\';
175   }
176 
177   // Join all segments.
178   bool first = true;
179   std::wstringstream result;
180   for (int i = 0; i < segments.size(); ++i) {
181     if (!first) {
182       result << L'\\';
183     }
184     first = false;
185     result << segments[i];
186   }
187   // Preserve trailing separator if the input contained it.
188   if (!path.empty() && is_separator(p[path.size() - 1])) {
189     result << L'\\';
190   }
191   return result.str();
192 }
193 
as_windows_path(const char * path,wstring * result)194 bool as_windows_path(const char* path, wstring* result) {
195   if (null_or_empty(path)) {
196     result->clear();
197     return true;
198   }
199   wstring wpath;
200   if (!strings::utf8_to_wcs(path, &wpath)) {
201     return false;
202   }
203   if (has_longpath_prefix(wpath.c_str())) {
204     *result = wpath;
205     return true;
206   }
207   if (is_separator(path[0]) || is_drive_relative(path)) {
208     return false;
209   }
210 
211 
212   if (!is_path_absolute(wpath.c_str())) {
213     int size = ::GetCurrentDirectoryW(0, nullptr);
214     if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
215       return false;
216     }
217     std::unique_ptr<WCHAR[]> wcwd(new WCHAR[size]);
218     ::GetCurrentDirectoryW(size, wcwd.get());
219     wpath = join_paths(wcwd.get(), wpath);
220   }
221   wpath = normalize(wpath);
222   if (!has_longpath_prefix(wpath.c_str())) {
223     // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API
224     // from processing the path and "helpfully" removing trailing dots from the
225     // path, for example.
226     // See https://github.com/bazelbuild/bazel/issues/2935
227     wpath = wstring(L"\\\\?\\") + wpath;
228   }
229   *result = wpath;
230   return true;
231 }
232 
233 }  // namespace
234 
open(const char * path,int flags,int mode)235 int open(const char* path, int flags, int mode) {
236 #ifdef SUPPORT_LONGPATHS
237   wstring wpath;
238   if (!as_windows_path(path, &wpath)) {
239     errno = ENOENT;
240     return -1;
241   }
242   return ::_wopen(wpath.c_str(), flags, mode);
243 #else
244   return ::_open(path, flags, mode);
245 #endif
246 }
247 
mkdir(const char * path,int)248 int mkdir(const char* path, int /*_mode*/) {
249 #ifdef SUPPORT_LONGPATHS
250   wstring wpath;
251   if (!as_windows_path(path, &wpath)) {
252     errno = ENOENT;
253     return -1;
254   }
255   return ::_wmkdir(wpath.c_str());
256 #else   // not SUPPORT_LONGPATHS
257   return ::_mkdir(path);
258 #endif  // not SUPPORT_LONGPATHS
259 }
260 
access(const char * path,int mode)261 int access(const char* path, int mode) {
262 #ifdef SUPPORT_LONGPATHS
263   wstring wpath;
264   if (!as_windows_path(path, &wpath)) {
265     errno = ENOENT;
266     return -1;
267   }
268   return ::_waccess(wpath.c_str(), mode);
269 #else
270   return ::_access(path, mode);
271 #endif
272 }
273 
chdir(const char * path)274 int chdir(const char* path) {
275 #ifdef SUPPORT_LONGPATHS
276   wstring wpath;
277   if (!as_windows_path(path, &wpath)) {
278     errno = ENOENT;
279     return -1;
280   }
281   return ::_wchdir(wpath.c_str());
282 #else
283   return ::_chdir(path);
284 #endif
285 }
286 
stat(const char * path,struct _stat * buffer)287 int stat(const char* path, struct _stat* buffer) {
288 #ifdef SUPPORT_LONGPATHS
289   wstring wpath;
290   if (!as_windows_path(path, &wpath)) {
291     errno = ENOENT;
292     return -1;
293   }
294   return ::_wstat(wpath.c_str(), buffer);
295 #else   // not SUPPORT_LONGPATHS
296   return ::_stat(path, buffer);
297 #endif  // not SUPPORT_LONGPATHS
298 }
299 
fopen(const char * path,const char * mode)300 FILE* fopen(const char* path, const char* mode) {
301 #ifdef SUPPORT_LONGPATHS
302   if (null_or_empty(path)) {
303     errno = EINVAL;
304     return nullptr;
305   }
306   wstring wpath;
307   if (!as_windows_path(path, &wpath)) {
308     errno = ENOENT;
309     return nullptr;
310   }
311   wstring wmode;
312   if (!strings::utf8_to_wcs(mode, &wmode)) {
313     errno = EINVAL;
314     return nullptr;
315   }
316   return ::_wfopen(wpath.c_str(), wmode.c_str());
317 #else
318   return ::fopen(path, mode);
319 #endif
320 }
321 
close(int fd)322 int close(int fd) { return ::_close(fd); }
323 
dup(int fd)324 int dup(int fd) { return ::_dup(fd); }
325 
dup2(int fd1,int fd2)326 int dup2(int fd1, int fd2) { return ::_dup2(fd1, fd2); }
327 
read(int fd,void * buffer,size_t size)328 int read(int fd, void* buffer, size_t size) {
329   return ::_read(fd, buffer, size);
330 }
331 
setmode(int fd,int mode)332 int setmode(int fd, int mode) { return ::_setmode(fd, mode); }
333 
write(int fd,const void * buffer,size_t size)334 int write(int fd, const void* buffer, size_t size) {
335   return ::_write(fd, buffer, size);
336 }
337 
testonly_utf8_to_winpath(const char * path)338 wstring testonly_utf8_to_winpath(const char* path) {
339   wstring wpath;
340   return as_windows_path(path, &wpath) ? wpath : wstring();
341 }
342 
ExpandWildcards(const string & path,std::function<void (const string &)> consume)343 ExpandWildcardsResult ExpandWildcards(
344     const string& path, std::function<void(const string&)> consume) {
345   if (path.find_first_of("*?") == string::npos) {
346     // There are no wildcards in the path, we don't need to expand it.
347     consume(path);
348     return ExpandWildcardsResult::kSuccess;
349   }
350 
351   wstring wpath;
352   if (!as_windows_path(path.c_str(), &wpath)) {
353     return ExpandWildcardsResult::kErrorInputPathConversion;
354   }
355 
356   static const wstring kDot = L".";
357   static const wstring kDotDot = L"..";
358   WIN32_FIND_DATAW metadata;
359   HANDLE handle = ::FindFirstFileW(wpath.c_str(), &metadata);
360   if (handle == INVALID_HANDLE_VALUE) {
361     // The pattern does not match any files (or directories).
362     return ExpandWildcardsResult::kErrorNoMatchingFile;
363   }
364 
365   string::size_type pos = path.find_last_of("\\/");
366   string dirname;
367   if (pos != string::npos) {
368     dirname = path.substr(0, pos + 1);
369   }
370 
371   ExpandWildcardsResult matched = ExpandWildcardsResult::kErrorNoMatchingFile;
372   do {
373     // Ignore ".", "..", and directories.
374     if ((metadata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0 &&
375         kDot != metadata.cFileName && kDotDot != metadata.cFileName) {
376       matched = ExpandWildcardsResult::kSuccess;
377       string filename;
378       if (!strings::wcs_to_utf8(metadata.cFileName, &filename)) {
379         matched = ExpandWildcardsResult::kErrorOutputPathConversion;
380         break;
381       }
382 
383       if (dirname.empty()) {
384         consume(filename);
385       } else {
386         consume(dirname + filename);
387       }
388     }
389   } while (::FindNextFileW(handle, &metadata));
390   FindClose(handle);
391   return matched;
392 }
393 
394 namespace strings {
395 
wcs_to_mbs(const WCHAR * s,string * out,bool outUtf8)396 bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) {
397   if (null_or_empty(s)) {
398     out->clear();
399     return true;
400   }
401   BOOL usedDefaultChar = FALSE;
402   SetLastError(0);
403   int size = WideCharToMultiByte(
404       outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0, nullptr,
405       outUtf8 ? nullptr : &usedDefaultChar);
406   if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
407       || usedDefaultChar) {
408     return false;
409   }
410   std::unique_ptr<CHAR[]> astr(new CHAR[size]);
411   WideCharToMultiByte(
412       outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, nullptr, nullptr);
413   out->assign(astr.get());
414   return true;
415 }
416 
mbs_to_wcs(const char * s,wstring * out,bool inUtf8)417 bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) {
418   if (null_or_empty(s)) {
419     out->clear();
420     return true;
421   }
422 
423   SetLastError(0);
424   int size =
425       MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0);
426   if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
427     return false;
428   }
429   std::unique_ptr<WCHAR[]> wstr(new WCHAR[size]);
430   MultiByteToWideChar(
431       inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1);
432   out->assign(wstr.get());
433   return true;
434 }
435 
utf8_to_wcs(const char * input,wstring * out)436 bool utf8_to_wcs(const char* input, wstring* out) {
437   return mbs_to_wcs(input, out, true);
438 }
439 
wcs_to_utf8(const wchar_t * input,string * out)440 bool wcs_to_utf8(const wchar_t* input, string* out) {
441   return wcs_to_mbs(input, out, true);
442 }
443 
444 }  // namespace strings
445 }  // namespace win32
446 }  // namespace io
447 }  // namespace protobuf
448 }  // namespace google
449 
450 #endif  // defined(_WIN32)
451