• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/filename_util.h"
6 
7 #include <set>
8 
9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h"
11 #include "base/path_service.h"
12 #include "base/strings/escape.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/sys_string_conversions.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/threading/thread_restrictions.h"
17 #include "build/build_config.h"
18 #include "net/base/filename_util_internal.h"
19 #include "net/base/net_string_util.h"
20 #include "net/base/url_util.h"
21 #include "net/http/http_content_disposition.h"
22 #include "url/gurl.h"
23 
24 namespace net {
25 
26 // Prefix to prepend to get a file URL.
27 static const char kFileURLPrefix[] = "file:///";
28 
FilePathToFileURL(const base::FilePath & path)29 GURL FilePathToFileURL(const base::FilePath& path) {
30   // Produce a URL like "file:///C:/foo" for a regular file, or
31   // "file://///server/path" for UNC. The URL canonicalizer will fix up the
32   // latter case to be the canonical UNC form: "file://server/path"
33   std::string url_string(kFileURLPrefix);
34 
35   // GURL() strips some whitespace and trailing control chars which are valid
36   // in file paths. It also interprets chars such as `%;#?` and maybe `\`, so we
37   // must percent encode these first. Reserve max possible length up front.
38   std::string utf8_path = path.AsUTF8Unsafe();
39   url_string.reserve(url_string.size() + (3 * utf8_path.size()));
40 
41   for (auto c : utf8_path) {
42     if (c == '%' || c == ';' || c == '#' || c == '?' ||
43 #if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
44         c == '\\' ||
45 #endif
46         c <= ' ') {
47       static const char kHexChars[] = "0123456789ABCDEF";
48       url_string += '%';
49       url_string += kHexChars[(c >> 4) & 0xf];
50       url_string += kHexChars[c & 0xf];
51     } else {
52       url_string += c;
53     }
54   }
55 
56   return GURL(url_string);
57 }
58 
FileURLToFilePath(const GURL & url,base::FilePath * file_path)59 bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) {
60   *file_path = base::FilePath();
61   base::FilePath::StringType& file_path_str =
62       const_cast<base::FilePath::StringType&>(file_path->value());
63   file_path_str.clear();
64 
65   if (!url.is_valid())
66     return false;
67 
68   // We may want to change this to a CHECK in the future.
69   if (!url.SchemeIsFile())
70     return false;
71 
72 #if BUILDFLAG(IS_WIN)
73   std::string path;
74   std::string host = url.host();
75   if (host.empty()) {
76     // URL contains no host, the path is the filename. In this case, the path
77     // will probably be preceded with a slash, as in "/C:/foo.txt", so we
78     // trim out that here.
79     path = url.path();
80     size_t first_non_slash = path.find_first_not_of("/\\");
81     if (first_non_slash != std::string::npos && first_non_slash > 0)
82       path.erase(0, first_non_slash);
83   } else {
84     // URL contains a host: this means it's UNC. We keep the preceding slash
85     // on the path.
86     path = "\\\\";
87     path.append(host);
88     path.append(url.path());
89   }
90   std::replace(path.begin(), path.end(), '/', '\\');
91 #else   // BUILDFLAG(IS_WIN)
92   // On POSIX, there's no obvious interpretation of file:// URLs with a host.
93   // Usually, remote mounts are still mounted onto the local filesystem.
94   // Therefore, we discard all URLs that are not obviously local to prevent
95   // spoofing attacks using file:// URLs. See crbug.com/881675.
96   if (!url.host().empty() && !net::IsLocalhost(url)) {
97     return false;
98   }
99   std::string path = url.path();
100 #endif  // !BUILDFLAG(IS_WIN)
101 
102   if (path.empty())
103     return false;
104 
105   // "%2F" ('/') results in failure, because it represents a literal '/'
106   // character in a path segment (not a path separator). If this were decoded,
107   // it would be interpreted as a path separator on both POSIX and Windows (note
108   // that Firefox *does* decode this, but it was decided on
109   // https://crbug.com/585422 that this represents a potential security risk).
110   // It isn't correct to keep it as "%2F", so this just fails. This is fine,
111   // because '/' is not a valid filename character on either POSIX or Windows.
112   std::set<unsigned char> illegal_encoded_bytes{'/'};
113 
114 #if BUILDFLAG(IS_WIN)
115   // "%5C" ('\\') on Windows results in failure, for the same reason as '/'
116   // above. On POSIX, "%5C" simply decodes as '\\', a valid filename character.
117   illegal_encoded_bytes.insert('\\');
118 #endif
119 
120   if (base::ContainsEncodedBytes(path, illegal_encoded_bytes))
121     return false;
122 
123   // Unescape all percent-encoded sequences, including blocked-for-display
124   // characters, control characters and invalid UTF-8 byte sequences.
125   // Percent-encoded bytes are not meaningful in a file system.
126   path = base::UnescapeBinaryURLComponent(path);
127 
128 #if BUILDFLAG(IS_WIN)
129   if (base::IsStringUTF8(path)) {
130     file_path_str.assign(base::UTF8ToWide(path));
131     // We used to try too hard and see if |path| made up entirely of
132     // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
133     // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
134     // If the check passed, we converted the result to UTF-8.
135     // Otherwise, we treated the result as the native OS encoding.
136     // However, that led to http://crbug.com/4619 and http://crbug.com/14153
137   } else {
138     // Not UTF-8, assume encoding is native codepage and we're done. We know we
139     // are giving the conversion function a nonempty string, and it may fail if
140     // the given string is not in the current encoding and give us an empty
141     // string back. We detect this and report failure.
142     file_path_str = base::SysNativeMBToWide(path);
143   }
144 #else   // BUILDFLAG(IS_WIN)
145   // Collapse multiple path slashes into a single path slash.
146   std::string new_path;
147   do {
148     new_path = path;
149     base::ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/");
150     path.swap(new_path);
151   } while (new_path != path);
152 
153   file_path_str.assign(path);
154 #endif  // !BUILDFLAG(IS_WIN)
155 
156   return !file_path_str.empty();
157 }
158 
GenerateSafeFileName(const std::string & mime_type,bool ignore_extension,base::FilePath * file_path)159 void GenerateSafeFileName(const std::string& mime_type,
160                           bool ignore_extension,
161                           base::FilePath* file_path) {
162   // Make sure we get the right file extension
163   EnsureSafeExtension(mime_type, ignore_extension, file_path);
164 
165 #if BUILDFLAG(IS_WIN)
166   // Prepend "_" to the file name if it's a reserved name
167   base::FilePath::StringType leaf_name = file_path->BaseName().value();
168   DCHECK(!leaf_name.empty());
169   if (IsReservedNameOnWindows(leaf_name)) {
170     leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name;
171     *file_path = file_path->DirName();
172     if (file_path->value() == base::FilePath::kCurrentDirectory) {
173       *file_path = base::FilePath(leaf_name);
174     } else {
175       *file_path = file_path->Append(leaf_name);
176     }
177   }
178 #endif
179 }
180 
IsReservedNameOnWindows(const base::FilePath::StringType & filename)181 bool IsReservedNameOnWindows(const base::FilePath::StringType& filename) {
182   // This list is taken from the MSDN article "Naming a file"
183   // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
184   // I also added clock$ because GetSaveFileName seems to consider it as a
185   // reserved name too.
186   static const char* const known_devices[] = {
187       "con",  "prn",  "aux",  "nul",  "com1", "com2", "com3",  "com4",
188       "com5", "com6", "com7", "com8", "com9", "lpt1", "lpt2",  "lpt3",
189       "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"};
190 #if BUILDFLAG(IS_WIN)
191   std::string filename_lower = base::ToLowerASCII(base::WideToUTF8(filename));
192 #elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
193   std::string filename_lower = base::ToLowerASCII(filename);
194 #endif
195 
196   for (const char* const device : known_devices) {
197     // Exact match.
198     if (filename_lower == device)
199       return true;
200     // Starts with "DEVICE.".
201     if (base::StartsWith(filename_lower, std::string(device) + ".",
202                          base::CompareCase::SENSITIVE)) {
203       return true;
204     }
205   }
206 
207   static const char* const magic_names[] = {
208       // These file names are used by the "Customize folder" feature of the
209       // shell.
210       "desktop.ini",
211       "thumbs.db",
212   };
213 
214   for (const char* const magic_name : magic_names) {
215     if (filename_lower == magic_name)
216       return true;
217   }
218 
219   return false;
220 }
221 
222 }  // namespace net
223