1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 // Functions for canonicalizing "file:" URLs.
11
12 #include <string_view>
13
14 #include "base/strings/string_util.h"
15 #include "url/url_canon.h"
16 #include "url/url_canon_internal.h"
17 #include "url/url_file.h"
18 #include "url/url_parse_internal.h"
19
20 namespace url {
21
22 namespace {
23
IsLocalhost(const char * spec,int begin,int end)24 bool IsLocalhost(const char* spec, int begin, int end) {
25 if (begin > end)
26 return false;
27 return std::string_view(&spec[begin], end - begin) == "localhost";
28 }
29
IsLocalhost(const char16_t * spec,int begin,int end)30 bool IsLocalhost(const char16_t* spec, int begin, int end) {
31 if (begin > end)
32 return false;
33 return std::u16string_view(&spec[begin], end - begin) == u"localhost";
34 }
35
36 template <typename CHAR>
DoFindWindowsDriveLetter(const CHAR * spec,int begin,int end)37 int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
38 if (begin > end)
39 return -1;
40
41 // First guess the beginning of the drive letter.
42 // If there is something that looks like a drive letter in the spec between
43 // begin and end, store its position in drive_letter_pos.
44 int drive_letter_pos =
45 DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
46 if (drive_letter_pos < begin)
47 return -1;
48
49 // Check if the path up to the drive letter candidate can be canonicalized as
50 // "/".
51 Component sub_path = MakeRange(begin, drive_letter_pos);
52 RawCanonOutput<1024> output;
53 Component output_path;
54 bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
55 if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
56 return -1;
57 }
58
59 return drive_letter_pos;
60 }
61
62 #ifdef WIN32
63
64 // Given a pointer into the spec, this copies and canonicalizes the drive
65 // letter and colon to the output, if one is found. If there is not a drive
66 // spec, it won't do anything. The index of the next character in the input
67 // spec is returned (after the colon when a drive spec is found, the begin
68 // offset if one is not).
69 template <typename CHAR>
FileDoDriveSpec(const CHAR * spec,int begin,int end,CanonOutput * output)70 int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
71 int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
72 if (drive_letter_pos < begin)
73 return begin;
74
75 // By now, a valid drive letter is confirmed at position drive_letter_pos,
76 // followed by a valid drive letter separator (a colon or a pipe).
77
78 output->push_back('/');
79
80 // Normalize Windows drive letters to uppercase.
81 if (base::IsAsciiLower(spec[drive_letter_pos]))
82 output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
83 else
84 output->push_back(static_cast<char>(spec[drive_letter_pos]));
85
86 // Normalize the character following it to a colon rather than pipe.
87 output->push_back(':');
88 return drive_letter_pos + 2;
89 }
90
91 #endif // WIN32
92
93 template<typename CHAR, typename UCHAR>
DoFileCanonicalizePath(const CHAR * spec,const Component & path,CanonOutput * output,Component * out_path)94 bool DoFileCanonicalizePath(const CHAR* spec,
95 const Component& path,
96 CanonOutput* output,
97 Component* out_path) {
98 // Copies and normalizes the "c:" at the beginning, if present.
99 out_path->begin = output->length();
100 int after_drive;
101 #ifdef WIN32
102 after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
103 #else
104 after_drive = path.begin;
105 #endif
106
107 // Copies the rest of the path, starting from the slash following the
108 // drive colon (if any, Windows only), or the first slash of the path.
109 bool success = true;
110 if (after_drive < path.end()) {
111 // Use the regular path canonicalizer to canonicalize the rest of the path
112 // after the drive.
113 //
114 // Give it a fake output component to write into, since we will be
115 // calculating the out_path ourselves (consisting of both the drive and the
116 // path we canonicalize here).
117 Component sub_path = MakeRange(after_drive, path.end());
118 Component fake_output_path;
119 success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
120 } else if (after_drive == path.begin) {
121 // No input path and no drive spec, canonicalize to a slash.
122 output->push_back('/');
123 }
124
125 out_path->len = output->length() - out_path->begin;
126 return success;
127 }
128
129 template<typename CHAR, typename UCHAR>
DoCanonicalizeFileURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)130 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
131 const Parsed& parsed,
132 CharsetConverter* query_converter,
133 CanonOutput* output,
134 Parsed* new_parsed) {
135 DCHECK(!parsed.has_opaque_path);
136
137 // Things we don't set in file: URLs.
138 new_parsed->username = Component();
139 new_parsed->password = Component();
140 new_parsed->port = Component();
141
142 // Scheme (known, so we don't bother running it through the more
143 // complicated scheme canonicalizer).
144 new_parsed->scheme.begin = output->length();
145 output->Append("file://");
146 new_parsed->scheme.len = 4;
147
148 // If the host is localhost, and the path starts with a Windows drive letter,
149 // remove the host component. This does the following transformation:
150 // file://localhost/C:/hello.txt -> file:///C:/hello.txt
151 //
152 // Note: we do this on every platform per URL Standard, not just Windows.
153 //
154 // TODO(crbug.com/41299821): According to the latest URL spec, this
155 // transformation should be done regardless of the path.
156 Component host_range = parsed.host;
157 if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
158 FindWindowsDriveLetter(source.path, parsed.path.begin,
159 parsed.path.end()) >= parsed.path.begin) {
160 host_range.reset();
161 }
162
163 // Append the host. For many file URLs, this will be empty. For UNC, this
164 // will be present.
165 // TODO(brettw) This doesn't do any checking for host name validity. We
166 // should probably handle validity checking of UNC hosts differently than
167 // for regular IP hosts.
168 bool success =
169 CanonicalizeFileHost(source.host, host_range, *output, new_parsed->host);
170 success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
171 output, &new_parsed->path);
172
173 CanonicalizeQuery(source.query, parsed.query, query_converter,
174 output, &new_parsed->query);
175 CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
176
177 return success;
178 }
179
180 } // namespace
181
FindWindowsDriveLetter(const char * spec,int begin,int end)182 int FindWindowsDriveLetter(const char* spec, int begin, int end) {
183 return DoFindWindowsDriveLetter(spec, begin, end);
184 }
185
FindWindowsDriveLetter(const char16_t * spec,int begin,int end)186 int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
187 return DoFindWindowsDriveLetter(spec, begin, end);
188 }
189
CanonicalizeFileURL(const char * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)190 bool CanonicalizeFileURL(const char* spec,
191 int spec_len,
192 const Parsed& parsed,
193 CharsetConverter* query_converter,
194 CanonOutput* output,
195 Parsed* new_parsed) {
196 return DoCanonicalizeFileURL<char, unsigned char>(
197 URLComponentSource<char>(spec), parsed, query_converter,
198 output, new_parsed);
199 }
200
CanonicalizeFileURL(const char16_t * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)201 bool CanonicalizeFileURL(const char16_t* spec,
202 int spec_len,
203 const Parsed& parsed,
204 CharsetConverter* query_converter,
205 CanonOutput* output,
206 Parsed* new_parsed) {
207 return DoCanonicalizeFileURL<char16_t, char16_t>(
208 URLComponentSource<char16_t>(spec), parsed, query_converter, output,
209 new_parsed);
210 }
211
FileCanonicalizePath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)212 bool FileCanonicalizePath(const char* spec,
213 const Component& path,
214 CanonOutput* output,
215 Component* out_path) {
216 return DoFileCanonicalizePath<char, unsigned char>(spec, path,
217 output, out_path);
218 }
219
FileCanonicalizePath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)220 bool FileCanonicalizePath(const char16_t* spec,
221 const Component& path,
222 CanonOutput* output,
223 Component* out_path) {
224 return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
225 out_path);
226 }
227
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)228 bool ReplaceFileURL(const char* base,
229 const Parsed& base_parsed,
230 const Replacements<char>& replacements,
231 CharsetConverter* query_converter,
232 CanonOutput* output,
233 Parsed* new_parsed) {
234 URLComponentSource<char> source(base);
235 Parsed parsed(base_parsed);
236 SetupOverrideComponents(base, replacements, &source, &parsed);
237 return DoCanonicalizeFileURL<char, unsigned char>(
238 source, parsed, query_converter, output, new_parsed);
239 }
240
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)241 bool ReplaceFileURL(const char* base,
242 const Parsed& base_parsed,
243 const Replacements<char16_t>& replacements,
244 CharsetConverter* query_converter,
245 CanonOutput* output,
246 Parsed* new_parsed) {
247 RawCanonOutput<1024> utf8;
248 URLComponentSource<char> source(base);
249 Parsed parsed(base_parsed);
250 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
251 return DoCanonicalizeFileURL<char, unsigned char>(
252 source, parsed, query_converter, output, new_parsed);
253 }
254
255 } // namespace url
256