• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 // Functions for canonicalizing "file:" URLs.
11 
12 #include <string_view>
13 
14 #include "base/strings/string_util.h"
15 #include "url/url_canon.h"
16 #include "url/url_canon_internal.h"
17 #include "url/url_file.h"
18 #include "url/url_parse_internal.h"
19 
20 namespace url {
21 
22 namespace {
23 
IsLocalhost(const char * spec,int begin,int end)24 bool IsLocalhost(const char* spec, int begin, int end) {
25   if (begin > end)
26     return false;
27   return std::string_view(&spec[begin], end - begin) == "localhost";
28 }
29 
IsLocalhost(const char16_t * spec,int begin,int end)30 bool IsLocalhost(const char16_t* spec, int begin, int end) {
31   if (begin > end)
32     return false;
33   return std::u16string_view(&spec[begin], end - begin) == u"localhost";
34 }
35 
36 template <typename CHAR>
DoFindWindowsDriveLetter(const CHAR * spec,int begin,int end)37 int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
38   if (begin > end)
39     return -1;
40 
41   // First guess the beginning of the drive letter.
42   // If there is something that looks like a drive letter in the spec between
43   // begin and end, store its position in drive_letter_pos.
44   int drive_letter_pos =
45       DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
46   if (drive_letter_pos < begin)
47     return -1;
48 
49   // Check if the path up to the drive letter candidate can be canonicalized as
50   // "/".
51   Component sub_path = MakeRange(begin, drive_letter_pos);
52   RawCanonOutput<1024> output;
53   Component output_path;
54   bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
55   if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
56     return -1;
57   }
58 
59   return drive_letter_pos;
60 }
61 
62 #ifdef WIN32
63 
64 // Given a pointer into the spec, this copies and canonicalizes the drive
65 // letter and colon to the output, if one is found. If there is not a drive
66 // spec, it won't do anything. The index of the next character in the input
67 // spec is returned (after the colon when a drive spec is found, the begin
68 // offset if one is not).
69 template <typename CHAR>
FileDoDriveSpec(const CHAR * spec,int begin,int end,CanonOutput * output)70 int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
71   int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
72   if (drive_letter_pos < begin)
73     return begin;
74 
75   // By now, a valid drive letter is confirmed at position drive_letter_pos,
76   // followed by a valid drive letter separator (a colon or a pipe).
77 
78   output->push_back('/');
79 
80   // Normalize Windows drive letters to uppercase.
81   if (base::IsAsciiLower(spec[drive_letter_pos]))
82     output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
83   else
84     output->push_back(static_cast<char>(spec[drive_letter_pos]));
85 
86   // Normalize the character following it to a colon rather than pipe.
87   output->push_back(':');
88   return drive_letter_pos + 2;
89 }
90 
91 #endif  // WIN32
92 
93 template<typename CHAR, typename UCHAR>
DoFileCanonicalizePath(const CHAR * spec,const Component & path,CanonOutput * output,Component * out_path)94 bool DoFileCanonicalizePath(const CHAR* spec,
95                             const Component& path,
96                             CanonOutput* output,
97                             Component* out_path) {
98   // Copies and normalizes the "c:" at the beginning, if present.
99   out_path->begin = output->length();
100   int after_drive;
101 #ifdef WIN32
102   after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
103 #else
104   after_drive = path.begin;
105 #endif
106 
107   // Copies the rest of the path, starting from the slash following the
108   // drive colon (if any, Windows only), or the first slash of the path.
109   bool success = true;
110   if (after_drive < path.end()) {
111     // Use the regular path canonicalizer to canonicalize the rest of the path
112     // after the drive.
113     //
114     // Give it a fake output component to write into, since we will be
115     // calculating the out_path ourselves (consisting of both the drive and the
116     // path we canonicalize here).
117     Component sub_path = MakeRange(after_drive, path.end());
118     Component fake_output_path;
119     success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
120   } else if (after_drive == path.begin) {
121     // No input path and no drive spec, canonicalize to a slash.
122     output->push_back('/');
123   }
124 
125   out_path->len = output->length() - out_path->begin;
126   return success;
127 }
128 
129 template<typename CHAR, typename UCHAR>
DoCanonicalizeFileURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)130 bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
131                            const Parsed& parsed,
132                            CharsetConverter* query_converter,
133                            CanonOutput* output,
134                            Parsed* new_parsed) {
135   DCHECK(!parsed.has_opaque_path);
136 
137   // Things we don't set in file: URLs.
138   new_parsed->username = Component();
139   new_parsed->password = Component();
140   new_parsed->port = Component();
141 
142   // Scheme (known, so we don't bother running it through the more
143   // complicated scheme canonicalizer).
144   new_parsed->scheme.begin = output->length();
145   output->Append("file://");
146   new_parsed->scheme.len = 4;
147 
148   // If the host is localhost, and the path starts with a Windows drive letter,
149   // remove the host component. This does the following transformation:
150   //     file://localhost/C:/hello.txt -> file:///C:/hello.txt
151   //
152   // Note: we do this on every platform per URL Standard, not just Windows.
153   //
154   // TODO(crbug.com/41299821): According to the latest URL spec, this
155   // transformation should be done regardless of the path.
156   Component host_range = parsed.host;
157   if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
158       FindWindowsDriveLetter(source.path, parsed.path.begin,
159                              parsed.path.end()) >= parsed.path.begin) {
160     host_range.reset();
161   }
162 
163   // Append the host. For many file URLs, this will be empty. For UNC, this
164   // will be present.
165   // TODO(brettw) This doesn't do any checking for host name validity. We
166   // should probably handle validity checking of UNC hosts differently than
167   // for regular IP hosts.
168   bool success =
169       CanonicalizeFileHost(source.host, host_range, *output, new_parsed->host);
170   success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
171                                     output, &new_parsed->path);
172 
173   CanonicalizeQuery(source.query, parsed.query, query_converter,
174                     output, &new_parsed->query);
175   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
176 
177   return success;
178 }
179 
180 } // namespace
181 
FindWindowsDriveLetter(const char * spec,int begin,int end)182 int FindWindowsDriveLetter(const char* spec, int begin, int end) {
183   return DoFindWindowsDriveLetter(spec, begin, end);
184 }
185 
FindWindowsDriveLetter(const char16_t * spec,int begin,int end)186 int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
187   return DoFindWindowsDriveLetter(spec, begin, end);
188 }
189 
CanonicalizeFileURL(const char * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)190 bool CanonicalizeFileURL(const char* spec,
191                          int spec_len,
192                          const Parsed& parsed,
193                          CharsetConverter* query_converter,
194                          CanonOutput* output,
195                          Parsed* new_parsed) {
196   return DoCanonicalizeFileURL<char, unsigned char>(
197       URLComponentSource<char>(spec), parsed, query_converter,
198       output, new_parsed);
199 }
200 
CanonicalizeFileURL(const char16_t * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)201 bool CanonicalizeFileURL(const char16_t* spec,
202                          int spec_len,
203                          const Parsed& parsed,
204                          CharsetConverter* query_converter,
205                          CanonOutput* output,
206                          Parsed* new_parsed) {
207   return DoCanonicalizeFileURL<char16_t, char16_t>(
208       URLComponentSource<char16_t>(spec), parsed, query_converter, output,
209       new_parsed);
210 }
211 
FileCanonicalizePath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)212 bool FileCanonicalizePath(const char* spec,
213                           const Component& path,
214                           CanonOutput* output,
215                           Component* out_path) {
216   return DoFileCanonicalizePath<char, unsigned char>(spec, path,
217                                                      output, out_path);
218 }
219 
FileCanonicalizePath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)220 bool FileCanonicalizePath(const char16_t* spec,
221                           const Component& path,
222                           CanonOutput* output,
223                           Component* out_path) {
224   return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
225                                                     out_path);
226 }
227 
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)228 bool ReplaceFileURL(const char* base,
229                     const Parsed& base_parsed,
230                     const Replacements<char>& replacements,
231                     CharsetConverter* query_converter,
232                     CanonOutput* output,
233                     Parsed* new_parsed) {
234   URLComponentSource<char> source(base);
235   Parsed parsed(base_parsed);
236   SetupOverrideComponents(base, replacements, &source, &parsed);
237   return DoCanonicalizeFileURL<char, unsigned char>(
238       source, parsed, query_converter, output, new_parsed);
239 }
240 
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)241 bool ReplaceFileURL(const char* base,
242                     const Parsed& base_parsed,
243                     const Replacements<char16_t>& replacements,
244                     CharsetConverter* query_converter,
245                     CanonOutput* output,
246                     Parsed* new_parsed) {
247   RawCanonOutput<1024> utf8;
248   URLComponentSource<char> source(base);
249   Parsed parsed(base_parsed);
250   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
251   return DoCanonicalizeFileURL<char, unsigned char>(
252       source, parsed, query_converter, output, new_parsed);
253 }
254 
255 }  // namespace url
256