• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/logging.h"
6 #include "url/url_file.h"
7 #include "url/url_parse.h"
8 #include "url/url_parse_internal.h"
9 
10 // Interesting IE file:isms...
11 //
12 //  INPUT                      OUTPUT
13 //  =========================  ==============================
14 //  file:/foo/bar              file:///foo/bar
15 //      The result here seems totally invalid!?!? This isn't UNC.
16 //
17 //  file:/
18 //  file:// or any other number of slashes
19 //      IE6 doesn't do anything at all if you click on this link. No error:
20 //      nothing. IE6's history system seems to always color this link, so I'm
21 //      guessing that it maps internally to the empty URL.
22 //
23 //  C:\                        file:///C:/
24 //      When on a file: URL source page, this link will work. When over HTTP,
25 //      the file: URL will appear in the status bar but the link will not work
26 //      (security restriction for all file URLs).
27 //
28 //  file:foo/                  file:foo/     (invalid?!?!?)
29 //  file:/foo/                 file:///foo/  (invalid?!?!?)
30 //  file://foo/                file://foo/   (UNC to server "foo")
31 //  file:///foo/               file:///foo/  (invalid, seems to be a file)
32 //  file:////foo/              file://foo/   (UNC to server "foo")
33 //      Any more than four slashes is also treated as UNC.
34 //
35 //  file:C:/                   file://C:/
36 //  file:/C:/                  file://C:/
37 //      The number of slashes after "file:" don't matter if the thing following
38 //      it looks like an absolute drive path. Also, slashes and backslashes are
39 //      equally valid here.
40 
41 namespace url {
42 
43 namespace {
44 
45 // A subcomponent of DoInitFileURL, the input of this function should be a UNC
46 // path name, with the index of the first character after the slashes following
47 // the scheme given in |after_slashes|. This will initialize the host, path,
48 // query, and ref, and leave the other output components untouched
49 // (DoInitFileURL handles these for us).
50 template<typename CHAR>
DoParseUNC(const CHAR * spec,int after_slashes,int spec_len,Parsed * parsed)51 void DoParseUNC(const CHAR* spec,
52                 int after_slashes,
53                 int spec_len,
54                Parsed* parsed) {
55   int next_slash = FindNextSlash(spec, after_slashes, spec_len);
56   if (next_slash == spec_len) {
57     // No additional slash found, as in "file://foo", treat the text as the
58     // host with no path (this will end up being UNC to server "foo").
59     int host_len = spec_len - after_slashes;
60     if (host_len)
61       parsed->host = Component(after_slashes, host_len);
62     else
63       parsed->host.reset();
64     parsed->path.reset();
65     return;
66   }
67 
68 #ifdef WIN32
69   // See if we have something that looks like a path following the first
70   // component. As in "file://localhost/c:/", we get "c:/" out. We want to
71   // treat this as a having no host but the path given. Works on Windows only.
72   if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
73     parsed->host.reset();
74     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
75                       &parsed->path, &parsed->query, &parsed->ref);
76     return;
77   }
78 #endif
79 
80   // Otherwise, everything up until that first slash we found is the host name,
81   // which will end up being the UNC host. For example "file://foo/bar.txt"
82   // will get a server name of "foo" and a path of "/bar". Later, on Windows,
83   // this should be treated as the filename "\\foo\bar.txt" in proper UNC
84   // notation.
85   int host_len = next_slash - after_slashes;
86   if (host_len)
87     parsed->host = MakeRange(after_slashes, next_slash);
88   else
89     parsed->host.reset();
90   if (next_slash < spec_len) {
91     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
92                       &parsed->path, &parsed->query, &parsed->ref);
93   } else {
94     parsed->path.reset();
95   }
96 }
97 
98 // A subcomponent of DoParseFileURL, the input should be a local file, with the
99 // beginning of the path indicated by the index in |path_begin|. This will
100 // initialize the host, path, query, and ref, and leave the other output
101 // components untouched (DoInitFileURL handles these for us).
102 template<typename CHAR>
DoParseLocalFile(const CHAR * spec,int path_begin,int spec_len,Parsed * parsed)103 void DoParseLocalFile(const CHAR* spec,
104                       int path_begin,
105                       int spec_len,
106                       Parsed* parsed) {
107   parsed->host.reset();
108   ParsePathInternal(spec, MakeRange(path_begin, spec_len),
109                     &parsed->path, &parsed->query, &parsed->ref);
110 }
111 
112 // Backend for the external functions that operates on either char type.
113 // Handles cases where there is a scheme, but also when handed the first
114 // character following the "file:" at the beginning of the spec. If so,
115 // this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
116 template<typename CHAR>
DoParseFileURL(const CHAR * spec,int spec_len,Parsed * parsed)117 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
118   DCHECK(spec_len >= 0);
119 
120   // Get the parts we never use for file URLs out of the way.
121   parsed->username.reset();
122   parsed->password.reset();
123   parsed->port.reset();
124 
125   // Many of the code paths don't set these, so it's convenient to just clear
126   // them. We'll write them in those cases we need them.
127   parsed->query.reset();
128   parsed->ref.reset();
129 
130   // Strip leading & trailing spaces and control characters.
131   int begin = 0;
132   TrimURL(spec, &begin, &spec_len);
133 
134   // Find the scheme, if any.
135   int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
136   int after_scheme;
137   int after_slashes;
138 #ifdef WIN32
139   // See how many slashes there are. We want to handle cases like UNC but also
140   // "/c:/foo". This is when there is no scheme, so we can allow pages to do
141   // links like "c:/foo/bar" or "//foo/bar". This is also called by the
142   // relative URL resolver when it determines there is an absolute URL, which
143   // may give us input like "/c:/foo".
144   after_slashes = begin + num_slashes;
145   if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
146     // Windows path, don't try to extract the scheme (for example, "c:\foo").
147     parsed->scheme.reset();
148     after_scheme = after_slashes;
149   } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
150     // Windows UNC path: don't try to extract the scheme, but keep the slashes.
151     parsed->scheme.reset();
152     after_scheme = begin;
153   } else
154 #endif
155   {
156     // ExtractScheme doesn't understand the possibility of filenames with
157     // colons in them, in which case it returns the entire spec up to the
158     // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
159     // the foo.c: scheme.
160     if (!num_slashes &&
161         ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
162       // Offset the results since we gave ExtractScheme a substring.
163       parsed->scheme.begin += begin;
164       after_scheme = parsed->scheme.end() + 1;
165     } else {
166       // No scheme found, remember that.
167       parsed->scheme.reset();
168       after_scheme = begin;
169     }
170   }
171 
172   // Handle empty specs ones that contain only whitespace or control chars,
173   // or that are just the scheme (for example "file:").
174   if (after_scheme == spec_len) {
175     parsed->host.reset();
176     parsed->path.reset();
177     return;
178   }
179 
180   num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
181   after_slashes = after_scheme + num_slashes;
182 #ifdef WIN32
183   // Check whether the input is a drive again. We checked above for windows
184   // drive specs, but that's only at the very beginning to see if we have a
185   // scheme at all. This test will be duplicated in that case, but will
186   // additionally handle all cases with a real scheme such as "file:///C:/".
187   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
188       num_slashes != 3) {
189     // Anything not beginning with a drive spec ("c:\") on Windows is treated
190     // as UNC, with the exception of three slashes which always means a file.
191     // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
192     DoParseUNC(spec, after_slashes, spec_len, parsed);
193     return;
194   }
195 #else
196   // file: URL with exactly 2 slashes is considered to have a host component.
197   if (num_slashes == 2) {
198     DoParseUNC(spec, after_slashes, spec_len, parsed);
199     return;
200   }
201 #endif  // WIN32
202 
203   // Easy and common case, the full path immediately follows the scheme
204   // (modulo slashes), as in "file://c:/foo". Just treat everything from
205   // there to the end as the path. Empty hosts have 0 length instead of -1.
206   // We include the last slash as part of the path if there is one.
207   DoParseLocalFile(spec,
208       num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
209       spec_len, parsed);
210 }
211 
212 }  // namespace
213 
ParseFileURL(const char * url,int url_len,Parsed * parsed)214 void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
215   DoParseFileURL(url, url_len, parsed);
216 }
217 
ParseFileURL(const base::char16 * url,int url_len,Parsed * parsed)218 void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
219   DoParseFileURL(url, url_len, parsed);
220 }
221 
222 }  // namespace url
223