• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "base/logging.h"
31 #include "googleurl/src/url_file.h"
32 #include "googleurl/src/url_parse.h"
33 #include "googleurl/src/url_parse_internal.h"
34 
35 // Interesting IE file:isms...
36 //
37 //  INPUT                      OUTPUT
38 //  =========================  ==============================
39 //  file:/foo/bar              file:///foo/bar
40 //      The result here seems totally invalid!?!? This isn't UNC.
41 //
42 //  file:/
43 //  file:// or any other number of slashes
44 //      IE6 doesn't do anything at all if you click on this link. No error:
45 //      nothing. IE6's history system seems to always color this link, so I'm
46 //      guessing that it maps internally to the empty URL.
47 //
48 //  C:\                        file:///C:/
49 //      When on a file: URL source page, this link will work. When over HTTP,
50 //      the file: URL will appear in the status bar but the link will not work
51 //      (security restriction for all file URLs).
52 //
53 //  file:foo/                  file:foo/     (invalid?!?!?)
54 //  file:/foo/                 file:///foo/  (invalid?!?!?)
55 //  file://foo/                file://foo/   (UNC to server "foo")
56 //  file:///foo/               file:///foo/  (invalid, seems to be a file)
57 //  file:////foo/              file://foo/   (UNC to server "foo")
58 //      Any more than four slashes is also treated as UNC.
59 //
60 //  file:C:/                   file://C:/
61 //  file:/C:/                  file://C:/
62 //      The number of slashes after "file:" don't matter if the thing following
63 //      it looks like an absolute drive path. Also, slashes and backslashes are
64 //      equally valid here.
65 
66 namespace url_parse {
67 
68 namespace {
69 
70 // A subcomponent of DoInitFileURL, the input of this function should be a UNC
71 // path name, with the index of the first character after the slashes following
72 // the scheme given in |after_slashes|. This will initialize the host, path,
73 // query, and ref, and leave the other output components untouched
74 // (DoInitFileURL handles these for us).
75 template<typename CHAR>
DoParseUNC(const CHAR * spec,int after_slashes,int spec_len,Parsed * parsed)76 void DoParseUNC(const CHAR* spec,
77                 int after_slashes,
78                 int spec_len,
79                Parsed* parsed) {
80   int next_slash = FindNextSlash(spec, after_slashes, spec_len);
81   if (next_slash == spec_len) {
82     // No additional slash found, as in "file://foo", treat the text as the
83     // host with no path (this will end up being UNC to server "foo").
84     int host_len = spec_len - after_slashes;
85     if (host_len)
86       parsed->host = Component(after_slashes, host_len);
87     else
88       parsed->host.reset();
89     parsed->path.reset();
90     return;
91   }
92 
93 #ifdef WIN32
94   // See if we have something that looks like a path following the first
95   // component. As in "file://localhost/c:/", we get "c:/" out. We want to
96   // treat this as a having no host but the path given. Works on Windows only.
97   if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
98     parsed->host.reset();
99     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
100                       &parsed->path, &parsed->query, &parsed->ref);
101     return;
102   }
103 #endif
104 
105   // Otherwise, everything up until that first slash we found is the host name,
106   // which will end up being the UNC host. For example "file://foo/bar.txt"
107   // will get a server name of "foo" and a path of "/bar". Later, on Windows,
108   // this should be treated as the filename "\\foo\bar.txt" in proper UNC
109   // notation.
110   int host_len = next_slash - after_slashes;
111   if (host_len)
112     parsed->host = MakeRange(after_slashes, next_slash);
113   else
114     parsed->host.reset();
115   if (next_slash < spec_len) {
116     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
117                       &parsed->path, &parsed->query, &parsed->ref);
118   } else {
119     parsed->path.reset();
120   }
121 }
122 
123 // A subcomponent of DoParseFileURL, the input should be a local file, with the
124 // beginning of the path indicated by the index in |path_begin|. This will
125 // initialize the host, path, query, and ref, and leave the other output
126 // components untouched (DoInitFileURL handles these for us).
127 template<typename CHAR>
DoParseLocalFile(const CHAR * spec,int path_begin,int spec_len,Parsed * parsed)128 void DoParseLocalFile(const CHAR* spec,
129                       int path_begin,
130                       int spec_len,
131                       Parsed* parsed) {
132   parsed->host.reset();
133   ParsePathInternal(spec, MakeRange(path_begin, spec_len),
134                     &parsed->path, &parsed->query, &parsed->ref);
135 }
136 
137 // Backend for the external functions that operates on either char type.
138 // We are handed the character after the "file:" at the beginning of the spec.
139 // Usually this is a slash, but needn't be; we allow paths like "file:c:\foo".
140 template<typename CHAR>
DoParseFileURL(const CHAR * spec,int spec_len,Parsed * parsed)141 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
142   DCHECK(spec_len >= 0);
143 
144   // Get the parts we never use for file URLs out of the way.
145   parsed->username.reset();
146   parsed->password.reset();
147   parsed->port.reset();
148 
149   // Many of the code paths don't set these, so it's convenient to just clear
150   // them. We'll write them in those cases we need them.
151   parsed->query.reset();
152   parsed->ref.reset();
153 
154   // Strip leading & trailing spaces and control characters.
155   int begin = 0;
156   TrimURL(spec, &begin, &spec_len);
157 
158   // Find the scheme.
159   int num_slashes;
160   int after_scheme;
161   int after_slashes;
162 #ifdef WIN32
163   // See how many slashes there are. We want to handle cases like UNC but also
164   // "/c:/foo". This is when there is no scheme, so we can allow pages to do
165   // links like "c:/foo/bar" or "//foo/bar". This is also called by the
166   // relative URL resolver when it determines there is an absolute URL, which
167   // may give us input like "/c:/foo".
168   num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
169   after_slashes = begin + num_slashes;
170   if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
171     // Windows path, don't try to extract the scheme (for example, "c:\foo").
172     parsed->scheme.reset();
173     after_scheme = after_slashes;
174   } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
175     // Windows UNC path: don't try to extract the scheme, but keep the slashes.
176     parsed->scheme.reset();
177     after_scheme = begin;
178   } else
179 #endif
180   {
181     if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
182       // Offset the results since we gave ExtractScheme a substring.
183       parsed->scheme.begin += begin;
184       after_scheme = parsed->scheme.end() + 1;
185     } else {
186       // No scheme found, remember that.
187       parsed->scheme.reset();
188       after_scheme = begin;
189     }
190   }
191 
192   // Handle empty specs ones that contain only whitespace or control chars,
193   // or that are just the scheme (for example "file:").
194   if (after_scheme == spec_len) {
195     parsed->host.reset();
196     parsed->path.reset();
197     return;
198   }
199 
200   num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
201 
202   after_slashes = after_scheme + num_slashes;
203 #ifdef WIN32
204   // Check whether the input is a drive again. We checked above for windows
205   // drive specs, but that's only at the very beginning to see if we have a
206   // scheme at all. This test will be duplicated in that case, but will
207   // additionally handle all cases with a real scheme such as "file:///C:/".
208   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
209       num_slashes != 3) {
210     // Anything not beginning with a drive spec ("c:\") on Windows is treated
211     // as UNC, with the exception of three slashes which always means a file.
212     // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
213     DoParseUNC(spec, after_slashes, spec_len, parsed);
214     return;
215   }
216 #else
217   // file: URL with exactly 2 slashes is considered to have a host component.
218   if (num_slashes == 2) {
219     DoParseUNC(spec, after_slashes, spec_len, parsed);
220     return;
221   }
222 #endif  // WIN32
223 
224   // Easy and common case, the full path immediately follows the scheme
225   // (modulo slashes), as in "file://c:/foo". Just treat everything from
226   // there to the end as the path. Empty hosts have 0 length instead of -1.
227   // We include the last slash as part of the path if there is one.
228   DoParseLocalFile(spec,
229       num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
230       spec_len, parsed);
231 }
232 
233 }  // namespace
234 
ParseFileURL(const char * url,int url_len,Parsed * parsed)235 void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
236   DoParseFileURL(url, url_len, parsed);
237 }
238 
ParseFileURL(const char16 * url,int url_len,Parsed * parsed)239 void ParseFileURL(const char16* url, int url_len, Parsed* parsed) {
240   DoParseFileURL(url, url_len, parsed);
241 }
242 
243 }  // namespace url_parse
244