• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/common/url_pattern.h"
6 
7 #include "base/strings/string_number_conversions.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_split.h"
10 #include "base/strings/string_util.h"
11 #include "content/public/common/url_constants.h"
12 #include "extensions/common/constants.h"
13 #include "url/gurl.h"
14 #include "url/url_util.h"
15 
16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
17 
18 namespace {
19 
20 // TODO(aa): What about more obscure schemes like data: and javascript: ?
21 // Note: keep this array in sync with kValidSchemeMasks.
22 const char* kValidSchemes[] = {
23   content::kHttpScheme,
24   content::kHttpsScheme,
25   chrome::kFileScheme,
26   content::kFtpScheme,
27   chrome::kChromeUIScheme,
28   extensions::kExtensionScheme,
29   chrome::kFileSystemScheme,
30 };
31 
32 const int kValidSchemeMasks[] = {
33   URLPattern::SCHEME_HTTP,
34   URLPattern::SCHEME_HTTPS,
35   URLPattern::SCHEME_FILE,
36   URLPattern::SCHEME_FTP,
37   URLPattern::SCHEME_CHROMEUI,
38   URLPattern::SCHEME_EXTENSION,
39   URLPattern::SCHEME_FILESYSTEM,
40 };
41 
42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
43                must_keep_these_arrays_in_sync);
44 
45 const char kParseSuccess[] = "Success.";
46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
47 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
49 const char kParseErrorEmptyHost[] = "Host can not be empty.";
50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
51 const char kParseErrorEmptyPath[] = "Empty path.";
52 const char kParseErrorInvalidPort[] = "Invalid port.";
53 
54 // Message explaining each URLPattern::ParseResult.
55 const char* const kParseResultMessages[] = {
56   kParseSuccess,
57   kParseErrorMissingSchemeSeparator,
58   kParseErrorInvalidScheme,
59   kParseErrorWrongSchemeType,
60   kParseErrorEmptyHost,
61   kParseErrorInvalidHostWildcard,
62   kParseErrorEmptyPath,
63   kParseErrorInvalidPort,
64 };
65 
66 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
67                must_add_message_for_each_parse_result);
68 
69 const char kPathSeparator[] = "/";
70 
IsStandardScheme(const std::string & scheme)71 bool IsStandardScheme(const std::string& scheme) {
72   // "*" gets the same treatment as a standard scheme.
73   if (scheme == "*")
74     return true;
75 
76   return url_util::IsStandard(scheme.c_str(),
77       url_parse::Component(0, static_cast<int>(scheme.length())));
78 }
79 
IsValidPortForScheme(const std::string & scheme,const std::string & port)80 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
81   if (port == "*")
82     return true;
83 
84   // Only accept non-wildcard ports if the scheme uses ports.
85   if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
86       url_parse::PORT_UNSPECIFIED) {
87     return false;
88   }
89 
90   int parsed_port = url_parse::PORT_UNSPECIFIED;
91   if (!base::StringToInt(port, &parsed_port))
92     return false;
93   return (parsed_port >= 0) && (parsed_port < 65536);
94 }
95 
96 // Returns |path| with the trailing wildcard stripped if one existed.
97 //
98 // The functions that rely on this (OverlapsWith and Contains) are only
99 // called for the patterns inside URLPatternSet. In those cases, we know that
100 // the path will have only a single wildcard at the end. This makes figuring
101 // out overlap much easier. It seems like there is probably a computer-sciency
102 // way to solve the general case, but we don't need that yet.
StripTrailingWildcard(const std::string & path)103 std::string StripTrailingWildcard(const std::string& path) {
104   size_t wildcard_index = path.find('*');
105   size_t path_last = path.size() - 1;
106   DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last);
107   return wildcard_index == path_last ? path.substr(0, path_last) : path;
108 }
109 
110 }  // namespace
111 
URLPattern()112 URLPattern::URLPattern()
113     : valid_schemes_(SCHEME_NONE),
114       match_all_urls_(false),
115       match_subdomains_(false),
116       port_("*") {}
117 
URLPattern(int valid_schemes)118 URLPattern::URLPattern(int valid_schemes)
119     : valid_schemes_(valid_schemes),
120       match_all_urls_(false),
121       match_subdomains_(false),
122       port_("*") {}
123 
URLPattern(int valid_schemes,const std::string & pattern)124 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
125     // Strict error checking is used, because this constructor is only
126     // appropriate when we know |pattern| is valid.
127     : valid_schemes_(valid_schemes),
128       match_all_urls_(false),
129       match_subdomains_(false),
130       port_("*") {
131   if (PARSE_SUCCESS != Parse(pattern))
132     NOTREACHED() << "URLPattern is invalid: " << pattern;
133 }
134 
~URLPattern()135 URLPattern::~URLPattern() {
136 }
137 
operator <(const URLPattern & other) const138 bool URLPattern::operator<(const URLPattern& other) const {
139   return GetAsString() < other.GetAsString();
140 }
141 
operator >(const URLPattern & other) const142 bool URLPattern::operator>(const URLPattern& other) const {
143   return GetAsString() > other.GetAsString();
144 }
145 
operator ==(const URLPattern & other) const146 bool URLPattern::operator==(const URLPattern& other) const {
147   return GetAsString() == other.GetAsString();
148 }
149 
Parse(const std::string & pattern)150 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
151   spec_.clear();
152   SetMatchAllURLs(false);
153   SetMatchSubdomains(false);
154   SetPort("*");
155 
156   // Special case pattern to match every valid URL.
157   if (pattern == kAllUrlsPattern) {
158     SetMatchAllURLs(true);
159     return PARSE_SUCCESS;
160   }
161 
162   // Parse out the scheme.
163   size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
164   bool has_standard_scheme_separator = true;
165 
166   // Some urls also use ':' alone as the scheme separator.
167   if (scheme_end_pos == std::string::npos) {
168     scheme_end_pos = pattern.find(':');
169     has_standard_scheme_separator = false;
170   }
171 
172   if (scheme_end_pos == std::string::npos)
173     return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
174 
175   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
176     return PARSE_ERROR_INVALID_SCHEME;
177 
178   bool standard_scheme = IsStandardScheme(scheme_);
179   if (standard_scheme != has_standard_scheme_separator)
180     return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
181 
182   // Advance past the scheme separator.
183   scheme_end_pos +=
184       (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
185   if (scheme_end_pos >= pattern.size())
186     return PARSE_ERROR_EMPTY_HOST;
187 
188   // Parse out the host and path.
189   size_t host_start_pos = scheme_end_pos;
190   size_t path_start_pos = 0;
191 
192   if (!standard_scheme) {
193     path_start_pos = host_start_pos;
194   } else if (scheme_ == chrome::kFileScheme) {
195     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
196     if (host_end_pos == std::string::npos) {
197       // Allow hostname omission.
198       // e.g. file://* is interpreted as file:///*,
199       // file://foo* is interpreted as file:///foo*.
200       path_start_pos = host_start_pos - 1;
201     } else {
202       // Ignore hostname if scheme is file://.
203       // e.g. file://localhost/foo is equal to file:///foo.
204       path_start_pos = host_end_pos;
205     }
206   } else {
207     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
208 
209     // Host is required.
210     if (host_start_pos == host_end_pos)
211       return PARSE_ERROR_EMPTY_HOST;
212 
213     if (host_end_pos == std::string::npos)
214       return PARSE_ERROR_EMPTY_PATH;
215 
216     host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
217 
218     // The first component can optionally be '*' to match all subdomains.
219     std::vector<std::string> host_components;
220     base::SplitString(host_, '.', &host_components);
221     if (host_components[0] == "*") {
222       match_subdomains_ = true;
223       host_components.erase(host_components.begin(),
224                             host_components.begin() + 1);
225     }
226     host_ = JoinString(host_components, '.');
227 
228     path_start_pos = host_end_pos;
229   }
230 
231   SetPath(pattern.substr(path_start_pos));
232 
233   size_t port_pos = host_.find(':');
234   if (port_pos != std::string::npos) {
235     if (!SetPort(host_.substr(port_pos + 1)))
236       return PARSE_ERROR_INVALID_PORT;
237     host_ = host_.substr(0, port_pos);
238   }
239 
240   // No other '*' can occur in the host, though. This isn't necessary, but is
241   // done as a convenience to developers who might otherwise be confused and
242   // think '*' works as a glob in the host.
243   if (host_.find('*') != std::string::npos)
244     return PARSE_ERROR_INVALID_HOST_WILDCARD;
245 
246   return PARSE_SUCCESS;
247 }
248 
SetValidSchemes(int valid_schemes)249 void URLPattern::SetValidSchemes(int valid_schemes) {
250   spec_.clear();
251   valid_schemes_ = valid_schemes;
252 }
253 
SetHost(const std::string & host)254 void URLPattern::SetHost(const std::string& host) {
255   spec_.clear();
256   host_ = host;
257 }
258 
SetMatchAllURLs(bool val)259 void URLPattern::SetMatchAllURLs(bool val) {
260   spec_.clear();
261   match_all_urls_ = val;
262 
263   if (val) {
264     match_subdomains_ = true;
265     scheme_ = "*";
266     host_.clear();
267     SetPath("/*");
268   }
269 }
270 
SetMatchSubdomains(bool val)271 void URLPattern::SetMatchSubdomains(bool val) {
272   spec_.clear();
273   match_subdomains_ = val;
274 }
275 
SetScheme(const std::string & scheme)276 bool URLPattern::SetScheme(const std::string& scheme) {
277   spec_.clear();
278   scheme_ = scheme;
279   if (scheme_ == "*") {
280     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
281   } else if (!IsValidScheme(scheme_)) {
282     return false;
283   }
284   return true;
285 }
286 
IsValidScheme(const std::string & scheme) const287 bool URLPattern::IsValidScheme(const std::string& scheme) const {
288   if (valid_schemes_ == SCHEME_ALL)
289     return true;
290 
291   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
292     if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
293       return true;
294   }
295 
296   return false;
297 }
298 
SetPath(const std::string & path)299 void URLPattern::SetPath(const std::string& path) {
300   spec_.clear();
301   path_ = path;
302   path_escaped_ = path_;
303   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
304   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
305 }
306 
SetPort(const std::string & port)307 bool URLPattern::SetPort(const std::string& port) {
308   spec_.clear();
309   if (IsValidPortForScheme(scheme_, port)) {
310     port_ = port;
311     return true;
312   }
313   return false;
314 }
315 
MatchesURL(const GURL & test) const316 bool URLPattern::MatchesURL(const GURL& test) const {
317   const GURL* test_url = &test;
318   bool has_inner_url = test.inner_url() != NULL;
319 
320   if (has_inner_url) {
321     if (!test.SchemeIsFileSystem())
322       return false;  // The only nested URLs we handle are filesystem URLs.
323     test_url = test.inner_url();
324   }
325 
326   if (!MatchesScheme(test_url->scheme()))
327     return false;
328 
329   if (match_all_urls_)
330     return true;
331 
332   std::string path_for_request = test.PathForRequest();
333   if (has_inner_url)
334     path_for_request = test_url->path() + path_for_request;
335 
336   return MatchesSecurityOriginHelper(*test_url) &&
337          MatchesPath(path_for_request);
338 }
339 
MatchesSecurityOrigin(const GURL & test) const340 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
341   const GURL* test_url = &test;
342   bool has_inner_url = test.inner_url() != NULL;
343 
344   if (has_inner_url) {
345     if (!test.SchemeIsFileSystem())
346       return false;  // The only nested URLs we handle are filesystem URLs.
347     test_url = test.inner_url();
348   }
349 
350   if (!MatchesScheme(test_url->scheme()))
351     return false;
352 
353   if (match_all_urls_)
354     return true;
355 
356   return MatchesSecurityOriginHelper(*test_url);
357 }
358 
MatchesScheme(const std::string & test) const359 bool URLPattern::MatchesScheme(const std::string& test) const {
360   if (!IsValidScheme(test))
361     return false;
362 
363   return scheme_ == "*" || test == scheme_;
364 }
365 
MatchesHost(const std::string & host) const366 bool URLPattern::MatchesHost(const std::string& host) const {
367   std::string test(content::kHttpScheme);
368   test += content::kStandardSchemeSeparator;
369   test += host;
370   test += "/";
371   return MatchesHost(GURL(test));
372 }
373 
MatchesHost(const GURL & test) const374 bool URLPattern::MatchesHost(const GURL& test) const {
375   // If the hosts are exactly equal, we have a match.
376   if (test.host() == host_)
377     return true;
378 
379   // If we're matching subdomains, and we have no host in the match pattern,
380   // that means that we're matching all hosts, which means we have a match no
381   // matter what the test host is.
382   if (match_subdomains_ && host_.empty())
383     return true;
384 
385   // Otherwise, we can only match if our match pattern matches subdomains.
386   if (!match_subdomains_)
387     return false;
388 
389   // We don't do subdomain matching against IP addresses, so we can give up now
390   // if the test host is an IP address.
391   if (test.HostIsIPAddress())
392     return false;
393 
394   // Check if the test host is a subdomain of our host.
395   if (test.host().length() <= (host_.length() + 1))
396     return false;
397 
398   if (test.host().compare(test.host().length() - host_.length(),
399                           host_.length(), host_) != 0)
400     return false;
401 
402   return test.host()[test.host().length() - host_.length() - 1] == '.';
403 }
404 
MatchesPath(const std::string & test) const405 bool URLPattern::MatchesPath(const std::string& test) const {
406   // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
407   // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
408   if (test + "/*" == path_escaped_)
409     return true;
410 
411   return MatchPattern(test, path_escaped_);
412 }
413 
GetAsString() const414 const std::string& URLPattern::GetAsString() const {
415   if (!spec_.empty())
416     return spec_;
417 
418   if (match_all_urls_) {
419     spec_ = kAllUrlsPattern;
420     return spec_;
421   }
422 
423   bool standard_scheme = IsStandardScheme(scheme_);
424 
425   std::string spec = scheme_ +
426       (standard_scheme ? content::kStandardSchemeSeparator : ":");
427 
428   if (scheme_ != chrome::kFileScheme && standard_scheme) {
429     if (match_subdomains_) {
430       spec += "*";
431       if (!host_.empty())
432         spec += ".";
433     }
434 
435     if (!host_.empty())
436       spec += host_;
437 
438     if (port_ != "*") {
439       spec += ":";
440       spec += port_;
441     }
442   }
443 
444   if (!path_.empty())
445     spec += path_;
446 
447   spec_ = spec;
448   return spec_;
449 }
450 
OverlapsWith(const URLPattern & other) const451 bool URLPattern::OverlapsWith(const URLPattern& other) const {
452   if (match_all_urls() || other.match_all_urls())
453     return true;
454   return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
455           other.MatchesAnyScheme(GetExplicitSchemes()))
456       && (MatchesHost(other.host()) || other.MatchesHost(host()))
457       && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
458       && (MatchesPath(StripTrailingWildcard(other.path())) ||
459           other.MatchesPath(StripTrailingWildcard(path())));
460 }
461 
Contains(const URLPattern & other) const462 bool URLPattern::Contains(const URLPattern& other) const {
463   if (match_all_urls())
464     return true;
465   return MatchesAllSchemes(other.GetExplicitSchemes())
466       && MatchesHost(other.host())
467       && MatchesPortPattern(other.port())
468       && MatchesPath(StripTrailingWildcard(other.path()));
469 }
470 
MatchesAnyScheme(const std::vector<std::string> & schemes) const471 bool URLPattern::MatchesAnyScheme(
472     const std::vector<std::string>& schemes) const {
473   for (std::vector<std::string>::const_iterator i = schemes.begin();
474        i != schemes.end(); ++i) {
475     if (MatchesScheme(*i))
476       return true;
477   }
478 
479   return false;
480 }
481 
MatchesAllSchemes(const std::vector<std::string> & schemes) const482 bool URLPattern::MatchesAllSchemes(
483     const std::vector<std::string>& schemes) const {
484   for (std::vector<std::string>::const_iterator i = schemes.begin();
485        i != schemes.end(); ++i) {
486     if (!MatchesScheme(*i))
487       return false;
488   }
489 
490   return true;
491 }
492 
MatchesSecurityOriginHelper(const GURL & test) const493 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
494   // Ignore hostname if scheme is file://.
495   if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
496     return false;
497 
498   if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))
499     return false;
500 
501   return true;
502 }
503 
MatchesPortPattern(const std::string & port) const504 bool URLPattern::MatchesPortPattern(const std::string& port) const {
505   return port_ == "*" || port_ == port;
506 }
507 
GetExplicitSchemes() const508 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
509   std::vector<std::string> result;
510 
511   if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
512     result.push_back(scheme_);
513     return result;
514   }
515 
516   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
517     if (MatchesScheme(kValidSchemes[i])) {
518       result.push_back(kValidSchemes[i]);
519     }
520   }
521 
522   return result;
523 }
524 
ConvertToExplicitSchemes() const525 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
526   std::vector<std::string> explicit_schemes = GetExplicitSchemes();
527   std::vector<URLPattern> result;
528 
529   for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
530        i != explicit_schemes.end(); ++i) {
531     URLPattern temp = *this;
532     temp.SetScheme(*i);
533     temp.SetMatchAllURLs(false);
534     result.push_back(temp);
535   }
536 
537   return result;
538 }
539 
540 // static
GetParseResultString(URLPattern::ParseResult parse_result)541 const char* URLPattern::GetParseResultString(
542     URLPattern::ParseResult parse_result) {
543   return kParseResultMessages[parse_result];
544 }
545