• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/common/url_pattern.h"
6 
7 #include "base/strings/string_number_conversions.h"
8 #include "base/strings/string_piece.h"
9 #include "base/strings/string_split.h"
10 #include "base/strings/string_util.h"
11 #include "content/public/common/url_constants.h"
12 #include "extensions/common/constants.h"
13 #include "url/gurl.h"
14 #include "url/url_util.h"
15 
16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
17 
18 namespace {
19 
20 // TODO(aa): What about more obscure schemes like data: and javascript: ?
21 // Note: keep this array in sync with kValidSchemeMasks.
22 const char* kValidSchemes[] = {
23     url::kHttpScheme,
24     url::kHttpsScheme,
25     url::kFileScheme,
26     url::kFtpScheme,
27     content::kChromeUIScheme,
28     extensions::kExtensionScheme,
29     url::kFileSystemScheme,
30 };
31 
32 const int kValidSchemeMasks[] = {
33   URLPattern::SCHEME_HTTP,
34   URLPattern::SCHEME_HTTPS,
35   URLPattern::SCHEME_FILE,
36   URLPattern::SCHEME_FTP,
37   URLPattern::SCHEME_CHROMEUI,
38   URLPattern::SCHEME_EXTENSION,
39   URLPattern::SCHEME_FILESYSTEM,
40 };
41 
42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
43                must_keep_these_arrays_in_sync);
44 
45 const char kParseSuccess[] = "Success.";
46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
47 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
49 const char kParseErrorEmptyHost[] = "Host can not be empty.";
50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
51 const char kParseErrorEmptyPath[] = "Empty path.";
52 const char kParseErrorInvalidPort[] = "Invalid port.";
53 const char kParseErrorInvalidHost[] = "Invalid host.";
54 
55 // Message explaining each URLPattern::ParseResult.
56 const char* const kParseResultMessages[] = {
57   kParseSuccess,
58   kParseErrorMissingSchemeSeparator,
59   kParseErrorInvalidScheme,
60   kParseErrorWrongSchemeType,
61   kParseErrorEmptyHost,
62   kParseErrorInvalidHostWildcard,
63   kParseErrorEmptyPath,
64   kParseErrorInvalidPort,
65   kParseErrorInvalidHost,
66 };
67 
68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
69                must_add_message_for_each_parse_result);
70 
71 const char kPathSeparator[] = "/";
72 
IsStandardScheme(const std::string & scheme)73 bool IsStandardScheme(const std::string& scheme) {
74   // "*" gets the same treatment as a standard scheme.
75   if (scheme == "*")
76     return true;
77 
78   return url::IsStandard(scheme.c_str(),
79                          url::Component(0, static_cast<int>(scheme.length())));
80 }
81 
IsValidPortForScheme(const std::string & scheme,const std::string & port)82 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
83   if (port == "*")
84     return true;
85 
86   // Only accept non-wildcard ports if the scheme uses ports.
87   if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
88       url::PORT_UNSPECIFIED) {
89     return false;
90   }
91 
92   int parsed_port = url::PORT_UNSPECIFIED;
93   if (!base::StringToInt(port, &parsed_port))
94     return false;
95   return (parsed_port >= 0) && (parsed_port < 65536);
96 }
97 
98 // Returns |path| with the trailing wildcard stripped if one existed.
99 //
100 // The functions that rely on this (OverlapsWith and Contains) are only
101 // called for the patterns inside URLPatternSet. In those cases, we know that
102 // the path will have only a single wildcard at the end. This makes figuring
103 // out overlap much easier. It seems like there is probably a computer-sciency
104 // way to solve the general case, but we don't need that yet.
StripTrailingWildcard(const std::string & path)105 std::string StripTrailingWildcard(const std::string& path) {
106   size_t wildcard_index = path.find('*');
107   size_t path_last = path.size() - 1;
108   DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last);
109   return wildcard_index == path_last ? path.substr(0, path_last) : path;
110 }
111 
112 }  // namespace
113 
114 // static
IsValidSchemeForExtensions(const std::string & scheme)115 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
116   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
117     if (scheme == kValidSchemes[i])
118       return true;
119   }
120   return false;
121 }
122 
URLPattern()123 URLPattern::URLPattern()
124     : valid_schemes_(SCHEME_NONE),
125       match_all_urls_(false),
126       match_subdomains_(false),
127       port_("*") {}
128 
URLPattern(int valid_schemes)129 URLPattern::URLPattern(int valid_schemes)
130     : valid_schemes_(valid_schemes),
131       match_all_urls_(false),
132       match_subdomains_(false),
133       port_("*") {}
134 
URLPattern(int valid_schemes,const std::string & pattern)135 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
136     // Strict error checking is used, because this constructor is only
137     // appropriate when we know |pattern| is valid.
138     : valid_schemes_(valid_schemes),
139       match_all_urls_(false),
140       match_subdomains_(false),
141       port_("*") {
142   ParseResult result = Parse(pattern);
143   if (PARSE_SUCCESS != result)
144     NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
145 }
146 
~URLPattern()147 URLPattern::~URLPattern() {
148 }
149 
operator <(const URLPattern & other) const150 bool URLPattern::operator<(const URLPattern& other) const {
151   return GetAsString() < other.GetAsString();
152 }
153 
operator >(const URLPattern & other) const154 bool URLPattern::operator>(const URLPattern& other) const {
155   return GetAsString() > other.GetAsString();
156 }
157 
operator ==(const URLPattern & other) const158 bool URLPattern::operator==(const URLPattern& other) const {
159   return GetAsString() == other.GetAsString();
160 }
161 
Parse(const std::string & pattern)162 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
163   spec_.clear();
164   SetMatchAllURLs(false);
165   SetMatchSubdomains(false);
166   SetPort("*");
167 
168   // Special case pattern to match every valid URL.
169   if (pattern == kAllUrlsPattern) {
170     SetMatchAllURLs(true);
171     return PARSE_SUCCESS;
172   }
173 
174   // Parse out the scheme.
175   size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator);
176   bool has_standard_scheme_separator = true;
177 
178   // Some urls also use ':' alone as the scheme separator.
179   if (scheme_end_pos == std::string::npos) {
180     scheme_end_pos = pattern.find(':');
181     has_standard_scheme_separator = false;
182   }
183 
184   if (scheme_end_pos == std::string::npos)
185     return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
186 
187   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
188     return PARSE_ERROR_INVALID_SCHEME;
189 
190   bool standard_scheme = IsStandardScheme(scheme_);
191   if (standard_scheme != has_standard_scheme_separator)
192     return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
193 
194   // Advance past the scheme separator.
195   scheme_end_pos +=
196       (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1);
197   if (scheme_end_pos >= pattern.size())
198     return PARSE_ERROR_EMPTY_HOST;
199 
200   // Parse out the host and path.
201   size_t host_start_pos = scheme_end_pos;
202   size_t path_start_pos = 0;
203 
204   if (!standard_scheme) {
205     path_start_pos = host_start_pos;
206   } else if (scheme_ == url::kFileScheme) {
207     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
208     if (host_end_pos == std::string::npos) {
209       // Allow hostname omission.
210       // e.g. file://* is interpreted as file:///*,
211       // file://foo* is interpreted as file:///foo*.
212       path_start_pos = host_start_pos - 1;
213     } else {
214       // Ignore hostname if scheme is file://.
215       // e.g. file://localhost/foo is equal to file:///foo.
216       path_start_pos = host_end_pos;
217     }
218   } else {
219     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
220 
221     // Host is required.
222     if (host_start_pos == host_end_pos)
223       return PARSE_ERROR_EMPTY_HOST;
224 
225     if (host_end_pos == std::string::npos)
226       return PARSE_ERROR_EMPTY_PATH;
227 
228     host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
229 
230     // The first component can optionally be '*' to match all subdomains.
231     std::vector<std::string> host_components;
232     base::SplitString(host_, '.', &host_components);
233     if (host_components[0] == "*") {
234       match_subdomains_ = true;
235       host_components.erase(host_components.begin(),
236                             host_components.begin() + 1);
237     }
238     host_ = JoinString(host_components, '.');
239 
240     path_start_pos = host_end_pos;
241   }
242 
243   SetPath(pattern.substr(path_start_pos));
244 
245   size_t port_pos = host_.find(':');
246   if (port_pos != std::string::npos) {
247     if (!SetPort(host_.substr(port_pos + 1)))
248       return PARSE_ERROR_INVALID_PORT;
249     host_ = host_.substr(0, port_pos);
250   }
251 
252   // No other '*' can occur in the host, though. This isn't necessary, but is
253   // done as a convenience to developers who might otherwise be confused and
254   // think '*' works as a glob in the host.
255   if (host_.find('*') != std::string::npos)
256     return PARSE_ERROR_INVALID_HOST_WILDCARD;
257 
258   // Null characters are not allowed in hosts.
259   if (host_.find('\0') != std::string::npos)
260     return PARSE_ERROR_INVALID_HOST;
261 
262   return PARSE_SUCCESS;
263 }
264 
SetValidSchemes(int valid_schemes)265 void URLPattern::SetValidSchemes(int valid_schemes) {
266   spec_.clear();
267   valid_schemes_ = valid_schemes;
268 }
269 
SetHost(const std::string & host)270 void URLPattern::SetHost(const std::string& host) {
271   spec_.clear();
272   host_ = host;
273 }
274 
SetMatchAllURLs(bool val)275 void URLPattern::SetMatchAllURLs(bool val) {
276   spec_.clear();
277   match_all_urls_ = val;
278 
279   if (val) {
280     match_subdomains_ = true;
281     scheme_ = "*";
282     host_.clear();
283     SetPath("/*");
284   }
285 }
286 
SetMatchSubdomains(bool val)287 void URLPattern::SetMatchSubdomains(bool val) {
288   spec_.clear();
289   match_subdomains_ = val;
290 }
291 
SetScheme(const std::string & scheme)292 bool URLPattern::SetScheme(const std::string& scheme) {
293   spec_.clear();
294   scheme_ = scheme;
295   if (scheme_ == "*") {
296     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
297   } else if (!IsValidScheme(scheme_)) {
298     return false;
299   }
300   return true;
301 }
302 
IsValidScheme(const std::string & scheme) const303 bool URLPattern::IsValidScheme(const std::string& scheme) const {
304   if (valid_schemes_ == SCHEME_ALL)
305     return true;
306 
307   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
308     if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
309       return true;
310   }
311 
312   return false;
313 }
314 
SetPath(const std::string & path)315 void URLPattern::SetPath(const std::string& path) {
316   spec_.clear();
317   path_ = path;
318   path_escaped_ = path_;
319   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
320   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
321 }
322 
SetPort(const std::string & port)323 bool URLPattern::SetPort(const std::string& port) {
324   spec_.clear();
325   if (IsValidPortForScheme(scheme_, port)) {
326     port_ = port;
327     return true;
328   }
329   return false;
330 }
331 
MatchesURL(const GURL & test) const332 bool URLPattern::MatchesURL(const GURL& test) const {
333   const GURL* test_url = &test;
334   bool has_inner_url = test.inner_url() != NULL;
335 
336   if (has_inner_url) {
337     if (!test.SchemeIsFileSystem())
338       return false;  // The only nested URLs we handle are filesystem URLs.
339     test_url = test.inner_url();
340   }
341 
342   if (!MatchesScheme(test_url->scheme()))
343     return false;
344 
345   if (match_all_urls_)
346     return true;
347 
348   std::string path_for_request = test.PathForRequest();
349   if (has_inner_url)
350     path_for_request = test_url->path() + path_for_request;
351 
352   return MatchesSecurityOriginHelper(*test_url) &&
353          MatchesPath(path_for_request);
354 }
355 
MatchesSecurityOrigin(const GURL & test) const356 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
357   const GURL* test_url = &test;
358   bool has_inner_url = test.inner_url() != NULL;
359 
360   if (has_inner_url) {
361     if (!test.SchemeIsFileSystem())
362       return false;  // The only nested URLs we handle are filesystem URLs.
363     test_url = test.inner_url();
364   }
365 
366   if (!MatchesScheme(test_url->scheme()))
367     return false;
368 
369   if (match_all_urls_)
370     return true;
371 
372   return MatchesSecurityOriginHelper(*test_url);
373 }
374 
MatchesScheme(const std::string & test) const375 bool URLPattern::MatchesScheme(const std::string& test) const {
376   if (!IsValidScheme(test))
377     return false;
378 
379   return scheme_ == "*" || test == scheme_;
380 }
381 
MatchesHost(const std::string & host) const382 bool URLPattern::MatchesHost(const std::string& host) const {
383   std::string test(url::kHttpScheme);
384   test += url::kStandardSchemeSeparator;
385   test += host;
386   test += "/";
387   return MatchesHost(GURL(test));
388 }
389 
MatchesHost(const GURL & test) const390 bool URLPattern::MatchesHost(const GURL& test) const {
391   // If the hosts are exactly equal, we have a match.
392   if (test.host() == host_)
393     return true;
394 
395   // If we're matching subdomains, and we have no host in the match pattern,
396   // that means that we're matching all hosts, which means we have a match no
397   // matter what the test host is.
398   if (match_subdomains_ && host_.empty())
399     return true;
400 
401   // Otherwise, we can only match if our match pattern matches subdomains.
402   if (!match_subdomains_)
403     return false;
404 
405   // We don't do subdomain matching against IP addresses, so we can give up now
406   // if the test host is an IP address.
407   if (test.HostIsIPAddress())
408     return false;
409 
410   // Check if the test host is a subdomain of our host.
411   if (test.host().length() <= (host_.length() + 1))
412     return false;
413 
414   if (test.host().compare(test.host().length() - host_.length(),
415                           host_.length(), host_) != 0)
416     return false;
417 
418   return test.host()[test.host().length() - host_.length() - 1] == '.';
419 }
420 
MatchesPath(const std::string & test) const421 bool URLPattern::MatchesPath(const std::string& test) const {
422   // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
423   // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
424   if (test + "/*" == path_escaped_)
425     return true;
426 
427   return MatchPattern(test, path_escaped_);
428 }
429 
GetAsString() const430 const std::string& URLPattern::GetAsString() const {
431   if (!spec_.empty())
432     return spec_;
433 
434   if (match_all_urls_) {
435     spec_ = kAllUrlsPattern;
436     return spec_;
437   }
438 
439   bool standard_scheme = IsStandardScheme(scheme_);
440 
441   std::string spec = scheme_ +
442       (standard_scheme ? url::kStandardSchemeSeparator : ":");
443 
444   if (scheme_ != url::kFileScheme && standard_scheme) {
445     if (match_subdomains_) {
446       spec += "*";
447       if (!host_.empty())
448         spec += ".";
449     }
450 
451     if (!host_.empty())
452       spec += host_;
453 
454     if (port_ != "*") {
455       spec += ":";
456       spec += port_;
457     }
458   }
459 
460   if (!path_.empty())
461     spec += path_;
462 
463   spec_ = spec;
464   return spec_;
465 }
466 
OverlapsWith(const URLPattern & other) const467 bool URLPattern::OverlapsWith(const URLPattern& other) const {
468   if (match_all_urls() || other.match_all_urls())
469     return true;
470   return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
471           other.MatchesAnyScheme(GetExplicitSchemes()))
472       && (MatchesHost(other.host()) || other.MatchesHost(host()))
473       && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
474       && (MatchesPath(StripTrailingWildcard(other.path())) ||
475           other.MatchesPath(StripTrailingWildcard(path())));
476 }
477 
Contains(const URLPattern & other) const478 bool URLPattern::Contains(const URLPattern& other) const {
479   if (match_all_urls())
480     return true;
481   return MatchesAllSchemes(other.GetExplicitSchemes())
482       && MatchesHost(other.host())
483       && MatchesPortPattern(other.port())
484       && MatchesPath(StripTrailingWildcard(other.path()));
485 }
486 
MatchesAnyScheme(const std::vector<std::string> & schemes) const487 bool URLPattern::MatchesAnyScheme(
488     const std::vector<std::string>& schemes) const {
489   for (std::vector<std::string>::const_iterator i = schemes.begin();
490        i != schemes.end(); ++i) {
491     if (MatchesScheme(*i))
492       return true;
493   }
494 
495   return false;
496 }
497 
MatchesAllSchemes(const std::vector<std::string> & schemes) const498 bool URLPattern::MatchesAllSchemes(
499     const std::vector<std::string>& schemes) const {
500   for (std::vector<std::string>::const_iterator i = schemes.begin();
501        i != schemes.end(); ++i) {
502     if (!MatchesScheme(*i))
503       return false;
504   }
505 
506   return true;
507 }
508 
MatchesSecurityOriginHelper(const GURL & test) const509 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
510   // Ignore hostname if scheme is file://.
511   if (scheme_ != url::kFileScheme && !MatchesHost(test))
512     return false;
513 
514   if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))
515     return false;
516 
517   return true;
518 }
519 
MatchesPortPattern(const std::string & port) const520 bool URLPattern::MatchesPortPattern(const std::string& port) const {
521   return port_ == "*" || port_ == port;
522 }
523 
GetExplicitSchemes() const524 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
525   std::vector<std::string> result;
526 
527   if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
528     result.push_back(scheme_);
529     return result;
530   }
531 
532   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
533     if (MatchesScheme(kValidSchemes[i])) {
534       result.push_back(kValidSchemes[i]);
535     }
536   }
537 
538   return result;
539 }
540 
ConvertToExplicitSchemes() const541 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
542   std::vector<std::string> explicit_schemes = GetExplicitSchemes();
543   std::vector<URLPattern> result;
544 
545   for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
546        i != explicit_schemes.end(); ++i) {
547     URLPattern temp = *this;
548     temp.SetScheme(*i);
549     temp.SetMatchAllURLs(false);
550     result.push_back(temp);
551   }
552 
553   return result;
554 }
555 
556 // static
GetParseResultString(URLPattern::ParseResult parse_result)557 const char* URLPattern::GetParseResultString(
558     URLPattern::ParseResult parse_result) {
559   return kParseResultMessages[parse_result];
560 }
561