• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/common/extensions/url_pattern.h"
6 
7 #include "base/string_piece.h"
8 #include "base/string_split.h"
9 #include "base/string_util.h"
10 #include "chrome/common/url_constants.h"
11 #include "googleurl/src/gurl.h"
12 #include "googleurl/src/url_util.h"
13 
14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
15 
16 namespace {
17 
18 // TODO(aa): Consider adding chrome-extension? What about more obscure ones
19 // like data: and javascript: ?
20 // Note: keep this array in sync with kValidSchemeMasks.
21 const char* kValidSchemes[] = {
22   chrome::kHttpScheme,
23   chrome::kHttpsScheme,
24   chrome::kFileScheme,
25   chrome::kFtpScheme,
26   chrome::kChromeUIScheme,
27   chrome::kFileSystemScheme,
28 };
29 
30 const int kValidSchemeMasks[] = {
31   URLPattern::SCHEME_HTTP,
32   URLPattern::SCHEME_HTTPS,
33   URLPattern::SCHEME_FILE,
34   URLPattern::SCHEME_FTP,
35   URLPattern::SCHEME_CHROMEUI,
36   URLPattern::SCHEME_FILESYSTEM,
37 };
38 
39 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
40                must_keep_these_arrays_in_sync);
41 
42 const char* kParseSuccess = "Success.";
43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";
44 const char* kParseErrorInvalidScheme = "Invalid scheme.";
45 const char* kParseErrorWrongSchemeType = "Wrong scheme type.";
46 const char* kParseErrorEmptyHost = "Host can not be empty.";
47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";
48 const char* kParseErrorEmptyPath = "Empty path.";
49 const char* kParseErrorHasColon =
50     "Ports are not supported in URL patterns. ':' may not be used in a host.";
51 
52 // Message explaining each URLPattern::ParseResult.
53 const char* kParseResultMessages[] = {
54   kParseSuccess,
55   kParseErrorMissingSchemeSeparator,
56   kParseErrorInvalidScheme,
57   kParseErrorWrongSchemeType,
58   kParseErrorEmptyHost,
59   kParseErrorInvalidHostWildcard,
60   kParseErrorEmptyPath,
61   kParseErrorHasColon
62 };
63 
64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
65                must_add_message_for_each_parse_result);
66 
67 const char kPathSeparator[] = "/";
68 
IsStandardScheme(const std::string & scheme)69 bool IsStandardScheme(const std::string& scheme) {
70   // "*" gets the same treatment as a standard scheme.
71   if (scheme == "*")
72     return true;
73 
74   return url_util::IsStandard(scheme.c_str(),
75       url_parse::Component(0, static_cast<int>(scheme.length())));
76 }
77 
78 }  // namespace
79 
URLPattern()80 URLPattern::URLPattern()
81     : valid_schemes_(SCHEME_NONE),
82       match_all_urls_(false),
83       match_subdomains_(false) {}
84 
URLPattern(int valid_schemes)85 URLPattern::URLPattern(int valid_schemes)
86     : valid_schemes_(valid_schemes), match_all_urls_(false),
87       match_subdomains_(false) {}
88 
URLPattern(int valid_schemes,const std::string & pattern)89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
90     : valid_schemes_(valid_schemes), match_all_urls_(false),
91       match_subdomains_(false) {
92 
93   // Strict error checking is used, because this constructor is only
94   // appropriate when we know |pattern| is valid.
95   if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT))
96     NOTREACHED() << "URLPattern is invalid: " << pattern;
97 }
98 
~URLPattern()99 URLPattern::~URLPattern() {
100 }
101 
Parse(const std::string & pattern,ParseOption strictness)102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern,
103                                           ParseOption strictness) {
104   CHECK(strictness == PARSE_LENIENT ||
105         strictness == PARSE_STRICT);
106 
107   // Special case pattern to match every valid URL.
108   if (pattern == kAllUrlsPattern) {
109     match_all_urls_ = true;
110     match_subdomains_ = true;
111     scheme_ = "*";
112     host_.clear();
113     SetPath("/*");
114     return PARSE_SUCCESS;
115   }
116 
117   // Parse out the scheme.
118   size_t scheme_end_pos = pattern.find(chrome::kStandardSchemeSeparator);
119   bool has_standard_scheme_separator = true;
120 
121   // Some urls also use ':' alone as the scheme separator.
122   if (scheme_end_pos == std::string::npos) {
123     scheme_end_pos = pattern.find(':');
124     has_standard_scheme_separator = false;
125   }
126 
127   if (scheme_end_pos == std::string::npos)
128     return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
129 
130   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
131     return PARSE_ERROR_INVALID_SCHEME;
132 
133   bool standard_scheme = IsStandardScheme(scheme_);
134   if (standard_scheme != has_standard_scheme_separator)
135     return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
136 
137   // Advance past the scheme separator.
138   scheme_end_pos +=
139       (standard_scheme ? strlen(chrome::kStandardSchemeSeparator) : 1);
140   if (scheme_end_pos >= pattern.size())
141     return PARSE_ERROR_EMPTY_HOST;
142 
143   // Parse out the host and path.
144   size_t host_start_pos = scheme_end_pos;
145   size_t path_start_pos = 0;
146 
147   // File URLs are special because they have no host.
148   if (scheme_ == chrome::kFileScheme || !standard_scheme) {
149     path_start_pos = host_start_pos;
150   } else {
151     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
152 
153     // Host is required.
154     if (host_start_pos == host_end_pos)
155       return PARSE_ERROR_EMPTY_HOST;
156 
157     if (host_end_pos == std::string::npos)
158       return PARSE_ERROR_EMPTY_PATH;
159 
160     host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
161 
162     // The first component can optionally be '*' to match all subdomains.
163     std::vector<std::string> host_components;
164     base::SplitString(host_, '.', &host_components);
165     if (host_components[0] == "*") {
166       match_subdomains_ = true;
167       host_components.erase(host_components.begin(),
168                             host_components.begin() + 1);
169     }
170     host_ = JoinString(host_components, '.');
171 
172     // No other '*' can occur in the host, though. This isn't necessary, but is
173     // done as a convenience to developers who might otherwise be confused and
174     // think '*' works as a glob in the host.
175     if (host_.find('*') != std::string::npos)
176       return PARSE_ERROR_INVALID_HOST_WILDCARD;
177 
178     path_start_pos = host_end_pos;
179   }
180 
181   SetPath(pattern.substr(path_start_pos));
182 
183   if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos)
184     return PARSE_ERROR_HAS_COLON;
185 
186   return PARSE_SUCCESS;
187 }
188 
SetScheme(const std::string & scheme)189 bool URLPattern::SetScheme(const std::string& scheme) {
190   scheme_ = scheme;
191   if (scheme_ == "*") {
192     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
193   } else if (!IsValidScheme(scheme_)) {
194     return false;
195   }
196   return true;
197 }
198 
IsValidScheme(const std::string & scheme) const199 bool URLPattern::IsValidScheme(const std::string& scheme) const {
200   if (valid_schemes_ == SCHEME_ALL)
201     return true;
202 
203   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
204     if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
205       return true;
206   }
207 
208   return false;
209 }
210 
SetPath(const std::string & path)211 void URLPattern::SetPath(const std::string& path) {
212   path_ = path;
213   path_escaped_ = path_;
214   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
215   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
216 }
217 
MatchesUrl(const GURL & test) const218 bool URLPattern::MatchesUrl(const GURL &test) const {
219   if (!MatchesScheme(test.scheme()))
220     return false;
221 
222   if (match_all_urls_)
223     return true;
224 
225   if (!MatchesHost(test))
226     return false;
227 
228   if (!MatchesPath(test.PathForRequest()))
229     return false;
230 
231   return true;
232 }
233 
MatchesScheme(const std::string & test) const234 bool URLPattern::MatchesScheme(const std::string& test) const {
235   if (!IsValidScheme(test))
236     return false;
237 
238   return scheme_ == "*" || test == scheme_;
239 }
240 
MatchesHost(const std::string & host) const241 bool URLPattern::MatchesHost(const std::string& host) const {
242   std::string test(chrome::kHttpScheme);
243   test += chrome::kStandardSchemeSeparator;
244   test += host;
245   test += "/";
246   return MatchesHost(GURL(test));
247 }
248 
MatchesHost(const GURL & test) const249 bool URLPattern::MatchesHost(const GURL& test) const {
250   // If the hosts are exactly equal, we have a match.
251   if (test.host() == host_)
252     return true;
253 
254   // If we're matching subdomains, and we have no host in the match pattern,
255   // that means that we're matching all hosts, which means we have a match no
256   // matter what the test host is.
257   if (match_subdomains_ && host_.empty())
258     return true;
259 
260   // Otherwise, we can only match if our match pattern matches subdomains.
261   if (!match_subdomains_)
262     return false;
263 
264   // We don't do subdomain matching against IP addresses, so we can give up now
265   // if the test host is an IP address.
266   if (test.HostIsIPAddress())
267     return false;
268 
269   // Check if the test host is a subdomain of our host.
270   if (test.host().length() <= (host_.length() + 1))
271     return false;
272 
273   if (test.host().compare(test.host().length() - host_.length(),
274                           host_.length(), host_) != 0)
275     return false;
276 
277   return test.host()[test.host().length() - host_.length() - 1] == '.';
278 }
279 
MatchesPath(const std::string & test) const280 bool URLPattern::MatchesPath(const std::string& test) const {
281   if (!MatchPattern(test, path_escaped_))
282     return false;
283 
284   return true;
285 }
286 
GetAsString() const287 std::string URLPattern::GetAsString() const {
288   if (match_all_urls_)
289     return kAllUrlsPattern;
290 
291   bool standard_scheme = IsStandardScheme(scheme_);
292 
293   std::string spec = scheme_ +
294       (standard_scheme ? chrome::kStandardSchemeSeparator : ":");
295 
296   if (scheme_ != chrome::kFileScheme && standard_scheme) {
297     if (match_subdomains_) {
298       spec += "*";
299       if (!host_.empty())
300         spec += ".";
301     }
302 
303     if (!host_.empty())
304       spec += host_;
305   }
306 
307   if (!path_.empty())
308     spec += path_;
309 
310   return spec;
311 }
312 
OverlapsWith(const URLPattern & other) const313 bool URLPattern::OverlapsWith(const URLPattern& other) const {
314   if (!MatchesScheme(other.scheme_) && !other.MatchesScheme(scheme_))
315     return false;
316 
317   if (!MatchesHost(other.host()) && !other.MatchesHost(host_))
318     return false;
319 
320   // We currently only use OverlapsWith() for the patterns inside
321   // ExtensionExtent. In those cases, we know that the path will have only a
322   // single wildcard at the end. This makes figuring out overlap much easier. It
323   // seems like there is probably a computer-sciency way to solve the general
324   // case, but we don't need that yet.
325   DCHECK(path_.find('*') == path_.size() - 1);
326   DCHECK(other.path().find('*') == other.path().size() - 1);
327 
328   if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&
329       !other.MatchesPath(path_.substr(0, path_.size() - 1)))
330     return false;
331 
332   return true;
333 }
334 
ConvertToExplicitSchemes() const335 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
336   std::vector<URLPattern> result;
337 
338   if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
339     result.push_back(*this);
340     return result;
341   }
342 
343   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
344     if (MatchesScheme(kValidSchemes[i])) {
345       URLPattern temp = *this;
346       temp.SetScheme(kValidSchemes[i]);
347       temp.set_match_all_urls(false);
348       result.push_back(temp);
349     }
350   }
351 
352   return result;
353 }
354 
355 // static
GetParseResultString(URLPattern::ParseResult parse_result)356 const char* URLPattern::GetParseResultString(
357     URLPattern::ParseResult parse_result) {
358   return kParseResultMessages[parse_result];
359 }
360