1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/common/extensions/url_pattern.h"
6
7 #include "base/string_piece.h"
8 #include "base/string_split.h"
9 #include "base/string_util.h"
10 #include "chrome/common/url_constants.h"
11 #include "googleurl/src/gurl.h"
12 #include "googleurl/src/url_util.h"
13
14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
15
16 namespace {
17
18 // TODO(aa): Consider adding chrome-extension? What about more obscure ones
19 // like data: and javascript: ?
20 // Note: keep this array in sync with kValidSchemeMasks.
21 const char* kValidSchemes[] = {
22 chrome::kHttpScheme,
23 chrome::kHttpsScheme,
24 chrome::kFileScheme,
25 chrome::kFtpScheme,
26 chrome::kChromeUIScheme,
27 chrome::kFileSystemScheme,
28 };
29
30 const int kValidSchemeMasks[] = {
31 URLPattern::SCHEME_HTTP,
32 URLPattern::SCHEME_HTTPS,
33 URLPattern::SCHEME_FILE,
34 URLPattern::SCHEME_FTP,
35 URLPattern::SCHEME_CHROMEUI,
36 URLPattern::SCHEME_FILESYSTEM,
37 };
38
39 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
40 must_keep_these_arrays_in_sync);
41
42 const char* kParseSuccess = "Success.";
43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";
44 const char* kParseErrorInvalidScheme = "Invalid scheme.";
45 const char* kParseErrorWrongSchemeType = "Wrong scheme type.";
46 const char* kParseErrorEmptyHost = "Host can not be empty.";
47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";
48 const char* kParseErrorEmptyPath = "Empty path.";
49 const char* kParseErrorHasColon =
50 "Ports are not supported in URL patterns. ':' may not be used in a host.";
51
52 // Message explaining each URLPattern::ParseResult.
53 const char* kParseResultMessages[] = {
54 kParseSuccess,
55 kParseErrorMissingSchemeSeparator,
56 kParseErrorInvalidScheme,
57 kParseErrorWrongSchemeType,
58 kParseErrorEmptyHost,
59 kParseErrorInvalidHostWildcard,
60 kParseErrorEmptyPath,
61 kParseErrorHasColon
62 };
63
64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
65 must_add_message_for_each_parse_result);
66
67 const char kPathSeparator[] = "/";
68
IsStandardScheme(const std::string & scheme)69 bool IsStandardScheme(const std::string& scheme) {
70 // "*" gets the same treatment as a standard scheme.
71 if (scheme == "*")
72 return true;
73
74 return url_util::IsStandard(scheme.c_str(),
75 url_parse::Component(0, static_cast<int>(scheme.length())));
76 }
77
78 } // namespace
79
URLPattern()80 URLPattern::URLPattern()
81 : valid_schemes_(SCHEME_NONE),
82 match_all_urls_(false),
83 match_subdomains_(false) {}
84
URLPattern(int valid_schemes)85 URLPattern::URLPattern(int valid_schemes)
86 : valid_schemes_(valid_schemes), match_all_urls_(false),
87 match_subdomains_(false) {}
88
URLPattern(int valid_schemes,const std::string & pattern)89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
90 : valid_schemes_(valid_schemes), match_all_urls_(false),
91 match_subdomains_(false) {
92
93 // Strict error checking is used, because this constructor is only
94 // appropriate when we know |pattern| is valid.
95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT))
96 NOTREACHED() << "URLPattern is invalid: " << pattern;
97 }
98
~URLPattern()99 URLPattern::~URLPattern() {
100 }
101
Parse(const std::string & pattern,ParseOption strictness)102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern,
103 ParseOption strictness) {
104 CHECK(strictness == PARSE_LENIENT ||
105 strictness == PARSE_STRICT);
106
107 // Special case pattern to match every valid URL.
108 if (pattern == kAllUrlsPattern) {
109 match_all_urls_ = true;
110 match_subdomains_ = true;
111 scheme_ = "*";
112 host_.clear();
113 SetPath("/*");
114 return PARSE_SUCCESS;
115 }
116
117 // Parse out the scheme.
118 size_t scheme_end_pos = pattern.find(chrome::kStandardSchemeSeparator);
119 bool has_standard_scheme_separator = true;
120
121 // Some urls also use ':' alone as the scheme separator.
122 if (scheme_end_pos == std::string::npos) {
123 scheme_end_pos = pattern.find(':');
124 has_standard_scheme_separator = false;
125 }
126
127 if (scheme_end_pos == std::string::npos)
128 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
129
130 if (!SetScheme(pattern.substr(0, scheme_end_pos)))
131 return PARSE_ERROR_INVALID_SCHEME;
132
133 bool standard_scheme = IsStandardScheme(scheme_);
134 if (standard_scheme != has_standard_scheme_separator)
135 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
136
137 // Advance past the scheme separator.
138 scheme_end_pos +=
139 (standard_scheme ? strlen(chrome::kStandardSchemeSeparator) : 1);
140 if (scheme_end_pos >= pattern.size())
141 return PARSE_ERROR_EMPTY_HOST;
142
143 // Parse out the host and path.
144 size_t host_start_pos = scheme_end_pos;
145 size_t path_start_pos = 0;
146
147 // File URLs are special because they have no host.
148 if (scheme_ == chrome::kFileScheme || !standard_scheme) {
149 path_start_pos = host_start_pos;
150 } else {
151 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
152
153 // Host is required.
154 if (host_start_pos == host_end_pos)
155 return PARSE_ERROR_EMPTY_HOST;
156
157 if (host_end_pos == std::string::npos)
158 return PARSE_ERROR_EMPTY_PATH;
159
160 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
161
162 // The first component can optionally be '*' to match all subdomains.
163 std::vector<std::string> host_components;
164 base::SplitString(host_, '.', &host_components);
165 if (host_components[0] == "*") {
166 match_subdomains_ = true;
167 host_components.erase(host_components.begin(),
168 host_components.begin() + 1);
169 }
170 host_ = JoinString(host_components, '.');
171
172 // No other '*' can occur in the host, though. This isn't necessary, but is
173 // done as a convenience to developers who might otherwise be confused and
174 // think '*' works as a glob in the host.
175 if (host_.find('*') != std::string::npos)
176 return PARSE_ERROR_INVALID_HOST_WILDCARD;
177
178 path_start_pos = host_end_pos;
179 }
180
181 SetPath(pattern.substr(path_start_pos));
182
183 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos)
184 return PARSE_ERROR_HAS_COLON;
185
186 return PARSE_SUCCESS;
187 }
188
SetScheme(const std::string & scheme)189 bool URLPattern::SetScheme(const std::string& scheme) {
190 scheme_ = scheme;
191 if (scheme_ == "*") {
192 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
193 } else if (!IsValidScheme(scheme_)) {
194 return false;
195 }
196 return true;
197 }
198
IsValidScheme(const std::string & scheme) const199 bool URLPattern::IsValidScheme(const std::string& scheme) const {
200 if (valid_schemes_ == SCHEME_ALL)
201 return true;
202
203 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
204 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
205 return true;
206 }
207
208 return false;
209 }
210
SetPath(const std::string & path)211 void URLPattern::SetPath(const std::string& path) {
212 path_ = path;
213 path_escaped_ = path_;
214 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
215 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
216 }
217
MatchesUrl(const GURL & test) const218 bool URLPattern::MatchesUrl(const GURL &test) const {
219 if (!MatchesScheme(test.scheme()))
220 return false;
221
222 if (match_all_urls_)
223 return true;
224
225 if (!MatchesHost(test))
226 return false;
227
228 if (!MatchesPath(test.PathForRequest()))
229 return false;
230
231 return true;
232 }
233
MatchesScheme(const std::string & test) const234 bool URLPattern::MatchesScheme(const std::string& test) const {
235 if (!IsValidScheme(test))
236 return false;
237
238 return scheme_ == "*" || test == scheme_;
239 }
240
MatchesHost(const std::string & host) const241 bool URLPattern::MatchesHost(const std::string& host) const {
242 std::string test(chrome::kHttpScheme);
243 test += chrome::kStandardSchemeSeparator;
244 test += host;
245 test += "/";
246 return MatchesHost(GURL(test));
247 }
248
MatchesHost(const GURL & test) const249 bool URLPattern::MatchesHost(const GURL& test) const {
250 // If the hosts are exactly equal, we have a match.
251 if (test.host() == host_)
252 return true;
253
254 // If we're matching subdomains, and we have no host in the match pattern,
255 // that means that we're matching all hosts, which means we have a match no
256 // matter what the test host is.
257 if (match_subdomains_ && host_.empty())
258 return true;
259
260 // Otherwise, we can only match if our match pattern matches subdomains.
261 if (!match_subdomains_)
262 return false;
263
264 // We don't do subdomain matching against IP addresses, so we can give up now
265 // if the test host is an IP address.
266 if (test.HostIsIPAddress())
267 return false;
268
269 // Check if the test host is a subdomain of our host.
270 if (test.host().length() <= (host_.length() + 1))
271 return false;
272
273 if (test.host().compare(test.host().length() - host_.length(),
274 host_.length(), host_) != 0)
275 return false;
276
277 return test.host()[test.host().length() - host_.length() - 1] == '.';
278 }
279
MatchesPath(const std::string & test) const280 bool URLPattern::MatchesPath(const std::string& test) const {
281 if (!MatchPattern(test, path_escaped_))
282 return false;
283
284 return true;
285 }
286
GetAsString() const287 std::string URLPattern::GetAsString() const {
288 if (match_all_urls_)
289 return kAllUrlsPattern;
290
291 bool standard_scheme = IsStandardScheme(scheme_);
292
293 std::string spec = scheme_ +
294 (standard_scheme ? chrome::kStandardSchemeSeparator : ":");
295
296 if (scheme_ != chrome::kFileScheme && standard_scheme) {
297 if (match_subdomains_) {
298 spec += "*";
299 if (!host_.empty())
300 spec += ".";
301 }
302
303 if (!host_.empty())
304 spec += host_;
305 }
306
307 if (!path_.empty())
308 spec += path_;
309
310 return spec;
311 }
312
OverlapsWith(const URLPattern & other) const313 bool URLPattern::OverlapsWith(const URLPattern& other) const {
314 if (!MatchesScheme(other.scheme_) && !other.MatchesScheme(scheme_))
315 return false;
316
317 if (!MatchesHost(other.host()) && !other.MatchesHost(host_))
318 return false;
319
320 // We currently only use OverlapsWith() for the patterns inside
321 // ExtensionExtent. In those cases, we know that the path will have only a
322 // single wildcard at the end. This makes figuring out overlap much easier. It
323 // seems like there is probably a computer-sciency way to solve the general
324 // case, but we don't need that yet.
325 DCHECK(path_.find('*') == path_.size() - 1);
326 DCHECK(other.path().find('*') == other.path().size() - 1);
327
328 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&
329 !other.MatchesPath(path_.substr(0, path_.size() - 1)))
330 return false;
331
332 return true;
333 }
334
ConvertToExplicitSchemes() const335 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
336 std::vector<URLPattern> result;
337
338 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
339 result.push_back(*this);
340 return result;
341 }
342
343 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
344 if (MatchesScheme(kValidSchemes[i])) {
345 URLPattern temp = *this;
346 temp.SetScheme(kValidSchemes[i]);
347 temp.set_match_all_urls(false);
348 result.push_back(temp);
349 }
350 }
351
352 return result;
353 }
354
355 // static
GetParseResultString(URLPattern::ParseResult parse_result)356 const char* URLPattern::GetParseResultString(
357 URLPattern::ParseResult parse_result) {
358 return kParseResultMessages[parse_result];
359 }
360