• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_
5 #define EXTENSIONS_COMMON_URL_PATTERN_H_
6 
7 #include <functional>
8 #include <string>
9 #include <vector>
10 
11 class GURL;
12 
13 // A pattern that can be used to match URLs. A URLPattern is a very restricted
14 // subset of URL syntax:
15 //
16 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>'
17 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' |
18 //             'chrome-extension' | 'filesystem'
19 // <host> := '*' | '*.' <anychar except '/' and '*'>+
20 // <port> := [':' ('*' | <port number between 0 and 65535>)]
21 // <path> := '/' <any chars>
22 //
23 // * Host is not used when the scheme is 'file'.
24 // * The path can have embedded '*' characters which act as glob wildcards.
25 // * '<all_urls>' is a special pattern that matches any URL that contains a
26 //   valid scheme (as specified by valid_schemes_).
27 // * The '*' scheme pattern excludes file URLs.
28 //
29 // Examples of valid patterns:
30 // - http://*/*
31 // - http://*/foo*
32 // - https://*.google.com/foo*bar
33 // - file://monkey*
34 // - http://127.0.0.1/*
35 //
36 // Examples of invalid patterns:
37 // - http://* -- path not specified
38 // - http://*foo/bar -- * not allowed as substring of host component
39 // - http://foo.*.bar/baz -- * must be first component
40 // - http:/bar -- scheme separator not found
41 // - foo://* -- invalid scheme
42 // - chrome:// -- we don't support chrome internal URLs
43 class URLPattern {
44  public:
45   // A collection of scheme bitmasks for use with valid_schemes.
46   enum SchemeMasks {
47     SCHEME_NONE       = 0,
48     SCHEME_HTTP       = 1 << 0,
49     SCHEME_HTTPS      = 1 << 1,
50     SCHEME_FILE       = 1 << 2,
51     SCHEME_FTP        = 1 << 3,
52     SCHEME_CHROMEUI   = 1 << 4,
53     SCHEME_EXTENSION  = 1 << 5,
54     SCHEME_FILESYSTEM = 1 << 6,
55 
56     // IMPORTANT!
57     // SCHEME_ALL will match every scheme, including chrome://, chrome-
58     // extension://, about:, etc. Because this has lots of security
59     // implications, third-party extensions should usually not be able to get
60     // access to URL patterns initialized this way. If there is a reason
61     // for violating this general rule, document why this it safe.
62     SCHEME_ALL      = -1,
63   };
64 
65   // Error codes returned from Parse().
66   enum ParseResult {
67     PARSE_SUCCESS = 0,
68     PARSE_ERROR_MISSING_SCHEME_SEPARATOR,
69     PARSE_ERROR_INVALID_SCHEME,
70     PARSE_ERROR_WRONG_SCHEME_SEPARATOR,
71     PARSE_ERROR_EMPTY_HOST,
72     PARSE_ERROR_INVALID_HOST_WILDCARD,
73     PARSE_ERROR_EMPTY_PATH,
74     PARSE_ERROR_INVALID_PORT,
75     NUM_PARSE_RESULTS
76   };
77 
78   // The <all_urls> string pattern.
79   static const char kAllUrlsPattern[];
80 
81   explicit URLPattern(int valid_schemes);
82 
83   // Convenience to construct a URLPattern from a string. If the string is not
84   // known ahead of time, use Parse() instead, which returns success or failure.
85   URLPattern(int valid_schemes, const std::string& pattern);
86 
87   URLPattern();
88   ~URLPattern();
89 
90   bool operator<(const URLPattern& other) const;
91   bool operator>(const URLPattern& other) const;
92   bool operator==(const URLPattern& other) const;
93 
94   // Initializes this instance by parsing the provided string. Returns
95   // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
96   // failure, this instance will have some intermediate values and is in an
97   // invalid state.
98   ParseResult Parse(const std::string& pattern_str);
99 
100   // Gets the bitmask of valid schemes.
valid_schemes()101   int valid_schemes() const { return valid_schemes_; }
102   void SetValidSchemes(int valid_schemes);
103 
104   // Gets the host the pattern matches. This can be an empty string if the
105   // pattern matches all hosts (the input was <scheme>://*/<whatever>).
host()106   const std::string& host() const { return host_; }
107   void SetHost(const std::string& host);
108 
109   // Gets whether to match subdomains of host().
match_subdomains()110   bool match_subdomains() const { return match_subdomains_; }
111   void SetMatchSubdomains(bool val);
112 
113   // Gets the path the pattern matches with the leading slash. This can have
114   // embedded asterisks which are interpreted using glob rules.
path()115   const std::string& path() const { return path_; }
116   void SetPath(const std::string& path);
117 
118   // Returns true if this pattern matches all urls.
match_all_urls()119   bool match_all_urls() const { return match_all_urls_; }
120   void SetMatchAllURLs(bool val);
121 
122   // Sets the scheme for pattern matches. This can be a single '*' if the
123   // pattern matches all valid schemes (as defined by the valid_schemes_
124   // property). Returns false on failure (if the scheme is not valid).
125   bool SetScheme(const std::string& scheme);
126   // Note: You should use MatchesScheme() instead of this getter unless you
127   // absolutely need the exact scheme. This is exposed for testing.
scheme()128   const std::string& scheme() const { return scheme_; }
129 
130   // Returns true if the specified scheme can be used in this URL pattern, and
131   // false otherwise. Uses valid_schemes_ to determine validity.
132   bool IsValidScheme(const std::string& scheme) const;
133 
134   // Returns true if this instance matches the specified URL.
135   bool MatchesURL(const GURL& test) const;
136 
137   // Returns true if this instance matches the specified security origin.
138   bool MatchesSecurityOrigin(const GURL& test) const;
139 
140   // Returns true if |test| matches our scheme.
141   // Note that if test is "filesystem", this may fail whereas MatchesURL
142   // may succeed.  MatchesURL is smart enough to look at the inner_url instead
143   // of the outer "filesystem:" part.
144   bool MatchesScheme(const std::string& test) const;
145 
146   // Returns true if |test| matches our host.
147   bool MatchesHost(const std::string& test) const;
148   bool MatchesHost(const GURL& test) const;
149 
150   // Returns true if |test| matches our path.
151   bool MatchesPath(const std::string& test) const;
152 
153   // Sets the port. Returns false if the port is invalid.
154   bool SetPort(const std::string& port);
port()155   const std::string& port() const { return port_; }
156 
157   // Returns a string representing this instance.
158   const std::string& GetAsString() const;
159 
160   // Determines whether there is a URL that would match this instance and
161   // another instance. This method is symmetrical: Calling
162   // other.OverlapsWith(this) would result in the same answer.
163   bool OverlapsWith(const URLPattern& other) const;
164 
165   // Returns true if this pattern matches all possible URLs that |other| can
166   // match. For example, http://*.google.com encompasses http://www.google.com.
167   bool Contains(const URLPattern& other) const;
168 
169   // Converts this URLPattern into an equivalent set of URLPatterns that don't
170   // use a wildcard in the scheme component. If this URLPattern doesn't use a
171   // wildcard scheme, then the returned set will contain one element that is
172   // equivalent to this instance.
173   std::vector<URLPattern> ConvertToExplicitSchemes() const;
174 
EffectiveHostCompare(const URLPattern & a,const URLPattern & b)175   static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) {
176     if (a.match_all_urls_ && b.match_all_urls_)
177       return false;
178     return a.host_.compare(b.host_) < 0;
179   };
180 
181   // Used for origin comparisons in a std::set.
182   class EffectiveHostCompareFunctor {
183    public:
operator()184     bool operator()(const URLPattern& a, const URLPattern& b) const {
185       return EffectiveHostCompare(a, b);
186     };
187   };
188 
189   // Get an error string for a ParseResult.
190   static const char* GetParseResultString(URLPattern::ParseResult parse_result);
191 
192   // Checks whether the bit is set for the given scheme in the given scheme mask
193   static bool IsSchemeBitSet(const std::string& scheme, const int mask);
194 
195  private:
196   // Returns true if any of the |schemes| items matches our scheme.
197   bool MatchesAnyScheme(const std::vector<std::string>& schemes) const;
198 
199   // Returns true if all of the |schemes| items matches our scheme.
200   bool MatchesAllSchemes(const std::vector<std::string>& schemes) const;
201 
202   bool MatchesSecurityOriginHelper(const GURL& test) const;
203 
204   // Returns true if our port matches the |port| pattern (it may be "*").
205   bool MatchesPortPattern(const std::string& port) const;
206 
207   // If the URLPattern contains a wildcard scheme, returns a list of
208   // equivalent literal schemes, otherwise returns the current scheme.
209   std::vector<std::string> GetExplicitSchemes() const;
210 
211   // A bitmask containing the schemes which are considered valid for this
212   // pattern. Parse() uses this to decide whether a pattern contains a valid
213   // scheme.
214   int valid_schemes_;
215 
216   // True if this is a special-case "<all_urls>" pattern.
217   bool match_all_urls_;
218 
219   // The scheme for the pattern.
220   std::string scheme_;
221 
222   // The host without any leading "*" components.
223   std::string host_;
224 
225   // Whether we should match subdomains of the host. This is true if the first
226   // component of the pattern's host was "*".
227   bool match_subdomains_;
228 
229   // The port.
230   std::string port_;
231 
232   // The path to match. This is everything after the host of the URL, or
233   // everything after the scheme in the case of file:// URLs.
234   std::string path_;
235 
236   // The path with "?" and "\" characters escaped for use with the
237   // MatchPattern() function.
238   std::string path_escaped_;
239 
240   // A string representing this URLPattern.
241   mutable std::string spec_;
242 };
243 
244 typedef std::vector<URLPattern> URLPatternList;
245 
246 #endif  // EXTENSIONS_COMMON_URL_PATTERN_H_
247