1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ 5 #define EXTENSIONS_COMMON_URL_PATTERN_H_ 6 7 #include <functional> 8 #include <string> 9 #include <vector> 10 11 class GURL; 12 13 // A pattern that can be used to match URLs. A URLPattern is a very restricted 14 // subset of URL syntax: 15 // 16 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>' 17 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | 18 // 'chrome-extension' | 'filesystem' 19 // <host> := '*' | '*.' <anychar except '/' and '*'>+ 20 // <port> := [':' ('*' | <port number between 0 and 65535>)] 21 // <path> := '/' <any chars> 22 // 23 // * Host is not used when the scheme is 'file'. 24 // * The path can have embedded '*' characters which act as glob wildcards. 25 // * '<all_urls>' is a special pattern that matches any URL that contains a 26 // valid scheme (as specified by valid_schemes_). 27 // * The '*' scheme pattern excludes file URLs. 28 // 29 // Examples of valid patterns: 30 // - http://*/* 31 // - http://*/foo* 32 // - https://*.google.com/foo*bar 33 // - file://monkey* 34 // - http://127.0.0.1/* 35 // 36 // Examples of invalid patterns: 37 // - http://* -- path not specified 38 // - http://*foo/bar -- * not allowed as substring of host component 39 // - http://foo.*.bar/baz -- * must be first component 40 // - http:/bar -- scheme separator not found 41 // - foo://* -- invalid scheme 42 // - chrome:// -- we don't support chrome internal URLs 43 class URLPattern { 44 public: 45 // A collection of scheme bitmasks for use with valid_schemes. 46 enum SchemeMasks { 47 SCHEME_NONE = 0, 48 SCHEME_HTTP = 1 << 0, 49 SCHEME_HTTPS = 1 << 1, 50 SCHEME_FILE = 1 << 2, 51 SCHEME_FTP = 1 << 3, 52 SCHEME_CHROMEUI = 1 << 4, 53 SCHEME_EXTENSION = 1 << 5, 54 SCHEME_FILESYSTEM = 1 << 6, 55 56 // IMPORTANT! 57 // SCHEME_ALL will match every scheme, including chrome://, chrome- 58 // extension://, about:, etc. Because this has lots of security 59 // implications, third-party extensions should usually not be able to get 60 // access to URL patterns initialized this way. If there is a reason 61 // for violating this general rule, document why this it safe. 62 SCHEME_ALL = -1, 63 }; 64 65 // Error codes returned from Parse(). 66 enum ParseResult { 67 PARSE_SUCCESS = 0, 68 PARSE_ERROR_MISSING_SCHEME_SEPARATOR, 69 PARSE_ERROR_INVALID_SCHEME, 70 PARSE_ERROR_WRONG_SCHEME_SEPARATOR, 71 PARSE_ERROR_EMPTY_HOST, 72 PARSE_ERROR_INVALID_HOST_WILDCARD, 73 PARSE_ERROR_EMPTY_PATH, 74 PARSE_ERROR_INVALID_PORT, 75 PARSE_ERROR_INVALID_HOST, 76 NUM_PARSE_RESULTS 77 }; 78 79 // The <all_urls> string pattern. 80 static const char kAllUrlsPattern[]; 81 82 // Returns true if the given |scheme| is considered valid for extensions. 83 static bool IsValidSchemeForExtensions(const std::string& scheme); 84 85 explicit URLPattern(int valid_schemes); 86 87 // Convenience to construct a URLPattern from a string. If the string is not 88 // known ahead of time, use Parse() instead, which returns success or failure. 89 URLPattern(int valid_schemes, const std::string& pattern); 90 91 URLPattern(); 92 ~URLPattern(); 93 94 bool operator<(const URLPattern& other) const; 95 bool operator>(const URLPattern& other) const; 96 bool operator==(const URLPattern& other) const; 97 98 // Initializes this instance by parsing the provided string. Returns 99 // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On 100 // failure, this instance will have some intermediate values and is in an 101 // invalid state. 102 ParseResult Parse(const std::string& pattern_str); 103 104 // Gets the bitmask of valid schemes. valid_schemes()105 int valid_schemes() const { return valid_schemes_; } 106 void SetValidSchemes(int valid_schemes); 107 108 // Gets the host the pattern matches. This can be an empty string if the 109 // pattern matches all hosts (the input was <scheme>://*/<whatever>). host()110 const std::string& host() const { return host_; } 111 void SetHost(const std::string& host); 112 113 // Gets whether to match subdomains of host(). match_subdomains()114 bool match_subdomains() const { return match_subdomains_; } 115 void SetMatchSubdomains(bool val); 116 117 // Gets the path the pattern matches with the leading slash. This can have 118 // embedded asterisks which are interpreted using glob rules. path()119 const std::string& path() const { return path_; } 120 void SetPath(const std::string& path); 121 122 // Returns true if this pattern matches all urls. match_all_urls()123 bool match_all_urls() const { return match_all_urls_; } 124 void SetMatchAllURLs(bool val); 125 126 // Sets the scheme for pattern matches. This can be a single '*' if the 127 // pattern matches all valid schemes (as defined by the valid_schemes_ 128 // property). Returns false on failure (if the scheme is not valid). 129 bool SetScheme(const std::string& scheme); 130 // Note: You should use MatchesScheme() instead of this getter unless you 131 // absolutely need the exact scheme. This is exposed for testing. scheme()132 const std::string& scheme() const { return scheme_; } 133 134 // Returns true if the specified scheme can be used in this URL pattern, and 135 // false otherwise. Uses valid_schemes_ to determine validity. 136 bool IsValidScheme(const std::string& scheme) const; 137 138 // Returns true if this instance matches the specified URL. 139 bool MatchesURL(const GURL& test) const; 140 141 // Returns true if this instance matches the specified security origin. 142 bool MatchesSecurityOrigin(const GURL& test) const; 143 144 // Returns true if |test| matches our scheme. 145 // Note that if test is "filesystem", this may fail whereas MatchesURL 146 // may succeed. MatchesURL is smart enough to look at the inner_url instead 147 // of the outer "filesystem:" part. 148 bool MatchesScheme(const std::string& test) const; 149 150 // Returns true if |test| matches our host. 151 bool MatchesHost(const std::string& test) const; 152 bool MatchesHost(const GURL& test) const; 153 154 // Returns true if |test| matches our path. 155 bool MatchesPath(const std::string& test) const; 156 157 // Sets the port. Returns false if the port is invalid. 158 bool SetPort(const std::string& port); port()159 const std::string& port() const { return port_; } 160 161 // Returns a string representing this instance. 162 const std::string& GetAsString() const; 163 164 // Determines whether there is a URL that would match this instance and 165 // another instance. This method is symmetrical: Calling 166 // other.OverlapsWith(this) would result in the same answer. 167 bool OverlapsWith(const URLPattern& other) const; 168 169 // Returns true if this pattern matches all possible URLs that |other| can 170 // match. For example, http://*.google.com encompasses http://www.google.com. 171 bool Contains(const URLPattern& other) const; 172 173 // Converts this URLPattern into an equivalent set of URLPatterns that don't 174 // use a wildcard in the scheme component. If this URLPattern doesn't use a 175 // wildcard scheme, then the returned set will contain one element that is 176 // equivalent to this instance. 177 std::vector<URLPattern> ConvertToExplicitSchemes() const; 178 EffectiveHostCompare(const URLPattern & a,const URLPattern & b)179 static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { 180 if (a.match_all_urls_ && b.match_all_urls_) 181 return false; 182 return a.host_.compare(b.host_) < 0; 183 } 184 185 // Used for origin comparisons in a std::set. 186 class EffectiveHostCompareFunctor { 187 public: operator()188 bool operator()(const URLPattern& a, const URLPattern& b) const { 189 return EffectiveHostCompare(a, b); 190 } 191 }; 192 193 // Get an error string for a ParseResult. 194 static const char* GetParseResultString(URLPattern::ParseResult parse_result); 195 196 private: 197 // Returns true if any of the |schemes| items matches our scheme. 198 bool MatchesAnyScheme(const std::vector<std::string>& schemes) const; 199 200 // Returns true if all of the |schemes| items matches our scheme. 201 bool MatchesAllSchemes(const std::vector<std::string>& schemes) const; 202 203 bool MatchesSecurityOriginHelper(const GURL& test) const; 204 205 // Returns true if our port matches the |port| pattern (it may be "*"). 206 bool MatchesPortPattern(const std::string& port) const; 207 208 // If the URLPattern contains a wildcard scheme, returns a list of 209 // equivalent literal schemes, otherwise returns the current scheme. 210 std::vector<std::string> GetExplicitSchemes() const; 211 212 // A bitmask containing the schemes which are considered valid for this 213 // pattern. Parse() uses this to decide whether a pattern contains a valid 214 // scheme. 215 int valid_schemes_; 216 217 // True if this is a special-case "<all_urls>" pattern. 218 bool match_all_urls_; 219 220 // The scheme for the pattern. 221 std::string scheme_; 222 223 // The host without any leading "*" components. 224 std::string host_; 225 226 // Whether we should match subdomains of the host. This is true if the first 227 // component of the pattern's host was "*". 228 bool match_subdomains_; 229 230 // The port. 231 std::string port_; 232 233 // The path to match. This is everything after the host of the URL, or 234 // everything after the scheme in the case of file:// URLs. 235 std::string path_; 236 237 // The path with "?" and "\" characters escaped for use with the 238 // MatchPattern() function. 239 std::string path_escaped_; 240 241 // A string representing this URLPattern. 242 mutable std::string spec_; 243 }; 244 245 typedef std::vector<URLPattern> URLPatternList; 246 247 #endif // EXTENSIONS_COMMON_URL_PATTERN_H_ 248