1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ 5 #define EXTENSIONS_COMMON_URL_PATTERN_H_ 6 7 #include <functional> 8 #include <string> 9 #include <vector> 10 11 class GURL; 12 13 // A pattern that can be used to match URLs. A URLPattern is a very restricted 14 // subset of URL syntax: 15 // 16 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>' 17 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | 18 // 'chrome-extension' | 'filesystem' 19 // <host> := '*' | '*.' <anychar except '/' and '*'>+ 20 // <port> := [':' ('*' | <port number between 0 and 65535>)] 21 // <path> := '/' <any chars> 22 // 23 // * Host is not used when the scheme is 'file'. 24 // * The path can have embedded '*' characters which act as glob wildcards. 25 // * '<all_urls>' is a special pattern that matches any URL that contains a 26 // valid scheme (as specified by valid_schemes_). 27 // * The '*' scheme pattern excludes file URLs. 28 // 29 // Examples of valid patterns: 30 // - http://*/* 31 // - http://*/foo* 32 // - https://*.google.com/foo*bar 33 // - file://monkey* 34 // - http://127.0.0.1/* 35 // 36 // Examples of invalid patterns: 37 // - http://* -- path not specified 38 // - http://*foo/bar -- * not allowed as substring of host component 39 // - http://foo.*.bar/baz -- * must be first component 40 // - http:/bar -- scheme separator not found 41 // - foo://* -- invalid scheme 42 // - chrome:// -- we don't support chrome internal URLs 43 class URLPattern { 44 public: 45 // A collection of scheme bitmasks for use with valid_schemes. 46 enum SchemeMasks { 47 SCHEME_NONE = 0, 48 SCHEME_HTTP = 1 << 0, 49 SCHEME_HTTPS = 1 << 1, 50 SCHEME_FILE = 1 << 2, 51 SCHEME_FTP = 1 << 3, 52 SCHEME_CHROMEUI = 1 << 4, 53 SCHEME_EXTENSION = 1 << 5, 54 SCHEME_FILESYSTEM = 1 << 6, 55 56 // IMPORTANT! 57 // SCHEME_ALL will match every scheme, including chrome://, chrome- 58 // extension://, about:, etc. Because this has lots of security 59 // implications, third-party extensions should usually not be able to get 60 // access to URL patterns initialized this way. If there is a reason 61 // for violating this general rule, document why this it safe. 62 SCHEME_ALL = -1, 63 }; 64 65 // Error codes returned from Parse(). 66 enum ParseResult { 67 PARSE_SUCCESS = 0, 68 PARSE_ERROR_MISSING_SCHEME_SEPARATOR, 69 PARSE_ERROR_INVALID_SCHEME, 70 PARSE_ERROR_WRONG_SCHEME_SEPARATOR, 71 PARSE_ERROR_EMPTY_HOST, 72 PARSE_ERROR_INVALID_HOST_WILDCARD, 73 PARSE_ERROR_EMPTY_PATH, 74 PARSE_ERROR_INVALID_PORT, 75 NUM_PARSE_RESULTS 76 }; 77 78 // The <all_urls> string pattern. 79 static const char kAllUrlsPattern[]; 80 81 explicit URLPattern(int valid_schemes); 82 83 // Convenience to construct a URLPattern from a string. If the string is not 84 // known ahead of time, use Parse() instead, which returns success or failure. 85 URLPattern(int valid_schemes, const std::string& pattern); 86 87 URLPattern(); 88 ~URLPattern(); 89 90 bool operator<(const URLPattern& other) const; 91 bool operator>(const URLPattern& other) const; 92 bool operator==(const URLPattern& other) const; 93 94 // Initializes this instance by parsing the provided string. Returns 95 // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On 96 // failure, this instance will have some intermediate values and is in an 97 // invalid state. 98 ParseResult Parse(const std::string& pattern_str); 99 100 // Gets the bitmask of valid schemes. valid_schemes()101 int valid_schemes() const { return valid_schemes_; } 102 void SetValidSchemes(int valid_schemes); 103 104 // Gets the host the pattern matches. This can be an empty string if the 105 // pattern matches all hosts (the input was <scheme>://*/<whatever>). host()106 const std::string& host() const { return host_; } 107 void SetHost(const std::string& host); 108 109 // Gets whether to match subdomains of host(). match_subdomains()110 bool match_subdomains() const { return match_subdomains_; } 111 void SetMatchSubdomains(bool val); 112 113 // Gets the path the pattern matches with the leading slash. This can have 114 // embedded asterisks which are interpreted using glob rules. path()115 const std::string& path() const { return path_; } 116 void SetPath(const std::string& path); 117 118 // Returns true if this pattern matches all urls. match_all_urls()119 bool match_all_urls() const { return match_all_urls_; } 120 void SetMatchAllURLs(bool val); 121 122 // Sets the scheme for pattern matches. This can be a single '*' if the 123 // pattern matches all valid schemes (as defined by the valid_schemes_ 124 // property). Returns false on failure (if the scheme is not valid). 125 bool SetScheme(const std::string& scheme); 126 // Note: You should use MatchesScheme() instead of this getter unless you 127 // absolutely need the exact scheme. This is exposed for testing. scheme()128 const std::string& scheme() const { return scheme_; } 129 130 // Returns true if the specified scheme can be used in this URL pattern, and 131 // false otherwise. Uses valid_schemes_ to determine validity. 132 bool IsValidScheme(const std::string& scheme) const; 133 134 // Returns true if this instance matches the specified URL. 135 bool MatchesURL(const GURL& test) const; 136 137 // Returns true if this instance matches the specified security origin. 138 bool MatchesSecurityOrigin(const GURL& test) const; 139 140 // Returns true if |test| matches our scheme. 141 // Note that if test is "filesystem", this may fail whereas MatchesURL 142 // may succeed. MatchesURL is smart enough to look at the inner_url instead 143 // of the outer "filesystem:" part. 144 bool MatchesScheme(const std::string& test) const; 145 146 // Returns true if |test| matches our host. 147 bool MatchesHost(const std::string& test) const; 148 bool MatchesHost(const GURL& test) const; 149 150 // Returns true if |test| matches our path. 151 bool MatchesPath(const std::string& test) const; 152 153 // Sets the port. Returns false if the port is invalid. 154 bool SetPort(const std::string& port); port()155 const std::string& port() const { return port_; } 156 157 // Returns a string representing this instance. 158 const std::string& GetAsString() const; 159 160 // Determines whether there is a URL that would match this instance and 161 // another instance. This method is symmetrical: Calling 162 // other.OverlapsWith(this) would result in the same answer. 163 bool OverlapsWith(const URLPattern& other) const; 164 165 // Returns true if this pattern matches all possible URLs that |other| can 166 // match. For example, http://*.google.com encompasses http://www.google.com. 167 bool Contains(const URLPattern& other) const; 168 169 // Converts this URLPattern into an equivalent set of URLPatterns that don't 170 // use a wildcard in the scheme component. If this URLPattern doesn't use a 171 // wildcard scheme, then the returned set will contain one element that is 172 // equivalent to this instance. 173 std::vector<URLPattern> ConvertToExplicitSchemes() const; 174 EffectiveHostCompare(const URLPattern & a,const URLPattern & b)175 static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { 176 if (a.match_all_urls_ && b.match_all_urls_) 177 return false; 178 return a.host_.compare(b.host_) < 0; 179 }; 180 181 // Used for origin comparisons in a std::set. 182 class EffectiveHostCompareFunctor { 183 public: operator()184 bool operator()(const URLPattern& a, const URLPattern& b) const { 185 return EffectiveHostCompare(a, b); 186 }; 187 }; 188 189 // Get an error string for a ParseResult. 190 static const char* GetParseResultString(URLPattern::ParseResult parse_result); 191 192 // Checks whether the bit is set for the given scheme in the given scheme mask 193 static bool IsSchemeBitSet(const std::string& scheme, const int mask); 194 195 private: 196 // Returns true if any of the |schemes| items matches our scheme. 197 bool MatchesAnyScheme(const std::vector<std::string>& schemes) const; 198 199 // Returns true if all of the |schemes| items matches our scheme. 200 bool MatchesAllSchemes(const std::vector<std::string>& schemes) const; 201 202 bool MatchesSecurityOriginHelper(const GURL& test) const; 203 204 // Returns true if our port matches the |port| pattern (it may be "*"). 205 bool MatchesPortPattern(const std::string& port) const; 206 207 // If the URLPattern contains a wildcard scheme, returns a list of 208 // equivalent literal schemes, otherwise returns the current scheme. 209 std::vector<std::string> GetExplicitSchemes() const; 210 211 // A bitmask containing the schemes which are considered valid for this 212 // pattern. Parse() uses this to decide whether a pattern contains a valid 213 // scheme. 214 int valid_schemes_; 215 216 // True if this is a special-case "<all_urls>" pattern. 217 bool match_all_urls_; 218 219 // The scheme for the pattern. 220 std::string scheme_; 221 222 // The host without any leading "*" components. 223 std::string host_; 224 225 // Whether we should match subdomains of the host. This is true if the first 226 // component of the pattern's host was "*". 227 bool match_subdomains_; 228 229 // The port. 230 std::string port_; 231 232 // The path to match. This is everything after the host of the URL, or 233 // everything after the scheme in the case of file:// URLs. 234 std::string path_; 235 236 // The path with "?" and "\" characters escaped for use with the 237 // MatchPattern() function. 238 std::string path_escaped_; 239 240 // A string representing this URLPattern. 241 mutable std::string spec_; 242 }; 243 244 typedef std::vector<URLPattern> URLPatternList; 245 246 #endif // EXTENSIONS_COMMON_URL_PATTERN_H_ 247