1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Provides global database of differential decompression dictionaries for the 6 // SDCH filter (processes sdch enconded content). 7 8 // Exactly one instance of SdchManager is built, and all references are made 9 // into that collection. 10 // 11 // The SdchManager maintains a collection of memory resident dictionaries. It 12 // can find a dictionary (based on a server specification of a hash), store a 13 // dictionary, and make judgements about what URLs can use, set, etc. a 14 // dictionary. 15 16 // These dictionaries are acquired over the net, and include a header 17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF 18 // module) to decompress data. 19 20 #ifndef NET_BASE_SDCH_MANAGER_H_ 21 #define NET_BASE_SDCH_MANAGER_H_ 22 23 #include <map> 24 #include <set> 25 #include <string> 26 27 #include "base/ref_counted.h" 28 #include "base/scoped_ptr.h" 29 #include "base/time.h" 30 #include "googleurl/src/gurl.h" 31 #include "testing/gtest/include/gtest/gtest_prod.h" 32 33 //------------------------------------------------------------------------------ 34 // Create a public interface to help us load SDCH dictionaries. 35 // The SdchManager class allows registration to support this interface. 36 // A browser may register a fetcher that is used by the dictionary managers to 37 // get data from a specified URL. This allows us to use very high level browser 38 // functionality in this base (when the functionaity can be provided). 39 class SdchFetcher { 40 public: SdchFetcher()41 SdchFetcher() {} ~SdchFetcher()42 virtual ~SdchFetcher() {} 43 44 // The Schedule() method is called when there is a need to get a dictionary 45 // from a server. The callee is responsible for getting that dictionary_text, 46 // and then calling back to AddSdchDictionary() to the SdchManager instance. 47 virtual void Schedule(const GURL& dictionary_url) = 0; 48 private: 49 DISALLOW_COPY_AND_ASSIGN(SdchFetcher); 50 }; 51 //------------------------------------------------------------------------------ 52 53 class SdchManager { 54 public: 55 // A list of errors that appeared and were either resolved, or used to turn 56 // off sdch encoding. 57 enum ProblemCodes { 58 MIN_PROBLEM_CODE, 59 60 // Content-encoding correction problems. 61 ADDED_CONTENT_ENCODING = 1, 62 FIXED_CONTENT_ENCODING = 2, 63 FIXED_CONTENT_ENCODINGS = 3, 64 65 // Content decoding errors. 66 DECODE_HEADER_ERROR = 4, 67 DECODE_BODY_ERROR = 5, 68 69 // More content-encoding correction problems. 70 OPTIONAL_GUNZIP_ENCODING_ADDED = 6, 71 72 // Content encoding correction when we're not even tagged as HTML!?! 73 BINARY_ADDED_CONTENT_ENCODING = 7, 74 BINARY_FIXED_CONTENT_ENCODING = 8, 75 BINARY_FIXED_CONTENT_ENCODINGS = 9, 76 77 // Dictionary selection for use problems. 78 DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10, 79 DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11, 80 DICTIONARY_FOUND_HAS_WRONG_PATH = 12, 81 DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13, 82 DICTIONARY_HASH_NOT_FOUND = 14, 83 DICTIONARY_HASH_MALFORMED = 15, 84 85 // Dictionary saving problems. 86 DICTIONARY_HAS_NO_HEADER = 20, 87 DICTIONARY_HEADER_LINE_MISSING_COLON = 21, 88 DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22, 89 DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23, 90 DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24, 91 DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25, 92 DICTIONARY_HAS_NO_TEXT = 26, 93 DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27, 94 95 // Dictionary loading problems. 96 DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30, 97 DICTIONARY_SELECTED_FOR_SSL = 31, 98 DICTIONARY_ALREADY_LOADED = 32, 99 DICTIONARY_SELECTED_FROM_NON_HTTP = 33, 100 DICTIONARY_IS_TOO_LARGE= 34, 101 DICTIONARY_COUNT_EXCEEDED = 35, 102 DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36, 103 DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37, 104 105 // Failsafe hack. 106 ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40, 107 108 109 // Content-Encoding problems detected, with no action taken. 110 MULTIENCODING_FOR_NON_SDCH_REQUEST = 50, 111 SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51, 112 113 // Dictionary manager issues. 114 DOMAIN_BLACKLIST_INCLUDES_TARGET = 61, 115 116 // Problematic decode recovery methods. 117 META_REFRESH_RECOVERY = 70, // Dictionary not found. 118 // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED. 119 // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED. 120 // defunct = 73, // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH. 121 META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error. 122 CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache. 123 PASSING_THROUGH_NON_SDCH = 76, // Tagged sdch but missing dictionary-hash. 124 INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded. 125 PASS_THROUGH_404_CODE = 78, // URL not found message passing through. 126 127 // This next report is very common, and not really an error scenario, but 128 // it exercises the error recovery logic. 129 PASS_THROUGH_OLD_CACHED = 79, // Back button got pre-SDCH cached content. 130 131 // Common decoded recovery methods. 132 META_REFRESH_CACHED_RECOVERY = 80, // Probably startup tab loading. 133 DISCARD_TENTATIVE_SDCH = 81, // Server decided not to use sdch. 134 135 // Non SDCH problems, only accounted for to make stat counting complete 136 // (i.e., be able to be sure all dictionary advertisements are accounted 137 // for). 138 139 UNFLUSHED_CONTENT = 90, // Possible error in filter chaining. 140 // defunct = 91, // MISSING_TIME_STATS (Should never happen.) 141 CACHE_DECODED = 92, // No timing stats recorded. 142 // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.) 143 UNINITIALIZED = 94, // Filter never even got initialized. 144 PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector. 145 DECODE_ERROR = 96, // Something went wrong during decode. 146 147 // Problem during the latency test. 148 LATENCY_TEST_DISALLOWED = 100, // SDCH now failing, but it worked before! 149 150 MAX_PROBLEM_CODE // Used to bound histogram. 151 }; 152 153 // Use the following static limits to block DOS attacks until we implement 154 // a cached dictionary evicition strategy. 155 static const size_t kMaxDictionarySize; 156 static const size_t kMaxDictionaryCount; 157 158 // There is one instance of |Dictionary| for each memory-cached SDCH 159 // dictionary. 160 class Dictionary : public base::RefCounted<Dictionary> { 161 public: 162 // Sdch filters can get our text to use in decoding compressed data. text()163 const std::string& text() const { return text_; } 164 165 private: 166 friend class base::RefCounted<Dictionary>; 167 friend class SdchManager; // Only manager can construct an instance. 168 FRIEND_TEST(SdchFilterTest, PathMatch); 169 170 // Construct a vc-diff usable dictionary from the dictionary_text starting 171 // at the given offset. The supplied client_hash should be used to 172 // advertise the dictionary's availability relative to the suppplied URL. 173 Dictionary(const std::string& dictionary_text, size_t offset, 174 const std::string& client_hash, const GURL& url, 175 const std::string& domain, const std::string& path, 176 const base::Time& expiration, const std::set<int> ports); ~Dictionary()177 ~Dictionary() {} 178 url()179 const GURL& url() const { return url_; } client_hash()180 const std::string& client_hash() const { return client_hash_; } 181 182 // Security method to check if we can advertise this dictionary for use 183 // if the |target_url| returns SDCH compressed data. 184 bool CanAdvertise(const GURL& target_url); 185 186 // Security methods to check if we can establish a new dictionary with the 187 // given data, that arrived in response to get of dictionary_url. 188 static bool CanSet(const std::string& domain, const std::string& path, 189 const std::set<int> ports, const GURL& dictionary_url); 190 191 // Security method to check if we can use a dictionary to decompress a 192 // target that arrived with a reference to this dictionary. 193 bool CanUse(const GURL& referring_url); 194 195 // Compare paths to see if they "match" for dictionary use. 196 static bool PathMatch(const std::string& path, 197 const std::string& restriction); 198 199 // Compare domains to see if the "match" for dictionary use. 200 static bool DomainMatch(const GURL& url, const std::string& restriction); 201 202 203 // The actual text of the dictionary. 204 std::string text_; 205 206 // Part of the hash of text_ that the client uses to advertise the fact that 207 // it has a specific dictionary pre-cached. 208 std::string client_hash_; 209 210 // The GURL that arrived with the text_ in a URL request to specify where 211 // this dictionary may be used. 212 const GURL url_; 213 214 // Metadate "headers" in before dictionary text contained the following: 215 // Each dictionary payload consists of several headers, followed by the text 216 // of the dictionary. The following are the known headers. 217 const std::string domain_; 218 const std::string path_; 219 const base::Time expiration_; // Implied by max-age. 220 const std::set<int> ports_; 221 222 DISALLOW_COPY_AND_ASSIGN(Dictionary); 223 }; 224 225 SdchManager(); 226 ~SdchManager(); 227 228 // Discontinue fetching of dictionaries, as we're now shutting down. 229 static void Shutdown(); 230 231 // Provide access to the single instance of this class. 232 static SdchManager* Global(); 233 234 // Record stats on various errors. 235 static void SdchErrorRecovery(ProblemCodes problem); 236 237 // Register a fetcher that this class can use to obtain dictionaries. set_sdch_fetcher(SdchFetcher * fetcher)238 void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); } 239 240 // If called with an empty string, advertise and support sdch on all domains. 241 // If called with a specific string, advertise and support only the specified 242 // domain. Function assumes the existence of a global SdchManager instance. 243 void EnableSdchSupport(const std::string& domain); 244 sdch_enabled()245 static bool sdch_enabled() { return global_ && global_->sdch_enabled_; } 246 247 // Briefly prevent further advertising of SDCH on this domain (if SDCH is 248 // enabled). After enough calls to IsInSupportedDomain() the blacklisting 249 // will be removed. Additional blacklists take exponentially more calls 250 // to IsInSupportedDomain() before the blacklisting is undone. 251 // Used when filter errors are found from a given domain, but it is plausible 252 // that the cause is temporary (such as application startup, where cached 253 // entries are used, but a dictionary is not yet loaded). 254 static void BlacklistDomain(const GURL& url); 255 256 // Used when SEVERE filter errors are found from a given domain, to prevent 257 // further use of SDCH on that domain. 258 static void BlacklistDomainForever(const GURL& url); 259 260 // Unit test only, this function resets enabling of sdch, and clears the 261 // blacklist. 262 static void ClearBlacklistings(); 263 264 // Unit test only, this function resets the blacklisting count for a domain. 265 static void ClearDomainBlacklisting(const std::string& domain); 266 267 // Unit test only: indicate how many more times a domain will be blacklisted. 268 static int BlackListDomainCount(const std::string& domain); 269 270 // Unit test only: Indicate what current blacklist increment is for a domain. 271 static int BlacklistDomainExponential(const std::string& domain); 272 273 // Check to see if SDCH is enabled (globally), and the given URL is in a 274 // supported domain (i.e., not blacklisted, and either the specific supported 275 // domain, or all domains were assumed supported). If it is blacklist, reduce 276 // by 1 the number of times it will be reported as blacklisted. 277 const bool IsInSupportedDomain(const GURL& url); 278 279 // Schedule the URL fetching to load a dictionary. This will always return 280 // before the dictionary is actually loaded and added. 281 // After the implied task does completes, the dictionary will have been 282 // cached in memory. 283 void FetchDictionary(const GURL& request_url, const GURL& dictionary_url); 284 285 // Security test function used before initiating a FetchDictionary. 286 // Return true if fetch is legal. 287 bool CanFetchDictionary(const GURL& referring_url, 288 const GURL& dictionary_url) const; 289 290 // Add an SDCH dictionary to our list of availible dictionaries. This addition 291 // will fail (return false) if addition is illegal (data in the dictionary is 292 // not acceptable from the dictionary_url; dictionary already added, etc.). 293 bool AddSdchDictionary(const std::string& dictionary_text, 294 const GURL& dictionary_url); 295 296 // Find the vcdiff dictionary (the body of the sdch dictionary that appears 297 // after the meta-data headers like Domain:...) with the given |server_hash| 298 // to use to decompreses data that arrived as SDCH encoded content. Check to 299 // be sure the returned |dictionary| can be used for decoding content supplied 300 // in response to a request for |referring_url|. 301 // Caller is responsible for AddRef()ing the dictionary, and Release()ing it 302 // when done. 303 // Return null in |dictionary| if there is no matching legal dictionary. 304 void GetVcdiffDictionary(const std::string& server_hash, 305 const GURL& referring_url, 306 Dictionary** dictionary); 307 308 // Get list of available (pre-cached) dictionaries that we have already loaded 309 // into memory. The list is a comma separated list of (client) hashes per 310 // the SDCH spec. 311 void GetAvailDictionaryList(const GURL& target_url, std::string* list); 312 313 // Construct the pair of hashes for client and server to identify an SDCH 314 // dictionary. This is only made public to facilitate unit testing, but is 315 // otherwise private 316 static void GenerateHash(const std::string& dictionary_text, 317 std::string* client_hash, std::string* server_hash); 318 319 // For Latency testing only, we need to know if we've succeeded in doing a 320 // round trip before starting our comparative tests. If ever we encounter 321 // problems with SDCH, we opt-out of the test unless/until we perform a 322 // complete SDCH decoding. 323 bool AllowLatencyExperiment(const GURL& url) const; 324 325 void SetAllowLatencyExperiment(const GURL& url, bool enable); 326 327 private: 328 typedef std::map<std::string, int> DomainCounter; 329 typedef std::set<std::string> ExperimentSet; 330 331 // A map of dictionaries info indexed by the hash that the server provides. 332 typedef std::map<std::string, Dictionary*> DictionaryMap; 333 334 // The one global instance of that holds all the data. 335 static SdchManager* global_; 336 337 // A simple implementation of a RFC 3548 "URL safe" base64 encoder. 338 static void UrlSafeBase64Encode(const std::string& input, 339 std::string* output); 340 DictionaryMap dictionaries_; 341 342 // An instance that can fetch a dictionary given a URL. 343 scoped_ptr<SdchFetcher> fetcher_; 344 345 // Support SDCH compression, by advertising in headers. 346 bool sdch_enabled_; 347 348 // Empty string means all domains. Non-empty means support only the given 349 // domain is supported. 350 std::string supported_domain_; 351 352 // List domains where decode failures have required disabling sdch, along with 353 // count of how many additonal uses should be blacklisted. 354 DomainCounter blacklisted_domains_; 355 356 // Support exponential backoff in number of domain accesses before 357 // blacklisting expires. 358 DomainCounter exponential_blacklist_count; 359 360 // List of hostnames for which a latency experiment is allowed (because a 361 // round trip test has recently passed). 362 ExperimentSet allow_latency_experiment_; 363 364 DISALLOW_COPY_AND_ASSIGN(SdchManager); 365 }; 366 367 #endif // NET_BASE_SDCH_MANAGER_H_ 368