• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Provides global database of differential decompression dictionaries for the
6 // SDCH filter (processes sdch enconded content).
7 
8 // Exactly one instance of SdchManager is built, and all references are made
9 // into that collection.
10 //
11 // The SdchManager maintains a collection of memory resident dictionaries.  It
12 // can find a dictionary (based on a server specification of a hash), store a
13 // dictionary, and make judgements about what URLs can use, set, etc. a
14 // dictionary.
15 
16 // These dictionaries are acquired over the net, and include a header
17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
18 // module) to decompress data.
19 
20 #ifndef NET_BASE_SDCH_MANAGER_H_
21 #define NET_BASE_SDCH_MANAGER_H_
22 
23 #include <map>
24 #include <set>
25 #include <string>
26 
27 #include "base/gtest_prod_util.h"
28 #include "base/memory/ref_counted.h"
29 #include "base/memory/scoped_ptr.h"
30 #include "base/threading/non_thread_safe.h"
31 #include "base/time/time.h"
32 #include "net/base/net_export.h"
33 #include "url/gurl.h"
34 
35 namespace net {
36 
37 //------------------------------------------------------------------------------
38 // Create a public interface to help us load SDCH dictionaries.
39 // The SdchManager class allows registration to support this interface.
40 // A browser may register a fetcher that is used by the dictionary managers to
41 // get data from a specified URL.  This allows us to use very high level browser
42 // functionality in this base (when the functionaity can be provided).
43 class SdchFetcher {
44  public:
SdchFetcher()45   SdchFetcher() {}
~SdchFetcher()46   virtual ~SdchFetcher() {}
47 
48   // The Schedule() method is called when there is a need to get a dictionary
49   // from a server.  The callee is responsible for getting that dictionary_text,
50   // and then calling back to AddSdchDictionary() to the SdchManager instance.
51   virtual void Schedule(const GURL& dictionary_url) = 0;
52  private:
53   DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
54 };
55 
56 //------------------------------------------------------------------------------
57 
NON_EXPORTED_BASE(base::NonThreadSafe)58 class NET_EXPORT SdchManager : public NON_EXPORTED_BASE(base::NonThreadSafe) {
59  public:
60   // A list of errors that appeared and were either resolved, or used to turn
61   // off sdch encoding.
62   enum ProblemCodes {
63     MIN_PROBLEM_CODE,
64 
65     // Content-encoding correction problems.
66     ADDED_CONTENT_ENCODING = 1,
67     FIXED_CONTENT_ENCODING = 2,
68     FIXED_CONTENT_ENCODINGS = 3,
69 
70     // Content decoding errors.
71     DECODE_HEADER_ERROR = 4,
72     DECODE_BODY_ERROR = 5,
73 
74     // More content-encoding correction problems.
75     OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
76 
77     // Content encoding correction when we're not even tagged as HTML!?!
78     BINARY_ADDED_CONTENT_ENCODING = 7,
79     BINARY_FIXED_CONTENT_ENCODING = 8,
80     BINARY_FIXED_CONTENT_ENCODINGS = 9,
81 
82     // Dictionary selection for use problems.
83     DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
84     DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
85     DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
86     DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
87     DICTIONARY_HASH_NOT_FOUND = 14,
88     DICTIONARY_HASH_MALFORMED = 15,
89 
90     // Dictionary saving problems.
91     DICTIONARY_HAS_NO_HEADER = 20,
92     DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
93     DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
94     DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
95     DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
96     DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
97     DICTIONARY_HAS_NO_TEXT = 26,
98     DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
99 
100     // Dictionary loading problems.
101     DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
102     DICTIONARY_SELECTED_FOR_SSL = 31,
103     DICTIONARY_ALREADY_LOADED = 32,
104     DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
105     DICTIONARY_IS_TOO_LARGE= 34,
106     DICTIONARY_COUNT_EXCEEDED = 35,
107     DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
108     DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
109 
110     // Failsafe hack.
111     ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
112 
113 
114     // Content-Encoding problems detected, with no action taken.
115     MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
116     SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
117 
118     // Dictionary manager issues.
119     DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
120 
121     // Problematic decode recovery methods.
122     META_REFRESH_RECOVERY = 70,            // Dictionary not found.
123     // defunct =  71, // Almost the same as META_REFRESH_UNSUPPORTED.
124     // defunct = 72,  // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
125     // defunct = 73,  // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
126     META_REFRESH_UNSUPPORTED = 74,         // Unrecoverable error.
127     CACHED_META_REFRESH_UNSUPPORTED = 75,  // As above, but pulled from cache.
128     PASSING_THROUGH_NON_SDCH = 76,  // Tagged sdch but missing dictionary-hash.
129     INCOMPLETE_SDCH_CONTENT = 77,   // Last window was not completely decoded.
130     PASS_THROUGH_404_CODE = 78,     // URL not found message passing through.
131 
132     // This next report is very common, and not really an error scenario, but
133     // it exercises the error recovery logic.
134     PASS_THROUGH_OLD_CACHED = 79,   // Back button got pre-SDCH cached content.
135 
136     // Common decoded recovery methods.
137     META_REFRESH_CACHED_RECOVERY = 80,  // Probably startup tab loading.
138     DISCARD_TENTATIVE_SDCH = 81,        // Server decided not to use sdch.
139 
140     // Non SDCH problems, only accounted for to make stat counting complete
141     // (i.e., be able to be sure all dictionary advertisements are accounted
142     // for).
143 
144     UNFLUSHED_CONTENT = 90,    // Possible error in filter chaining.
145     // defunct = 91,           // MISSING_TIME_STATS (Should never happen.)
146     CACHE_DECODED = 92,        // No timing stats recorded.
147     // defunct = 93,           // OVER_10_MINUTES (No timing stats recorded.)
148     UNINITIALIZED = 94,        // Filter never even got initialized.
149     PRIOR_TO_DICTIONARY = 95,  // We hadn't even parsed a dictionary selector.
150     DECODE_ERROR = 96,         // Something went wrong during decode.
151 
152     // Problem during the latency test.
153     LATENCY_TEST_DISALLOWED = 100,  // SDCH now failing, but it worked before!
154 
155     MAX_PROBLEM_CODE  // Used to bound histogram.
156   };
157 
158   // Use the following static limits to block DOS attacks until we implement
159   // a cached dictionary evicition strategy.
160   static const size_t kMaxDictionarySize;
161   static const size_t kMaxDictionaryCount;
162 
163   // There is one instance of |Dictionary| for each memory-cached SDCH
164   // dictionary.
165   class NET_EXPORT_PRIVATE Dictionary : public base::RefCounted<Dictionary> {
166    public:
167     // Sdch filters can get our text to use in decoding compressed data.
168     const std::string& text() const { return text_; }
169 
170    private:
171     friend class base::RefCounted<Dictionary>;
172     friend class SdchManager;  // Only manager can construct an instance.
173     FRIEND_TEST_ALL_PREFIXES(SdchFilterTest, PathMatch);
174 
175     // Construct a vc-diff usable dictionary from the dictionary_text starting
176     // at the given offset.  The supplied client_hash should be used to
177     // advertise the dictionary's availability relative to the suppplied URL.
178     Dictionary(const std::string& dictionary_text,
179                size_t offset,
180                const std::string& client_hash,
181                const GURL& url,
182                const std::string& domain,
183                const std::string& path,
184                const base::Time& expiration,
185                const std::set<int>& ports);
186     ~Dictionary();
187 
188     const GURL& url() const { return url_; }
189     const std::string& client_hash() const { return client_hash_; }
190 
191     // Security method to check if we can advertise this dictionary for use
192     // if the |target_url| returns SDCH compressed data.
193     bool CanAdvertise(const GURL& target_url);
194 
195     // Security methods to check if we can establish a new dictionary with the
196     // given data, that arrived in response to get of dictionary_url.
197     static bool CanSet(const std::string& domain, const std::string& path,
198                        const std::set<int>& ports, const GURL& dictionary_url);
199 
200     // Security method to check if we can use a dictionary to decompress a
201     // target that arrived with a reference to this dictionary.
202     bool CanUse(const GURL& referring_url);
203 
204     // Compare paths to see if they "match" for dictionary use.
205     static bool PathMatch(const std::string& path,
206                           const std::string& restriction);
207 
208     // Compare domains to see if the "match" for dictionary use.
209     static bool DomainMatch(const GURL& url, const std::string& restriction);
210 
211 
212     // The actual text of the dictionary.
213     std::string text_;
214 
215     // Part of the hash of text_ that the client uses to advertise the fact that
216     // it has a specific dictionary pre-cached.
217     std::string client_hash_;
218 
219     // The GURL that arrived with the text_ in a URL request to specify where
220     // this dictionary may be used.
221     const GURL url_;
222 
223     // Metadate "headers" in before dictionary text contained the following:
224     // Each dictionary payload consists of several headers, followed by the text
225     // of the dictionary.  The following are the known headers.
226     const std::string domain_;
227     const std::string path_;
228     const base::Time expiration_;  // Implied by max-age.
229     const std::set<int> ports_;
230 
231     DISALLOW_COPY_AND_ASSIGN(Dictionary);
232   };
233 
234   SdchManager();
235   ~SdchManager();
236 
237   // Discontinue fetching of dictionaries, as we're now shutting down.
238   static void Shutdown();
239 
240   // Provide access to the single instance of this class.
241   static SdchManager* Global();
242 
243   // Record stats on various errors.
244   static void SdchErrorRecovery(ProblemCodes problem);
245 
246   // Register a fetcher that this class can use to obtain dictionaries.
247   void set_sdch_fetcher(SdchFetcher* fetcher);
248 
249   // Enables or disables SDCH compression.
250   static void EnableSdchSupport(bool enabled);
251 
252   static bool sdch_enabled() { return g_sdch_enabled_; }
253 
254   // Briefly prevent further advertising of SDCH on this domain (if SDCH is
255   // enabled). After enough calls to IsInSupportedDomain() the blacklisting
256   // will be removed.  Additional blacklists take exponentially more calls
257   // to IsInSupportedDomain() before the blacklisting is undone.
258   // Used when filter errors are found from a given domain, but it is plausible
259   // that the cause is temporary (such as application startup, where cached
260   // entries are used, but a dictionary is not yet loaded).
261   static void BlacklistDomain(const GURL& url);
262 
263   // Used when SEVERE filter errors are found from a given domain, to prevent
264   // further use of SDCH on that domain.
265   static void BlacklistDomainForever(const GURL& url);
266 
267   // Unit test only, this function resets enabling of sdch, and clears the
268   // blacklist.
269   static void ClearBlacklistings();
270 
271   // Unit test only, this function resets the blacklisting count for a domain.
272   static void ClearDomainBlacklisting(const std::string& domain);
273 
274   // Unit test only: indicate how many more times a domain will be blacklisted.
275   static int BlackListDomainCount(const std::string& domain);
276 
277   // Unit test only: Indicate what current blacklist increment is for a domain.
278   static int BlacklistDomainExponential(const std::string& domain);
279 
280   // Check to see if SDCH is enabled (globally), and the given URL is in a
281   // supported domain (i.e., not blacklisted, and either the specific supported
282   // domain, or all domains were assumed supported).  If it is blacklist, reduce
283   // by 1 the number of times it will be reported as blacklisted.
284   bool IsInSupportedDomain(const GURL& url);
285 
286   // Schedule the URL fetching to load a dictionary. This will always return
287   // before the dictionary is actually loaded and added.
288   // After the implied task does completes, the dictionary will have been
289   // cached in memory.
290   void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
291 
292   // Security test function used before initiating a FetchDictionary.
293   // Return true if fetch is legal.
294   bool CanFetchDictionary(const GURL& referring_url,
295                           const GURL& dictionary_url) const;
296 
297   // Add an SDCH dictionary to our list of availible dictionaries. This addition
298   // will fail (return false) if addition is illegal (data in the dictionary is
299   // not acceptable from the dictionary_url; dictionary already added, etc.).
300   bool AddSdchDictionary(const std::string& dictionary_text,
301                          const GURL& dictionary_url);
302 
303   // Find the vcdiff dictionary (the body of the sdch dictionary that appears
304   // after the meta-data headers like Domain:...) with the given |server_hash|
305   // to use to decompreses data that arrived as SDCH encoded content.  Check to
306   // be sure the returned |dictionary| can be used for decoding content supplied
307   // in response to a request for |referring_url|.
308   // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
309   // when done.
310   // Return null in |dictionary| if there is no matching legal dictionary.
311   void GetVcdiffDictionary(const std::string& server_hash,
312                            const GURL& referring_url,
313                            Dictionary** dictionary);
314 
315   // Get list of available (pre-cached) dictionaries that we have already loaded
316   // into memory.  The list is a comma separated list of (client) hashes per
317   // the SDCH spec.
318   void GetAvailDictionaryList(const GURL& target_url, std::string* list);
319 
320   // Construct the pair of hashes for client and server to identify an SDCH
321   // dictionary.  This is only made public to facilitate unit testing, but is
322   // otherwise private
323   static void GenerateHash(const std::string& dictionary_text,
324                            std::string* client_hash, std::string* server_hash);
325 
326   // For Latency testing only, we need to know if we've succeeded in doing a
327   // round trip before starting our comparative tests.  If ever we encounter
328   // problems with SDCH, we opt-out of the test unless/until we perform a
329   // complete SDCH decoding.
330   bool AllowLatencyExperiment(const GURL& url) const;
331 
332   void SetAllowLatencyExperiment(const GURL& url, bool enable);
333 
334  private:
335   typedef std::map<std::string, int> DomainCounter;
336   typedef std::set<std::string> ExperimentSet;
337 
338   // A map of dictionaries info indexed by the hash that the server provides.
339   typedef std::map<std::string, Dictionary*> DictionaryMap;
340 
341   // The one global instance of that holds all the data.
342   static SdchManager* global_;
343 
344   // Support SDCH compression, by advertising in headers.
345   static bool g_sdch_enabled_;
346 
347   // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
348   static void UrlSafeBase64Encode(const std::string& input,
349                                   std::string* output);
350   DictionaryMap dictionaries_;
351 
352   // An instance that can fetch a dictionary given a URL.
353   scoped_ptr<SdchFetcher> fetcher_;
354 
355   // List domains where decode failures have required disabling sdch, along with
356   // count of how many additonal uses should be blacklisted.
357   DomainCounter blacklisted_domains_;
358 
359   // Support exponential backoff in number of domain accesses before
360   // blacklisting expires.
361   DomainCounter exponential_blacklist_count;
362 
363   // List of hostnames for which a latency experiment is allowed (because a
364   // round trip test has recently passed).
365   ExperimentSet allow_latency_experiment_;
366 
367   DISALLOW_COPY_AND_ASSIGN(SdchManager);
368 };
369 
370 }  // namespace net
371 
372 #endif  // NET_BASE_SDCH_MANAGER_H_
373