• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/base64.h"
6 #include "base/field_trial.h"
7 #include "base/histogram.h"
8 #include "base/logging.h"
9 #include "base/sha2.h"
10 #include "base/string_util.h"
11 #include "net/base/registry_controlled_domain.h"
12 #include "net/base/sdch_manager.h"
13 #include "net/url_request/url_request_http_job.h"
14 
15 using base::Time;
16 using base::TimeDelta;
17 
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
21 
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
24 
25 // static
26 SdchManager* SdchManager::global_;
27 
28 // static
Global()29 SdchManager* SdchManager::Global() {
30   return global_;
31 }
32 
33 // static
SdchErrorRecovery(ProblemCodes problem)34 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
35   UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
36 }
37 
38 // static
ClearBlacklistings()39 void SdchManager::ClearBlacklistings() {
40   Global()->blacklisted_domains_.clear();
41   Global()->exponential_blacklist_count.clear();
42 }
43 
44 // static
ClearDomainBlacklisting(const std::string & domain)45 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
46   Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
47 }
48 
49 // static
BlackListDomainCount(const std::string & domain)50 int SdchManager::BlackListDomainCount(const std::string& domain) {
51   if (Global()->blacklisted_domains_.end() ==
52       Global()->blacklisted_domains_.find(domain))
53     return 0;
54   return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
55 }
56 
57 // static
BlacklistDomainExponential(const std::string & domain)58 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
59   if (Global()->exponential_blacklist_count.end() ==
60       Global()->exponential_blacklist_count.find(domain))
61     return 0;
62   return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
63 }
64 
65 //------------------------------------------------------------------------------
SdchManager()66 SdchManager::SdchManager() : sdch_enabled_(false) {
67   DCHECK(!global_);
68   global_ = this;
69 }
70 
~SdchManager()71 SdchManager::~SdchManager() {
72   DCHECK(global_ == this);
73   while (!dictionaries_.empty()) {
74     DictionaryMap::iterator it = dictionaries_.begin();
75     it->second->Release();
76     dictionaries_.erase(it->first);
77   }
78   global_ = NULL;
79 }
80 
81 // static
Shutdown()82 void SdchManager::Shutdown() {
83   if (!global_ )
84     return;
85   global_->fetcher_.reset(NULL);
86 }
87 
88 // static
BlacklistDomain(const GURL & url)89 void SdchManager::BlacklistDomain(const GURL& url) {
90   if (!global_ )
91     return;
92   global_->SetAllowLatencyExperiment(url, false);
93 
94   std::string domain(StringToLowerASCII(url.host()));
95   int count = global_->blacklisted_domains_[domain];
96   if (count > 0)
97     return;  // Domain is already blacklisted.
98 
99   count = 1 + 2 * global_->exponential_blacklist_count[domain];
100   if (count > 0)
101     global_->exponential_blacklist_count[domain] = count;
102   else
103     count = INT_MAX;
104 
105   global_->blacklisted_domains_[domain] = count;
106 }
107 
108 // static
BlacklistDomainForever(const GURL & url)109 void SdchManager::BlacklistDomainForever(const GURL& url) {
110   if (!global_ )
111     return;
112   global_->SetAllowLatencyExperiment(url, false);
113 
114   std::string domain(StringToLowerASCII(url.host()));
115   global_->exponential_blacklist_count[domain] = INT_MAX;
116   global_->blacklisted_domains_[domain] = INT_MAX;
117 }
118 
EnableSdchSupport(const std::string & domain)119 void SdchManager::EnableSdchSupport(const std::string& domain) {
120   // We presume that there is a SDCH manager instance.
121   global_->supported_domain_ = domain;
122   global_->sdch_enabled_ = true;
123 }
124 
IsInSupportedDomain(const GURL & url)125 const bool SdchManager::IsInSupportedDomain(const GURL& url) {
126   if (!sdch_enabled_ )
127     return false;
128   if (!supported_domain_.empty() &&
129       !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
130      return false;  // It is not the singular supported domain.
131 
132   if (blacklisted_domains_.empty())
133     return true;
134 
135   std::string domain(StringToLowerASCII(url.host()));
136   DomainCounter::iterator it = blacklisted_domains_.find(domain);
137   if (blacklisted_domains_.end() == it)
138     return true;
139 
140   int count = it->second - 1;
141   if (count > 0)
142     blacklisted_domains_[domain] = count;
143   else
144     blacklisted_domains_.erase(domain);
145   SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
146   return false;
147 }
148 
CanFetchDictionary(const GURL & referring_url,const GURL & dictionary_url) const149 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
150                                      const GURL& dictionary_url) const {
151   /* The user agent may retrieve a dictionary from the dictionary URL if all of
152      the following are true:
153        1 The dictionary URL host name matches the referrer URL host name
154        2 The dictionary URL host name domain matches the parent domain of the
155            referrer URL host name
156        3 The parent domain of the referrer URL host name is not a top level
157            domain
158        4 The dictionary URL is not an HTTPS URL.
159    */
160   // Item (1) above implies item (2).  Spec should be updated.
161   // I take "host name match" to be "is identical to"
162   if (referring_url.host() != dictionary_url.host()) {
163     SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
164     return false;
165   }
166   if (referring_url.SchemeIs("https")) {
167     SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
168     return false;
169   }
170 
171   // TODO(jar): Remove this failsafe conservative hack which is more restrictive
172   // than current SDCH spec when needed, and justified by security audit.
173   if (!referring_url.SchemeIs("http")) {
174     SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
175     return false;
176   }
177 
178   return true;
179 }
180 
FetchDictionary(const GURL & request_url,const GURL & dictionary_url)181 void SdchManager::FetchDictionary(const GURL& request_url,
182                                   const GURL& dictionary_url) {
183   if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
184       fetcher_.get())
185     fetcher_->Schedule(dictionary_url);
186 }
187 
AddSdchDictionary(const std::string & dictionary_text,const GURL & dictionary_url)188 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
189     const GURL& dictionary_url) {
190   std::string client_hash;
191   std::string server_hash;
192   GenerateHash(dictionary_text, &client_hash, &server_hash);
193   if (dictionaries_.find(server_hash) != dictionaries_.end()) {
194     SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
195     return false;  // Already loaded.
196   }
197 
198   std::string domain, path;
199   std::set<int> ports;
200   Time expiration(Time::Now() + TimeDelta::FromDays(30));
201 
202   if (dictionary_text.empty()) {
203     SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
204     return false;  // Missing header.
205   }
206 
207   size_t header_end = dictionary_text.find("\n\n");
208   if (std::string::npos == header_end) {
209     SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
210     return false;  // Missing header.
211   }
212   size_t line_start = 0;  // Start of line being parsed.
213   while (1) {
214     size_t line_end = dictionary_text.find('\n', line_start);
215     DCHECK(std::string::npos != line_end);
216     DCHECK(line_end <= header_end);
217 
218     size_t colon_index = dictionary_text.find(':', line_start);
219     if (std::string::npos == colon_index) {
220       SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
221       return false;  // Illegal line missing a colon.
222     }
223 
224     if (colon_index > line_end)
225       break;
226 
227     size_t value_start = dictionary_text.find_first_not_of(" \t",
228                                                            colon_index + 1);
229     if (std::string::npos != value_start) {
230       if (value_start >= line_end)
231         break;
232       std::string name(dictionary_text, line_start, colon_index - line_start);
233       std::string value(dictionary_text, value_start, line_end - value_start);
234       name = StringToLowerASCII(name);
235       if (name == "domain") {
236         domain = value;
237       } else if (name == "path") {
238         path = value;
239       } else if (name == "format-version") {
240         if (value != "1.0")
241           return false;
242       } else if (name == "max-age") {
243         expiration = Time::Now() + TimeDelta::FromSeconds(StringToInt64(value));
244       } else if (name == "port") {
245         int port = StringToInt(value);
246         if (port >= 0)
247           ports.insert(port);
248       }
249     }
250 
251     if (line_end >= header_end)
252       break;
253     line_start = line_end + 1;
254   }
255 
256   if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
257     return false;
258 
259   // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
260   // useless dictionaries.  We should probably have a cache eviction plan,
261   // instead of just blocking additions.  For now, with the spec in flux, it
262   // is probably not worth doing eviction handling.
263   if (kMaxDictionarySize < dictionary_text.size()) {
264     SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
265     return false;
266   }
267   if (kMaxDictionaryCount <= dictionaries_.size()) {
268     SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
269     return false;
270   }
271 
272   UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
273   DLOG(INFO) << "Loaded dictionary with client hash " << client_hash <<
274       " and server hash " << server_hash;
275   Dictionary* dictionary =
276       new Dictionary(dictionary_text, header_end + 2, client_hash,
277                      dictionary_url, domain, path, expiration, ports);
278   dictionary->AddRef();
279   dictionaries_[server_hash] = dictionary;
280   return true;
281 }
282 
GetVcdiffDictionary(const std::string & server_hash,const GURL & referring_url,Dictionary ** dictionary)283 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
284     const GURL& referring_url, Dictionary** dictionary) {
285   *dictionary = NULL;
286   DictionaryMap::iterator it = dictionaries_.find(server_hash);
287   if (it == dictionaries_.end()) {
288     return;
289   }
290   Dictionary* matching_dictionary = it->second;
291   if (!matching_dictionary->CanUse(referring_url))
292     return;
293   *dictionary = matching_dictionary;
294 }
295 
296 // TODO(jar): If we have evictions from the dictionaries_, then we need to
297 // change this interface to return a list of reference counted Dictionary
298 // instances that can be used if/when a server specifies one.
GetAvailDictionaryList(const GURL & target_url,std::string * list)299 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
300                                          std::string* list) {
301   int count = 0;
302   for (DictionaryMap::iterator it = dictionaries_.begin();
303        it != dictionaries_.end(); ++it) {
304     if (!it->second->CanAdvertise(target_url))
305       continue;
306     ++count;
307     if (!list->empty())
308       list->append(",");
309     list->append(it->second->client_hash());
310   }
311   // Watch to see if we have corrupt or numerous dictionaries.
312   if (count > 0)
313     UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
314 }
315 
Dictionary(const std::string & dictionary_text,size_t offset,const std::string & client_hash,const GURL & gurl,const std::string & domain,const std::string & path,const Time & expiration,const std::set<int> ports)316 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
317     size_t offset, const std::string& client_hash, const GURL& gurl,
318     const std::string& domain, const std::string& path, const Time& expiration,
319     const std::set<int> ports)
320       : text_(dictionary_text, offset),
321         client_hash_(client_hash),
322         url_(gurl),
323         domain_(domain),
324         path_(path),
325         expiration_(expiration),
326         ports_(ports) {
327 }
328 
329 // static
GenerateHash(const std::string & dictionary_text,std::string * client_hash,std::string * server_hash)330 void SdchManager::GenerateHash(const std::string& dictionary_text,
331     std::string* client_hash, std::string* server_hash) {
332   char binary_hash[32];
333   base::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
334 
335   std::string first_48_bits(&binary_hash[0], 6);
336   std::string second_48_bits(&binary_hash[6], 6);
337   UrlSafeBase64Encode(first_48_bits, client_hash);
338   UrlSafeBase64Encode(second_48_bits, server_hash);
339 
340   DCHECK_EQ(server_hash->length(), 8u);
341   DCHECK_EQ(client_hash->length(), 8u);
342 }
343 
344 // static
UrlSafeBase64Encode(const std::string & input,std::string * output)345 void SdchManager::UrlSafeBase64Encode(const std::string& input,
346                                       std::string* output) {
347   // Since this is only done during a dictionary load, and hashes are only 8
348   // characters, we just do the simple fixup, rather than rewriting the encoder.
349   base::Base64Encode(input, output);
350   for (size_t i = 0; i < output->size(); ++i) {
351     switch (output->data()[i]) {
352       case '+':
353         (*output)[i] = '-';
354         continue;
355       case '/':
356         (*output)[i] = '_';
357         continue;
358       default:
359         continue;
360     }
361   }
362 }
363 
364 //------------------------------------------------------------------------------
365 // Security functions restricting loads and use of dictionaries.
366 
367 // static
CanSet(const std::string & domain,const std::string & path,const std::set<int> ports,const GURL & dictionary_url)368 bool SdchManager::Dictionary::CanSet(const std::string& domain,
369                                      const std::string& path,
370                                      const std::set<int> ports,
371                                      const GURL& dictionary_url) {
372   if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
373     return false;
374   /*
375   A dictionary is invalid and must not be stored if any of the following are
376   true:
377     1. The dictionary has no Domain attribute.
378     2. The effective host name that derives from the referer URL host name does
379       not domain-match the Domain attribute.
380     3. The Domain attribute is a top level domain.
381     4. The referer URL host is a host domain name (not IP address) and has the
382       form HD, where D is the value of the Domain attribute, and H is a string
383       that contains one or more dots.
384     5. If the dictionary has a Port attribute and the referer URL's port was not
385       in the list.
386   */
387 
388   // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
389   // and hence the conservative approach is to not allow any redirects (if there
390   // were any... then don't allow the dictionary to be set).
391 
392   if (domain.empty()) {
393     SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
394     return false;  // Domain is required.
395   }
396   if (net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
397       == 0) {
398     SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
399     return false;  // domain was a TLD.
400   }
401   if (!Dictionary::DomainMatch(dictionary_url, domain)) {
402     SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
403     return false;
404   }
405 
406   std::string referrer_url_host = dictionary_url.host();
407   size_t postfix_domain_index = referrer_url_host.rfind(domain);
408   // See if it is indeed a postfix, or just an internal string.
409   if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
410     // It is a postfix... so check to see if there's a dot in the prefix.
411     size_t end_of_host_index = referrer_url_host.find_first_of('.');
412     if (referrer_url_host.npos != end_of_host_index  &&
413         end_of_host_index < postfix_domain_index) {
414       SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
415       return false;
416     }
417   }
418 
419   if (!ports.empty()
420       && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
421     SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
422     return false;
423   }
424   return true;
425 }
426 
427 // static
CanUse(const GURL & referring_url)428 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
429   if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
430     return false;
431   /*
432     1. The request URL's host name domain-matches the Domain attribute of the
433       dictionary.
434     2. If the dictionary has a Port attribute, the request port is one of the
435       ports listed in the Port attribute.
436     3. The request URL path-matches the path attribute of the dictionary.
437     4. The request is not an HTTPS request.
438 */
439   if (!DomainMatch(referring_url, domain_)) {
440     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
441     return false;
442   }
443   if (!ports_.empty()
444       && 0 == ports_.count(referring_url.EffectiveIntPort())) {
445     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
446     return false;
447   }
448   if (path_.size() && !PathMatch(referring_url.path(), path_)) {
449     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
450     return false;
451   }
452   if (referring_url.SchemeIsSecure()) {
453     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
454     return false;
455   }
456 
457   // TODO(jar): Remove overly restrictive failsafe test (added per security
458   // review) when we have a need to be more general.
459   if (!referring_url.SchemeIs("http")) {
460     SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
461     return false;
462   }
463 
464   return true;
465 }
466 
CanAdvertise(const GURL & target_url)467 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
468   if (!SdchManager::Global()->IsInSupportedDomain(target_url))
469     return false;
470   /* The specific rules of when a dictionary should be advertised in an
471      Avail-Dictionary header are modeled after the rules for cookie scoping. The
472      terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
473      dictionary may be advertised in the Avail-Dictionaries header exactly when
474      all of the following are true:
475       1. The server's effective host name domain-matches the Domain attribute of
476          the dictionary.
477       2. If the dictionary has a Port attribute, the request port is one of the
478          ports listed in the Port attribute.
479       3. The request URI path-matches the path header of the dictionary.
480       4. The request is not an HTTPS request.
481     */
482   if (!DomainMatch(target_url, domain_))
483     return false;
484   if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
485     return false;
486   if (path_.size() && !PathMatch(target_url.path(), path_))
487     return false;
488   if (target_url.SchemeIsSecure())
489     return false;
490   if (Time::Now() > expiration_)
491     return false;
492   return true;
493 }
494 
PathMatch(const std::string & path,const std::string & restriction)495 bool SdchManager::Dictionary::PathMatch(const std::string& path,
496                                         const std::string& restriction) {
497   /*  Must be either:
498   1. P2 is equal to P1
499   2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
500       character following P2 in P1 is "/".
501       */
502   if (path == restriction)
503     return true;
504   size_t prefix_length = restriction.size();
505   if (prefix_length > path.size())
506     return false;  // Can't be a prefix.
507   if (0 != path.compare(0, prefix_length, restriction))
508     return false;
509   return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
510 }
511 
512 // static
DomainMatch(const GURL & gurl,const std::string & restriction)513 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
514                                           const std::string& restriction) {
515   // TODO(jar): This is not precisely a domain match definition.
516   return gurl.DomainIs(restriction.data(), restriction.size());
517 }
518 
519 //------------------------------------------------------------------------------
520 // Methods for supporting latency experiments.
521 
AllowLatencyExperiment(const GURL & url) const522 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
523   return allow_latency_experiment_.end() !=
524       allow_latency_experiment_.find(url.host());
525 }
526 
SetAllowLatencyExperiment(const GURL & url,bool enable)527 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
528   if (enable) {
529     allow_latency_experiment_.insert(url.host());
530     return;
531   }
532   ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
533   if (allow_latency_experiment_.end() == it)
534     return;  // It was already erased, or never allowed.
535   SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
536   allow_latency_experiment_.erase(it);
537 }
538