• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/sdch_manager.h"
6 
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
15 
16 namespace net {
17 
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
21 
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
24 
25 // static
26 SdchManager* SdchManager::global_;
27 
28 //------------------------------------------------------------------------------
Dictionary(const std::string & dictionary_text,size_t offset,const std::string & client_hash,const GURL & gurl,const std::string & domain,const std::string & path,const base::Time & expiration,const std::set<int> & ports)29 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
30                                     size_t offset,
31                                     const std::string& client_hash,
32                                     const GURL& gurl,
33                                     const std::string& domain,
34                                     const std::string& path,
35                                     const base::Time& expiration,
36                                     const std::set<int>& ports)
37     : text_(dictionary_text, offset),
38       client_hash_(client_hash),
39       url_(gurl),
40       domain_(domain),
41       path_(path),
42       expiration_(expiration),
43       ports_(ports) {
44 }
45 
~Dictionary()46 SdchManager::Dictionary::~Dictionary() {
47 }
48 
CanAdvertise(const GURL & target_url)49 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
50   if (!SdchManager::Global()->IsInSupportedDomain(target_url))
51     return false;
52   /* The specific rules of when a dictionary should be advertised in an
53      Avail-Dictionary header are modeled after the rules for cookie scoping. The
54      terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
55      dictionary may be advertised in the Avail-Dictionaries header exactly when
56      all of the following are true:
57       1. The server's effective host name domain-matches the Domain attribute of
58          the dictionary.
59       2. If the dictionary has a Port attribute, the request port is one of the
60          ports listed in the Port attribute.
61       3. The request URI path-matches the path header of the dictionary.
62       4. The request is not an HTTPS request.
63     */
64   if (!DomainMatch(target_url, domain_))
65     return false;
66   if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
67     return false;
68   if (path_.size() && !PathMatch(target_url.path(), path_))
69     return false;
70   if (target_url.SchemeIsSecure())
71     return false;
72   if (base::Time::Now() > expiration_)
73     return false;
74   return true;
75 }
76 
77 //------------------------------------------------------------------------------
78 // Security functions restricting loads and use of dictionaries.
79 
80 // static
CanSet(const std::string & domain,const std::string & path,const std::set<int> & ports,const GURL & dictionary_url)81 bool SdchManager::Dictionary::CanSet(const std::string& domain,
82                                      const std::string& path,
83                                      const std::set<int>& ports,
84                                      const GURL& dictionary_url) {
85   if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
86     return false;
87   /*
88   A dictionary is invalid and must not be stored if any of the following are
89   true:
90     1. The dictionary has no Domain attribute.
91     2. The effective host name that derives from the referer URL host name does
92       not domain-match the Domain attribute.
93     3. The Domain attribute is a top level domain.
94     4. The referer URL host is a host domain name (not IP address) and has the
95       form HD, where D is the value of the Domain attribute, and H is a string
96       that contains one or more dots.
97     5. If the dictionary has a Port attribute and the referer URL's port was not
98       in the list.
99   */
100 
101   // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
102   // and hence the conservative approach is to not allow any redirects (if there
103   // were any... then don't allow the dictionary to be set).
104 
105   if (domain.empty()) {
106     SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
107     return false;  // Domain is required.
108   }
109   if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
110       == 0) {
111     SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
112     return false;  // domain was a TLD.
113   }
114   if (!Dictionary::DomainMatch(dictionary_url, domain)) {
115     SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
116     return false;
117   }
118 
119   std::string referrer_url_host = dictionary_url.host();
120   size_t postfix_domain_index = referrer_url_host.rfind(domain);
121   // See if it is indeed a postfix, or just an internal string.
122   if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
123     // It is a postfix... so check to see if there's a dot in the prefix.
124     size_t end_of_host_index = referrer_url_host.find_first_of('.');
125     if (referrer_url_host.npos != end_of_host_index  &&
126         end_of_host_index < postfix_domain_index) {
127       SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
128       return false;
129     }
130   }
131 
132   if (!ports.empty()
133       && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
134     SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
135     return false;
136   }
137   return true;
138 }
139 
140 // static
CanUse(const GURL & referring_url)141 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
142   if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
143     return false;
144   /*
145     1. The request URL's host name domain-matches the Domain attribute of the
146       dictionary.
147     2. If the dictionary has a Port attribute, the request port is one of the
148       ports listed in the Port attribute.
149     3. The request URL path-matches the path attribute of the dictionary.
150     4. The request is not an HTTPS request.
151 */
152   if (!DomainMatch(referring_url, domain_)) {
153     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
154     return false;
155   }
156   if (!ports_.empty()
157       && 0 == ports_.count(referring_url.EffectiveIntPort())) {
158     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
159     return false;
160   }
161   if (path_.size() && !PathMatch(referring_url.path(), path_)) {
162     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
163     return false;
164   }
165   if (referring_url.SchemeIsSecure()) {
166     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
167     return false;
168   }
169 
170   // TODO(jar): Remove overly restrictive failsafe test (added per security
171   // review) when we have a need to be more general.
172   if (!referring_url.SchemeIs("http")) {
173     SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
174     return false;
175   }
176 
177   return true;
178 }
179 
PathMatch(const std::string & path,const std::string & restriction)180 bool SdchManager::Dictionary::PathMatch(const std::string& path,
181                                         const std::string& restriction) {
182   /*  Must be either:
183   1. P2 is equal to P1
184   2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
185       character following P2 in P1 is "/".
186       */
187   if (path == restriction)
188     return true;
189   size_t prefix_length = restriction.size();
190   if (prefix_length > path.size())
191     return false;  // Can't be a prefix.
192   if (0 != path.compare(0, prefix_length, restriction))
193     return false;
194   return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
195 }
196 
197 // static
DomainMatch(const GURL & gurl,const std::string & restriction)198 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
199                                           const std::string& restriction) {
200   // TODO(jar): This is not precisely a domain match definition.
201   return gurl.DomainIs(restriction.data(), restriction.size());
202 }
203 
204 //------------------------------------------------------------------------------
SdchManager()205 SdchManager::SdchManager() : sdch_enabled_(false) {
206   DCHECK(!global_);
207   global_ = this;
208 }
209 
~SdchManager()210 SdchManager::~SdchManager() {
211   DCHECK(global_ == this);
212   while (!dictionaries_.empty()) {
213     DictionaryMap::iterator it = dictionaries_.begin();
214     it->second->Release();
215     dictionaries_.erase(it->first);
216   }
217   global_ = NULL;
218 }
219 
220 // static
Shutdown()221 void SdchManager::Shutdown() {
222   if (!global_ )
223     return;
224   global_->fetcher_.reset(NULL);
225 }
226 
227 // static
Global()228 SdchManager* SdchManager::Global() {
229   return global_;
230 }
231 
232 // static
SdchErrorRecovery(ProblemCodes problem)233 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
234   UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
235 }
236 
EnableSdchSupport(const std::string & domain)237 void SdchManager::EnableSdchSupport(const std::string& domain) {
238   // We presume that there is a SDCH manager instance.
239   global_->supported_domain_ = domain;
240   global_->sdch_enabled_ = true;
241 }
242 
243 // static
BlacklistDomain(const GURL & url)244 void SdchManager::BlacklistDomain(const GURL& url) {
245   if (!global_ )
246     return;
247   global_->SetAllowLatencyExperiment(url, false);
248 
249   std::string domain(StringToLowerASCII(url.host()));
250   int count = global_->blacklisted_domains_[domain];
251   if (count > 0)
252     return;  // Domain is already blacklisted.
253 
254   count = 1 + 2 * global_->exponential_blacklist_count[domain];
255   if (count > 0)
256     global_->exponential_blacklist_count[domain] = count;
257   else
258     count = INT_MAX;
259 
260   global_->blacklisted_domains_[domain] = count;
261 }
262 
263 // static
BlacklistDomainForever(const GURL & url)264 void SdchManager::BlacklistDomainForever(const GURL& url) {
265   if (!global_ )
266     return;
267   global_->SetAllowLatencyExperiment(url, false);
268 
269   std::string domain(StringToLowerASCII(url.host()));
270   global_->exponential_blacklist_count[domain] = INT_MAX;
271   global_->blacklisted_domains_[domain] = INT_MAX;
272 }
273 
274 // static
ClearBlacklistings()275 void SdchManager::ClearBlacklistings() {
276   Global()->blacklisted_domains_.clear();
277   Global()->exponential_blacklist_count.clear();
278 }
279 
280 // static
ClearDomainBlacklisting(const std::string & domain)281 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
282   Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
283 }
284 
285 // static
BlackListDomainCount(const std::string & domain)286 int SdchManager::BlackListDomainCount(const std::string& domain) {
287   if (Global()->blacklisted_domains_.end() ==
288       Global()->blacklisted_domains_.find(domain))
289     return 0;
290   return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
291 }
292 
293 // static
BlacklistDomainExponential(const std::string & domain)294 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
295   if (Global()->exponential_blacklist_count.end() ==
296       Global()->exponential_blacklist_count.find(domain))
297     return 0;
298   return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
299 }
300 
IsInSupportedDomain(const GURL & url)301 bool SdchManager::IsInSupportedDomain(const GURL& url) {
302   if (!sdch_enabled_ )
303     return false;
304   if (!supported_domain_.empty() &&
305       !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
306      return false;  // It is not the singular supported domain.
307 
308   if (blacklisted_domains_.empty())
309     return true;
310 
311   std::string domain(StringToLowerASCII(url.host()));
312   DomainCounter::iterator it = blacklisted_domains_.find(domain);
313   if (blacklisted_domains_.end() == it)
314     return true;
315 
316   int count = it->second - 1;
317   if (count > 0)
318     blacklisted_domains_[domain] = count;
319   else
320     blacklisted_domains_.erase(domain);
321   SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
322   return false;
323 }
324 
FetchDictionary(const GURL & request_url,const GURL & dictionary_url)325 void SdchManager::FetchDictionary(const GURL& request_url,
326                                   const GURL& dictionary_url) {
327   if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
328       fetcher_.get())
329     fetcher_->Schedule(dictionary_url);
330 }
331 
CanFetchDictionary(const GURL & referring_url,const GURL & dictionary_url) const332 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
333                                      const GURL& dictionary_url) const {
334   /* The user agent may retrieve a dictionary from the dictionary URL if all of
335      the following are true:
336        1 The dictionary URL host name matches the referrer URL host name
337        2 The dictionary URL host name domain matches the parent domain of the
338            referrer URL host name
339        3 The parent domain of the referrer URL host name is not a top level
340            domain
341        4 The dictionary URL is not an HTTPS URL.
342    */
343   // Item (1) above implies item (2).  Spec should be updated.
344   // I take "host name match" to be "is identical to"
345   if (referring_url.host() != dictionary_url.host()) {
346     SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
347     return false;
348   }
349   if (referring_url.SchemeIs("https")) {
350     SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
351     return false;
352   }
353 
354   // TODO(jar): Remove this failsafe conservative hack which is more restrictive
355   // than current SDCH spec when needed, and justified by security audit.
356   if (!referring_url.SchemeIs("http")) {
357     SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
358     return false;
359   }
360 
361   return true;
362 }
363 
AddSdchDictionary(const std::string & dictionary_text,const GURL & dictionary_url)364 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
365     const GURL& dictionary_url) {
366   std::string client_hash;
367   std::string server_hash;
368   GenerateHash(dictionary_text, &client_hash, &server_hash);
369   if (dictionaries_.find(server_hash) != dictionaries_.end()) {
370     SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
371     return false;  // Already loaded.
372   }
373 
374   std::string domain, path;
375   std::set<int> ports;
376   base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
377 
378   if (dictionary_text.empty()) {
379     SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
380     return false;  // Missing header.
381   }
382 
383   size_t header_end = dictionary_text.find("\n\n");
384   if (std::string::npos == header_end) {
385     SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
386     return false;  // Missing header.
387   }
388   size_t line_start = 0;  // Start of line being parsed.
389   while (1) {
390     size_t line_end = dictionary_text.find('\n', line_start);
391     DCHECK(std::string::npos != line_end);
392     DCHECK(line_end <= header_end);
393 
394     size_t colon_index = dictionary_text.find(':', line_start);
395     if (std::string::npos == colon_index) {
396       SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
397       return false;  // Illegal line missing a colon.
398     }
399 
400     if (colon_index > line_end)
401       break;
402 
403     size_t value_start = dictionary_text.find_first_not_of(" \t",
404                                                            colon_index + 1);
405     if (std::string::npos != value_start) {
406       if (value_start >= line_end)
407         break;
408       std::string name(dictionary_text, line_start, colon_index - line_start);
409       std::string value(dictionary_text, value_start, line_end - value_start);
410       name = StringToLowerASCII(name);
411       if (name == "domain") {
412         domain = value;
413       } else if (name == "path") {
414         path = value;
415       } else if (name == "format-version") {
416         if (value != "1.0")
417           return false;
418       } else if (name == "max-age") {
419         int64 seconds;
420         base::StringToInt64(value, &seconds);
421         expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
422       } else if (name == "port") {
423         int port;
424         base::StringToInt(value, &port);
425         if (port >= 0)
426           ports.insert(port);
427       }
428     }
429 
430     if (line_end >= header_end)
431       break;
432     line_start = line_end + 1;
433   }
434 
435   if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
436     return false;
437 
438   // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
439   // useless dictionaries.  We should probably have a cache eviction plan,
440   // instead of just blocking additions.  For now, with the spec in flux, it
441   // is probably not worth doing eviction handling.
442   if (kMaxDictionarySize < dictionary_text.size()) {
443     SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
444     return false;
445   }
446   if (kMaxDictionaryCount <= dictionaries_.size()) {
447     SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
448     return false;
449   }
450 
451   UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
452   DVLOG(1) << "Loaded dictionary with client hash " << client_hash
453            << " and server hash " << server_hash;
454   Dictionary* dictionary =
455       new Dictionary(dictionary_text, header_end + 2, client_hash,
456                      dictionary_url, domain, path, expiration, ports);
457   dictionary->AddRef();
458   dictionaries_[server_hash] = dictionary;
459   return true;
460 }
461 
GetVcdiffDictionary(const std::string & server_hash,const GURL & referring_url,Dictionary ** dictionary)462 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
463     const GURL& referring_url, Dictionary** dictionary) {
464   *dictionary = NULL;
465   DictionaryMap::iterator it = dictionaries_.find(server_hash);
466   if (it == dictionaries_.end()) {
467     return;
468   }
469   Dictionary* matching_dictionary = it->second;
470   if (!matching_dictionary->CanUse(referring_url))
471     return;
472   *dictionary = matching_dictionary;
473 }
474 
475 // TODO(jar): If we have evictions from the dictionaries_, then we need to
476 // change this interface to return a list of reference counted Dictionary
477 // instances that can be used if/when a server specifies one.
GetAvailDictionaryList(const GURL & target_url,std::string * list)478 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
479                                          std::string* list) {
480   int count = 0;
481   for (DictionaryMap::iterator it = dictionaries_.begin();
482        it != dictionaries_.end(); ++it) {
483     if (!it->second->CanAdvertise(target_url))
484       continue;
485     ++count;
486     if (!list->empty())
487       list->append(",");
488     list->append(it->second->client_hash());
489   }
490   // Watch to see if we have corrupt or numerous dictionaries.
491   if (count > 0)
492     UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
493 }
494 
495 // static
GenerateHash(const std::string & dictionary_text,std::string * client_hash,std::string * server_hash)496 void SdchManager::GenerateHash(const std::string& dictionary_text,
497     std::string* client_hash, std::string* server_hash) {
498   char binary_hash[32];
499   crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
500 
501   std::string first_48_bits(&binary_hash[0], 6);
502   std::string second_48_bits(&binary_hash[6], 6);
503   UrlSafeBase64Encode(first_48_bits, client_hash);
504   UrlSafeBase64Encode(second_48_bits, server_hash);
505 
506   DCHECK_EQ(server_hash->length(), 8u);
507   DCHECK_EQ(client_hash->length(), 8u);
508 }
509 
510 //------------------------------------------------------------------------------
511 // Methods for supporting latency experiments.
512 
AllowLatencyExperiment(const GURL & url) const513 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
514   return allow_latency_experiment_.end() !=
515       allow_latency_experiment_.find(url.host());
516 }
517 
SetAllowLatencyExperiment(const GURL & url,bool enable)518 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
519   if (enable) {
520     allow_latency_experiment_.insert(url.host());
521     return;
522   }
523   ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
524   if (allow_latency_experiment_.end() == it)
525     return;  // It was already erased, or never allowed.
526   SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
527   allow_latency_experiment_.erase(it);
528 }
529 
530 // static
UrlSafeBase64Encode(const std::string & input,std::string * output)531 void SdchManager::UrlSafeBase64Encode(const std::string& input,
532                                       std::string* output) {
533   // Since this is only done during a dictionary load, and hashes are only 8
534   // characters, we just do the simple fixup, rather than rewriting the encoder.
535   base::Base64Encode(input, output);
536   for (size_t i = 0; i < output->size(); ++i) {
537     switch (output->data()[i]) {
538       case '+':
539         (*output)[i] = '-';
540         continue;
541       case '/':
542         (*output)[i] = '_';
543         continue;
544       default:
545         continue;
546     }
547   }
548 }
549 
550 }  // namespace net
551