1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/sdch_manager.h"
6
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_number_conversions.h"
11 #include "base/strings/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
15
16 namespace net {
17
18 //------------------------------------------------------------------------------
19 // static
20
21 // Adjust SDCH limits downwards for mobile.
22 #if defined(OS_ANDROID) || defined(OS_IOS)
23 // static
24 const size_t SdchManager::kMaxDictionaryCount = 1;
25 const size_t SdchManager::kMaxDictionarySize = 150 * 1000;
26 #else
27 // static
28 const size_t SdchManager::kMaxDictionaryCount = 20;
29 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
30 #endif
31
32 // static
33 bool SdchManager::g_sdch_enabled_ = true;
34
35 // static
36 bool SdchManager::g_secure_scheme_supported_ = false;
37
38 //------------------------------------------------------------------------------
Dictionary(const std::string & dictionary_text,size_t offset,const std::string & client_hash,const GURL & gurl,const std::string & domain,const std::string & path,const base::Time & expiration,const std::set<int> & ports)39 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
40 size_t offset,
41 const std::string& client_hash,
42 const GURL& gurl,
43 const std::string& domain,
44 const std::string& path,
45 const base::Time& expiration,
46 const std::set<int>& ports)
47 : text_(dictionary_text, offset),
48 client_hash_(client_hash),
49 url_(gurl),
50 domain_(domain),
51 path_(path),
52 expiration_(expiration),
53 ports_(ports) {
54 }
55
~Dictionary()56 SdchManager::Dictionary::~Dictionary() {
57 }
58
CanAdvertise(const GURL & target_url)59 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
60 /* The specific rules of when a dictionary should be advertised in an
61 Avail-Dictionary header are modeled after the rules for cookie scoping. The
62 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
63 dictionary may be advertised in the Avail-Dictionaries header exactly when
64 all of the following are true:
65 1. The server's effective host name domain-matches the Domain attribute of
66 the dictionary.
67 2. If the dictionary has a Port attribute, the request port is one of the
68 ports listed in the Port attribute.
69 3. The request URI path-matches the path header of the dictionary.
70 4. The request is not an HTTPS request.
71 We can override (ignore) item (4) only when we have explicitly enabled
72 HTTPS support AND dictionary has been acquired over HTTPS.
73 */
74 if (!DomainMatch(target_url, domain_))
75 return false;
76 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
77 return false;
78 if (path_.size() && !PathMatch(target_url.path(), path_))
79 return false;
80 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
81 return false;
82 if (target_url.SchemeIsSecure() && !url_.SchemeIsSecure())
83 return false;
84 if (base::Time::Now() > expiration_)
85 return false;
86 return true;
87 }
88
89 //------------------------------------------------------------------------------
90 // Security functions restricting loads and use of dictionaries.
91
92 // static
CanSet(const std::string & domain,const std::string & path,const std::set<int> & ports,const GURL & dictionary_url)93 bool SdchManager::Dictionary::CanSet(const std::string& domain,
94 const std::string& path,
95 const std::set<int>& ports,
96 const GURL& dictionary_url) {
97 /*
98 A dictionary is invalid and must not be stored if any of the following are
99 true:
100 1. The dictionary has no Domain attribute.
101 2. The effective host name that derives from the referer URL host name does
102 not domain-match the Domain attribute.
103 3. The Domain attribute is a top level domain.
104 4. The referer URL host is a host domain name (not IP address) and has the
105 form HD, where D is the value of the Domain attribute, and H is a string
106 that contains one or more dots.
107 5. If the dictionary has a Port attribute and the referer URL's port was not
108 in the list.
109 */
110
111 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
112 // and hence the conservative approach is to not allow any redirects (if there
113 // were any... then don't allow the dictionary to be set).
114
115 if (domain.empty()) {
116 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
117 return false; // Domain is required.
118 }
119 if (registry_controlled_domains::GetDomainAndRegistry(
120 domain,
121 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
122 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
123 return false; // domain was a TLD.
124 }
125 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
126 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
127 return false;
128 }
129
130 std::string referrer_url_host = dictionary_url.host();
131 size_t postfix_domain_index = referrer_url_host.rfind(domain);
132 // See if it is indeed a postfix, or just an internal string.
133 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
134 // It is a postfix... so check to see if there's a dot in the prefix.
135 size_t end_of_host_index = referrer_url_host.find_first_of('.');
136 if (referrer_url_host.npos != end_of_host_index &&
137 end_of_host_index < postfix_domain_index) {
138 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
139 return false;
140 }
141 }
142
143 if (!ports.empty()
144 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
145 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
146 return false;
147 }
148 return true;
149 }
150
151 // static
CanUse(const GURL & referring_url)152 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
153 /*
154 1. The request URL's host name domain-matches the Domain attribute of the
155 dictionary.
156 2. If the dictionary has a Port attribute, the request port is one of the
157 ports listed in the Port attribute.
158 3. The request URL path-matches the path attribute of the dictionary.
159 4. The request is not an HTTPS request.
160 We can override (ignore) item (4) only when we have explicitly enabled
161 HTTPS support AND dictionary has been acquired over HTTPS.
162 */
163 if (!DomainMatch(referring_url, domain_)) {
164 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
165 return false;
166 }
167 if (!ports_.empty()
168 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
169 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
170 return false;
171 }
172 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
173 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
174 return false;
175 }
176 if (!SdchManager::secure_scheme_supported() &&
177 referring_url.SchemeIsSecure()) {
178 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
179 return false;
180 }
181 if (referring_url.SchemeIsSecure() && !url_.SchemeIsSecure()) {
182 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
183 return false;
184 }
185
186 // TODO(jar): Remove overly restrictive failsafe test (added per security
187 // review) when we have a need to be more general.
188 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
189 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
190 return false;
191 }
192
193 return true;
194 }
195
PathMatch(const std::string & path,const std::string & restriction)196 bool SdchManager::Dictionary::PathMatch(const std::string& path,
197 const std::string& restriction) {
198 /* Must be either:
199 1. P2 is equal to P1
200 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
201 character following P2 in P1 is "/".
202 */
203 if (path == restriction)
204 return true;
205 size_t prefix_length = restriction.size();
206 if (prefix_length > path.size())
207 return false; // Can't be a prefix.
208 if (0 != path.compare(0, prefix_length, restriction))
209 return false;
210 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
211 }
212
213 // static
DomainMatch(const GURL & gurl,const std::string & restriction)214 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
215 const std::string& restriction) {
216 // TODO(jar): This is not precisely a domain match definition.
217 return gurl.DomainIs(restriction.data(), restriction.size());
218 }
219
220 //------------------------------------------------------------------------------
SdchManager()221 SdchManager::SdchManager() {
222 DCHECK(CalledOnValidThread());
223 }
224
~SdchManager()225 SdchManager::~SdchManager() {
226 DCHECK(CalledOnValidThread());
227 while (!dictionaries_.empty()) {
228 DictionaryMap::iterator it = dictionaries_.begin();
229 dictionaries_.erase(it->first);
230 }
231 }
232
ClearData()233 void SdchManager::ClearData() {
234 blacklisted_domains_.clear();
235 exponential_blacklist_count_.clear();
236 allow_latency_experiment_.clear();
237 if (fetcher_.get())
238 fetcher_->Cancel();
239
240 // Note that this may result in not having dictionaries we've advertised
241 // for incoming responses. The window is relatively small (as ClearData()
242 // is not expected to be called frequently), so we rely on meta-refresh
243 // to handle this case.
244 dictionaries_.clear();
245 }
246
247 // static
SdchErrorRecovery(ProblemCodes problem)248 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
249 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
250 }
251
set_sdch_fetcher(SdchFetcher * fetcher)252 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
253 DCHECK(CalledOnValidThread());
254 fetcher_.reset(fetcher);
255 }
256
257 // static
EnableSdchSupport(bool enabled)258 void SdchManager::EnableSdchSupport(bool enabled) {
259 g_sdch_enabled_ = enabled;
260 }
261
262 // static
EnableSecureSchemeSupport(bool enabled)263 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
264 g_secure_scheme_supported_ = enabled;
265 }
266
BlacklistDomain(const GURL & url)267 void SdchManager::BlacklistDomain(const GURL& url) {
268 SetAllowLatencyExperiment(url, false);
269
270 std::string domain(StringToLowerASCII(url.host()));
271 int count = blacklisted_domains_[domain];
272 if (count > 0)
273 return; // Domain is already blacklisted.
274
275 count = 1 + 2 * exponential_blacklist_count_[domain];
276 if (count > 0)
277 exponential_blacklist_count_[domain] = count;
278 else
279 count = INT_MAX;
280
281 blacklisted_domains_[domain] = count;
282 }
283
BlacklistDomainForever(const GURL & url)284 void SdchManager::BlacklistDomainForever(const GURL& url) {
285 SetAllowLatencyExperiment(url, false);
286
287 std::string domain(StringToLowerASCII(url.host()));
288 exponential_blacklist_count_[domain] = INT_MAX;
289 blacklisted_domains_[domain] = INT_MAX;
290 }
291
ClearBlacklistings()292 void SdchManager::ClearBlacklistings() {
293 blacklisted_domains_.clear();
294 exponential_blacklist_count_.clear();
295 }
296
ClearDomainBlacklisting(const std::string & domain)297 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
298 blacklisted_domains_.erase(StringToLowerASCII(domain));
299 }
300
BlackListDomainCount(const std::string & domain)301 int SdchManager::BlackListDomainCount(const std::string& domain) {
302 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain))
303 return 0;
304 return blacklisted_domains_[StringToLowerASCII(domain)];
305 }
306
BlacklistDomainExponential(const std::string & domain)307 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
308 if (exponential_blacklist_count_.end() ==
309 exponential_blacklist_count_.find(domain))
310 return 0;
311 return exponential_blacklist_count_[StringToLowerASCII(domain)];
312 }
313
IsInSupportedDomain(const GURL & url)314 bool SdchManager::IsInSupportedDomain(const GURL& url) {
315 DCHECK(CalledOnValidThread());
316 if (!g_sdch_enabled_ )
317 return false;
318
319 if (!secure_scheme_supported() && url.SchemeIsSecure())
320 return false;
321
322 if (blacklisted_domains_.empty())
323 return true;
324
325 std::string domain(StringToLowerASCII(url.host()));
326 DomainCounter::iterator it = blacklisted_domains_.find(domain);
327 if (blacklisted_domains_.end() == it)
328 return true;
329
330 int count = it->second - 1;
331 if (count > 0)
332 blacklisted_domains_[domain] = count;
333 else
334 blacklisted_domains_.erase(domain);
335 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
336 return false;
337 }
338
FetchDictionary(const GURL & request_url,const GURL & dictionary_url)339 void SdchManager::FetchDictionary(const GURL& request_url,
340 const GURL& dictionary_url) {
341 DCHECK(CalledOnValidThread());
342 if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get())
343 fetcher_->Schedule(dictionary_url);
344 }
345
CanFetchDictionary(const GURL & referring_url,const GURL & dictionary_url) const346 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
347 const GURL& dictionary_url) const {
348 DCHECK(CalledOnValidThread());
349 /* The user agent may retrieve a dictionary from the dictionary URL if all of
350 the following are true:
351 1 The dictionary URL host name matches the referrer URL host name and
352 scheme.
353 2 The dictionary URL host name domain matches the parent domain of the
354 referrer URL host name
355 3 The parent domain of the referrer URL host name is not a top level
356 domain
357 4 The dictionary URL is not an HTTPS URL.
358 */
359 // Item (1) above implies item (2). Spec should be updated.
360 // I take "host name match" to be "is identical to"
361 if (referring_url.host() != dictionary_url.host() ||
362 referring_url.scheme() != dictionary_url.scheme()) {
363 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
364 return false;
365 }
366 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
367 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
368 return false;
369 }
370
371 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
372 // than current SDCH spec when needed, and justified by security audit.
373 if (!referring_url.SchemeIsHTTPOrHTTPS()) {
374 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
375 return false;
376 }
377
378 return true;
379 }
380
AddSdchDictionary(const std::string & dictionary_text,const GURL & dictionary_url)381 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
382 const GURL& dictionary_url) {
383 DCHECK(CalledOnValidThread());
384 std::string client_hash;
385 std::string server_hash;
386 GenerateHash(dictionary_text, &client_hash, &server_hash);
387 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
388 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
389 return false; // Already loaded.
390 }
391
392 std::string domain, path;
393 std::set<int> ports;
394 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
395
396 if (dictionary_text.empty()) {
397 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
398 return false; // Missing header.
399 }
400
401 size_t header_end = dictionary_text.find("\n\n");
402 if (std::string::npos == header_end) {
403 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
404 return false; // Missing header.
405 }
406 size_t line_start = 0; // Start of line being parsed.
407 while (1) {
408 size_t line_end = dictionary_text.find('\n', line_start);
409 DCHECK(std::string::npos != line_end);
410 DCHECK_LE(line_end, header_end);
411
412 size_t colon_index = dictionary_text.find(':', line_start);
413 if (std::string::npos == colon_index) {
414 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
415 return false; // Illegal line missing a colon.
416 }
417
418 if (colon_index > line_end)
419 break;
420
421 size_t value_start = dictionary_text.find_first_not_of(" \t",
422 colon_index + 1);
423 if (std::string::npos != value_start) {
424 if (value_start >= line_end)
425 break;
426 std::string name(dictionary_text, line_start, colon_index - line_start);
427 std::string value(dictionary_text, value_start, line_end - value_start);
428 name = StringToLowerASCII(name);
429 if (name == "domain") {
430 domain = value;
431 } else if (name == "path") {
432 path = value;
433 } else if (name == "format-version") {
434 if (value != "1.0")
435 return false;
436 } else if (name == "max-age") {
437 int64 seconds;
438 base::StringToInt64(value, &seconds);
439 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
440 } else if (name == "port") {
441 int port;
442 base::StringToInt(value, &port);
443 if (port >= 0)
444 ports.insert(port);
445 }
446 }
447
448 if (line_end >= header_end)
449 break;
450 line_start = line_end + 1;
451 }
452
453 if (!IsInSupportedDomain(dictionary_url))
454 return false;
455
456 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
457 return false;
458
459 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
460 // useless dictionaries. We should probably have a cache eviction plan,
461 // instead of just blocking additions. For now, with the spec in flux, it
462 // is probably not worth doing eviction handling.
463 if (kMaxDictionarySize < dictionary_text.size()) {
464 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
465 return false;
466 }
467 if (kMaxDictionaryCount <= dictionaries_.size()) {
468 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
469 return false;
470 }
471
472 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
473 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
474 << " and server hash " << server_hash;
475 Dictionary* dictionary =
476 new Dictionary(dictionary_text, header_end + 2, client_hash,
477 dictionary_url, domain, path, expiration, ports);
478 dictionaries_[server_hash] = dictionary;
479 return true;
480 }
481
GetVcdiffDictionary(const std::string & server_hash,const GURL & referring_url,scoped_refptr<Dictionary> * dictionary)482 void SdchManager::GetVcdiffDictionary(
483 const std::string& server_hash,
484 const GURL& referring_url,
485 scoped_refptr<Dictionary>* dictionary) {
486 DCHECK(CalledOnValidThread());
487 *dictionary = NULL;
488 DictionaryMap::iterator it = dictionaries_.find(server_hash);
489 if (it == dictionaries_.end()) {
490 return;
491 }
492 scoped_refptr<Dictionary> matching_dictionary = it->second;
493 if (!IsInSupportedDomain(referring_url))
494 return;
495 if (!matching_dictionary->CanUse(referring_url))
496 return;
497 *dictionary = matching_dictionary;
498 }
499
500 // TODO(jar): If we have evictions from the dictionaries_, then we need to
501 // change this interface to return a list of reference counted Dictionary
502 // instances that can be used if/when a server specifies one.
GetAvailDictionaryList(const GURL & target_url,std::string * list)503 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
504 std::string* list) {
505 DCHECK(CalledOnValidThread());
506 int count = 0;
507 for (DictionaryMap::iterator it = dictionaries_.begin();
508 it != dictionaries_.end(); ++it) {
509 if (!IsInSupportedDomain(target_url))
510 continue;
511 if (!it->second->CanAdvertise(target_url))
512 continue;
513 ++count;
514 if (!list->empty())
515 list->append(",");
516 list->append(it->second->client_hash());
517 }
518 // Watch to see if we have corrupt or numerous dictionaries.
519 if (count > 0)
520 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
521 }
522
523 // static
GenerateHash(const std::string & dictionary_text,std::string * client_hash,std::string * server_hash)524 void SdchManager::GenerateHash(const std::string& dictionary_text,
525 std::string* client_hash, std::string* server_hash) {
526 char binary_hash[32];
527 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
528
529 std::string first_48_bits(&binary_hash[0], 6);
530 std::string second_48_bits(&binary_hash[6], 6);
531 UrlSafeBase64Encode(first_48_bits, client_hash);
532 UrlSafeBase64Encode(second_48_bits, server_hash);
533
534 DCHECK_EQ(server_hash->length(), 8u);
535 DCHECK_EQ(client_hash->length(), 8u);
536 }
537
538 //------------------------------------------------------------------------------
539 // Methods for supporting latency experiments.
540
AllowLatencyExperiment(const GURL & url) const541 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
542 DCHECK(CalledOnValidThread());
543 return allow_latency_experiment_.end() !=
544 allow_latency_experiment_.find(url.host());
545 }
546
SetAllowLatencyExperiment(const GURL & url,bool enable)547 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
548 DCHECK(CalledOnValidThread());
549 if (enable) {
550 allow_latency_experiment_.insert(url.host());
551 return;
552 }
553 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
554 if (allow_latency_experiment_.end() == it)
555 return; // It was already erased, or never allowed.
556 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
557 allow_latency_experiment_.erase(it);
558 }
559
560 // static
UrlSafeBase64Encode(const std::string & input,std::string * output)561 void SdchManager::UrlSafeBase64Encode(const std::string& input,
562 std::string* output) {
563 // Since this is only done during a dictionary load, and hashes are only 8
564 // characters, we just do the simple fixup, rather than rewriting the encoder.
565 base::Base64Encode(input, output);
566 for (size_t i = 0; i < output->size(); ++i) {
567 switch (output->data()[i]) {
568 case '+':
569 (*output)[i] = '-';
570 continue;
571 case '/':
572 (*output)[i] = '_';
573 continue;
574 default:
575 continue;
576 }
577 }
578 }
579
580 } // namespace net
581