1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/sdch_manager.h"
6
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
15
16 namespace net {
17
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
21
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
24
25 // static
26 SdchManager* SdchManager::global_;
27
28 //------------------------------------------------------------------------------
Dictionary(const std::string & dictionary_text,size_t offset,const std::string & client_hash,const GURL & gurl,const std::string & domain,const std::string & path,const base::Time & expiration,const std::set<int> & ports)29 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
30 size_t offset,
31 const std::string& client_hash,
32 const GURL& gurl,
33 const std::string& domain,
34 const std::string& path,
35 const base::Time& expiration,
36 const std::set<int>& ports)
37 : text_(dictionary_text, offset),
38 client_hash_(client_hash),
39 url_(gurl),
40 domain_(domain),
41 path_(path),
42 expiration_(expiration),
43 ports_(ports) {
44 }
45
~Dictionary()46 SdchManager::Dictionary::~Dictionary() {
47 }
48
CanAdvertise(const GURL & target_url)49 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
50 if (!SdchManager::Global()->IsInSupportedDomain(target_url))
51 return false;
52 /* The specific rules of when a dictionary should be advertised in an
53 Avail-Dictionary header are modeled after the rules for cookie scoping. The
54 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
55 dictionary may be advertised in the Avail-Dictionaries header exactly when
56 all of the following are true:
57 1. The server's effective host name domain-matches the Domain attribute of
58 the dictionary.
59 2. If the dictionary has a Port attribute, the request port is one of the
60 ports listed in the Port attribute.
61 3. The request URI path-matches the path header of the dictionary.
62 4. The request is not an HTTPS request.
63 */
64 if (!DomainMatch(target_url, domain_))
65 return false;
66 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
67 return false;
68 if (path_.size() && !PathMatch(target_url.path(), path_))
69 return false;
70 if (target_url.SchemeIsSecure())
71 return false;
72 if (base::Time::Now() > expiration_)
73 return false;
74 return true;
75 }
76
77 //------------------------------------------------------------------------------
78 // Security functions restricting loads and use of dictionaries.
79
80 // static
CanSet(const std::string & domain,const std::string & path,const std::set<int> & ports,const GURL & dictionary_url)81 bool SdchManager::Dictionary::CanSet(const std::string& domain,
82 const std::string& path,
83 const std::set<int>& ports,
84 const GURL& dictionary_url) {
85 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
86 return false;
87 /*
88 A dictionary is invalid and must not be stored if any of the following are
89 true:
90 1. The dictionary has no Domain attribute.
91 2. The effective host name that derives from the referer URL host name does
92 not domain-match the Domain attribute.
93 3. The Domain attribute is a top level domain.
94 4. The referer URL host is a host domain name (not IP address) and has the
95 form HD, where D is the value of the Domain attribute, and H is a string
96 that contains one or more dots.
97 5. If the dictionary has a Port attribute and the referer URL's port was not
98 in the list.
99 */
100
101 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
102 // and hence the conservative approach is to not allow any redirects (if there
103 // were any... then don't allow the dictionary to be set).
104
105 if (domain.empty()) {
106 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
107 return false; // Domain is required.
108 }
109 if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
110 == 0) {
111 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
112 return false; // domain was a TLD.
113 }
114 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
115 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
116 return false;
117 }
118
119 std::string referrer_url_host = dictionary_url.host();
120 size_t postfix_domain_index = referrer_url_host.rfind(domain);
121 // See if it is indeed a postfix, or just an internal string.
122 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
123 // It is a postfix... so check to see if there's a dot in the prefix.
124 size_t end_of_host_index = referrer_url_host.find_first_of('.');
125 if (referrer_url_host.npos != end_of_host_index &&
126 end_of_host_index < postfix_domain_index) {
127 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
128 return false;
129 }
130 }
131
132 if (!ports.empty()
133 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
134 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
135 return false;
136 }
137 return true;
138 }
139
140 // static
CanUse(const GURL & referring_url)141 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
142 if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
143 return false;
144 /*
145 1. The request URL's host name domain-matches the Domain attribute of the
146 dictionary.
147 2. If the dictionary has a Port attribute, the request port is one of the
148 ports listed in the Port attribute.
149 3. The request URL path-matches the path attribute of the dictionary.
150 4. The request is not an HTTPS request.
151 */
152 if (!DomainMatch(referring_url, domain_)) {
153 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
154 return false;
155 }
156 if (!ports_.empty()
157 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
158 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
159 return false;
160 }
161 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
162 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
163 return false;
164 }
165 if (referring_url.SchemeIsSecure()) {
166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
167 return false;
168 }
169
170 // TODO(jar): Remove overly restrictive failsafe test (added per security
171 // review) when we have a need to be more general.
172 if (!referring_url.SchemeIs("http")) {
173 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
174 return false;
175 }
176
177 return true;
178 }
179
PathMatch(const std::string & path,const std::string & restriction)180 bool SdchManager::Dictionary::PathMatch(const std::string& path,
181 const std::string& restriction) {
182 /* Must be either:
183 1. P2 is equal to P1
184 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
185 character following P2 in P1 is "/".
186 */
187 if (path == restriction)
188 return true;
189 size_t prefix_length = restriction.size();
190 if (prefix_length > path.size())
191 return false; // Can't be a prefix.
192 if (0 != path.compare(0, prefix_length, restriction))
193 return false;
194 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
195 }
196
197 // static
DomainMatch(const GURL & gurl,const std::string & restriction)198 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
199 const std::string& restriction) {
200 // TODO(jar): This is not precisely a domain match definition.
201 return gurl.DomainIs(restriction.data(), restriction.size());
202 }
203
204 //------------------------------------------------------------------------------
SdchManager()205 SdchManager::SdchManager() : sdch_enabled_(false) {
206 DCHECK(!global_);
207 global_ = this;
208 }
209
~SdchManager()210 SdchManager::~SdchManager() {
211 DCHECK(global_ == this);
212 while (!dictionaries_.empty()) {
213 DictionaryMap::iterator it = dictionaries_.begin();
214 it->second->Release();
215 dictionaries_.erase(it->first);
216 }
217 global_ = NULL;
218 }
219
220 // static
Shutdown()221 void SdchManager::Shutdown() {
222 if (!global_ )
223 return;
224 global_->fetcher_.reset(NULL);
225 }
226
227 // static
Global()228 SdchManager* SdchManager::Global() {
229 return global_;
230 }
231
232 // static
SdchErrorRecovery(ProblemCodes problem)233 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
234 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
235 }
236
EnableSdchSupport(const std::string & domain)237 void SdchManager::EnableSdchSupport(const std::string& domain) {
238 // We presume that there is a SDCH manager instance.
239 global_->supported_domain_ = domain;
240 global_->sdch_enabled_ = true;
241 }
242
243 // static
BlacklistDomain(const GURL & url)244 void SdchManager::BlacklistDomain(const GURL& url) {
245 if (!global_ )
246 return;
247 global_->SetAllowLatencyExperiment(url, false);
248
249 std::string domain(StringToLowerASCII(url.host()));
250 int count = global_->blacklisted_domains_[domain];
251 if (count > 0)
252 return; // Domain is already blacklisted.
253
254 count = 1 + 2 * global_->exponential_blacklist_count[domain];
255 if (count > 0)
256 global_->exponential_blacklist_count[domain] = count;
257 else
258 count = INT_MAX;
259
260 global_->blacklisted_domains_[domain] = count;
261 }
262
263 // static
BlacklistDomainForever(const GURL & url)264 void SdchManager::BlacklistDomainForever(const GURL& url) {
265 if (!global_ )
266 return;
267 global_->SetAllowLatencyExperiment(url, false);
268
269 std::string domain(StringToLowerASCII(url.host()));
270 global_->exponential_blacklist_count[domain] = INT_MAX;
271 global_->blacklisted_domains_[domain] = INT_MAX;
272 }
273
274 // static
ClearBlacklistings()275 void SdchManager::ClearBlacklistings() {
276 Global()->blacklisted_domains_.clear();
277 Global()->exponential_blacklist_count.clear();
278 }
279
280 // static
ClearDomainBlacklisting(const std::string & domain)281 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
282 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
283 }
284
285 // static
BlackListDomainCount(const std::string & domain)286 int SdchManager::BlackListDomainCount(const std::string& domain) {
287 if (Global()->blacklisted_domains_.end() ==
288 Global()->blacklisted_domains_.find(domain))
289 return 0;
290 return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
291 }
292
293 // static
BlacklistDomainExponential(const std::string & domain)294 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
295 if (Global()->exponential_blacklist_count.end() ==
296 Global()->exponential_blacklist_count.find(domain))
297 return 0;
298 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
299 }
300
IsInSupportedDomain(const GURL & url)301 bool SdchManager::IsInSupportedDomain(const GURL& url) {
302 if (!sdch_enabled_ )
303 return false;
304 if (!supported_domain_.empty() &&
305 !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
306 return false; // It is not the singular supported domain.
307
308 if (blacklisted_domains_.empty())
309 return true;
310
311 std::string domain(StringToLowerASCII(url.host()));
312 DomainCounter::iterator it = blacklisted_domains_.find(domain);
313 if (blacklisted_domains_.end() == it)
314 return true;
315
316 int count = it->second - 1;
317 if (count > 0)
318 blacklisted_domains_[domain] = count;
319 else
320 blacklisted_domains_.erase(domain);
321 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
322 return false;
323 }
324
FetchDictionary(const GURL & request_url,const GURL & dictionary_url)325 void SdchManager::FetchDictionary(const GURL& request_url,
326 const GURL& dictionary_url) {
327 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
328 fetcher_.get())
329 fetcher_->Schedule(dictionary_url);
330 }
331
CanFetchDictionary(const GURL & referring_url,const GURL & dictionary_url) const332 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
333 const GURL& dictionary_url) const {
334 /* The user agent may retrieve a dictionary from the dictionary URL if all of
335 the following are true:
336 1 The dictionary URL host name matches the referrer URL host name
337 2 The dictionary URL host name domain matches the parent domain of the
338 referrer URL host name
339 3 The parent domain of the referrer URL host name is not a top level
340 domain
341 4 The dictionary URL is not an HTTPS URL.
342 */
343 // Item (1) above implies item (2). Spec should be updated.
344 // I take "host name match" to be "is identical to"
345 if (referring_url.host() != dictionary_url.host()) {
346 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
347 return false;
348 }
349 if (referring_url.SchemeIs("https")) {
350 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
351 return false;
352 }
353
354 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
355 // than current SDCH spec when needed, and justified by security audit.
356 if (!referring_url.SchemeIs("http")) {
357 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
358 return false;
359 }
360
361 return true;
362 }
363
AddSdchDictionary(const std::string & dictionary_text,const GURL & dictionary_url)364 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
365 const GURL& dictionary_url) {
366 std::string client_hash;
367 std::string server_hash;
368 GenerateHash(dictionary_text, &client_hash, &server_hash);
369 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
370 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
371 return false; // Already loaded.
372 }
373
374 std::string domain, path;
375 std::set<int> ports;
376 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
377
378 if (dictionary_text.empty()) {
379 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
380 return false; // Missing header.
381 }
382
383 size_t header_end = dictionary_text.find("\n\n");
384 if (std::string::npos == header_end) {
385 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
386 return false; // Missing header.
387 }
388 size_t line_start = 0; // Start of line being parsed.
389 while (1) {
390 size_t line_end = dictionary_text.find('\n', line_start);
391 DCHECK(std::string::npos != line_end);
392 DCHECK(line_end <= header_end);
393
394 size_t colon_index = dictionary_text.find(':', line_start);
395 if (std::string::npos == colon_index) {
396 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
397 return false; // Illegal line missing a colon.
398 }
399
400 if (colon_index > line_end)
401 break;
402
403 size_t value_start = dictionary_text.find_first_not_of(" \t",
404 colon_index + 1);
405 if (std::string::npos != value_start) {
406 if (value_start >= line_end)
407 break;
408 std::string name(dictionary_text, line_start, colon_index - line_start);
409 std::string value(dictionary_text, value_start, line_end - value_start);
410 name = StringToLowerASCII(name);
411 if (name == "domain") {
412 domain = value;
413 } else if (name == "path") {
414 path = value;
415 } else if (name == "format-version") {
416 if (value != "1.0")
417 return false;
418 } else if (name == "max-age") {
419 int64 seconds;
420 base::StringToInt64(value, &seconds);
421 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
422 } else if (name == "port") {
423 int port;
424 base::StringToInt(value, &port);
425 if (port >= 0)
426 ports.insert(port);
427 }
428 }
429
430 if (line_end >= header_end)
431 break;
432 line_start = line_end + 1;
433 }
434
435 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
436 return false;
437
438 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
439 // useless dictionaries. We should probably have a cache eviction plan,
440 // instead of just blocking additions. For now, with the spec in flux, it
441 // is probably not worth doing eviction handling.
442 if (kMaxDictionarySize < dictionary_text.size()) {
443 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
444 return false;
445 }
446 if (kMaxDictionaryCount <= dictionaries_.size()) {
447 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
448 return false;
449 }
450
451 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
452 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
453 << " and server hash " << server_hash;
454 Dictionary* dictionary =
455 new Dictionary(dictionary_text, header_end + 2, client_hash,
456 dictionary_url, domain, path, expiration, ports);
457 dictionary->AddRef();
458 dictionaries_[server_hash] = dictionary;
459 return true;
460 }
461
GetVcdiffDictionary(const std::string & server_hash,const GURL & referring_url,Dictionary ** dictionary)462 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
463 const GURL& referring_url, Dictionary** dictionary) {
464 *dictionary = NULL;
465 DictionaryMap::iterator it = dictionaries_.find(server_hash);
466 if (it == dictionaries_.end()) {
467 return;
468 }
469 Dictionary* matching_dictionary = it->second;
470 if (!matching_dictionary->CanUse(referring_url))
471 return;
472 *dictionary = matching_dictionary;
473 }
474
475 // TODO(jar): If we have evictions from the dictionaries_, then we need to
476 // change this interface to return a list of reference counted Dictionary
477 // instances that can be used if/when a server specifies one.
GetAvailDictionaryList(const GURL & target_url,std::string * list)478 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
479 std::string* list) {
480 int count = 0;
481 for (DictionaryMap::iterator it = dictionaries_.begin();
482 it != dictionaries_.end(); ++it) {
483 if (!it->second->CanAdvertise(target_url))
484 continue;
485 ++count;
486 if (!list->empty())
487 list->append(",");
488 list->append(it->second->client_hash());
489 }
490 // Watch to see if we have corrupt or numerous dictionaries.
491 if (count > 0)
492 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
493 }
494
495 // static
GenerateHash(const std::string & dictionary_text,std::string * client_hash,std::string * server_hash)496 void SdchManager::GenerateHash(const std::string& dictionary_text,
497 std::string* client_hash, std::string* server_hash) {
498 char binary_hash[32];
499 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
500
501 std::string first_48_bits(&binary_hash[0], 6);
502 std::string second_48_bits(&binary_hash[6], 6);
503 UrlSafeBase64Encode(first_48_bits, client_hash);
504 UrlSafeBase64Encode(second_48_bits, server_hash);
505
506 DCHECK_EQ(server_hash->length(), 8u);
507 DCHECK_EQ(client_hash->length(), 8u);
508 }
509
510 //------------------------------------------------------------------------------
511 // Methods for supporting latency experiments.
512
AllowLatencyExperiment(const GURL & url) const513 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
514 return allow_latency_experiment_.end() !=
515 allow_latency_experiment_.find(url.host());
516 }
517
SetAllowLatencyExperiment(const GURL & url,bool enable)518 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
519 if (enable) {
520 allow_latency_experiment_.insert(url.host());
521 return;
522 }
523 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
524 if (allow_latency_experiment_.end() == it)
525 return; // It was already erased, or never allowed.
526 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
527 allow_latency_experiment_.erase(it);
528 }
529
530 // static
UrlSafeBase64Encode(const std::string & input,std::string * output)531 void SdchManager::UrlSafeBase64Encode(const std::string& input,
532 std::string* output) {
533 // Since this is only done during a dictionary load, and hashes are only 8
534 // characters, we just do the simple fixup, rather than rewriting the encoder.
535 base::Base64Encode(input, output);
536 for (size_t i = 0; i < output->size(); ++i) {
537 switch (output->data()[i]) {
538 case '+':
539 (*output)[i] = '-';
540 continue;
541 case '/':
542 (*output)[i] = '_';
543 continue;
544 default:
545 continue;
546 }
547 }
548 }
549
550 } // namespace net
551