1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/base64.h"
6 #include "base/field_trial.h"
7 #include "base/histogram.h"
8 #include "base/logging.h"
9 #include "base/sha2.h"
10 #include "base/string_util.h"
11 #include "net/base/registry_controlled_domain.h"
12 #include "net/base/sdch_manager.h"
13 #include "net/url_request/url_request_http_job.h"
14
15 using base::Time;
16 using base::TimeDelta;
17
18 //------------------------------------------------------------------------------
19 // static
20 const size_t SdchManager::kMaxDictionarySize = 1000000;
21
22 // static
23 const size_t SdchManager::kMaxDictionaryCount = 20;
24
25 // static
26 SdchManager* SdchManager::global_;
27
28 // static
Global()29 SdchManager* SdchManager::Global() {
30 return global_;
31 }
32
33 // static
SdchErrorRecovery(ProblemCodes problem)34 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
35 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
36 }
37
38 // static
ClearBlacklistings()39 void SdchManager::ClearBlacklistings() {
40 Global()->blacklisted_domains_.clear();
41 Global()->exponential_blacklist_count.clear();
42 }
43
44 // static
ClearDomainBlacklisting(const std::string & domain)45 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
46 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
47 }
48
49 // static
BlackListDomainCount(const std::string & domain)50 int SdchManager::BlackListDomainCount(const std::string& domain) {
51 if (Global()->blacklisted_domains_.end() ==
52 Global()->blacklisted_domains_.find(domain))
53 return 0;
54 return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
55 }
56
57 // static
BlacklistDomainExponential(const std::string & domain)58 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
59 if (Global()->exponential_blacklist_count.end() ==
60 Global()->exponential_blacklist_count.find(domain))
61 return 0;
62 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
63 }
64
65 //------------------------------------------------------------------------------
SdchManager()66 SdchManager::SdchManager() : sdch_enabled_(false) {
67 DCHECK(!global_);
68 global_ = this;
69 }
70
~SdchManager()71 SdchManager::~SdchManager() {
72 DCHECK(global_ == this);
73 while (!dictionaries_.empty()) {
74 DictionaryMap::iterator it = dictionaries_.begin();
75 it->second->Release();
76 dictionaries_.erase(it->first);
77 }
78 global_ = NULL;
79 }
80
81 // static
Shutdown()82 void SdchManager::Shutdown() {
83 if (!global_ )
84 return;
85 global_->fetcher_.reset(NULL);
86 }
87
88 // static
BlacklistDomain(const GURL & url)89 void SdchManager::BlacklistDomain(const GURL& url) {
90 if (!global_ )
91 return;
92 global_->SetAllowLatencyExperiment(url, false);
93
94 std::string domain(StringToLowerASCII(url.host()));
95 int count = global_->blacklisted_domains_[domain];
96 if (count > 0)
97 return; // Domain is already blacklisted.
98
99 count = 1 + 2 * global_->exponential_blacklist_count[domain];
100 if (count > 0)
101 global_->exponential_blacklist_count[domain] = count;
102 else
103 count = INT_MAX;
104
105 global_->blacklisted_domains_[domain] = count;
106 }
107
108 // static
BlacklistDomainForever(const GURL & url)109 void SdchManager::BlacklistDomainForever(const GURL& url) {
110 if (!global_ )
111 return;
112 global_->SetAllowLatencyExperiment(url, false);
113
114 std::string domain(StringToLowerASCII(url.host()));
115 global_->exponential_blacklist_count[domain] = INT_MAX;
116 global_->blacklisted_domains_[domain] = INT_MAX;
117 }
118
EnableSdchSupport(const std::string & domain)119 void SdchManager::EnableSdchSupport(const std::string& domain) {
120 // We presume that there is a SDCH manager instance.
121 global_->supported_domain_ = domain;
122 global_->sdch_enabled_ = true;
123 }
124
IsInSupportedDomain(const GURL & url)125 const bool SdchManager::IsInSupportedDomain(const GURL& url) {
126 if (!sdch_enabled_ )
127 return false;
128 if (!supported_domain_.empty() &&
129 !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
130 return false; // It is not the singular supported domain.
131
132 if (blacklisted_domains_.empty())
133 return true;
134
135 std::string domain(StringToLowerASCII(url.host()));
136 DomainCounter::iterator it = blacklisted_domains_.find(domain);
137 if (blacklisted_domains_.end() == it)
138 return true;
139
140 int count = it->second - 1;
141 if (count > 0)
142 blacklisted_domains_[domain] = count;
143 else
144 blacklisted_domains_.erase(domain);
145 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
146 return false;
147 }
148
CanFetchDictionary(const GURL & referring_url,const GURL & dictionary_url) const149 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
150 const GURL& dictionary_url) const {
151 /* The user agent may retrieve a dictionary from the dictionary URL if all of
152 the following are true:
153 1 The dictionary URL host name matches the referrer URL host name
154 2 The dictionary URL host name domain matches the parent domain of the
155 referrer URL host name
156 3 The parent domain of the referrer URL host name is not a top level
157 domain
158 4 The dictionary URL is not an HTTPS URL.
159 */
160 // Item (1) above implies item (2). Spec should be updated.
161 // I take "host name match" to be "is identical to"
162 if (referring_url.host() != dictionary_url.host()) {
163 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
164 return false;
165 }
166 if (referring_url.SchemeIs("https")) {
167 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
168 return false;
169 }
170
171 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
172 // than current SDCH spec when needed, and justified by security audit.
173 if (!referring_url.SchemeIs("http")) {
174 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
175 return false;
176 }
177
178 return true;
179 }
180
FetchDictionary(const GURL & request_url,const GURL & dictionary_url)181 void SdchManager::FetchDictionary(const GURL& request_url,
182 const GURL& dictionary_url) {
183 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
184 fetcher_.get())
185 fetcher_->Schedule(dictionary_url);
186 }
187
AddSdchDictionary(const std::string & dictionary_text,const GURL & dictionary_url)188 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
189 const GURL& dictionary_url) {
190 std::string client_hash;
191 std::string server_hash;
192 GenerateHash(dictionary_text, &client_hash, &server_hash);
193 if (dictionaries_.find(server_hash) != dictionaries_.end()) {
194 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
195 return false; // Already loaded.
196 }
197
198 std::string domain, path;
199 std::set<int> ports;
200 Time expiration(Time::Now() + TimeDelta::FromDays(30));
201
202 if (dictionary_text.empty()) {
203 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
204 return false; // Missing header.
205 }
206
207 size_t header_end = dictionary_text.find("\n\n");
208 if (std::string::npos == header_end) {
209 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
210 return false; // Missing header.
211 }
212 size_t line_start = 0; // Start of line being parsed.
213 while (1) {
214 size_t line_end = dictionary_text.find('\n', line_start);
215 DCHECK(std::string::npos != line_end);
216 DCHECK(line_end <= header_end);
217
218 size_t colon_index = dictionary_text.find(':', line_start);
219 if (std::string::npos == colon_index) {
220 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
221 return false; // Illegal line missing a colon.
222 }
223
224 if (colon_index > line_end)
225 break;
226
227 size_t value_start = dictionary_text.find_first_not_of(" \t",
228 colon_index + 1);
229 if (std::string::npos != value_start) {
230 if (value_start >= line_end)
231 break;
232 std::string name(dictionary_text, line_start, colon_index - line_start);
233 std::string value(dictionary_text, value_start, line_end - value_start);
234 name = StringToLowerASCII(name);
235 if (name == "domain") {
236 domain = value;
237 } else if (name == "path") {
238 path = value;
239 } else if (name == "format-version") {
240 if (value != "1.0")
241 return false;
242 } else if (name == "max-age") {
243 expiration = Time::Now() + TimeDelta::FromSeconds(StringToInt64(value));
244 } else if (name == "port") {
245 int port = StringToInt(value);
246 if (port >= 0)
247 ports.insert(port);
248 }
249 }
250
251 if (line_end >= header_end)
252 break;
253 line_start = line_end + 1;
254 }
255
256 if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
257 return false;
258
259 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
260 // useless dictionaries. We should probably have a cache eviction plan,
261 // instead of just blocking additions. For now, with the spec in flux, it
262 // is probably not worth doing eviction handling.
263 if (kMaxDictionarySize < dictionary_text.size()) {
264 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
265 return false;
266 }
267 if (kMaxDictionaryCount <= dictionaries_.size()) {
268 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
269 return false;
270 }
271
272 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
273 DLOG(INFO) << "Loaded dictionary with client hash " << client_hash <<
274 " and server hash " << server_hash;
275 Dictionary* dictionary =
276 new Dictionary(dictionary_text, header_end + 2, client_hash,
277 dictionary_url, domain, path, expiration, ports);
278 dictionary->AddRef();
279 dictionaries_[server_hash] = dictionary;
280 return true;
281 }
282
GetVcdiffDictionary(const std::string & server_hash,const GURL & referring_url,Dictionary ** dictionary)283 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
284 const GURL& referring_url, Dictionary** dictionary) {
285 *dictionary = NULL;
286 DictionaryMap::iterator it = dictionaries_.find(server_hash);
287 if (it == dictionaries_.end()) {
288 return;
289 }
290 Dictionary* matching_dictionary = it->second;
291 if (!matching_dictionary->CanUse(referring_url))
292 return;
293 *dictionary = matching_dictionary;
294 }
295
296 // TODO(jar): If we have evictions from the dictionaries_, then we need to
297 // change this interface to return a list of reference counted Dictionary
298 // instances that can be used if/when a server specifies one.
GetAvailDictionaryList(const GURL & target_url,std::string * list)299 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
300 std::string* list) {
301 int count = 0;
302 for (DictionaryMap::iterator it = dictionaries_.begin();
303 it != dictionaries_.end(); ++it) {
304 if (!it->second->CanAdvertise(target_url))
305 continue;
306 ++count;
307 if (!list->empty())
308 list->append(",");
309 list->append(it->second->client_hash());
310 }
311 // Watch to see if we have corrupt or numerous dictionaries.
312 if (count > 0)
313 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
314 }
315
Dictionary(const std::string & dictionary_text,size_t offset,const std::string & client_hash,const GURL & gurl,const std::string & domain,const std::string & path,const Time & expiration,const std::set<int> ports)316 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
317 size_t offset, const std::string& client_hash, const GURL& gurl,
318 const std::string& domain, const std::string& path, const Time& expiration,
319 const std::set<int> ports)
320 : text_(dictionary_text, offset),
321 client_hash_(client_hash),
322 url_(gurl),
323 domain_(domain),
324 path_(path),
325 expiration_(expiration),
326 ports_(ports) {
327 }
328
329 // static
GenerateHash(const std::string & dictionary_text,std::string * client_hash,std::string * server_hash)330 void SdchManager::GenerateHash(const std::string& dictionary_text,
331 std::string* client_hash, std::string* server_hash) {
332 char binary_hash[32];
333 base::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
334
335 std::string first_48_bits(&binary_hash[0], 6);
336 std::string second_48_bits(&binary_hash[6], 6);
337 UrlSafeBase64Encode(first_48_bits, client_hash);
338 UrlSafeBase64Encode(second_48_bits, server_hash);
339
340 DCHECK_EQ(server_hash->length(), 8u);
341 DCHECK_EQ(client_hash->length(), 8u);
342 }
343
344 // static
UrlSafeBase64Encode(const std::string & input,std::string * output)345 void SdchManager::UrlSafeBase64Encode(const std::string& input,
346 std::string* output) {
347 // Since this is only done during a dictionary load, and hashes are only 8
348 // characters, we just do the simple fixup, rather than rewriting the encoder.
349 base::Base64Encode(input, output);
350 for (size_t i = 0; i < output->size(); ++i) {
351 switch (output->data()[i]) {
352 case '+':
353 (*output)[i] = '-';
354 continue;
355 case '/':
356 (*output)[i] = '_';
357 continue;
358 default:
359 continue;
360 }
361 }
362 }
363
364 //------------------------------------------------------------------------------
365 // Security functions restricting loads and use of dictionaries.
366
367 // static
CanSet(const std::string & domain,const std::string & path,const std::set<int> ports,const GURL & dictionary_url)368 bool SdchManager::Dictionary::CanSet(const std::string& domain,
369 const std::string& path,
370 const std::set<int> ports,
371 const GURL& dictionary_url) {
372 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
373 return false;
374 /*
375 A dictionary is invalid and must not be stored if any of the following are
376 true:
377 1. The dictionary has no Domain attribute.
378 2. The effective host name that derives from the referer URL host name does
379 not domain-match the Domain attribute.
380 3. The Domain attribute is a top level domain.
381 4. The referer URL host is a host domain name (not IP address) and has the
382 form HD, where D is the value of the Domain attribute, and H is a string
383 that contains one or more dots.
384 5. If the dictionary has a Port attribute and the referer URL's port was not
385 in the list.
386 */
387
388 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
389 // and hence the conservative approach is to not allow any redirects (if there
390 // were any... then don't allow the dictionary to be set).
391
392 if (domain.empty()) {
393 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
394 return false; // Domain is required.
395 }
396 if (net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
397 == 0) {
398 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
399 return false; // domain was a TLD.
400 }
401 if (!Dictionary::DomainMatch(dictionary_url, domain)) {
402 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
403 return false;
404 }
405
406 std::string referrer_url_host = dictionary_url.host();
407 size_t postfix_domain_index = referrer_url_host.rfind(domain);
408 // See if it is indeed a postfix, or just an internal string.
409 if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
410 // It is a postfix... so check to see if there's a dot in the prefix.
411 size_t end_of_host_index = referrer_url_host.find_first_of('.');
412 if (referrer_url_host.npos != end_of_host_index &&
413 end_of_host_index < postfix_domain_index) {
414 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
415 return false;
416 }
417 }
418
419 if (!ports.empty()
420 && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
421 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
422 return false;
423 }
424 return true;
425 }
426
427 // static
CanUse(const GURL & referring_url)428 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
429 if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
430 return false;
431 /*
432 1. The request URL's host name domain-matches the Domain attribute of the
433 dictionary.
434 2. If the dictionary has a Port attribute, the request port is one of the
435 ports listed in the Port attribute.
436 3. The request URL path-matches the path attribute of the dictionary.
437 4. The request is not an HTTPS request.
438 */
439 if (!DomainMatch(referring_url, domain_)) {
440 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
441 return false;
442 }
443 if (!ports_.empty()
444 && 0 == ports_.count(referring_url.EffectiveIntPort())) {
445 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
446 return false;
447 }
448 if (path_.size() && !PathMatch(referring_url.path(), path_)) {
449 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
450 return false;
451 }
452 if (referring_url.SchemeIsSecure()) {
453 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
454 return false;
455 }
456
457 // TODO(jar): Remove overly restrictive failsafe test (added per security
458 // review) when we have a need to be more general.
459 if (!referring_url.SchemeIs("http")) {
460 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
461 return false;
462 }
463
464 return true;
465 }
466
CanAdvertise(const GURL & target_url)467 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
468 if (!SdchManager::Global()->IsInSupportedDomain(target_url))
469 return false;
470 /* The specific rules of when a dictionary should be advertised in an
471 Avail-Dictionary header are modeled after the rules for cookie scoping. The
472 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
473 dictionary may be advertised in the Avail-Dictionaries header exactly when
474 all of the following are true:
475 1. The server's effective host name domain-matches the Domain attribute of
476 the dictionary.
477 2. If the dictionary has a Port attribute, the request port is one of the
478 ports listed in the Port attribute.
479 3. The request URI path-matches the path header of the dictionary.
480 4. The request is not an HTTPS request.
481 */
482 if (!DomainMatch(target_url, domain_))
483 return false;
484 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
485 return false;
486 if (path_.size() && !PathMatch(target_url.path(), path_))
487 return false;
488 if (target_url.SchemeIsSecure())
489 return false;
490 if (Time::Now() > expiration_)
491 return false;
492 return true;
493 }
494
PathMatch(const std::string & path,const std::string & restriction)495 bool SdchManager::Dictionary::PathMatch(const std::string& path,
496 const std::string& restriction) {
497 /* Must be either:
498 1. P2 is equal to P1
499 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
500 character following P2 in P1 is "/".
501 */
502 if (path == restriction)
503 return true;
504 size_t prefix_length = restriction.size();
505 if (prefix_length > path.size())
506 return false; // Can't be a prefix.
507 if (0 != path.compare(0, prefix_length, restriction))
508 return false;
509 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
510 }
511
512 // static
DomainMatch(const GURL & gurl,const std::string & restriction)513 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
514 const std::string& restriction) {
515 // TODO(jar): This is not precisely a domain match definition.
516 return gurl.DomainIs(restriction.data(), restriction.size());
517 }
518
519 //------------------------------------------------------------------------------
520 // Methods for supporting latency experiments.
521
AllowLatencyExperiment(const GURL & url) const522 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
523 return allow_latency_experiment_.end() !=
524 allow_latency_experiment_.find(url.host());
525 }
526
SetAllowLatencyExperiment(const GURL & url,bool enable)527 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
528 if (enable) {
529 allow_latency_experiment_.insert(url.host());
530 return;
531 }
532 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
533 if (allow_latency_experiment_.end() == it)
534 return; // It was already erased, or never allowed.
535 SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
536 allow_latency_experiment_.erase(it);
537 }
538