1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/net/predictor.h"
6
7 #include <algorithm>
8 #include <cmath>
9 #include <set>
10 #include <sstream>
11
12 #include "base/compiler_specific.h"
13 #include "base/metrics/histogram.h"
14 #include "base/string_util.h"
15 #include "base/time.h"
16 #include "base/values.h"
17 #include "chrome/browser/net/preconnect.h"
18 #include "content/browser/browser_thread.h"
19 #include "net/base/address_list.h"
20 #include "net/base/completion_callback.h"
21 #include "net/base/host_port_pair.h"
22 #include "net/base/host_resolver.h"
23 #include "net/base/net_errors.h"
24 #include "net/base/net_log.h"
25
26 using base::TimeDelta;
27
28 namespace chrome_browser_net {
29
30 // static
31 const double Predictor::kPreconnectWorthyExpectedValue = 0.8;
32 // static
33 const double Predictor::kDNSPreresolutionWorthyExpectedValue = 0.1;
34 // static
35 const double Predictor::kDiscardableExpectedValue = 0.05;
36 // The goal is of trimming is to to reduce the importance (number of expected
37 // subresources needed) by a factor of 2 after about 24 hours of uptime. We will
38 // trim roughly once-an-hour of uptime. The ratio to use in each trim operation
39 // is then the 24th root of 0.5. If a user only surfs for 4 hours a day, then
40 // after about 6 days they will have halved all their estimates of subresource
41 // connections. Once this falls below kDiscardableExpectedValue the referrer
42 // will be discarded.
43 // TODO(jar): Measure size of referrer lists in the field. Consider an adaptive
44 // system that uses a higher trim ratio when the list is large.
45 // static
46 const double Predictor::kReferrerTrimRatio = 0.97153;
47
48 // static
49 const TimeDelta Predictor::kDurationBetweenTrimmings = TimeDelta::FromHours(1);
50 // static
51 const TimeDelta Predictor::kDurationBetweenTrimmingIncrements =
52 TimeDelta::FromSeconds(15);
53 // static
54 const size_t Predictor::kUrlsTrimmedPerIncrement = 5u;
55
56 class Predictor::LookupRequest {
57 public:
LookupRequest(Predictor * predictor,net::HostResolver * host_resolver,const GURL & url)58 LookupRequest(Predictor* predictor,
59 net::HostResolver* host_resolver,
60 const GURL& url)
61 : ALLOW_THIS_IN_INITIALIZER_LIST(
62 net_callback_(this, &LookupRequest::OnLookupFinished)),
63 predictor_(predictor),
64 url_(url),
65 resolver_(host_resolver) {
66 }
67
68 // Return underlying network resolver status.
69 // net::OK ==> Host was found synchronously.
70 // net:ERR_IO_PENDING ==> Network will callback later with result.
71 // anything else ==> Host was not found synchronously.
Start()72 int Start() {
73 net::HostResolver::RequestInfo resolve_info(
74 net::HostPortPair::FromURL(url_));
75
76 // Make a note that this is a speculative resolve request. This allows us
77 // to separate it from real navigations in the observer's callback, and
78 // lets the HostResolver know it can de-prioritize it.
79 resolve_info.set_is_speculative(true);
80 return resolver_.Resolve(
81 resolve_info, &addresses_, &net_callback_, net::BoundNetLog());
82 }
83
84 private:
OnLookupFinished(int result)85 void OnLookupFinished(int result) {
86 predictor_->OnLookupFinished(this, url_, result == net::OK);
87 }
88
89 // HostResolver will call us using this callback when resolution is complete.
90 net::CompletionCallbackImpl<LookupRequest> net_callback_;
91
92 Predictor* predictor_; // The predictor which started us.
93
94 const GURL url_; // Hostname to resolve.
95 net::SingleRequestHostResolver resolver_;
96 net::AddressList addresses_;
97
98 DISALLOW_COPY_AND_ASSIGN(LookupRequest);
99 };
100
Predictor(net::HostResolver * host_resolver,TimeDelta max_dns_queue_delay,size_t max_concurrent,bool preconnect_enabled)101 Predictor::Predictor(net::HostResolver* host_resolver,
102 TimeDelta max_dns_queue_delay,
103 size_t max_concurrent,
104 bool preconnect_enabled)
105 : peak_pending_lookups_(0),
106 shutdown_(false),
107 max_concurrent_dns_lookups_(max_concurrent),
108 max_dns_queue_delay_(max_dns_queue_delay),
109 host_resolver_(host_resolver),
110 preconnect_enabled_(preconnect_enabled),
111 consecutive_omnibox_preconnect_count_(0),
112 next_trim_time_(base::TimeTicks::Now() + kDurationBetweenTrimmings),
113 ALLOW_THIS_IN_INITIALIZER_LIST(trim_task_factory_(this)) {
114 }
115
~Predictor()116 Predictor::~Predictor() {
117 DCHECK(shutdown_);
118 }
119
Shutdown()120 void Predictor::Shutdown() {
121 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
122 DCHECK(!shutdown_);
123 shutdown_ = true;
124
125 std::set<LookupRequest*>::iterator it;
126 for (it = pending_lookups_.begin(); it != pending_lookups_.end(); ++it)
127 delete *it;
128 }
129
130 // Overloaded Resolve() to take a vector of names.
ResolveList(const UrlList & urls,UrlInfo::ResolutionMotivation motivation)131 void Predictor::ResolveList(const UrlList& urls,
132 UrlInfo::ResolutionMotivation motivation) {
133 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
134
135 for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) {
136 AppendToResolutionQueue(*it, motivation);
137 }
138 }
139
140 // Basic Resolve() takes an invidual name, and adds it
141 // to the queue.
Resolve(const GURL & url,UrlInfo::ResolutionMotivation motivation)142 void Predictor::Resolve(const GURL& url,
143 UrlInfo::ResolutionMotivation motivation) {
144 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
145 if (!url.has_host())
146 return;
147 AppendToResolutionQueue(url, motivation);
148 }
149
LearnFromNavigation(const GURL & referring_url,const GURL & target_url)150 void Predictor::LearnFromNavigation(const GURL& referring_url,
151 const GURL& target_url) {
152 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
153 DCHECK(referring_url == referring_url.GetWithEmptyPath());
154 DCHECK(target_url == target_url.GetWithEmptyPath());
155 if (referring_url.has_host()) {
156 referrers_[referring_url].SuggestHost(target_url);
157 // Possibly do some referrer trimming.
158 TrimReferrers();
159 }
160 }
161
162 enum SubresourceValue {
163 PRECONNECTION,
164 PRERESOLUTION,
165 TOO_NEW,
166 SUBRESOURCE_VALUE_MAX
167 };
168
AnticipateOmniboxUrl(const GURL & url,bool preconnectable)169 void Predictor::AnticipateOmniboxUrl(const GURL& url, bool preconnectable) {
170 std::string host = url.HostNoBrackets();
171 bool is_new_host_request = (host != last_omnibox_host_);
172 last_omnibox_host_ = host;
173
174 UrlInfo::ResolutionMotivation motivation(UrlInfo::OMNIBOX_MOTIVATED);
175 base::TimeTicks now = base::TimeTicks::Now();
176
177 if (preconnect_enabled()) {
178 if (preconnectable && !is_new_host_request) {
179 ++consecutive_omnibox_preconnect_count_;
180 // The omnibox suggests a search URL (for which we can preconnect) after
181 // one or two characters are typed, even though such typing often (1 in
182 // 3?) becomes a real URL. This code waits till is has more evidence of a
183 // preconnectable URL (search URL) before forming a preconnection, so as
184 // to reduce the useless preconnect rate.
185 // Perchance this logic should be pushed back into the omnibox, where the
186 // actual characters typed, such as a space, can better forcast whether
187 // we need to search/preconnect or not. By waiting for at least 4
188 // characters in a row that have lead to a search proposal, we avoid
189 // preconnections for a prefix like "www." and we also wait until we have
190 // at least a 4 letter word to search for.
191 // Each character typed appears to induce 2 calls to
192 // AnticipateOmniboxUrl(), so we double 4 characters and limit at 8
193 // requests.
194 // TODO(jar): Use an A/B test to optimize this.
195 const int kMinConsecutiveRequests = 8;
196 if (consecutive_omnibox_preconnect_count_ >= kMinConsecutiveRequests) {
197 // TODO(jar): The wild guess of 30 seconds could be tuned/tested, but it
198 // currently is just a guess that most sockets will remain open for at
199 // least 30 seconds. This avoids a lot of cross-thread posting, and
200 // exercise of the network stack in this common case.
201 const int kMaxSearchKeepaliveSeconds(30);
202 if ((now - last_omnibox_preconnect_).InSeconds() <
203 kMaxSearchKeepaliveSeconds)
204 return; // We've done a preconnect recently.
205 last_omnibox_preconnect_ = now;
206 const int kConnectionsNeeded = 1;
207 PreconnectOnUIThread(CanonicalizeUrl(url), motivation,
208 kConnectionsNeeded);
209 return; // Skip pre-resolution, since we'll open a connection.
210 }
211 } else {
212 consecutive_omnibox_preconnect_count_ = 0;
213 }
214 }
215
216 // Fall through and consider pre-resolution.
217
218 // Omnibox tends to call in pairs (just a few milliseconds apart), and we
219 // really don't need to keep resolving a name that often.
220 // TODO(jar): A/B tests could check for perf impact of the early returns.
221 if (!is_new_host_request) {
222 const int kMinPreresolveSeconds(10);
223 if (kMinPreresolveSeconds > (now - last_omnibox_preresolve_).InSeconds())
224 return;
225 }
226 last_omnibox_preresolve_ = now;
227
228 // Perform at least DNS pre-resolution.
229 BrowserThread::PostTask(
230 BrowserThread::IO,
231 FROM_HERE,
232 NewRunnableMethod(this, &Predictor::Resolve, CanonicalizeUrl(url),
233 motivation));
234 }
235
PreconnectUrlAndSubresources(const GURL & url)236 void Predictor::PreconnectUrlAndSubresources(const GURL& url) {
237 if (preconnect_enabled()) {
238 std::string host = url.HostNoBrackets();
239 UrlInfo::ResolutionMotivation motivation(UrlInfo::EARLY_LOAD_MOTIVATED);
240 const int kConnectionsNeeded = 1;
241 PreconnectOnUIThread(CanonicalizeUrl(url), motivation,
242 kConnectionsNeeded);
243 PredictFrameSubresources(url.GetWithEmptyPath());
244 }
245 }
246
PredictFrameSubresources(const GURL & url)247 void Predictor::PredictFrameSubresources(const GURL& url) {
248 DCHECK(url.GetWithEmptyPath() == url);
249 // Add one pass through the message loop to allow current navigation to
250 // proceed.
251 BrowserThread::PostTask(
252 BrowserThread::IO,
253 FROM_HERE,
254 NewRunnableMethod(this, &Predictor::PrepareFrameSubresources, url));
255 }
256
PrepareFrameSubresources(const GURL & url)257 void Predictor::PrepareFrameSubresources(const GURL& url) {
258 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
259 DCHECK(url.GetWithEmptyPath() == url);
260 Referrers::iterator it = referrers_.find(url);
261 if (referrers_.end() == it) {
262 // Only when we don't know anything about this url, make 2 connections
263 // available. We could do this completely via learning (by prepopulating
264 // the referrer_ list with this expected value), but it would swell the
265 // size of the list with all the "Leaf" nodes in the tree (nodes that don't
266 // load any subresources). If we learn about this resource, we will instead
267 // provide a more carefully estimated preconnection count.
268 if (preconnect_enabled_)
269 PreconnectOnIOThread(url, UrlInfo::SELF_REFERAL_MOTIVATED, 2);
270 return;
271 }
272
273 Referrer* referrer = &(it->second);
274 referrer->IncrementUseCount();
275 const UrlInfo::ResolutionMotivation motivation =
276 UrlInfo::LEARNED_REFERAL_MOTIVATED;
277 for (Referrer::iterator future_url = referrer->begin();
278 future_url != referrer->end(); ++future_url) {
279 SubresourceValue evalution(TOO_NEW);
280 double connection_expectation = future_url->second.subresource_use_rate();
281 UMA_HISTOGRAM_CUSTOM_COUNTS("Net.PreconnectSubresourceExpectation",
282 static_cast<int>(connection_expectation * 100),
283 10, 5000, 50);
284 future_url->second.ReferrerWasObserved();
285 if (preconnect_enabled_ &&
286 connection_expectation > kPreconnectWorthyExpectedValue) {
287 evalution = PRECONNECTION;
288 future_url->second.IncrementPreconnectionCount();
289 int count = static_cast<int>(std::ceil(connection_expectation));
290 if (url.host() == future_url->first.host())
291 ++count;
292 PreconnectOnIOThread(future_url->first, motivation, count);
293 } else if (connection_expectation > kDNSPreresolutionWorthyExpectedValue) {
294 evalution = PRERESOLUTION;
295 future_url->second.preresolution_increment();
296 UrlInfo* queued_info = AppendToResolutionQueue(future_url->first,
297 motivation);
298 if (queued_info)
299 queued_info->SetReferringHostname(url);
300 }
301 UMA_HISTOGRAM_ENUMERATION("Net.PreconnectSubresourceEval", evalution,
302 SUBRESOURCE_VALUE_MAX);
303 }
304 }
305
306 // Provide sort order so all .com's are together, etc.
307 struct RightToLeftStringSorter {
operator ()chrome_browser_net::RightToLeftStringSorter308 bool operator()(const GURL& left,
309 const GURL& right) const {
310 return string_compare(left.host(), right.host());
311 }
312
string_comparechrome_browser_net::RightToLeftStringSorter313 static bool string_compare(const std::string& left_host,
314 const std::string& right_host) {
315 if (left_host == right_host) return true;
316 size_t left_already_matched = left_host.size();
317 size_t right_already_matched = right_host.size();
318
319 // Ensure both strings have characters.
320 if (!left_already_matched) return true;
321 if (!right_already_matched) return false;
322
323 // Watch for trailing dot, so we'll always be safe to go one beyond dot.
324 if ('.' == left_host[left_already_matched - 1]) {
325 if ('.' != right_host[right_already_matched - 1])
326 return true;
327 // Both have dots at end of string.
328 --left_already_matched;
329 --right_already_matched;
330 } else {
331 if ('.' == right_host[right_already_matched - 1])
332 return false;
333 }
334
335 while (1) {
336 if (!left_already_matched) return true;
337 if (!right_already_matched) return false;
338
339 size_t left_length, right_length;
340 size_t left_start = left_host.find_last_of('.', left_already_matched - 1);
341 if (std::string::npos == left_start) {
342 left_length = left_already_matched;
343 left_already_matched = left_start = 0;
344 } else {
345 left_length = left_already_matched - left_start;
346 left_already_matched = left_start;
347 ++left_start; // Don't compare the dot.
348 }
349 size_t right_start = right_host.find_last_of('.',
350 right_already_matched - 1);
351 if (std::string::npos == right_start) {
352 right_length = right_already_matched;
353 right_already_matched = right_start = 0;
354 } else {
355 right_length = right_already_matched - right_start;
356 right_already_matched = right_start;
357 ++right_start; // Don't compare the dot.
358 }
359
360 int diff = left_host.compare(left_start, left_host.size(),
361 right_host, right_start, right_host.size());
362 if (diff > 0) return false;
363 if (diff < 0) return true;
364 }
365 }
366 };
367
GetHtmlReferrerLists(std::string * output)368 void Predictor::GetHtmlReferrerLists(std::string* output) {
369 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
370 if (referrers_.empty())
371 return;
372
373 // TODO(jar): Remove any plausible JavaScript from names before displaying.
374
375 typedef std::set<GURL, struct RightToLeftStringSorter>
376 SortedNames;
377 SortedNames sorted_names;
378
379 for (Referrers::iterator it = referrers_.begin();
380 referrers_.end() != it; ++it)
381 sorted_names.insert(it->first);
382
383 output->append("<br><table border>");
384 output->append(
385 "<tr><th>Host for Page</th>"
386 "<th>Page Load<br>Count</th>"
387 "<th>Subresource<br>Navigations</th>"
388 "<th>Subresource<br>PreConnects</th>"
389 "<th>Subresource<br>PreResolves</th>"
390 "<th>Expected<br>Connects</th>"
391 "<th>Subresource Spec</th></tr>");
392
393 for (SortedNames::iterator it = sorted_names.begin();
394 sorted_names.end() != it; ++it) {
395 Referrer* referrer = &(referrers_[*it]);
396 bool first_set_of_futures = true;
397 for (Referrer::iterator future_url = referrer->begin();
398 future_url != referrer->end(); ++future_url) {
399 output->append("<tr align=right>");
400 if (first_set_of_futures) {
401 base::StringAppendF(output,
402 "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>",
403 static_cast<int>(referrer->size()),
404 it->spec().c_str(),
405 static_cast<int>(referrer->size()),
406 static_cast<int>(referrer->use_count()));
407 }
408 first_set_of_futures = false;
409 base::StringAppendF(output,
410 "<td>%d</td><td>%d</td><td>%d</td><td>%2.3f</td><td>%s</td></tr>",
411 static_cast<int>(future_url->second.navigation_count()),
412 static_cast<int>(future_url->second.preconnection_count()),
413 static_cast<int>(future_url->second.preresolution_count()),
414 static_cast<double>(future_url->second.subresource_use_rate()),
415 future_url->first.spec().c_str());
416 }
417 }
418 output->append("</table>");
419 }
420
GetHtmlInfo(std::string * output)421 void Predictor::GetHtmlInfo(std::string* output) {
422 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
423 // Local lists for calling UrlInfo
424 UrlInfo::UrlInfoTable name_not_found;
425 UrlInfo::UrlInfoTable name_preresolved;
426
427 // Get copies of all useful data.
428 typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> SortedUrlInfo;
429 SortedUrlInfo snapshot;
430 // UrlInfo supports value semantics, so we can do a shallow copy.
431 for (Results::iterator it(results_.begin()); it != results_.end(); it++)
432 snapshot[it->first] = it->second;
433
434 // Partition the UrlInfo's into categories.
435 for (SortedUrlInfo::iterator it(snapshot.begin());
436 it != snapshot.end(); it++) {
437 if (it->second.was_nonexistent()) {
438 name_not_found.push_back(it->second);
439 continue;
440 }
441 if (!it->second.was_found())
442 continue; // Still being processed.
443 name_preresolved.push_back(it->second);
444 }
445
446 bool brief = false;
447 #ifdef NDEBUG
448 brief = true;
449 #endif // NDEBUG
450
451 // Call for display of each table, along with title.
452 UrlInfo::GetHtmlTable(name_preresolved,
453 "Preresolution DNS records performed for ", brief, output);
454 UrlInfo::GetHtmlTable(name_not_found,
455 "Preresolving DNS records revealed non-existence for ", brief, output);
456 }
457
AppendToResolutionQueue(const GURL & url,UrlInfo::ResolutionMotivation motivation)458 UrlInfo* Predictor::AppendToResolutionQueue(
459 const GURL& url,
460 UrlInfo::ResolutionMotivation motivation) {
461 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
462 DCHECK(url.has_host());
463
464 if (shutdown_)
465 return NULL;
466
467 UrlInfo* info = &results_[url];
468 info->SetUrl(url); // Initialize or DCHECK.
469 // TODO(jar): I need to discard names that have long since expired.
470 // Currently we only add to the domain map :-/
471
472 DCHECK(info->HasUrl(url));
473
474 if (!info->NeedsDnsUpdate()) {
475 info->DLogResultsStats("DNS PrefetchNotUpdated");
476 return NULL;
477 }
478
479 info->SetQueuedState(motivation);
480 work_queue_.Push(url, motivation);
481 StartSomeQueuedResolutions();
482 return info;
483 }
484
StartSomeQueuedResolutions()485 void Predictor::StartSomeQueuedResolutions() {
486 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
487
488 while (!work_queue_.IsEmpty() &&
489 pending_lookups_.size() < max_concurrent_dns_lookups_) {
490 const GURL url(work_queue_.Pop());
491 UrlInfo* info = &results_[url];
492 DCHECK(info->HasUrl(url));
493 info->SetAssignedState();
494
495 if (CongestionControlPerformed(info)) {
496 DCHECK(work_queue_.IsEmpty());
497 return;
498 }
499
500 LookupRequest* request = new LookupRequest(this, host_resolver_, url);
501 int status = request->Start();
502 if (status == net::ERR_IO_PENDING) {
503 // Will complete asynchronously.
504 pending_lookups_.insert(request);
505 peak_pending_lookups_ = std::max(peak_pending_lookups_,
506 pending_lookups_.size());
507 } else {
508 // Completed synchronously (was already cached by HostResolver), or else
509 // there was (equivalently) some network error that prevents us from
510 // finding the name. Status net::OK means it was "found."
511 LookupFinished(request, url, status == net::OK);
512 delete request;
513 }
514 }
515 }
516
CongestionControlPerformed(UrlInfo * info)517 bool Predictor::CongestionControlPerformed(UrlInfo* info) {
518 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
519 // Note: queue_duration is ONLY valid after we go to assigned state.
520 if (info->queue_duration() < max_dns_queue_delay_)
521 return false;
522 // We need to discard all entries in our queue, as we're keeping them waiting
523 // too long. By doing this, we'll have a chance to quickly service urgent
524 // resolutions, and not have a bogged down system.
525 while (true) {
526 info->RemoveFromQueue();
527 if (work_queue_.IsEmpty())
528 break;
529 info = &results_[work_queue_.Pop()];
530 info->SetAssignedState();
531 }
532 return true;
533 }
534
OnLookupFinished(LookupRequest * request,const GURL & url,bool found)535 void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url,
536 bool found) {
537 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
538
539 LookupFinished(request, url, found);
540 pending_lookups_.erase(request);
541 delete request;
542
543 StartSomeQueuedResolutions();
544 }
545
LookupFinished(LookupRequest * request,const GURL & url,bool found)546 void Predictor::LookupFinished(LookupRequest* request, const GURL& url,
547 bool found) {
548 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
549 UrlInfo* info = &results_[url];
550 DCHECK(info->HasUrl(url));
551 if (info->is_marked_to_delete()) {
552 results_.erase(url);
553 } else {
554 if (found)
555 info->SetFoundState();
556 else
557 info->SetNoSuchNameState();
558 }
559 }
560
DiscardAllResults()561 void Predictor::DiscardAllResults() {
562 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
563 // Delete anything listed so far in this session that shows in about:dns.
564 referrers_.clear();
565
566
567 // Try to delete anything in our work queue.
568 while (!work_queue_.IsEmpty()) {
569 // Emulate processing cycle as though host was not found.
570 GURL url = work_queue_.Pop();
571 UrlInfo* info = &results_[url];
572 DCHECK(info->HasUrl(url));
573 info->SetAssignedState();
574 info->SetNoSuchNameState();
575 }
576 // Now every result_ is either resolved, or is being resolved
577 // (see LookupRequest).
578
579 // Step through result_, recording names of all hosts that can't be erased.
580 // We can't erase anything being worked on.
581 Results assignees;
582 for (Results::iterator it = results_.begin(); results_.end() != it; ++it) {
583 GURL url(it->first);
584 UrlInfo* info = &it->second;
585 DCHECK(info->HasUrl(url));
586 if (info->is_assigned()) {
587 info->SetPendingDeleteState();
588 assignees[url] = *info;
589 }
590 }
591 DCHECK(assignees.size() <= max_concurrent_dns_lookups_);
592 results_.clear();
593 // Put back in the names being worked on.
594 for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) {
595 DCHECK(it->second.is_marked_to_delete());
596 results_[it->first] = it->second;
597 }
598 }
599
TrimReferrersNow()600 void Predictor::TrimReferrersNow() {
601 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
602 // Just finish up work if an incremental trim is in progress.
603 if (urls_being_trimmed_.empty())
604 LoadUrlsForTrimming();
605 IncrementalTrimReferrers(true); // Do everything now.
606 }
607
SerializeReferrers(ListValue * referral_list)608 void Predictor::SerializeReferrers(ListValue* referral_list) {
609 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
610 referral_list->Clear();
611 referral_list->Append(new FundamentalValue(PREDICTOR_REFERRER_VERSION));
612 for (Referrers::const_iterator it = referrers_.begin();
613 it != referrers_.end(); ++it) {
614 // Serialize the list of subresource names.
615 Value* subresource_list(it->second.Serialize());
616
617 // Create a list for each referer.
618 ListValue* motivator(new ListValue);
619 motivator->Append(new StringValue(it->first.spec()));
620 motivator->Append(subresource_list);
621
622 referral_list->Append(motivator);
623 }
624 }
625
DeserializeReferrers(const ListValue & referral_list)626 void Predictor::DeserializeReferrers(const ListValue& referral_list) {
627 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
628 int format_version = -1;
629 if (referral_list.GetSize() > 0 &&
630 referral_list.GetInteger(0, &format_version) &&
631 format_version == PREDICTOR_REFERRER_VERSION) {
632 for (size_t i = 1; i < referral_list.GetSize(); ++i) {
633 ListValue* motivator;
634 if (!referral_list.GetList(i, &motivator)) {
635 NOTREACHED();
636 return;
637 }
638 std::string motivating_url_spec;
639 if (!motivator->GetString(0, &motivating_url_spec)) {
640 NOTREACHED();
641 return;
642 }
643
644 Value* subresource_list;
645 if (!motivator->Get(1, &subresource_list)) {
646 NOTREACHED();
647 return;
648 }
649
650 referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list);
651 }
652 }
653 }
654
TrimReferrers()655 void Predictor::TrimReferrers() {
656 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
657 if (!urls_being_trimmed_.empty())
658 return; // There is incremental trimming in progress already.
659
660 // Check to see if it is time to trim yet.
661 base::TimeTicks now = base::TimeTicks::Now();
662 if (now < next_trim_time_)
663 return;
664 next_trim_time_ = now + kDurationBetweenTrimmings;
665
666 LoadUrlsForTrimming();
667 PostIncrementalTrimTask();
668 }
669
LoadUrlsForTrimming()670 void Predictor::LoadUrlsForTrimming() {
671 DCHECK(urls_being_trimmed_.empty());
672 for (Referrers::const_iterator it = referrers_.begin();
673 it != referrers_.end(); ++it)
674 urls_being_trimmed_.push_back(it->first);
675 UMA_HISTOGRAM_COUNTS("Net.PredictionTrimSize", urls_being_trimmed_.size());
676 }
677
PostIncrementalTrimTask()678 void Predictor::PostIncrementalTrimTask() {
679 if (urls_being_trimmed_.empty())
680 return;
681 MessageLoop::current()->PostDelayedTask(
682 FROM_HERE,
683 trim_task_factory_.NewRunnableMethod(&Predictor::IncrementalTrimReferrers,
684 false),
685 kDurationBetweenTrimmingIncrements.InMilliseconds());
686 }
687
IncrementalTrimReferrers(bool trim_all_now)688 void Predictor::IncrementalTrimReferrers(bool trim_all_now) {
689 size_t trim_count = urls_being_trimmed_.size();
690 if (!trim_all_now)
691 trim_count = std::min(trim_count, kUrlsTrimmedPerIncrement);
692 while (trim_count-- != 0) {
693 Referrers::iterator it = referrers_.find(urls_being_trimmed_.back());
694 urls_being_trimmed_.pop_back();
695 if (it == referrers_.end())
696 continue; // Defensive code: It got trimmed away already.
697 if (!it->second.Trim(kReferrerTrimRatio, kDiscardableExpectedValue))
698 referrers_.erase(it);
699 }
700 PostIncrementalTrimTask();
701 }
702
703 //------------------------------------------------------------------------------
704
HostNameQueue()705 Predictor::HostNameQueue::HostNameQueue() {
706 }
707
~HostNameQueue()708 Predictor::HostNameQueue::~HostNameQueue() {
709 }
710
Push(const GURL & url,UrlInfo::ResolutionMotivation motivation)711 void Predictor::HostNameQueue::Push(const GURL& url,
712 UrlInfo::ResolutionMotivation motivation) {
713 switch (motivation) {
714 case UrlInfo::STATIC_REFERAL_MOTIVATED:
715 case UrlInfo::LEARNED_REFERAL_MOTIVATED:
716 case UrlInfo::MOUSE_OVER_MOTIVATED:
717 rush_queue_.push(url);
718 break;
719
720 default:
721 background_queue_.push(url);
722 break;
723 }
724 }
725
IsEmpty() const726 bool Predictor::HostNameQueue::IsEmpty() const {
727 return rush_queue_.empty() && background_queue_.empty();
728 }
729
Pop()730 GURL Predictor::HostNameQueue::Pop() {
731 DCHECK(!IsEmpty());
732 std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_
733 : &rush_queue_);
734 GURL url(queue->front());
735 queue->pop();
736 return url;
737 }
738
DeserializeReferrersThenDelete(ListValue * referral_list)739 void Predictor::DeserializeReferrersThenDelete(ListValue* referral_list) {
740 DeserializeReferrers(*referral_list);
741 delete referral_list;
742 }
743
744
745 //------------------------------------------------------------------------------
746 // Helper functions
747 //------------------------------------------------------------------------------
748
749 // static
CanonicalizeUrl(const GURL & url)750 GURL Predictor::CanonicalizeUrl(const GURL& url) {
751 if (!url.has_host())
752 return GURL::EmptyGURL();
753
754 std::string scheme;
755 if (url.has_scheme()) {
756 scheme = url.scheme();
757 if (scheme != "http" && scheme != "https")
758 return GURL::EmptyGURL();
759 if (url.has_port())
760 return url.GetWithEmptyPath();
761 } else {
762 scheme = "http";
763 }
764
765 // If we omit a port, it will default to 80 or 443 as appropriate.
766 std::string colon_plus_port;
767 if (url.has_port())
768 colon_plus_port = ":" + url.port();
769
770 return GURL(scheme + "://" + url.host() + colon_plus_port);
771 }
772
773
774 } // namespace chrome_browser_net
775