• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/history/expire_history_backend.h"
6 
7 #include <algorithm>
8 #include <functional>
9 #include <limits>
10 
11 #include "base/bind.h"
12 #include "base/compiler_specific.h"
13 #include "base/file_util.h"
14 #include "base/files/file_enumerator.h"
15 #include "base/logging.h"
16 #include "base/message_loop/message_loop.h"
17 #include "chrome/browser/chrome_notification_types.h"
18 #include "chrome/browser/history/history_database.h"
19 #include "chrome/browser/history/history_notifications.h"
20 #include "chrome/browser/history/thumbnail_database.h"
21 #include "components/history/core/browser/history_client.h"
22 
23 namespace history {
24 
25 // Helpers --------------------------------------------------------------------
26 
27 namespace {
28 
29 // The number of days by which the expiration threshold is advanced for items
30 // that we want to expire early, such as those of AUTO_SUBFRAME transition type.
31 //
32 // Early expiration stuff is kept around only for edge cases, as subframes
33 // don't appear in history and the vast majority of them are ads anyway. The
34 // main use case for these is if you're on a site with links to different
35 // frames, you'll be able to see those links as visited, and we'll also be
36 // able to get redirect information for those URLs.
37 //
38 // But since these uses are most valuable when you're actually on the site,
39 // and because these can take up the bulk of your history, we get a lot of
40 // space savings by deleting them quickly.
41 const int kEarlyExpirationAdvanceDays = 3;
42 
43 // Reads all types of visits starting from beginning of time to the given end
44 // time. This is the most general reader.
45 class AllVisitsReader : public ExpiringVisitsReader {
46  public:
Read(base::Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const47   virtual bool Read(base::Time end_time,
48                     HistoryDatabase* db,
49                     VisitVector* visits,
50                     int max_visits) const OVERRIDE {
51     DCHECK(db) << "must have a database to operate upon";
52     DCHECK(visits) << "visit vector has to exist in order to populate it";
53 
54     db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits);
55     // When we got the maximum number of visits we asked for, we say there could
56     // be additional things to expire now.
57     return static_cast<int>(visits->size()) == max_visits;
58   }
59 };
60 
61 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is
62 // computed as follows:
63 // * |begin_time| is read from the meta table. This value is updated whenever
64 //   there are no more additional visits to expire by this reader.
65 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
66 //   but not past the current time.
67 class AutoSubframeVisitsReader : public ExpiringVisitsReader {
68  public:
Read(base::Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const69   virtual bool Read(base::Time end_time,
70                     HistoryDatabase* db,
71                     VisitVector* visits,
72                     int max_visits) const OVERRIDE {
73     DCHECK(db) << "must have a database to operate upon";
74     DCHECK(visits) << "visit vector has to exist in order to populate it";
75 
76     base::Time begin_time = db->GetEarlyExpirationThreshold();
77     // Advance |end_time| to expire early.
78     base::Time early_end_time = end_time +
79         base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
80 
81     // We don't want to set the early expiration threshold to a time in the
82     // future.
83     base::Time now = base::Time::Now();
84     if (early_end_time > now)
85       early_end_time = now;
86 
87     db->GetVisitsInRangeForTransition(begin_time, early_end_time,
88                                       max_visits,
89                                       content::PAGE_TRANSITION_AUTO_SUBFRAME,
90                                       visits);
91     bool more = static_cast<int>(visits->size()) == max_visits;
92     if (!more)
93       db->UpdateEarlyExpirationThreshold(early_end_time);
94 
95     return more;
96   }
97 };
98 
99 // The number of visits we will expire very time we check for old items. This
100 // Prevents us from doing too much work any given time.
101 const int kNumExpirePerIteration = 32;
102 
103 // The number of seconds between checking for items that should be expired when
104 // we think there might be more items to expire. This timeout is used when the
105 // last expiration found at least kNumExpirePerIteration and we want to check
106 // again "soon."
107 const int kExpirationDelaySec = 30;
108 
109 // The number of minutes between checking, as with kExpirationDelaySec, but
110 // when we didn't find enough things to expire last time. If there was no
111 // history to expire last iteration, it's likely there is nothing next
112 // iteration, so we want to wait longer before checking to avoid wasting CPU.
113 const int kExpirationEmptyDelayMin = 5;
114 
115 }  // namespace
116 
117 
118 // ExpireHistoryBackend::DeleteEffects ----------------------------------------
119 
DeleteEffects()120 ExpireHistoryBackend::DeleteEffects::DeleteEffects() {
121 }
122 
~DeleteEffects()123 ExpireHistoryBackend::DeleteEffects::~DeleteEffects() {
124 }
125 
126 
127 // ExpireHistoryBackend -------------------------------------------------------
128 
ExpireHistoryBackend(BroadcastNotificationDelegate * delegate,HistoryClient * history_client)129 ExpireHistoryBackend::ExpireHistoryBackend(
130     BroadcastNotificationDelegate* delegate,
131     HistoryClient* history_client)
132     : delegate_(delegate),
133       main_db_(NULL),
134       thumb_db_(NULL),
135       weak_factory_(this),
136       history_client_(history_client) {
137 }
138 
~ExpireHistoryBackend()139 ExpireHistoryBackend::~ExpireHistoryBackend() {
140 }
141 
SetDatabases(HistoryDatabase * main_db,ThumbnailDatabase * thumb_db)142 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
143                                         ThumbnailDatabase* thumb_db) {
144   main_db_ = main_db;
145   thumb_db_ = thumb_db;
146 }
147 
DeleteURL(const GURL & url)148 void ExpireHistoryBackend::DeleteURL(const GURL& url) {
149   DeleteURLs(std::vector<GURL>(1, url));
150 }
151 
DeleteURLs(const std::vector<GURL> & urls)152 void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) {
153   if (!main_db_)
154     return;
155 
156   DeleteEffects effects;
157   HistoryClient* history_client = GetHistoryClient();
158   for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end();
159        ++url) {
160     URLRow url_row;
161     if (!main_db_->GetRowForURL(*url, &url_row))
162       continue;  // Nothing to delete.
163 
164     // Collect all the visits and delete them. Note that we don't give up if
165     // there are no visits, since the URL could still have an entry that we
166     // should delete.
167     VisitVector visits;
168     main_db_->GetVisitsForURL(url_row.id(), &visits);
169 
170     DeleteVisitRelatedInfo(visits, &effects);
171 
172     // We skip ExpireURLsForVisits (since we are deleting from the
173     // URL, and not starting with visits in a given time range). We
174     // therefore need to call the deletion and favicon update
175     // functions manually.
176     DeleteOneURL(url_row,
177                  history_client && history_client->IsBookmarked(*url),
178                  &effects);
179   }
180 
181   DeleteFaviconsIfPossible(&effects);
182 
183   BroadcastNotifications(&effects, DELETION_USER_INITIATED);
184 }
185 
ExpireHistoryBetween(const std::set<GURL> & restrict_urls,base::Time begin_time,base::Time end_time)186 void ExpireHistoryBackend::ExpireHistoryBetween(
187     const std::set<GURL>& restrict_urls,
188     base::Time begin_time,
189     base::Time end_time) {
190   if (!main_db_)
191     return;
192 
193   // Find the affected visits and delete them.
194   VisitVector visits;
195   main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
196   if (!restrict_urls.empty()) {
197     std::set<URLID> url_ids;
198     for (std::set<GURL>::const_iterator url = restrict_urls.begin();
199         url != restrict_urls.end(); ++url)
200       url_ids.insert(main_db_->GetRowForURL(*url, NULL));
201     VisitVector all_visits;
202     all_visits.swap(visits);
203     for (VisitVector::iterator visit = all_visits.begin();
204          visit != all_visits.end(); ++visit) {
205       if (url_ids.find(visit->url_id) != url_ids.end())
206         visits.push_back(*visit);
207     }
208   }
209   ExpireVisits(visits);
210 }
211 
ExpireHistoryForTimes(const std::vector<base::Time> & times)212 void ExpireHistoryBackend::ExpireHistoryForTimes(
213     const std::vector<base::Time>& times) {
214   // |times| must be in reverse chronological order and have no
215   // duplicates, i.e. each member must be earlier than the one before
216   // it.
217   DCHECK(
218       std::adjacent_find(
219           times.begin(), times.end(), std::less_equal<base::Time>()) ==
220       times.end());
221 
222   if (!main_db_)
223     return;
224 
225   // Find the affected visits and delete them.
226   VisitVector visits;
227   main_db_->GetVisitsForTimes(times, &visits);
228   ExpireVisits(visits);
229 }
230 
ExpireVisits(const VisitVector & visits)231 void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) {
232   if (visits.empty())
233     return;
234 
235   DeleteEffects effects;
236   DeleteVisitRelatedInfo(visits, &effects);
237 
238   // Delete or update the URLs affected. We want to update the visit counts
239   // since this is called by the user who wants to delete their recent history,
240   // and we don't want to leave any evidence.
241   ExpireURLsForVisits(visits, &effects);
242   DeleteFaviconsIfPossible(&effects);
243   BroadcastNotifications(&effects, DELETION_USER_INITIATED);
244 
245   // Pick up any bits possibly left over.
246   ParanoidExpireHistory();
247 }
248 
ExpireHistoryBefore(base::Time end_time)249 void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) {
250   if (!main_db_)
251     return;
252 
253   // Expire as much history as possible before the given date.
254   ExpireSomeOldHistory(end_time, GetAllVisitsReader(),
255                        std::numeric_limits<int>::max());
256   ParanoidExpireHistory();
257 }
258 
InitWorkQueue()259 void ExpireHistoryBackend::InitWorkQueue() {
260   DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
261 
262   for (size_t i = 0; i < readers_.size(); i++)
263     work_queue_.push(readers_[i]);
264 }
265 
GetAllVisitsReader()266 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
267   if (!all_visits_reader_)
268     all_visits_reader_.reset(new AllVisitsReader());
269   return all_visits_reader_.get();
270 }
271 
272 const ExpiringVisitsReader*
GetAutoSubframeVisitsReader()273     ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
274   if (!auto_subframe_visits_reader_)
275     auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
276   return auto_subframe_visits_reader_.get();
277 }
278 
StartExpiringOldStuff(base::TimeDelta expiration_threshold)279 void ExpireHistoryBackend::StartExpiringOldStuff(
280     base::TimeDelta expiration_threshold) {
281   expiration_threshold_ = expiration_threshold;
282 
283   // Remove all readers, just in case this was method was called before.
284   readers_.clear();
285   // For now, we explicitly add all known readers. If we come up with more
286   // reader types (in case we want to expire different types of visits in
287   // different ways), we can make it be populated by creator/owner of
288   // ExpireHistoryBackend.
289   readers_.push_back(GetAllVisitsReader());
290   readers_.push_back(GetAutoSubframeVisitsReader());
291 
292   // Initialize the queue with all tasks for the first set of iterations.
293   InitWorkQueue();
294   ScheduleExpire();
295 }
296 
DeleteFaviconsIfPossible(DeleteEffects * effects)297 void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) {
298   if (!thumb_db_)
299     return;
300 
301   for (std::set<favicon_base::FaviconID>::const_iterator i =
302            effects->affected_favicons.begin();
303        i != effects->affected_favicons.end(); ++i) {
304     if (!thumb_db_->HasMappingFor(*i)) {
305       GURL icon_url;
306       favicon_base::IconType icon_type;
307       if (thumb_db_->GetFaviconHeader(*i,
308                                       &icon_url,
309                                       &icon_type) &&
310           thumb_db_->DeleteFavicon(*i)) {
311         effects->deleted_favicons.insert(icon_url);
312       }
313     }
314   }
315 }
316 
BroadcastNotifications(DeleteEffects * effects,DeletionType type)317 void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects,
318                                                   DeletionType type) {
319   if (!effects->modified_urls.empty()) {
320     scoped_ptr<URLsModifiedDetails> details(new URLsModifiedDetails);
321     details->changed_urls = effects->modified_urls;
322     delegate_->NotifySyncURLsModified(&details->changed_urls);
323     delegate_->BroadcastNotifications(
324         chrome::NOTIFICATION_HISTORY_URLS_MODIFIED,
325         details.PassAs<HistoryDetails>());
326   }
327   if (!effects->deleted_urls.empty()) {
328     scoped_ptr<URLsDeletedDetails> details(new URLsDeletedDetails);
329     details->all_history = false;
330     details->expired = (type == DELETION_EXPIRED);
331     details->rows = effects->deleted_urls;
332     details->favicon_urls = effects->deleted_favicons;
333     delegate_->NotifySyncURLsDeleted(details->all_history, details->expired,
334                                      &details->rows);
335     delegate_->BroadcastNotifications(chrome::NOTIFICATION_HISTORY_URLS_DELETED,
336                                       details.PassAs<HistoryDetails>());
337   }
338 }
339 
DeleteVisitRelatedInfo(const VisitVector & visits,DeleteEffects * effects)340 void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits,
341                                                   DeleteEffects* effects) {
342   for (size_t i = 0; i < visits.size(); i++) {
343     // Delete the visit itself.
344     main_db_->DeleteVisit(visits[i]);
345 
346     // Add the URL row to the affected URL list.
347     if (!effects->affected_urls.count(visits[i].url_id)) {
348       URLRow row;
349       if (main_db_->GetURLRow(visits[i].url_id, &row))
350         effects->affected_urls[visits[i].url_id] = row;
351     }
352   }
353 }
354 
DeleteOneURL(const URLRow & url_row,bool is_bookmarked,DeleteEffects * effects)355 void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row,
356                                         bool is_bookmarked,
357                                         DeleteEffects* effects) {
358   main_db_->DeleteSegmentForURL(url_row.id());
359 
360   if (!is_bookmarked) {
361     effects->deleted_urls.push_back(url_row);
362 
363     // Delete stuff that references this URL.
364     if (thumb_db_) {
365       // Collect shared information.
366       std::vector<IconMapping> icon_mappings;
367       if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) {
368         for (std::vector<IconMapping>::iterator m = icon_mappings.begin();
369              m != icon_mappings.end(); ++m) {
370           effects->affected_favicons.insert(m->icon_id);
371         }
372         // Delete the mapping entries for the url.
373         thumb_db_->DeleteIconMappings(url_row.url());
374       }
375     }
376     // Last, delete the URL entry.
377     main_db_->DeleteURLRow(url_row.id());
378   }
379 }
380 
381 namespace {
382 
383 struct ChangedURL {
ChangedURLhistory::__anon71a675490211::ChangedURL384   ChangedURL() : visit_count(0), typed_count(0) {}
385   int visit_count;
386   int typed_count;
387 };
388 
389 }  // namespace
390 
ExpireURLsForVisits(const VisitVector & visits,DeleteEffects * effects)391 void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits,
392                                                DeleteEffects* effects) {
393   // First find all unique URLs and the number of visits we're deleting for
394   // each one.
395   std::map<URLID, ChangedURL> changed_urls;
396   for (size_t i = 0; i < visits.size(); i++) {
397     ChangedURL& cur = changed_urls[visits[i].url_id];
398     // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
399     // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
400     // typed, which would help eliminate the need for this code (we still would
401     // need to handle RELOAD transitions specially, though).
402     content::PageTransition transition =
403         content::PageTransitionStripQualifier(visits[i].transition);
404     if (transition != content::PAGE_TRANSITION_RELOAD)
405       cur.visit_count++;
406     if ((transition == content::PAGE_TRANSITION_TYPED &&
407         !content::PageTransitionIsRedirect(visits[i].transition)) ||
408         transition == content::PAGE_TRANSITION_KEYWORD_GENERATED)
409       cur.typed_count++;
410   }
411 
412   // Check each unique URL with deleted visits.
413   HistoryClient* history_client = GetHistoryClient();
414   for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
415        i != changed_urls.end(); ++i) {
416     // The unique URL rows should already be filled in.
417     URLRow& url_row = effects->affected_urls[i->first];
418     if (!url_row.id())
419       continue;  // URL row doesn't exist in the database.
420 
421     // Check if there are any other visits for this URL and update the time
422     // (the time change may not actually be synced to disk below when we're
423     // archiving).
424     VisitRow last_visit;
425     if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
426       url_row.set_last_visit(last_visit.visit_time);
427     else
428       url_row.set_last_visit(base::Time());
429 
430     // Don't delete URLs with visits still in the DB, or bookmarked.
431     bool is_bookmarked =
432         (history_client && history_client->IsBookmarked(url_row.url()));
433     if (!is_bookmarked && url_row.last_visit().is_null()) {
434       // Not bookmarked and no more visits. Nuke the url.
435       DeleteOneURL(url_row, is_bookmarked, effects);
436     } else {
437       // NOTE: The calls to std::max() below are a backstop, but they should
438       // never actually be needed unless the database is corrupt (I think).
439       url_row.set_visit_count(
440           std::max(0, url_row.visit_count() - i->second.visit_count));
441       url_row.set_typed_count(
442           std::max(0, url_row.typed_count() - i->second.typed_count));
443 
444       // Update the db with the new details.
445       main_db_->UpdateURLRow(url_row.id(), url_row);
446 
447       effects->modified_urls.push_back(url_row);
448     }
449   }
450 }
451 
ScheduleExpire()452 void ExpireHistoryBackend::ScheduleExpire() {
453   base::TimeDelta delay;
454   if (work_queue_.empty()) {
455     // If work queue is empty, reset the work queue to contain all tasks and
456     // schedule next iteration after a longer delay.
457     InitWorkQueue();
458     delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
459   } else {
460     delay = base::TimeDelta::FromSeconds(kExpirationDelaySec);
461   }
462 
463   base::MessageLoop::current()->PostDelayedTask(
464       FROM_HERE,
465       base::Bind(&ExpireHistoryBackend::DoExpireIteration,
466                  weak_factory_.GetWeakPtr()),
467       delay);
468 }
469 
DoExpireIteration()470 void ExpireHistoryBackend::DoExpireIteration() {
471   DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
472 
473   const ExpiringVisitsReader* reader = work_queue_.front();
474   bool more_to_expire = ExpireSomeOldHistory(
475       GetCurrentExpirationTime(), reader, kNumExpirePerIteration);
476 
477   work_queue_.pop();
478   // If there are more items to expire, add the reader back to the queue, thus
479   // creating a new task for future iterations.
480   if (more_to_expire)
481     work_queue_.push(reader);
482 
483   ScheduleExpire();
484 }
485 
ExpireSomeOldHistory(base::Time end_time,const ExpiringVisitsReader * reader,int max_visits)486 bool ExpireHistoryBackend::ExpireSomeOldHistory(
487     base::Time end_time,
488     const ExpiringVisitsReader* reader,
489     int max_visits) {
490   if (!main_db_)
491     return false;
492 
493   // Add an extra time unit to given end time, because
494   // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
495   base::Time effective_end_time =
496       base::Time::FromInternalValue(end_time.ToInternalValue() + 1);
497 
498   VisitVector deleted_visits;
499   bool more_to_expire = reader->Read(effective_end_time, main_db_,
500                                      &deleted_visits, max_visits);
501 
502   DeleteEffects deleted_effects;
503   DeleteVisitRelatedInfo(deleted_visits, &deleted_effects);
504   ExpireURLsForVisits(deleted_visits, &deleted_effects);
505   DeleteFaviconsIfPossible(&deleted_effects);
506 
507   BroadcastNotifications(&deleted_effects, DELETION_EXPIRED);
508 
509   return more_to_expire;
510 }
511 
ParanoidExpireHistory()512 void ExpireHistoryBackend::ParanoidExpireHistory() {
513   // TODO(brettw): Bug 1067331: write this to clean up any errors.
514 }
515 
GetHistoryClient()516 HistoryClient* ExpireHistoryBackend::GetHistoryClient() {
517   // We use the history client to determine if a URL is bookmarked. The data is
518   // loaded on a separate thread and may not be done by the time we get here.
519   // We therefore block until the bookmarks have finished loading.
520   if (history_client_)
521     history_client_->BlockUntilBookmarksLoaded();
522   return history_client_;
523 }
524 
525 }  // namespace history
526