1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/expire_history_backend.h"
6
7 #include <algorithm>
8 #include <functional>
9 #include <limits>
10
11 #include "base/bind.h"
12 #include "base/compiler_specific.h"
13 #include "base/file_util.h"
14 #include "base/files/file_enumerator.h"
15 #include "base/logging.h"
16 #include "base/message_loop/message_loop.h"
17 #include "chrome/browser/chrome_notification_types.h"
18 #include "chrome/browser/history/history_database.h"
19 #include "chrome/browser/history/history_notifications.h"
20 #include "chrome/browser/history/thumbnail_database.h"
21 #include "components/history/core/browser/history_client.h"
22
23 namespace history {
24
25 // Helpers --------------------------------------------------------------------
26
27 namespace {
28
29 // The number of days by which the expiration threshold is advanced for items
30 // that we want to expire early, such as those of AUTO_SUBFRAME transition type.
31 //
32 // Early expiration stuff is kept around only for edge cases, as subframes
33 // don't appear in history and the vast majority of them are ads anyway. The
34 // main use case for these is if you're on a site with links to different
35 // frames, you'll be able to see those links as visited, and we'll also be
36 // able to get redirect information for those URLs.
37 //
38 // But since these uses are most valuable when you're actually on the site,
39 // and because these can take up the bulk of your history, we get a lot of
40 // space savings by deleting them quickly.
41 const int kEarlyExpirationAdvanceDays = 3;
42
43 // Reads all types of visits starting from beginning of time to the given end
44 // time. This is the most general reader.
45 class AllVisitsReader : public ExpiringVisitsReader {
46 public:
Read(base::Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const47 virtual bool Read(base::Time end_time,
48 HistoryDatabase* db,
49 VisitVector* visits,
50 int max_visits) const OVERRIDE {
51 DCHECK(db) << "must have a database to operate upon";
52 DCHECK(visits) << "visit vector has to exist in order to populate it";
53
54 db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits);
55 // When we got the maximum number of visits we asked for, we say there could
56 // be additional things to expire now.
57 return static_cast<int>(visits->size()) == max_visits;
58 }
59 };
60
61 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is
62 // computed as follows:
63 // * |begin_time| is read from the meta table. This value is updated whenever
64 // there are no more additional visits to expire by this reader.
65 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
66 // but not past the current time.
67 class AutoSubframeVisitsReader : public ExpiringVisitsReader {
68 public:
Read(base::Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const69 virtual bool Read(base::Time end_time,
70 HistoryDatabase* db,
71 VisitVector* visits,
72 int max_visits) const OVERRIDE {
73 DCHECK(db) << "must have a database to operate upon";
74 DCHECK(visits) << "visit vector has to exist in order to populate it";
75
76 base::Time begin_time = db->GetEarlyExpirationThreshold();
77 // Advance |end_time| to expire early.
78 base::Time early_end_time = end_time +
79 base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
80
81 // We don't want to set the early expiration threshold to a time in the
82 // future.
83 base::Time now = base::Time::Now();
84 if (early_end_time > now)
85 early_end_time = now;
86
87 db->GetVisitsInRangeForTransition(begin_time, early_end_time,
88 max_visits,
89 content::PAGE_TRANSITION_AUTO_SUBFRAME,
90 visits);
91 bool more = static_cast<int>(visits->size()) == max_visits;
92 if (!more)
93 db->UpdateEarlyExpirationThreshold(early_end_time);
94
95 return more;
96 }
97 };
98
99 // The number of visits we will expire very time we check for old items. This
100 // Prevents us from doing too much work any given time.
101 const int kNumExpirePerIteration = 32;
102
103 // The number of seconds between checking for items that should be expired when
104 // we think there might be more items to expire. This timeout is used when the
105 // last expiration found at least kNumExpirePerIteration and we want to check
106 // again "soon."
107 const int kExpirationDelaySec = 30;
108
109 // The number of minutes between checking, as with kExpirationDelaySec, but
110 // when we didn't find enough things to expire last time. If there was no
111 // history to expire last iteration, it's likely there is nothing next
112 // iteration, so we want to wait longer before checking to avoid wasting CPU.
113 const int kExpirationEmptyDelayMin = 5;
114
115 } // namespace
116
117
118 // ExpireHistoryBackend::DeleteEffects ----------------------------------------
119
DeleteEffects()120 ExpireHistoryBackend::DeleteEffects::DeleteEffects() {
121 }
122
~DeleteEffects()123 ExpireHistoryBackend::DeleteEffects::~DeleteEffects() {
124 }
125
126
127 // ExpireHistoryBackend -------------------------------------------------------
128
ExpireHistoryBackend(BroadcastNotificationDelegate * delegate,HistoryClient * history_client)129 ExpireHistoryBackend::ExpireHistoryBackend(
130 BroadcastNotificationDelegate* delegate,
131 HistoryClient* history_client)
132 : delegate_(delegate),
133 main_db_(NULL),
134 thumb_db_(NULL),
135 weak_factory_(this),
136 history_client_(history_client) {
137 }
138
~ExpireHistoryBackend()139 ExpireHistoryBackend::~ExpireHistoryBackend() {
140 }
141
SetDatabases(HistoryDatabase * main_db,ThumbnailDatabase * thumb_db)142 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
143 ThumbnailDatabase* thumb_db) {
144 main_db_ = main_db;
145 thumb_db_ = thumb_db;
146 }
147
DeleteURL(const GURL & url)148 void ExpireHistoryBackend::DeleteURL(const GURL& url) {
149 DeleteURLs(std::vector<GURL>(1, url));
150 }
151
DeleteURLs(const std::vector<GURL> & urls)152 void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) {
153 if (!main_db_)
154 return;
155
156 DeleteEffects effects;
157 HistoryClient* history_client = GetHistoryClient();
158 for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end();
159 ++url) {
160 URLRow url_row;
161 if (!main_db_->GetRowForURL(*url, &url_row))
162 continue; // Nothing to delete.
163
164 // Collect all the visits and delete them. Note that we don't give up if
165 // there are no visits, since the URL could still have an entry that we
166 // should delete.
167 VisitVector visits;
168 main_db_->GetVisitsForURL(url_row.id(), &visits);
169
170 DeleteVisitRelatedInfo(visits, &effects);
171
172 // We skip ExpireURLsForVisits (since we are deleting from the
173 // URL, and not starting with visits in a given time range). We
174 // therefore need to call the deletion and favicon update
175 // functions manually.
176 DeleteOneURL(url_row,
177 history_client && history_client->IsBookmarked(*url),
178 &effects);
179 }
180
181 DeleteFaviconsIfPossible(&effects);
182
183 BroadcastNotifications(&effects, DELETION_USER_INITIATED);
184 }
185
ExpireHistoryBetween(const std::set<GURL> & restrict_urls,base::Time begin_time,base::Time end_time)186 void ExpireHistoryBackend::ExpireHistoryBetween(
187 const std::set<GURL>& restrict_urls,
188 base::Time begin_time,
189 base::Time end_time) {
190 if (!main_db_)
191 return;
192
193 // Find the affected visits and delete them.
194 VisitVector visits;
195 main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
196 if (!restrict_urls.empty()) {
197 std::set<URLID> url_ids;
198 for (std::set<GURL>::const_iterator url = restrict_urls.begin();
199 url != restrict_urls.end(); ++url)
200 url_ids.insert(main_db_->GetRowForURL(*url, NULL));
201 VisitVector all_visits;
202 all_visits.swap(visits);
203 for (VisitVector::iterator visit = all_visits.begin();
204 visit != all_visits.end(); ++visit) {
205 if (url_ids.find(visit->url_id) != url_ids.end())
206 visits.push_back(*visit);
207 }
208 }
209 ExpireVisits(visits);
210 }
211
ExpireHistoryForTimes(const std::vector<base::Time> & times)212 void ExpireHistoryBackend::ExpireHistoryForTimes(
213 const std::vector<base::Time>& times) {
214 // |times| must be in reverse chronological order and have no
215 // duplicates, i.e. each member must be earlier than the one before
216 // it.
217 DCHECK(
218 std::adjacent_find(
219 times.begin(), times.end(), std::less_equal<base::Time>()) ==
220 times.end());
221
222 if (!main_db_)
223 return;
224
225 // Find the affected visits and delete them.
226 VisitVector visits;
227 main_db_->GetVisitsForTimes(times, &visits);
228 ExpireVisits(visits);
229 }
230
ExpireVisits(const VisitVector & visits)231 void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) {
232 if (visits.empty())
233 return;
234
235 DeleteEffects effects;
236 DeleteVisitRelatedInfo(visits, &effects);
237
238 // Delete or update the URLs affected. We want to update the visit counts
239 // since this is called by the user who wants to delete their recent history,
240 // and we don't want to leave any evidence.
241 ExpireURLsForVisits(visits, &effects);
242 DeleteFaviconsIfPossible(&effects);
243 BroadcastNotifications(&effects, DELETION_USER_INITIATED);
244
245 // Pick up any bits possibly left over.
246 ParanoidExpireHistory();
247 }
248
ExpireHistoryBefore(base::Time end_time)249 void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) {
250 if (!main_db_)
251 return;
252
253 // Expire as much history as possible before the given date.
254 ExpireSomeOldHistory(end_time, GetAllVisitsReader(),
255 std::numeric_limits<int>::max());
256 ParanoidExpireHistory();
257 }
258
InitWorkQueue()259 void ExpireHistoryBackend::InitWorkQueue() {
260 DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
261
262 for (size_t i = 0; i < readers_.size(); i++)
263 work_queue_.push(readers_[i]);
264 }
265
GetAllVisitsReader()266 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
267 if (!all_visits_reader_)
268 all_visits_reader_.reset(new AllVisitsReader());
269 return all_visits_reader_.get();
270 }
271
272 const ExpiringVisitsReader*
GetAutoSubframeVisitsReader()273 ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
274 if (!auto_subframe_visits_reader_)
275 auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
276 return auto_subframe_visits_reader_.get();
277 }
278
StartExpiringOldStuff(base::TimeDelta expiration_threshold)279 void ExpireHistoryBackend::StartExpiringOldStuff(
280 base::TimeDelta expiration_threshold) {
281 expiration_threshold_ = expiration_threshold;
282
283 // Remove all readers, just in case this was method was called before.
284 readers_.clear();
285 // For now, we explicitly add all known readers. If we come up with more
286 // reader types (in case we want to expire different types of visits in
287 // different ways), we can make it be populated by creator/owner of
288 // ExpireHistoryBackend.
289 readers_.push_back(GetAllVisitsReader());
290 readers_.push_back(GetAutoSubframeVisitsReader());
291
292 // Initialize the queue with all tasks for the first set of iterations.
293 InitWorkQueue();
294 ScheduleExpire();
295 }
296
DeleteFaviconsIfPossible(DeleteEffects * effects)297 void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) {
298 if (!thumb_db_)
299 return;
300
301 for (std::set<favicon_base::FaviconID>::const_iterator i =
302 effects->affected_favicons.begin();
303 i != effects->affected_favicons.end(); ++i) {
304 if (!thumb_db_->HasMappingFor(*i)) {
305 GURL icon_url;
306 favicon_base::IconType icon_type;
307 if (thumb_db_->GetFaviconHeader(*i,
308 &icon_url,
309 &icon_type) &&
310 thumb_db_->DeleteFavicon(*i)) {
311 effects->deleted_favicons.insert(icon_url);
312 }
313 }
314 }
315 }
316
BroadcastNotifications(DeleteEffects * effects,DeletionType type)317 void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects,
318 DeletionType type) {
319 if (!effects->modified_urls.empty()) {
320 scoped_ptr<URLsModifiedDetails> details(new URLsModifiedDetails);
321 details->changed_urls = effects->modified_urls;
322 delegate_->NotifySyncURLsModified(&details->changed_urls);
323 delegate_->BroadcastNotifications(
324 chrome::NOTIFICATION_HISTORY_URLS_MODIFIED,
325 details.PassAs<HistoryDetails>());
326 }
327 if (!effects->deleted_urls.empty()) {
328 scoped_ptr<URLsDeletedDetails> details(new URLsDeletedDetails);
329 details->all_history = false;
330 details->expired = (type == DELETION_EXPIRED);
331 details->rows = effects->deleted_urls;
332 details->favicon_urls = effects->deleted_favicons;
333 delegate_->NotifySyncURLsDeleted(details->all_history, details->expired,
334 &details->rows);
335 delegate_->BroadcastNotifications(chrome::NOTIFICATION_HISTORY_URLS_DELETED,
336 details.PassAs<HistoryDetails>());
337 }
338 }
339
DeleteVisitRelatedInfo(const VisitVector & visits,DeleteEffects * effects)340 void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits,
341 DeleteEffects* effects) {
342 for (size_t i = 0; i < visits.size(); i++) {
343 // Delete the visit itself.
344 main_db_->DeleteVisit(visits[i]);
345
346 // Add the URL row to the affected URL list.
347 if (!effects->affected_urls.count(visits[i].url_id)) {
348 URLRow row;
349 if (main_db_->GetURLRow(visits[i].url_id, &row))
350 effects->affected_urls[visits[i].url_id] = row;
351 }
352 }
353 }
354
DeleteOneURL(const URLRow & url_row,bool is_bookmarked,DeleteEffects * effects)355 void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row,
356 bool is_bookmarked,
357 DeleteEffects* effects) {
358 main_db_->DeleteSegmentForURL(url_row.id());
359
360 if (!is_bookmarked) {
361 effects->deleted_urls.push_back(url_row);
362
363 // Delete stuff that references this URL.
364 if (thumb_db_) {
365 // Collect shared information.
366 std::vector<IconMapping> icon_mappings;
367 if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) {
368 for (std::vector<IconMapping>::iterator m = icon_mappings.begin();
369 m != icon_mappings.end(); ++m) {
370 effects->affected_favicons.insert(m->icon_id);
371 }
372 // Delete the mapping entries for the url.
373 thumb_db_->DeleteIconMappings(url_row.url());
374 }
375 }
376 // Last, delete the URL entry.
377 main_db_->DeleteURLRow(url_row.id());
378 }
379 }
380
381 namespace {
382
383 struct ChangedURL {
ChangedURLhistory::__anon71a675490211::ChangedURL384 ChangedURL() : visit_count(0), typed_count(0) {}
385 int visit_count;
386 int typed_count;
387 };
388
389 } // namespace
390
ExpireURLsForVisits(const VisitVector & visits,DeleteEffects * effects)391 void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits,
392 DeleteEffects* effects) {
393 // First find all unique URLs and the number of visits we're deleting for
394 // each one.
395 std::map<URLID, ChangedURL> changed_urls;
396 for (size_t i = 0; i < visits.size(); i++) {
397 ChangedURL& cur = changed_urls[visits[i].url_id];
398 // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
399 // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
400 // typed, which would help eliminate the need for this code (we still would
401 // need to handle RELOAD transitions specially, though).
402 content::PageTransition transition =
403 content::PageTransitionStripQualifier(visits[i].transition);
404 if (transition != content::PAGE_TRANSITION_RELOAD)
405 cur.visit_count++;
406 if ((transition == content::PAGE_TRANSITION_TYPED &&
407 !content::PageTransitionIsRedirect(visits[i].transition)) ||
408 transition == content::PAGE_TRANSITION_KEYWORD_GENERATED)
409 cur.typed_count++;
410 }
411
412 // Check each unique URL with deleted visits.
413 HistoryClient* history_client = GetHistoryClient();
414 for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
415 i != changed_urls.end(); ++i) {
416 // The unique URL rows should already be filled in.
417 URLRow& url_row = effects->affected_urls[i->first];
418 if (!url_row.id())
419 continue; // URL row doesn't exist in the database.
420
421 // Check if there are any other visits for this URL and update the time
422 // (the time change may not actually be synced to disk below when we're
423 // archiving).
424 VisitRow last_visit;
425 if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
426 url_row.set_last_visit(last_visit.visit_time);
427 else
428 url_row.set_last_visit(base::Time());
429
430 // Don't delete URLs with visits still in the DB, or bookmarked.
431 bool is_bookmarked =
432 (history_client && history_client->IsBookmarked(url_row.url()));
433 if (!is_bookmarked && url_row.last_visit().is_null()) {
434 // Not bookmarked and no more visits. Nuke the url.
435 DeleteOneURL(url_row, is_bookmarked, effects);
436 } else {
437 // NOTE: The calls to std::max() below are a backstop, but they should
438 // never actually be needed unless the database is corrupt (I think).
439 url_row.set_visit_count(
440 std::max(0, url_row.visit_count() - i->second.visit_count));
441 url_row.set_typed_count(
442 std::max(0, url_row.typed_count() - i->second.typed_count));
443
444 // Update the db with the new details.
445 main_db_->UpdateURLRow(url_row.id(), url_row);
446
447 effects->modified_urls.push_back(url_row);
448 }
449 }
450 }
451
ScheduleExpire()452 void ExpireHistoryBackend::ScheduleExpire() {
453 base::TimeDelta delay;
454 if (work_queue_.empty()) {
455 // If work queue is empty, reset the work queue to contain all tasks and
456 // schedule next iteration after a longer delay.
457 InitWorkQueue();
458 delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
459 } else {
460 delay = base::TimeDelta::FromSeconds(kExpirationDelaySec);
461 }
462
463 base::MessageLoop::current()->PostDelayedTask(
464 FROM_HERE,
465 base::Bind(&ExpireHistoryBackend::DoExpireIteration,
466 weak_factory_.GetWeakPtr()),
467 delay);
468 }
469
DoExpireIteration()470 void ExpireHistoryBackend::DoExpireIteration() {
471 DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
472
473 const ExpiringVisitsReader* reader = work_queue_.front();
474 bool more_to_expire = ExpireSomeOldHistory(
475 GetCurrentExpirationTime(), reader, kNumExpirePerIteration);
476
477 work_queue_.pop();
478 // If there are more items to expire, add the reader back to the queue, thus
479 // creating a new task for future iterations.
480 if (more_to_expire)
481 work_queue_.push(reader);
482
483 ScheduleExpire();
484 }
485
ExpireSomeOldHistory(base::Time end_time,const ExpiringVisitsReader * reader,int max_visits)486 bool ExpireHistoryBackend::ExpireSomeOldHistory(
487 base::Time end_time,
488 const ExpiringVisitsReader* reader,
489 int max_visits) {
490 if (!main_db_)
491 return false;
492
493 // Add an extra time unit to given end time, because
494 // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
495 base::Time effective_end_time =
496 base::Time::FromInternalValue(end_time.ToInternalValue() + 1);
497
498 VisitVector deleted_visits;
499 bool more_to_expire = reader->Read(effective_end_time, main_db_,
500 &deleted_visits, max_visits);
501
502 DeleteEffects deleted_effects;
503 DeleteVisitRelatedInfo(deleted_visits, &deleted_effects);
504 ExpireURLsForVisits(deleted_visits, &deleted_effects);
505 DeleteFaviconsIfPossible(&deleted_effects);
506
507 BroadcastNotifications(&deleted_effects, DELETION_EXPIRED);
508
509 return more_to_expire;
510 }
511
ParanoidExpireHistory()512 void ExpireHistoryBackend::ParanoidExpireHistory() {
513 // TODO(brettw): Bug 1067331: write this to clean up any errors.
514 }
515
GetHistoryClient()516 HistoryClient* ExpireHistoryBackend::GetHistoryClient() {
517 // We use the history client to determine if a URL is bookmarked. The data is
518 // loaded on a separate thread and may not be done by the time we get here.
519 // We therefore block until the bookmarks have finished loading.
520 if (history_client_)
521 history_client_->BlockUntilBookmarksLoaded();
522 return history_client_;
523 }
524
525 } // namespace history
526