1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/expire_history_backend.h"
6
7 #include <algorithm>
8 #include <limits>
9
10 #include "base/compiler_specific.h"
11 #include "base/file_util.h"
12 #include "base/message_loop.h"
13 #include "chrome/browser/bookmarks/bookmark_service.h"
14 #include "chrome/browser/history/archived_database.h"
15 #include "chrome/browser/history/history_database.h"
16 #include "chrome/browser/history/history_notifications.h"
17 #include "chrome/browser/history/text_database.h"
18 #include "chrome/browser/history/text_database_manager.h"
19 #include "chrome/browser/history/thumbnail_database.h"
20 #include "content/common/notification_type.h"
21
22 using base::Time;
23 using base::TimeDelta;
24
25 namespace history {
26
27 namespace {
28
29 // The number of days by which the expiration threshold is advanced for items
30 // that we want to expire early, such as those of AUTO_SUBFRAME transition type.
31 const int kEarlyExpirationAdvanceDays = 30;
32
33 // Reads all types of visits starting from beginning of time to the given end
34 // time. This is the most general reader.
35 class AllVisitsReader : public ExpiringVisitsReader {
36 public:
Read(Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const37 virtual bool Read(Time end_time, HistoryDatabase* db,
38 VisitVector* visits, int max_visits) const {
39 DCHECK(db) << "must have a database to operate upon";
40 DCHECK(visits) << "visit vector has to exist in order to populate it";
41
42 db->GetAllVisitsInRange(Time(), end_time, max_visits, visits);
43 // When we got the maximum number of visits we asked for, we say there could
44 // be additional things to expire now.
45 return static_cast<int>(visits->size()) == max_visits;
46 }
47 };
48
49 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is
50 // computed as follows:
51 // * |begin_time| is read from the meta table. This value is updated whenever
52 // there are no more additional visits to expire by this reader.
53 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
54 // but not past the current time.
55 class AutoSubframeVisitsReader : public ExpiringVisitsReader {
56 public:
Read(Time end_time,HistoryDatabase * db,VisitVector * visits,int max_visits) const57 virtual bool Read(Time end_time, HistoryDatabase* db,
58 VisitVector* visits, int max_visits) const {
59 DCHECK(db) << "must have a database to operate upon";
60 DCHECK(visits) << "visit vector has to exist in order to populate it";
61
62 Time begin_time = db->GetEarlyExpirationThreshold();
63 // Advance |end_time| to expire early.
64 Time early_end_time = end_time +
65 TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
66
67 // We don't want to set the early expiration threshold to a time in the
68 // future.
69 Time now = Time::Now();
70 if (early_end_time > now)
71 early_end_time = now;
72
73 db->GetVisitsInRangeForTransition(begin_time, early_end_time,
74 max_visits,
75 PageTransition::AUTO_SUBFRAME,
76 visits);
77 bool more = static_cast<int>(visits->size()) == max_visits;
78 if (!more)
79 db->UpdateEarlyExpirationThreshold(early_end_time);
80
81 return more;
82 }
83 };
84
85 // Returns true if this visit is worth archiving. Otherwise, this visit is not
86 // worth saving (for example, subframe navigations and redirects) and we can
87 // just delete it when it gets old.
ShouldArchiveVisit(const VisitRow & visit)88 bool ShouldArchiveVisit(const VisitRow& visit) {
89 int no_qualifier = PageTransition::StripQualifier(visit.transition);
90
91 // These types of transitions are always "important" and the user will want
92 // to see them.
93 if (no_qualifier == PageTransition::TYPED ||
94 no_qualifier == PageTransition::AUTO_BOOKMARK ||
95 no_qualifier == PageTransition::START_PAGE)
96 return true;
97
98 // Only archive these "less important" transitions when they were the final
99 // navigation and not part of a redirect chain.
100 if ((no_qualifier == PageTransition::LINK ||
101 no_qualifier == PageTransition::FORM_SUBMIT ||
102 no_qualifier == PageTransition::KEYWORD ||
103 no_qualifier == PageTransition::GENERATED) &&
104 visit.transition & PageTransition::CHAIN_END)
105 return true;
106
107 // The transition types we ignore are AUTO_SUBFRAME and MANUAL_SUBFRAME.
108 return false;
109 }
110
111 // The number of visits we will expire very time we check for old items. This
112 // Prevents us from doing too much work any given time.
113 const int kNumExpirePerIteration = 10;
114
115 // The number of seconds between checking for items that should be expired when
116 // we think there might be more items to expire. This timeout is used when the
117 // last expiration found at least kNumExpirePerIteration and we want to check
118 // again "soon."
119 const int kExpirationDelaySec = 30;
120
121 // The number of minutes between checking, as with kExpirationDelaySec, but
122 // when we didn't find enough things to expire last time. If there was no
123 // history to expire last iteration, it's likely there is nothing next
124 // iteration, so we want to wait longer before checking to avoid wasting CPU.
125 const int kExpirationEmptyDelayMin = 5;
126
127 // The number of minutes that we wait for before scheduling a task to
128 // delete old history index files.
129 const int kIndexExpirationDelayMin = 2;
130
131 // The number of the most recent months for which we do not want to delete
132 // the history index files.
133 const int kStoreHistoryIndexesForMonths = 12;
134
135 } // namespace
136
137 struct ExpireHistoryBackend::DeleteDependencies {
138 // The time range affected. These can be is_null() to be unbounded in one
139 // or both directions.
140 base::Time begin_time, end_time;
141
142 // ----- Filled by DeleteVisitRelatedInfo or manually if a function doesn't
143 // call that function. -----
144
145 // The unique URL rows affected by this delete.
146 std::map<URLID, URLRow> affected_urls;
147
148 // ----- Filled by DeleteOneURL -----
149
150 // The URLs deleted during this operation.
151 std::vector<URLRow> deleted_urls;
152
153 // The list of all favicon IDs that the affected URLs had. Favicons will be
154 // shared between all URLs with the same favicon, so this is the set of IDs
155 // that we will need to check when the delete operations are complete.
156 std::set<FaviconID> affected_favicons;
157
158 // Tracks the set of databases that have changed so we can optimize when
159 // when we're done.
160 TextDatabaseManager::ChangeSet text_db_changes;
161 };
162
ExpireHistoryBackend(BroadcastNotificationDelegate * delegate,BookmarkService * bookmark_service)163 ExpireHistoryBackend::ExpireHistoryBackend(
164 BroadcastNotificationDelegate* delegate,
165 BookmarkService* bookmark_service)
166 : delegate_(delegate),
167 main_db_(NULL),
168 archived_db_(NULL),
169 thumb_db_(NULL),
170 text_db_(NULL),
171 ALLOW_THIS_IN_INITIALIZER_LIST(factory_(this)),
172 bookmark_service_(bookmark_service) {
173 }
174
~ExpireHistoryBackend()175 ExpireHistoryBackend::~ExpireHistoryBackend() {
176 }
177
SetDatabases(HistoryDatabase * main_db,ArchivedDatabase * archived_db,ThumbnailDatabase * thumb_db,TextDatabaseManager * text_db)178 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
179 ArchivedDatabase* archived_db,
180 ThumbnailDatabase* thumb_db,
181 TextDatabaseManager* text_db) {
182 main_db_ = main_db;
183 archived_db_ = archived_db;
184 thumb_db_ = thumb_db;
185 text_db_ = text_db;
186 }
187
DeleteURL(const GURL & url)188 void ExpireHistoryBackend::DeleteURL(const GURL& url) {
189 if (!main_db_)
190 return;
191
192 URLRow url_row;
193 if (!main_db_->GetRowForURL(url, &url_row))
194 return; // Nothing to delete.
195
196 // Collect all the visits and delete them. Note that we don't give up if
197 // there are no visits, since the URL could still have an entry that we should
198 // delete.
199 // TODO(brettw): bug 1171148: We should also delete from the archived DB.
200 VisitVector visits;
201 main_db_->GetVisitsForURL(url_row.id(), &visits);
202
203 DeleteDependencies dependencies;
204 DeleteVisitRelatedInfo(visits, &dependencies);
205
206 // We skip ExpireURLsForVisits (since we are deleting from the URL, and not
207 // starting with visits in a given time range). We therefore need to call the
208 // deletion and favicon update functions manually.
209
210 BookmarkService* bookmark_service = GetBookmarkService();
211 bool is_bookmarked =
212 (bookmark_service && bookmark_service->IsBookmarked(url));
213
214 DeleteOneURL(url_row, is_bookmarked, &dependencies);
215 if (!is_bookmarked)
216 DeleteFaviconsIfPossible(dependencies.affected_favicons);
217
218 if (text_db_)
219 text_db_->OptimizeChangedDatabases(dependencies.text_db_changes);
220
221 BroadcastDeleteNotifications(&dependencies);
222 }
223
ExpireHistoryBetween(const std::set<GURL> & restrict_urls,Time begin_time,Time end_time)224 void ExpireHistoryBackend::ExpireHistoryBetween(
225 const std::set<GURL>& restrict_urls, Time begin_time, Time end_time) {
226 if (!main_db_)
227 return;
228
229 // There may be stuff in the text database manager's temporary cache.
230 if (text_db_)
231 text_db_->DeleteFromUncommitted(restrict_urls, begin_time, end_time);
232
233 // Find the affected visits and delete them.
234 // TODO(brettw): bug 1171164: We should query the archived database here, too.
235 VisitVector visits;
236 main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
237 if (!restrict_urls.empty()) {
238 std::set<URLID> url_ids;
239 for (std::set<GURL>::const_iterator url = restrict_urls.begin();
240 url != restrict_urls.end(); ++url)
241 url_ids.insert(main_db_->GetRowForURL(*url, NULL));
242 VisitVector all_visits;
243 all_visits.swap(visits);
244 for (VisitVector::iterator visit = all_visits.begin();
245 visit != all_visits.end(); ++visit) {
246 if (url_ids.find(visit->url_id) != url_ids.end())
247 visits.push_back(*visit);
248 }
249 }
250 if (visits.empty())
251 return;
252
253 DeleteDependencies dependencies;
254 DeleteVisitRelatedInfo(visits, &dependencies);
255
256 // Delete or update the URLs affected. We want to update the visit counts
257 // since this is called by the user who wants to delete their recent history,
258 // and we don't want to leave any evidence.
259 ExpireURLsForVisits(visits, &dependencies);
260 DeleteFaviconsIfPossible(dependencies.affected_favicons);
261
262 // An is_null begin time means that all history should be deleted.
263 BroadcastDeleteNotifications(&dependencies);
264
265 // Pick up any bits possibly left over.
266 ParanoidExpireHistory();
267 }
268
ArchiveHistoryBefore(Time end_time)269 void ExpireHistoryBackend::ArchiveHistoryBefore(Time end_time) {
270 if (!main_db_)
271 return;
272
273 // Archive as much history as possible before the given date.
274 ArchiveSomeOldHistory(end_time, GetAllVisitsReader(),
275 std::numeric_limits<size_t>::max());
276 ParanoidExpireHistory();
277 }
278
InitWorkQueue()279 void ExpireHistoryBackend::InitWorkQueue() {
280 DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
281
282 for (size_t i = 0; i < readers_.size(); i++)
283 work_queue_.push(readers_[i]);
284 }
285
GetAllVisitsReader()286 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
287 if (!all_visits_reader_.get())
288 all_visits_reader_.reset(new AllVisitsReader());
289 return all_visits_reader_.get();
290 }
291
292 const ExpiringVisitsReader*
GetAutoSubframeVisitsReader()293 ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
294 if (!auto_subframe_visits_reader_.get())
295 auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
296 return auto_subframe_visits_reader_.get();
297 }
298
StartArchivingOldStuff(TimeDelta expiration_threshold)299 void ExpireHistoryBackend::StartArchivingOldStuff(
300 TimeDelta expiration_threshold) {
301 expiration_threshold_ = expiration_threshold;
302
303 // Remove all readers, just in case this was method was called before.
304 readers_.clear();
305 // For now, we explicitly add all known readers. If we come up with more
306 // reader types (in case we want to expire different types of visits in
307 // different ways), we can make it be populated by creator/owner of
308 // ExpireHistoryBackend.
309 readers_.push_back(GetAllVisitsReader());
310 readers_.push_back(GetAutoSubframeVisitsReader());
311
312 // Initialize the queue with all tasks for the first set of iterations.
313 InitWorkQueue();
314 ScheduleArchive();
315 ScheduleExpireHistoryIndexFiles();
316 }
317
DeleteFaviconsIfPossible(const std::set<FaviconID> & favicon_set)318 void ExpireHistoryBackend::DeleteFaviconsIfPossible(
319 const std::set<FaviconID>& favicon_set) {
320 if (!thumb_db_)
321 return;
322
323 for (std::set<FaviconID>::const_iterator i = favicon_set.begin();
324 i != favicon_set.end(); ++i) {
325 if (!thumb_db_->HasMappingFor(*i))
326 thumb_db_->DeleteFavicon(*i);
327 }
328 }
329
BroadcastDeleteNotifications(DeleteDependencies * dependencies)330 void ExpireHistoryBackend::BroadcastDeleteNotifications(
331 DeleteDependencies* dependencies) {
332 if (!dependencies->deleted_urls.empty()) {
333 // Broadcast the URL deleted notification. Note that we also broadcast when
334 // we were requested to delete everything even if that was a NOP, since
335 // some components care to know when history is deleted (it's up to them to
336 // determine if they care whether anything was deleted).
337 URLsDeletedDetails* deleted_details = new URLsDeletedDetails;
338 deleted_details->all_history = false;
339 std::vector<URLRow> typed_urls_changed; // Collect this for later.
340 for (size_t i = 0; i < dependencies->deleted_urls.size(); i++) {
341 deleted_details->urls.insert(dependencies->deleted_urls[i].url());
342 if (dependencies->deleted_urls[i].typed_count() > 0)
343 typed_urls_changed.push_back(dependencies->deleted_urls[i]);
344 }
345 delegate_->BroadcastNotifications(NotificationType::HISTORY_URLS_DELETED,
346 deleted_details);
347
348 // Broadcast the typed URL changed modification (this updates the inline
349 // autocomplete database).
350 //
351 // Note: if we ever need to broadcast changes to more than just typed URLs,
352 // this notification should be changed rather than a new "non-typed"
353 // notification added. The in-memory database can always do the filtering
354 // itself in that case.
355 if (!typed_urls_changed.empty()) {
356 URLsModifiedDetails* modified_details = new URLsModifiedDetails;
357 modified_details->changed_urls.swap(typed_urls_changed);
358 delegate_->BroadcastNotifications(
359 NotificationType::HISTORY_TYPED_URLS_MODIFIED,
360 modified_details);
361 }
362 }
363 }
364
DeleteVisitRelatedInfo(const VisitVector & visits,DeleteDependencies * dependencies)365 void ExpireHistoryBackend::DeleteVisitRelatedInfo(
366 const VisitVector& visits,
367 DeleteDependencies* dependencies) {
368 for (size_t i = 0; i < visits.size(); i++) {
369 // Delete the visit itself.
370 main_db_->DeleteVisit(visits[i]);
371
372 // Add the URL row to the affected URL list.
373 std::map<URLID, URLRow>::const_iterator found =
374 dependencies->affected_urls.find(visits[i].url_id);
375 const URLRow* cur_row = NULL;
376 if (found == dependencies->affected_urls.end()) {
377 URLRow row;
378 if (!main_db_->GetURLRow(visits[i].url_id, &row))
379 continue;
380 dependencies->affected_urls[visits[i].url_id] = row;
381 cur_row = &dependencies->affected_urls[visits[i].url_id];
382 } else {
383 cur_row = &found->second;
384 }
385
386 // Delete any associated full-text indexed data.
387 if (visits[i].is_indexed && text_db_) {
388 text_db_->DeletePageData(visits[i].visit_time, cur_row->url(),
389 &dependencies->text_db_changes);
390 }
391 }
392 }
393
DeleteOneURL(const URLRow & url_row,bool is_bookmarked,DeleteDependencies * dependencies)394 void ExpireHistoryBackend::DeleteOneURL(
395 const URLRow& url_row,
396 bool is_bookmarked,
397 DeleteDependencies* dependencies) {
398 main_db_->DeleteSegmentForURL(url_row.id());
399
400 // The URL may be in the text database manager's temporary cache.
401 if (text_db_) {
402 std::set<GURL> restrict_urls;
403 restrict_urls.insert(url_row.url());
404 text_db_->DeleteFromUncommitted(restrict_urls, base::Time(), base::Time());
405 }
406
407 if (!is_bookmarked) {
408 dependencies->deleted_urls.push_back(url_row);
409
410 // Delete stuff that references this URL.
411 if (thumb_db_) {
412 thumb_db_->DeleteThumbnail(url_row.id());
413
414 // Collect shared information.
415 std::vector<IconMapping> icon_mappings;
416 if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) {
417 for (std::vector<IconMapping>::iterator m = icon_mappings.begin();
418 m != icon_mappings.end(); ++m) {
419 dependencies->affected_favicons.insert(m->icon_id);
420 }
421 // Delete the mapping entries for the url.
422 thumb_db_->DeleteIconMappings(url_row.url());
423 }
424 }
425 // Last, delete the URL entry.
426 main_db_->DeleteURLRow(url_row.id());
427 }
428 }
429
ArchiveOneURL(const URLRow & url_row)430 URLID ExpireHistoryBackend::ArchiveOneURL(const URLRow& url_row) {
431 if (!archived_db_)
432 return 0;
433
434 // See if this URL is present in the archived database already. Note that
435 // we must look up by ID since the URL ID will be different.
436 URLRow archived_row;
437 if (archived_db_->GetRowForURL(url_row.url(), &archived_row)) {
438 // TODO(sky): bug 1168470, need to archive past search terms.
439 // TODO(brettw): should be copy the visit counts over? This will mean that
440 // the main DB's visit counts are only for the last 3 months rather than
441 // accumulative.
442 archived_row.set_last_visit(url_row.last_visit());
443 archived_db_->UpdateURLRow(archived_row.id(), archived_row);
444 return archived_row.id();
445 }
446
447 // This row is not in the archived DB, add it.
448 return archived_db_->AddURL(url_row);
449 }
450
451 namespace {
452
453 struct ChangedURL {
ChangedURLhistory::__anon15e588240211::ChangedURL454 ChangedURL() : visit_count(0), typed_count(0) {}
455 int visit_count;
456 int typed_count;
457 };
458
459 } // namespace
460
ExpireURLsForVisits(const VisitVector & visits,DeleteDependencies * dependencies)461 void ExpireHistoryBackend::ExpireURLsForVisits(
462 const VisitVector& visits,
463 DeleteDependencies* dependencies) {
464 // First find all unique URLs and the number of visits we're deleting for
465 // each one.
466 std::map<URLID, ChangedURL> changed_urls;
467 for (size_t i = 0; i < visits.size(); i++) {
468 ChangedURL& cur = changed_urls[visits[i].url_id];
469 cur.visit_count++;
470 // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
471 // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
472 // typed, which would eliminate the need for this code.
473 PageTransition::Type transition =
474 PageTransition::StripQualifier(visits[i].transition);
475 if ((transition == PageTransition::TYPED &&
476 !PageTransition::IsRedirect(visits[i].transition)) ||
477 transition == PageTransition::KEYWORD_GENERATED)
478 cur.typed_count++;
479 }
480
481 // Check each unique URL with deleted visits.
482 BookmarkService* bookmark_service = GetBookmarkService();
483 for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
484 i != changed_urls.end(); ++i) {
485 // The unique URL rows should already be filled into the dependencies.
486 URLRow& url_row = dependencies->affected_urls[i->first];
487 if (!url_row.id())
488 continue; // URL row doesn't exist in the database.
489
490 // Check if there are any other visits for this URL and update the time
491 // (the time change may not actually be synced to disk below when we're
492 // archiving).
493 VisitRow last_visit;
494 if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
495 url_row.set_last_visit(last_visit.visit_time);
496 else
497 url_row.set_last_visit(Time());
498
499 // Don't delete URLs with visits still in the DB, or bookmarked.
500 bool is_bookmarked =
501 (bookmark_service && bookmark_service->IsBookmarked(url_row.url()));
502 if (!is_bookmarked && url_row.last_visit().is_null()) {
503 // Not bookmarked and no more visits. Nuke the url.
504 DeleteOneURL(url_row, is_bookmarked, dependencies);
505 } else {
506 // NOTE: The calls to std::max() below are a backstop, but they should
507 // never actually be needed unless the database is corrupt (I think).
508 url_row.set_visit_count(
509 std::max(0, url_row.visit_count() - i->second.visit_count));
510 url_row.set_typed_count(
511 std::max(0, url_row.typed_count() - i->second.typed_count));
512
513 // Update the db with the new details.
514 main_db_->UpdateURLRow(url_row.id(), url_row);
515 }
516 }
517 }
518
ArchiveURLsAndVisits(const VisitVector & visits,DeleteDependencies * dependencies)519 void ExpireHistoryBackend::ArchiveURLsAndVisits(
520 const VisitVector& visits,
521 DeleteDependencies* dependencies) {
522 if (!archived_db_ || !main_db_)
523 return;
524
525 // Make sure all unique URL rows are added to the dependency list and the
526 // archived database. We will also keep the mapping between the main DB URLID
527 // and the archived one.
528 std::map<URLID, URLID> main_id_to_archived_id;
529 for (size_t i = 0; i < visits.size(); i++) {
530 std::map<URLID, URLRow>::const_iterator found =
531 dependencies->affected_urls.find(visits[i].url_id);
532 if (found == dependencies->affected_urls.end()) {
533 // Unique URL encountered, archive it.
534 URLRow row; // Row in the main DB.
535 URLID archived_id; // ID in the archived DB.
536 if (!main_db_->GetURLRow(visits[i].url_id, &row) ||
537 !(archived_id = ArchiveOneURL(row))) {
538 // Failure archiving, skip this one.
539 continue;
540 }
541
542 // Only add URL to the dependency list once we know we successfully
543 // archived it.
544 main_id_to_archived_id[row.id()] = archived_id;
545 dependencies->affected_urls[row.id()] = row;
546 }
547 }
548
549 // Retrieve the sources for all the archived visits before archiving.
550 // The returned visit_sources vector should contain the source for each visit
551 // from visits at the same index.
552 VisitSourceMap visit_sources;
553 main_db_->GetVisitsSource(visits, &visit_sources);
554
555 // Now archive the visits since we know the URL ID to make them reference.
556 // The source visit list should still reference the visits in the main DB, but
557 // we will update it to reflect only the visits that were successfully
558 // archived.
559 for (size_t i = 0; i < visits.size(); i++) {
560 // Construct the visit that we will add to the archived database. We do
561 // not store referring visits since we delete many of the visits when
562 // archiving.
563 VisitRow cur_visit(visits[i]);
564 cur_visit.url_id = main_id_to_archived_id[cur_visit.url_id];
565 cur_visit.referring_visit = 0;
566 VisitSourceMap::iterator iter = visit_sources.find(visits[i].visit_id);
567 archived_db_->AddVisit(
568 &cur_visit,
569 iter == visit_sources.end() ? SOURCE_BROWSED : iter->second);
570 // Ignore failures, we will delete it from the main DB no matter what.
571 }
572 }
573
ScheduleArchive()574 void ExpireHistoryBackend::ScheduleArchive() {
575 TimeDelta delay;
576 if (work_queue_.empty()) {
577 // If work queue is empty, reset the work queue to contain all tasks and
578 // schedule next iteration after a longer delay.
579 InitWorkQueue();
580 delay = TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
581 } else {
582 delay = TimeDelta::FromSeconds(kExpirationDelaySec);
583 }
584
585 MessageLoop::current()->PostDelayedTask(FROM_HERE, factory_.NewRunnableMethod(
586 &ExpireHistoryBackend::DoArchiveIteration), delay.InMilliseconds());
587 }
588
DoArchiveIteration()589 void ExpireHistoryBackend::DoArchiveIteration() {
590 DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
591
592 const ExpiringVisitsReader* reader = work_queue_.front();
593 bool more_to_expire = ArchiveSomeOldHistory(GetCurrentArchiveTime(), reader,
594 kNumExpirePerIteration);
595
596 work_queue_.pop();
597 // If there are more items to expire, add the reader back to the queue, thus
598 // creating a new task for future iterations.
599 if (more_to_expire)
600 work_queue_.push(reader);
601
602 ScheduleArchive();
603 }
604
ArchiveSomeOldHistory(base::Time end_time,const ExpiringVisitsReader * reader,int max_visits)605 bool ExpireHistoryBackend::ArchiveSomeOldHistory(
606 base::Time end_time,
607 const ExpiringVisitsReader* reader,
608 int max_visits) {
609 if (!main_db_)
610 return false;
611
612 // Add an extra time unit to given end time, because
613 // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
614 Time effective_end_time =
615 Time::FromInternalValue(end_time.ToInternalValue() + 1);
616
617 VisitVector affected_visits;
618 bool more_to_expire = reader->Read(effective_end_time, main_db_,
619 &affected_visits, max_visits);
620
621 // Some visits we'll delete while others we'll archive.
622 VisitVector deleted_visits, archived_visits;
623 for (size_t i = 0; i < affected_visits.size(); i++) {
624 if (ShouldArchiveVisit(affected_visits[i]))
625 archived_visits.push_back(affected_visits[i]);
626 else
627 deleted_visits.push_back(affected_visits[i]);
628 }
629
630 // Do the actual archiving.
631 DeleteDependencies archived_dependencies;
632 ArchiveURLsAndVisits(archived_visits, &archived_dependencies);
633 DeleteVisitRelatedInfo(archived_visits, &archived_dependencies);
634
635 DeleteDependencies deleted_dependencies;
636 DeleteVisitRelatedInfo(deleted_visits, &deleted_dependencies);
637
638 // This will remove or archive all the affected URLs. Must do the deleting
639 // cleanup before archiving so the delete dependencies structure references
640 // only those URLs that were actually deleted instead of having some visits
641 // archived and then the rest deleted.
642 ExpireURLsForVisits(deleted_visits, &deleted_dependencies);
643 ExpireURLsForVisits(archived_visits, &archived_dependencies);
644
645 // Create a union of all affected favicons (we don't store favicons for
646 // archived URLs) and delete them.
647 std::set<FaviconID> affected_favicons(
648 archived_dependencies.affected_favicons);
649 for (std::set<FaviconID>::const_iterator i =
650 deleted_dependencies.affected_favicons.begin();
651 i != deleted_dependencies.affected_favicons.end(); ++i) {
652 affected_favicons.insert(*i);
653 }
654 DeleteFaviconsIfPossible(affected_favicons);
655
656 // Send notifications for the stuff that was deleted. These won't normally be
657 // in history views since they were subframes, but they will be in the visited
658 // link system, which needs to be updated now. This function is smart enough
659 // to not do anything if nothing was deleted.
660 BroadcastDeleteNotifications(&deleted_dependencies);
661
662 return more_to_expire;
663 }
664
ParanoidExpireHistory()665 void ExpireHistoryBackend::ParanoidExpireHistory() {
666 // TODO(brettw): Bug 1067331: write this to clean up any errors.
667 }
668
ScheduleExpireHistoryIndexFiles()669 void ExpireHistoryBackend::ScheduleExpireHistoryIndexFiles() {
670 if (!text_db_) {
671 // Can't expire old history index files because we
672 // don't know where they're located.
673 return;
674 }
675
676 TimeDelta delay = TimeDelta::FromMinutes(kIndexExpirationDelayMin);
677 MessageLoop::current()->PostDelayedTask(
678 FROM_HERE, factory_.NewRunnableMethod(
679 &ExpireHistoryBackend::DoExpireHistoryIndexFiles),
680 delay.InMilliseconds());
681 }
682
DoExpireHistoryIndexFiles()683 void ExpireHistoryBackend::DoExpireHistoryIndexFiles() {
684 Time::Exploded exploded;
685 Time::Now().LocalExplode(&exploded);
686 int cutoff_month =
687 exploded.year * 12 + exploded.month - kStoreHistoryIndexesForMonths;
688 TextDatabase::DBIdent cutoff_id =
689 (cutoff_month / 12) * 100 + (cutoff_month % 12);
690
691 FilePath::StringType history_index_files_pattern = TextDatabase::file_base();
692 history_index_files_pattern.append(FILE_PATH_LITERAL("*"));
693 file_util::FileEnumerator file_enumerator(
694 text_db_->GetDir(), false, file_util::FileEnumerator::FILES,
695 history_index_files_pattern);
696 for (FilePath file = file_enumerator.Next(); !file.empty();
697 file = file_enumerator.Next()) {
698 TextDatabase::DBIdent file_id = TextDatabase::FileNameToID(file);
699 if (file_id < cutoff_id)
700 file_util::Delete(file, false);
701 }
702 }
703
GetBookmarkService()704 BookmarkService* ExpireHistoryBackend::GetBookmarkService() {
705 // We use the bookmark service to determine if a URL is bookmarked. The
706 // bookmark service is loaded on a separate thread and may not be done by the
707 // time we get here. We therefor block until the bookmarks have finished
708 // loading.
709 if (bookmark_service_)
710 bookmark_service_->BlockTillLoaded();
711 return bookmark_service_;
712 }
713
714 } // namespace history
715