1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/history_database.h"
6
7 #include <algorithm>
8 #include <set>
9 #include <string>
10
11 #include "base/command_line.h"
12 #include "base/files/file_util.h"
13 #include "base/metrics/histogram.h"
14 #include "base/rand_util.h"
15 #include "base/strings/string_util.h"
16 #include "base/time/time.h"
17 #include "sql/transaction.h"
18
19 #if defined(OS_MACOSX)
20 #include "base/mac/mac_util.h"
21 #endif
22
23 namespace history {
24
25 namespace {
26
27 // Current version number. We write databases at the "current" version number,
28 // but any previous version that can read the "compatible" one can make do with
29 // our database without *too* many bad effects.
30 const int kCurrentVersionNumber = 29;
31 const int kCompatibleVersionNumber = 16;
32 const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
33
34 } // namespace
35
HistoryDatabase()36 HistoryDatabase::HistoryDatabase() {
37 }
38
~HistoryDatabase()39 HistoryDatabase::~HistoryDatabase() {
40 }
41
Init(const base::FilePath & history_name)42 sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
43 db_.set_histogram_tag("History");
44
45 // Set the exceptional sqlite error handler.
46 db_.set_error_callback(error_callback_);
47
48 // Set the database page size to something a little larger to give us
49 // better performance (we're typically seek rather than bandwidth limited).
50 // This only has an effect before any tables have been created, otherwise
51 // this is a NOP. Must be a power of 2 and a max of 8192.
52 db_.set_page_size(4096);
53
54 // Set the cache size. The page size, plus a little extra, times this
55 // value, tells us how much memory the cache will use maximum.
56 // 1000 * 4kB = 4MB
57 // TODO(brettw) scale this value to the amount of available memory.
58 db_.set_cache_size(1000);
59
60 // Note that we don't set exclusive locking here. That's done by
61 // BeginExclusiveMode below which is called later (we have to be in shared
62 // mode to start out for the in-memory backend to read the data).
63
64 if (!db_.Open(history_name))
65 return sql::INIT_FAILURE;
66
67 // Wrap the rest of init in a tranaction. This will prevent the database from
68 // getting corrupted if we crash in the middle of initialization or migration.
69 sql::Transaction committer(&db_);
70 if (!committer.Begin())
71 return sql::INIT_FAILURE;
72
73 #if defined(OS_MACOSX)
74 // Exclude the history file from backups.
75 base::mac::SetFileBackupExclusion(history_name);
76 #endif
77
78 // Prime the cache.
79 db_.Preload();
80
81 // Create the tables and indices.
82 // NOTE: If you add something here, also add it to
83 // RecreateAllButStarAndURLTables.
84 if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
85 return sql::INIT_FAILURE;
86 if (!CreateURLTable(false) || !InitVisitTable() ||
87 !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
88 !InitSegmentTables())
89 return sql::INIT_FAILURE;
90 CreateMainURLIndex();
91 CreateKeywordSearchTermsIndices();
92
93 // TODO(benjhayden) Remove at some point.
94 meta_table_.DeleteKey("next_download_id");
95
96 // Version check.
97 sql::InitStatus version_status = EnsureCurrentVersion();
98 if (version_status != sql::INIT_OK)
99 return version_status;
100
101 return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
102 }
103
ComputeDatabaseMetrics(const base::FilePath & history_name)104 void HistoryDatabase::ComputeDatabaseMetrics(
105 const base::FilePath& history_name) {
106 base::TimeTicks start_time = base::TimeTicks::Now();
107 int64 file_size = 0;
108 if (!base::GetFileSize(history_name, &file_size))
109 return;
110 int file_mb = static_cast<int>(file_size / (1024 * 1024));
111 UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
112
113 sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
114 if (!url_count.Step())
115 return;
116 UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
117
118 sql::Statement visit_count(db_.GetUniqueStatement(
119 "SELECT count(*) FROM visits"));
120 if (!visit_count.Step())
121 return;
122 UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
123
124 base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
125 sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
126 "SELECT count(*) FROM visits WHERE visit_time > ?"));
127 weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
128 int weekly_visit_count = 0;
129 if (weekly_visit_sql.Step())
130 weekly_visit_count = weekly_visit_sql.ColumnInt(0);
131 UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
132
133 base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
134 sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
135 "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
136 monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
137 monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
138 int older_visit_count = 0;
139 if (monthly_visit_sql.Step())
140 older_visit_count = monthly_visit_sql.ColumnInt(0);
141 UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
142 older_visit_count + weekly_visit_count);
143
144 UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
145 base::TimeTicks::Now() - start_time);
146
147 // Compute the advanced metrics even less often, pending timing data showing
148 // that's not necessary.
149 if (base::RandInt(1, 3) == 3) {
150 start_time = base::TimeTicks::Now();
151
152 // Collect all URLs visited within the last month.
153 sql::Statement url_sql(db_.GetUniqueStatement(
154 "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
155 url_sql.BindInt64(0, one_month_ago.ToInternalValue());
156
157 // Count URLs (which will always be unique) and unique hosts within the last
158 // week and last month.
159 int week_url_count = 0;
160 int month_url_count = 0;
161 std::set<std::string> week_hosts;
162 std::set<std::string> month_hosts;
163 while (url_sql.Step()) {
164 GURL url(url_sql.ColumnString(0));
165 base::Time visit_time =
166 base::Time::FromInternalValue(url_sql.ColumnInt64(1));
167 ++month_url_count;
168 month_hosts.insert(url.host());
169 if (visit_time > one_week_ago) {
170 ++week_url_count;
171 week_hosts.insert(url.host());
172 }
173 }
174 UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
175 UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
176 UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
177 UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
178 UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
179 base::TimeTicks::Now() - start_time);
180 }
181 }
182
BeginExclusiveMode()183 void HistoryDatabase::BeginExclusiveMode() {
184 // We can't use set_exclusive_locking() since that only has an effect before
185 // the DB is opened.
186 ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
187 }
188
189 // static
GetCurrentVersion()190 int HistoryDatabase::GetCurrentVersion() {
191 return kCurrentVersionNumber;
192 }
193
BeginTransaction()194 void HistoryDatabase::BeginTransaction() {
195 db_.BeginTransaction();
196 }
197
CommitTransaction()198 void HistoryDatabase::CommitTransaction() {
199 db_.CommitTransaction();
200 }
201
RollbackTransaction()202 void HistoryDatabase::RollbackTransaction() {
203 db_.RollbackTransaction();
204 }
205
RecreateAllTablesButURL()206 bool HistoryDatabase::RecreateAllTablesButURL() {
207 if (!DropVisitTable())
208 return false;
209 if (!InitVisitTable())
210 return false;
211
212 if (!DropKeywordSearchTermsTable())
213 return false;
214 if (!InitKeywordSearchTermsTable())
215 return false;
216
217 if (!DropSegmentTables())
218 return false;
219 if (!InitSegmentTables())
220 return false;
221
222 CreateKeywordSearchTermsIndices();
223 return true;
224 }
225
Vacuum()226 void HistoryDatabase::Vacuum() {
227 DCHECK_EQ(0, db_.transaction_nesting()) <<
228 "Can not have a transaction when vacuuming.";
229 ignore_result(db_.Execute("VACUUM"));
230 }
231
TrimMemory(bool aggressively)232 void HistoryDatabase::TrimMemory(bool aggressively) {
233 db_.TrimMemory(aggressively);
234 }
235
Raze()236 bool HistoryDatabase::Raze() {
237 return db_.Raze();
238 }
239
SetSegmentID(VisitID visit_id,SegmentID segment_id)240 bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
241 sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
242 "UPDATE visits SET segment_id = ? WHERE id = ?"));
243 s.BindInt64(0, segment_id);
244 s.BindInt64(1, visit_id);
245 DCHECK(db_.GetLastChangeCount() == 1);
246
247 return s.Run();
248 }
249
GetSegmentID(VisitID visit_id)250 SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
251 sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
252 "SELECT segment_id FROM visits WHERE id = ?"));
253 s.BindInt64(0, visit_id);
254
255 if (s.Step()) {
256 if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
257 return 0;
258 else
259 return s.ColumnInt64(0);
260 }
261 return 0;
262 }
263
GetEarlyExpirationThreshold()264 base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
265 if (!cached_early_expiration_threshold_.is_null())
266 return cached_early_expiration_threshold_;
267
268 int64 threshold;
269 if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
270 // Set to a very early non-zero time, so it's before all history, but not
271 // zero to avoid re-retrieval.
272 threshold = 1L;
273 }
274
275 cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
276 return cached_early_expiration_threshold_;
277 }
278
UpdateEarlyExpirationThreshold(base::Time threshold)279 void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
280 meta_table_.SetValue(kEarlyExpirationThresholdKey,
281 threshold.ToInternalValue());
282 cached_early_expiration_threshold_ = threshold;
283 }
284
GetDB()285 sql::Connection& HistoryDatabase::GetDB() {
286 return db_;
287 }
288
289 // Migration -------------------------------------------------------------------
290
EnsureCurrentVersion()291 sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
292 // We can't read databases newer than we were designed for.
293 if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
294 LOG(WARNING) << "History database is too new.";
295 return sql::INIT_TOO_NEW;
296 }
297
298 int cur_version = meta_table_.GetVersionNumber();
299
300 // Put migration code here
301
302 if (cur_version == 15) {
303 if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
304 LOG(WARNING) << "Unable to update history database to version 16.";
305 return sql::INIT_FAILURE;
306 }
307 ++cur_version;
308 meta_table_.SetVersionNumber(cur_version);
309 meta_table_.SetCompatibleVersionNumber(
310 std::min(cur_version, kCompatibleVersionNumber));
311 }
312
313 if (cur_version == 16) {
314 #if !defined(OS_WIN)
315 // In this version we bring the time format on Mac & Linux in sync with the
316 // Windows version so that profiles can be moved between computers.
317 MigrateTimeEpoch();
318 #endif
319 // On all platforms we bump the version number, so on Windows this
320 // migration is a NOP. We keep the compatible version at 16 since things
321 // will basically still work, just history will be in the future if an
322 // old version reads it.
323 ++cur_version;
324 meta_table_.SetVersionNumber(cur_version);
325 }
326
327 if (cur_version == 17) {
328 // Version 17 was for thumbnails to top sites migration. We ended up
329 // disabling it though, so 17->18 does nothing.
330 ++cur_version;
331 meta_table_.SetVersionNumber(cur_version);
332 }
333
334 if (cur_version == 18) {
335 // This is the version prior to adding url_source column. We need to
336 // migrate the database.
337 cur_version = 19;
338 meta_table_.SetVersionNumber(cur_version);
339 }
340
341 if (cur_version == 19) {
342 cur_version++;
343 meta_table_.SetVersionNumber(cur_version);
344 // This was the thumbnail migration. Obsolete.
345 }
346
347 if (cur_version == 20) {
348 // This is the version prior to adding the visit_duration field in visits
349 // database. We need to migrate the database.
350 if (!MigrateVisitsWithoutDuration()) {
351 LOG(WARNING) << "Unable to update history database to version 21.";
352 return sql::INIT_FAILURE;
353 }
354 ++cur_version;
355 meta_table_.SetVersionNumber(cur_version);
356 }
357
358 if (cur_version == 21) {
359 // The android_urls table's data schemal was changed in version 21.
360 #if defined(OS_ANDROID)
361 if (!MigrateToVersion22()) {
362 LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
363 }
364 #endif
365 ++cur_version;
366 meta_table_.SetVersionNumber(cur_version);
367 }
368
369 if (cur_version == 22) {
370 if (!MigrateDownloadsState()) {
371 LOG(WARNING) << "Unable to fix invalid downloads state values";
372 // Invalid state values may cause crashes.
373 return sql::INIT_FAILURE;
374 }
375 cur_version++;
376 meta_table_.SetVersionNumber(cur_version);
377 }
378
379 if (cur_version == 23) {
380 if (!MigrateDownloadsReasonPathsAndDangerType()) {
381 LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
382 // Invalid state values may cause crashes.
383 return sql::INIT_FAILURE;
384 }
385 cur_version++;
386 meta_table_.SetVersionNumber(cur_version);
387 }
388
389 if (cur_version == 24) {
390 if (!MigratePresentationIndex()) {
391 LOG(WARNING) << "Unable to migrate history to version 25";
392 return sql::INIT_FAILURE;
393 }
394 cur_version++;
395 meta_table_.SetVersionNumber(cur_version);
396 }
397
398 if (cur_version == 25) {
399 if (!MigrateReferrer()) {
400 LOG(WARNING) << "Unable to migrate history to version 26";
401 return sql::INIT_FAILURE;
402 }
403 cur_version++;
404 meta_table_.SetVersionNumber(cur_version);
405 }
406
407 if (cur_version == 26) {
408 if (!MigrateDownloadedByExtension()) {
409 LOG(WARNING) << "Unable to migrate history to version 27";
410 return sql::INIT_FAILURE;
411 }
412 cur_version++;
413 meta_table_.SetVersionNumber(cur_version);
414 }
415
416 if (cur_version == 27) {
417 if (!MigrateDownloadValidators()) {
418 LOG(WARNING) << "Unable to migrate history to version 28";
419 return sql::INIT_FAILURE;
420 }
421 cur_version++;
422 meta_table_.SetVersionNumber(cur_version);
423 }
424
425 if (cur_version == 28) {
426 if (!MigrateMimeType()) {
427 LOG(WARNING) << "Unable to migrate history to version 29";
428 return sql::INIT_FAILURE;
429 }
430 cur_version++;
431 meta_table_.SetVersionNumber(cur_version);
432 }
433
434 // When the version is too old, we just try to continue anyway, there should
435 // not be a released product that makes a database too old for us to handle.
436 LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
437 "History database version " << cur_version << " is too old to handle.";
438
439 return sql::INIT_OK;
440 }
441
442 #if !defined(OS_WIN)
MigrateTimeEpoch()443 void HistoryDatabase::MigrateTimeEpoch() {
444 // Update all the times in the URLs and visits table in the main database.
445 ignore_result(db_.Execute(
446 "UPDATE urls "
447 "SET last_visit_time = last_visit_time + 11644473600000000 "
448 "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
449 ignore_result(db_.Execute(
450 "UPDATE visits "
451 "SET visit_time = visit_time + 11644473600000000 "
452 "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
453 ignore_result(db_.Execute(
454 "UPDATE segment_usage "
455 "SET time_slot = time_slot + 11644473600000000 "
456 "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
457 }
458 #endif
459
460 } // namespace history
461