• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/history/visit_database.h"
6 
7 #include <algorithm>
8 #include <limits>
9 #include <map>
10 #include <set>
11 
12 #include "app/sql/statement.h"
13 #include "base/logging.h"
14 #include "base/string_number_conversions.h"
15 #include "chrome/browser/history/url_database.h"
16 #include "chrome/common/url_constants.h"
17 #include "content/common/page_transition_types.h"
18 
19 // Rows, in order, of the visit table.
20 #define HISTORY_VISIT_ROW_FIELDS \
21   " id,url,visit_time,from_visit,transition,segment_id,is_indexed "
22 
23 namespace history {
24 
VisitDatabase()25 VisitDatabase::VisitDatabase() {
26 }
27 
~VisitDatabase()28 VisitDatabase::~VisitDatabase() {
29 }
30 
InitVisitTable()31 bool VisitDatabase::InitVisitTable() {
32   if (!GetDB().DoesTableExist("visits")) {
33     if (!GetDB().Execute("CREATE TABLE visits("
34         "id INTEGER PRIMARY KEY,"
35         "url INTEGER NOT NULL," // key of the URL this corresponds to
36         "visit_time INTEGER NOT NULL,"
37         "from_visit INTEGER,"
38         "transition INTEGER DEFAULT 0 NOT NULL,"
39         "segment_id INTEGER,"
40         // True when we have indexed data for this visit.
41         "is_indexed BOOLEAN)"))
42       return false;
43   } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) {
44     // Old versions don't have the is_indexed column, we can just add that and
45     // not worry about different database revisions, since old ones will
46     // continue to work.
47     //
48     // TODO(brettw) this should be removed once we think everybody has been
49     // updated (added early Mar 2008).
50     if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN"))
51       return false;
52   }
53 
54   // Visit source table contains the source information for all the visits. To
55   // save space, we do not record those user browsed visits which would be the
56   // majority in this table. Only other sources are recorded.
57   // Due to the tight relationship between visit_source and visits table, they
58   // should be created and dropped at the same time.
59   if (!GetDB().DoesTableExist("visit_source")) {
60     if (!GetDB().Execute("CREATE TABLE visit_source("
61                          "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)"))
62         return false;
63   }
64 
65   // Index over url so we can quickly find visits for a page. This will just
66   // fail if it already exists and we'll ignore it.
67   GetDB().Execute("CREATE INDEX visits_url_index ON visits (url)");
68 
69   // Create an index over from visits so that we can efficiently find
70   // referrers and redirects. Ignore failures because it likely already exists.
71   GetDB().Execute("CREATE INDEX visits_from_index ON visits (from_visit)");
72 
73   // Create an index over time so that we can efficiently find the visits in a
74   // given time range (most history views are time-based). Ignore failures
75   // because it likely already exists.
76   GetDB().Execute("CREATE INDEX visits_time_index ON visits (visit_time)");
77 
78   return true;
79 }
80 
DropVisitTable()81 bool VisitDatabase::DropVisitTable() {
82   GetDB().Execute("DROP TABLE visit_source");
83   // This will also drop the indices over the table.
84   return GetDB().Execute("DROP TABLE visits");
85 }
86 
87 // Must be in sync with HISTORY_VISIT_ROW_FIELDS.
88 // static
FillVisitRow(sql::Statement & statement,VisitRow * visit)89 void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) {
90   visit->visit_id = statement.ColumnInt64(0);
91   visit->url_id = statement.ColumnInt64(1);
92   visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2));
93   visit->referring_visit = statement.ColumnInt64(3);
94   visit->transition = PageTransition::FromInt(statement.ColumnInt(4));
95   visit->segment_id = statement.ColumnInt64(5);
96   visit->is_indexed = !!statement.ColumnInt(6);
97 }
98 
99 // static
FillVisitVector(sql::Statement & statement,VisitVector * visits)100 void VisitDatabase::FillVisitVector(sql::Statement& statement,
101                                     VisitVector* visits) {
102   while (statement.Step()) {
103     history::VisitRow visit;
104     FillVisitRow(statement, &visit);
105     visits->push_back(visit);
106   }
107 }
108 
AddVisit(VisitRow * visit,VisitSource source)109 VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) {
110   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
111       "INSERT INTO visits "
112       "(url, visit_time, from_visit, transition, segment_id, is_indexed) "
113       "VALUES (?,?,?,?,?,?)"));
114   if (!statement) {
115     VLOG(0) << "Failed to build visit insert statement:  "
116             << "url_id = " << visit->url_id;
117     return 0;
118   }
119 
120   statement.BindInt64(0, visit->url_id);
121   statement.BindInt64(1, visit->visit_time.ToInternalValue());
122   statement.BindInt64(2, visit->referring_visit);
123   statement.BindInt64(3, visit->transition);
124   statement.BindInt64(4, visit->segment_id);
125   statement.BindInt64(5, visit->is_indexed);
126 
127   if (!statement.Run()) {
128     VLOG(0) << "Failed to execute visit insert statement:  "
129             << "url_id = " << visit->url_id;
130     return 0;
131   }
132 
133   visit->visit_id = GetDB().GetLastInsertRowId();
134 
135   if (source != SOURCE_BROWSED) {
136     // Record the source of this visit when it is not browsed.
137     sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE,
138         "INSERT INTO visit_source (id, source) VALUES (?,?)"));
139     if (!statement1.is_valid()) {
140       VLOG(0) << "Failed to build visit_source insert statement:  "
141               << "url_id = " << visit->visit_id;
142       return 0;
143     }
144 
145     statement1.BindInt64(0, visit->visit_id);
146     statement1.BindInt64(1, source);
147     if (!statement1.Run()) {
148       VLOG(0) << "Failed to execute visit_source insert statement:  "
149               << "url_id = " << visit->visit_id;
150       return 0;
151     }
152   }
153 
154   return visit->visit_id;
155 }
156 
DeleteVisit(const VisitRow & visit)157 void VisitDatabase::DeleteVisit(const VisitRow& visit) {
158   // Patch around this visit. Any visits that this went to will now have their
159   // "source" be the deleted visit's source.
160   sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE,
161       "UPDATE visits SET from_visit=? WHERE from_visit=?"));
162   if (!update_chain)
163     return;
164   update_chain.BindInt64(0, visit.referring_visit);
165   update_chain.BindInt64(1, visit.visit_id);
166   update_chain.Run();
167 
168   // Now delete the actual visit.
169   sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE,
170       "DELETE FROM visits WHERE id=?"));
171   if (!del)
172     return;
173   del.BindInt64(0, visit.visit_id);
174   del.Run();
175 
176   // Try to delete the entry in visit_source table as well.
177   // If the visit was browsed, there is no corresponding entry in visit_source
178   // table, and nothing will be deleted.
179   del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE,
180              "DELETE FROM visit_source WHERE id=?"));
181   if (!del.is_valid())
182     return;
183   del.BindInt64(0, visit.visit_id);
184   del.Run();
185 }
186 
GetRowForVisit(VisitID visit_id,VisitRow * out_visit)187 bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) {
188   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
189       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?"));
190   if (!statement)
191     return false;
192 
193   statement.BindInt64(0, visit_id);
194   if (!statement.Step())
195     return false;
196 
197   FillVisitRow(statement, out_visit);
198 
199   // We got a different visit than we asked for, something is wrong.
200   DCHECK_EQ(visit_id, out_visit->visit_id);
201   if (visit_id != out_visit->visit_id)
202     return false;
203 
204   return true;
205 }
206 
UpdateVisitRow(const VisitRow & visit)207 bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) {
208   // Don't store inconsistent data to the database.
209   DCHECK_NE(visit.visit_id, visit.referring_visit);
210   if (visit.visit_id == visit.referring_visit)
211     return false;
212 
213   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
214       "UPDATE visits SET "
215       "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=? "
216       "WHERE id=?"));
217   if (!statement)
218     return false;
219 
220   statement.BindInt64(0, visit.url_id);
221   statement.BindInt64(1, visit.visit_time.ToInternalValue());
222   statement.BindInt64(2, visit.referring_visit);
223   statement.BindInt64(3, visit.transition);
224   statement.BindInt64(4, visit.segment_id);
225   statement.BindInt64(5, visit.is_indexed);
226   statement.BindInt64(6, visit.visit_id);
227   return statement.Run();
228 }
229 
GetVisitsForURL(URLID url_id,VisitVector * visits)230 bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) {
231   visits->clear();
232 
233   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
234       "SELECT" HISTORY_VISIT_ROW_FIELDS
235       "FROM visits "
236       "WHERE url=? "
237       "ORDER BY visit_time ASC"));
238   if (!statement)
239     return false;
240 
241   statement.BindInt64(0, url_id);
242   FillVisitVector(statement, visits);
243   return true;
244 }
245 
GetAllVisitsInRange(base::Time begin_time,base::Time end_time,int max_results,VisitVector * visits)246 void VisitDatabase::GetAllVisitsInRange(base::Time begin_time,
247                                         base::Time end_time,
248                                         int max_results,
249                                         VisitVector* visits) {
250   visits->clear();
251 
252   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
253       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
254       "WHERE visit_time >= ? AND visit_time < ?"
255       "ORDER BY visit_time LIMIT ?"));
256   if (!statement)
257     return;
258 
259   // See GetVisibleVisitsInRange for more info on how these times are bound.
260   int64 end = end_time.ToInternalValue();
261   statement.BindInt64(0, begin_time.ToInternalValue());
262   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
263   statement.BindInt64(2,
264       max_results ? max_results : std::numeric_limits<int64>::max());
265 
266   FillVisitVector(statement, visits);
267 }
268 
GetVisitsInRangeForTransition(base::Time begin_time,base::Time end_time,int max_results,PageTransition::Type transition,VisitVector * visits)269 void VisitDatabase::GetVisitsInRangeForTransition(
270     base::Time begin_time,
271     base::Time end_time,
272     int max_results,
273     PageTransition::Type transition,
274     VisitVector* visits) {
275   DCHECK(visits);
276   visits->clear();
277 
278   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
279       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
280       "WHERE visit_time >= ? AND visit_time < ? "
281       "AND (transition & ?) == ?"
282       "ORDER BY visit_time LIMIT ?"));
283   if (!statement)
284     return;
285 
286   // See GetVisibleVisitsInRange for more info on how these times are bound.
287   int64 end = end_time.ToInternalValue();
288   statement.BindInt64(0, begin_time.ToInternalValue());
289   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
290   statement.BindInt(2, PageTransition::CORE_MASK);
291   statement.BindInt(3, transition);
292   statement.BindInt64(4,
293       max_results ? max_results : std::numeric_limits<int64>::max());
294 
295   FillVisitVector(statement, visits);
296 }
297 
GetVisibleVisitsInRange(base::Time begin_time,base::Time end_time,int max_count,VisitVector * visits)298 void VisitDatabase::GetVisibleVisitsInRange(base::Time begin_time,
299                                             base::Time end_time,
300                                             int max_count,
301                                             VisitVector* visits) {
302   visits->clear();
303   // The visit_time values can be duplicated in a redirect chain, so we sort
304   // by id too, to ensure a consistent ordering just in case.
305   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
306       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
307       "WHERE visit_time >= ? AND visit_time < ? "
308       "AND (transition & ?) != 0 "  // CHAIN_END
309       "AND (transition & ?) NOT IN (?, ?, ?) "  // NO SUBFRAME or
310                                                 // KEYWORD_GENERATED
311       "ORDER BY visit_time DESC, id DESC"));
312   if (!statement)
313     return;
314 
315   // Note that we use min/max values for querying unlimited ranges of time using
316   // the same statement. Since the time has an index, this will be about the
317   // same amount of work as just doing a query for everything with no qualifier.
318   int64 end = end_time.ToInternalValue();
319   statement.BindInt64(0, begin_time.ToInternalValue());
320   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
321   statement.BindInt(2, PageTransition::CHAIN_END);
322   statement.BindInt(3, PageTransition::CORE_MASK);
323   statement.BindInt(4, PageTransition::AUTO_SUBFRAME);
324   statement.BindInt(5, PageTransition::MANUAL_SUBFRAME);
325   statement.BindInt(6, PageTransition::KEYWORD_GENERATED);
326 
327   std::set<URLID> found_urls;
328   while (statement.Step()) {
329     VisitRow visit;
330     FillVisitRow(statement, &visit);
331     // Make sure the URL this visit corresponds to is unique.
332     if (found_urls.find(visit.url_id) != found_urls.end())
333       continue;
334     found_urls.insert(visit.url_id);
335     visits->push_back(visit);
336 
337     if (max_count > 0 && static_cast<int>(visits->size()) >= max_count)
338       break;
339   }
340 }
341 
GetMostRecentVisitForURL(URLID url_id,VisitRow * visit_row)342 VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id,
343                                                 VisitRow* visit_row) {
344   // The visit_time values can be duplicated in a redirect chain, so we sort
345   // by id too, to ensure a consistent ordering just in case.
346   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
347       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
348       "WHERE url=? "
349       "ORDER BY visit_time DESC, id DESC "
350       "LIMIT 1"));
351   if (!statement)
352     return 0;
353 
354   statement.BindInt64(0, url_id);
355   if (!statement.Step())
356     return 0;  // No visits for this URL.
357 
358   if (visit_row) {
359     FillVisitRow(statement, visit_row);
360     return visit_row->visit_id;
361   }
362   return statement.ColumnInt64(0);
363 }
364 
GetMostRecentVisitsForURL(URLID url_id,int max_results,VisitVector * visits)365 bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id,
366                                               int max_results,
367                                               VisitVector* visits) {
368   visits->clear();
369 
370   // The visit_time values can be duplicated in a redirect chain, so we sort
371   // by id too, to ensure a consistent ordering just in case.
372   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
373       "SELECT" HISTORY_VISIT_ROW_FIELDS
374       "FROM visits "
375       "WHERE url=? "
376       "ORDER BY visit_time DESC, id DESC "
377       "LIMIT ?"));
378   if (!statement)
379     return false;
380 
381   statement.BindInt64(0, url_id);
382   statement.BindInt(1, max_results);
383   FillVisitVector(statement, visits);
384   return true;
385 }
386 
GetRedirectFromVisit(VisitID from_visit,VisitID * to_visit,GURL * to_url)387 bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit,
388                                          VisitID* to_visit,
389                                          GURL* to_url) {
390   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
391       "SELECT v.id,u.url "
392       "FROM visits v JOIN urls u ON v.url = u.id "
393       "WHERE v.from_visit = ? "
394       "AND (v.transition & ?) != 0"));  // IS_REDIRECT_MASK
395   if (!statement)
396     return false;
397 
398   statement.BindInt64(0, from_visit);
399   statement.BindInt(1, PageTransition::IS_REDIRECT_MASK);
400 
401   if (!statement.Step())
402     return false;  // No redirect from this visit.
403   if (to_visit)
404     *to_visit = statement.ColumnInt64(0);
405   if (to_url)
406     *to_url = GURL(statement.ColumnString(1));
407   return true;
408 }
409 
GetRedirectToVisit(VisitID to_visit,VisitID * from_visit,GURL * from_url)410 bool VisitDatabase::GetRedirectToVisit(VisitID to_visit,
411                                        VisitID* from_visit,
412                                        GURL* from_url) {
413   VisitRow row;
414   if (!GetRowForVisit(to_visit, &row))
415     return false;
416 
417   if (from_visit)
418     *from_visit = row.referring_visit;
419 
420   if (from_url) {
421     sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
422         "SELECT u.url "
423         "FROM visits v JOIN urls u ON v.url = u.id "
424         "WHERE v.id = ?"));
425     statement.BindInt64(0, row.referring_visit);
426 
427     if (!statement.Step())
428       return false;
429 
430     *from_url = GURL(statement.ColumnString(0));
431   }
432   return true;
433 }
434 
GetVisitCountToHost(const GURL & url,int * count,base::Time * first_visit)435 bool VisitDatabase::GetVisitCountToHost(const GURL& url,
436                                         int* count,
437                                         base::Time* first_visit) {
438   if (!url.SchemeIs(chrome::kHttpScheme) && !url.SchemeIs(chrome::kHttpsScheme))
439     return false;
440 
441   // We need to search for URLs with a matching host/port. One way to query for
442   // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This
443   // is inefficient though in that it doesn't use the index and each entry must
444   // be visited. The same query can be executed by using >= and < operator.
445   // The query becomes:
446   // 'url >= http://google.com/' and url < http://google.com0'.
447   // 0 is used as it is one character greater than '/'.
448   GURL search_url(url);
449   const std::string host_query_min = search_url.GetOrigin().spec();
450 
451   if (host_query_min.empty())
452     return false;
453 
454   std::string host_query_max = host_query_min;
455   host_query_max[host_query_max.size() - 1] = '0';
456 
457   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
458       "SELECT MIN(v.visit_time), COUNT(*) "
459       "FROM visits v INNER JOIN urls u ON v.url = u.id "
460       "WHERE (u.url >= ? AND u.url < ?)"));
461   if (!statement)
462     return false;
463 
464   statement.BindString(0, host_query_min);
465   statement.BindString(1, host_query_max);
466 
467   if (!statement.Step()) {
468     // We've never been to this page before.
469     *count = 0;
470     return true;
471   }
472 
473   *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
474   *count = statement.ColumnInt(1);
475   return true;
476 }
477 
GetStartDate(base::Time * first_visit)478 bool VisitDatabase::GetStartDate(base::Time* first_visit) {
479   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
480       "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0"));
481   if (!statement || !statement.Step() || statement.ColumnInt64(0) == 0) {
482     *first_visit = base::Time::Now();
483     return false;
484   }
485   *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
486   return true;
487 }
488 
GetVisitsSource(const VisitVector & visits,VisitSourceMap * sources)489 void VisitDatabase::GetVisitsSource(const VisitVector& visits,
490                                     VisitSourceMap* sources) {
491   DCHECK(sources);
492   sources->clear();
493 
494   // We query the source in batch. Here defines the batch size.
495   const size_t batch_size = 500;
496   size_t visits_size = visits.size();
497 
498   size_t start_index = 0, end_index = 0;
499   while (end_index < visits_size) {
500     start_index = end_index;
501     end_index = end_index + batch_size < visits_size ? end_index + batch_size
502                                                      : visits_size;
503 
504     // Compose the sql statement with a list of ids.
505     std::string sql = "SELECT id,source FROM visit_source ";
506     sql.append("WHERE id IN (");
507     // Append all the ids in the statement.
508     for (size_t j = start_index; j < end_index; j++) {
509       if (j != start_index)
510         sql.push_back(',');
511       sql.append(base::Int64ToString(visits[j].visit_id));
512     }
513     sql.append(") ORDER BY id");
514     sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
515     if (!statement)
516       return;
517 
518     // Get the source entries out of the query result.
519     while (statement.Step()) {
520       std::pair<VisitID, VisitSource> source_entry(statement.ColumnInt64(0),
521           static_cast<VisitSource>(statement.ColumnInt(1)));
522       sources->insert(source_entry);
523     }
524   }
525 }
526 
527 }  // namespace history
528