• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/history/visitsegment_database.h"
6 
7 #include <math.h>
8 
9 #include <algorithm>
10 #include <string>
11 #include <vector>
12 
13 #include "base/logging.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "chrome/browser/history/page_usage_data.h"
18 #include "sql/statement.h"
19 #include "sql/transaction.h"
20 
21 // The following tables are used to store url segment information.
22 //
23 // segments
24 //   id                 Primary key
25 //   name               A unique string to represent that segment. (URL derived)
26 //   url_id             ID of the url currently used to represent this segment.
27 //
28 // segment_usage
29 //   id                 Primary key
30 //   segment_id         Corresponding segment id
31 //   time_slot          time stamp identifying for what day this entry is about
32 //   visit_count        Number of visit in the segment
33 //
34 
35 namespace history {
36 
VisitSegmentDatabase()37 VisitSegmentDatabase::VisitSegmentDatabase() {
38 }
39 
~VisitSegmentDatabase()40 VisitSegmentDatabase::~VisitSegmentDatabase() {
41 }
42 
InitSegmentTables()43 bool VisitSegmentDatabase::InitSegmentTables() {
44   // Segments table.
45   if (!GetDB().DoesTableExist("segments")) {
46     if (!GetDB().Execute("CREATE TABLE segments ("
47         "id INTEGER PRIMARY KEY,"
48         "name VARCHAR,"
49         "url_id INTEGER NON NULL)")) {
50       return false;
51     }
52 
53     if (!GetDB().Execute(
54         "CREATE INDEX segments_name ON segments(name)")) {
55       return false;
56     }
57   }
58 
59   // This was added later, so we need to try to create it even if the table
60   // already exists.
61   if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_url_id ON "
62                        "segments(url_id)"))
63     return false;
64 
65   // Segment usage table.
66   if (!GetDB().DoesTableExist("segment_usage")) {
67     if (!GetDB().Execute("CREATE TABLE segment_usage ("
68         "id INTEGER PRIMARY KEY,"
69         "segment_id INTEGER NOT NULL,"
70         "time_slot INTEGER NOT NULL,"
71         "visit_count INTEGER DEFAULT 0 NOT NULL)")) {
72       return false;
73     }
74     if (!GetDB().Execute(
75         "CREATE INDEX segment_usage_time_slot_segment_id ON "
76         "segment_usage(time_slot, segment_id)")) {
77       return false;
78     }
79   }
80 
81   // Added in a later version, so we always need to try to creat this index.
82   if (!GetDB().Execute("CREATE INDEX IF NOT EXISTS segments_usage_seg_id "
83                        "ON segment_usage(segment_id)"))
84     return false;
85 
86   return true;
87 }
88 
DropSegmentTables()89 bool VisitSegmentDatabase::DropSegmentTables() {
90   // Dropping the tables will implicitly delete the indices.
91   return GetDB().Execute("DROP TABLE segments") &&
92          GetDB().Execute("DROP TABLE segment_usage");
93 }
94 
95 // Note: the segment name is derived from the URL but is not a URL. It is
96 // a string that can be easily recreated from various URLS. Maybe this should
97 // be an MD5 to limit the length.
98 //
99 // static
ComputeSegmentName(const GURL & url)100 std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) {
101   // TODO(brettw) this should probably use the registry controlled
102   // domains service.
103   GURL::Replacements r;
104   const char kWWWDot[] = "www.";
105   const int kWWWDotLen = arraysize(kWWWDot) - 1;
106 
107   std::string host = url.host();
108   const char* host_c = host.c_str();
109   // Remove www. to avoid some dups.
110   if (static_cast<int>(host.size()) > kWWWDotLen &&
111       LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) {
112     r.SetHost(host.c_str(),
113               url::Component(kWWWDotLen,
114                              static_cast<int>(host.size()) - kWWWDotLen));
115   }
116   // Remove other stuff we don't want.
117   r.ClearUsername();
118   r.ClearPassword();
119   r.ClearQuery();
120   r.ClearRef();
121   r.ClearPort();
122 
123   return url.ReplaceComponents(r).spec();
124 }
125 
GetSegmentNamed(const std::string & segment_name)126 SegmentID VisitSegmentDatabase::GetSegmentNamed(
127     const std::string& segment_name) {
128   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
129       "SELECT id FROM segments WHERE name = ?"));
130   statement.BindString(0, segment_name);
131 
132   if (statement.Step())
133     return statement.ColumnInt64(0);
134   return 0;
135 }
136 
UpdateSegmentRepresentationURL(SegmentID segment_id,URLID url_id)137 bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id,
138                                                           URLID url_id) {
139   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
140       "UPDATE segments SET url_id = ? WHERE id = ?"));
141   statement.BindInt64(0, url_id);
142   statement.BindInt64(1, segment_id);
143 
144   return statement.Run();
145 }
146 
GetSegmentRepresentationURL(SegmentID segment_id)147 URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) {
148   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
149       "SELECT url_id FROM segments WHERE id = ?"));
150   statement.BindInt64(0, segment_id);
151 
152   if (statement.Step())
153     return statement.ColumnInt64(0);
154   return 0;
155 }
156 
CreateSegment(URLID url_id,const std::string & segment_name)157 SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id,
158                                               const std::string& segment_name) {
159   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
160       "INSERT INTO segments (name, url_id) VALUES (?,?)"));
161   statement.BindString(0, segment_name);
162   statement.BindInt64(1, url_id);
163 
164   if (statement.Run())
165     return GetDB().GetLastInsertRowId();
166   return 0;
167 }
168 
IncreaseSegmentVisitCount(SegmentID segment_id,base::Time ts,int amount)169 bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id,
170                                                      base::Time ts,
171                                                      int amount) {
172   base::Time t = ts.LocalMidnight();
173 
174   sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE,
175       "SELECT id, visit_count FROM segment_usage "
176       "WHERE time_slot = ? AND segment_id = ?"));
177   select.BindInt64(0, t.ToInternalValue());
178   select.BindInt64(1, segment_id);
179 
180   if (!select.is_valid())
181     return false;
182 
183   if (select.Step()) {
184     sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE,
185         "UPDATE segment_usage SET visit_count = ? WHERE id = ?"));
186     update.BindInt64(0, select.ColumnInt64(1) + static_cast<int64>(amount));
187     update.BindInt64(1, select.ColumnInt64(0));
188 
189     return update.Run();
190   } else {
191     sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE,
192         "INSERT INTO segment_usage "
193         "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)"));
194     insert.BindInt64(0, segment_id);
195     insert.BindInt64(1, t.ToInternalValue());
196     insert.BindInt64(2, static_cast<int64>(amount));
197 
198     return insert.Run();
199   }
200 }
201 
QuerySegmentUsage(base::Time from_time,int max_result_count,std::vector<PageUsageData * > * results)202 void VisitSegmentDatabase::QuerySegmentUsage(
203     base::Time from_time,
204     int max_result_count,
205     std::vector<PageUsageData*>* results) {
206   // This function gathers the highest-ranked segments in two queries.
207   // The first gathers scores for all segments.
208   // The second gathers segment data (url, title, etc.) for the highest-ranked
209   // segments.
210 
211   // Gather all the segment scores.
212   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
213       "SELECT segment_id, time_slot, visit_count "
214       "FROM segment_usage WHERE time_slot >= ? "
215       "ORDER BY segment_id"));
216   if (!statement.is_valid())
217     return;
218 
219   base::Time ts = from_time.LocalMidnight();
220   statement.BindInt64(0, ts.ToInternalValue());
221 
222   base::Time now = base::Time::Now();
223   SegmentID last_segment_id = 0;
224   PageUsageData* pud = NULL;
225   float score = 0;
226   while (statement.Step()) {
227     SegmentID segment_id = statement.ColumnInt64(0);
228     if (segment_id != last_segment_id) {
229       if (pud) {
230         pud->SetScore(score);
231         results->push_back(pud);
232       }
233 
234       pud = new PageUsageData(segment_id);
235       score = 0;
236       last_segment_id = segment_id;
237     }
238 
239     base::Time timeslot =
240         base::Time::FromInternalValue(statement.ColumnInt64(1));
241     int visit_count = statement.ColumnInt(2);
242     int days_ago = (now - timeslot).InDays();
243 
244     // Score for this day in isolation.
245     float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
246     // Recent visits count more than historical ones, so we multiply in a boost
247     // related to how long ago this day was.
248     // This boost is a curve that smoothly goes through these values:
249     // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
250     // at the limit of how far we reach into the past.
251     float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
252     score += recency_boost * day_visits_score;
253   }
254 
255   if (pud) {
256     pud->SetScore(score);
257     results->push_back(pud);
258   }
259 
260   // Limit to the top kResultCount results.
261   std::sort(results->begin(), results->end(), PageUsageData::Predicate);
262   if (static_cast<int>(results->size()) > max_result_count) {
263     STLDeleteContainerPointers(results->begin() + max_result_count,
264                                results->end());
265     results->resize(max_result_count);
266   }
267 
268   // Now fetch the details about the entries we care about.
269   sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE,
270       "SELECT urls.url, urls.title FROM urls "
271       "JOIN segments ON segments.url_id = urls.id "
272       "WHERE segments.id = ?"));
273 
274   if (!statement2.is_valid())
275     return;
276 
277   for (size_t i = 0; i < results->size(); ++i) {
278     PageUsageData* pud = (*results)[i];
279     statement2.BindInt64(0, pud->GetID());
280     if (statement2.Step()) {
281       pud->SetURL(GURL(statement2.ColumnString(0)));
282       pud->SetTitle(statement2.ColumnString16(1));
283     }
284     statement2.Reset(true);
285   }
286 }
287 
DeleteSegmentData(base::Time older_than)288 bool VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) {
289   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
290       "DELETE FROM segment_usage WHERE time_slot < ?"));
291   statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue());
292 
293   return statement.Run();
294 }
295 
DeleteSegmentForURL(URLID url_id)296 bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) {
297   sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE,
298       "DELETE FROM segment_usage WHERE segment_id IN "
299       "(SELECT id FROM segments WHERE url_id = ?)"));
300   delete_usage.BindInt64(0, url_id);
301 
302   if (!delete_usage.Run())
303     return false;
304 
305   sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE,
306       "DELETE FROM segments WHERE url_id = ?"));
307   delete_seg.BindInt64(0, url_id);
308 
309   return delete_seg.Run();
310 }
311 
MigratePresentationIndex()312 bool VisitSegmentDatabase::MigratePresentationIndex() {
313   sql::Transaction transaction(&GetDB());
314   return transaction.Begin() &&
315       GetDB().Execute("DROP TABLE presentation") &&
316       GetDB().Execute("CREATE TABLE segments_tmp ("
317                       "id INTEGER PRIMARY KEY,"
318                       "name VARCHAR,"
319                       "url_id INTEGER NON NULL)") &&
320       GetDB().Execute("INSERT INTO segments_tmp SELECT "
321                       "id, name, url_id FROM segments") &&
322       GetDB().Execute("DROP TABLE segments") &&
323       GetDB().Execute("ALTER TABLE segments_tmp RENAME TO segments") &&
324       transaction.Commit();
325 }
326 
327 }  // namespace history
328