• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/history/visitsegment_database.h"
6 
7 #include <math.h>
8 
9 #include <algorithm>
10 #include <string>
11 #include <vector>
12 
13 #include "app/sql/statement.h"
14 #include "base/logging.h"
15 #include "base/stl_util-inl.h"
16 #include "base/string_util.h"
17 #include "base/utf_string_conversions.h"
18 #include "chrome/browser/history/page_usage_data.h"
19 
20 // The following tables are used to store url segment information.
21 //
22 // segments
23 //   id                 Primary key
24 //   name               A unique string to represent that segment. (URL derived)
25 //   url_id             ID of the url currently used to represent this segment.
26 //   pres_index         index used to store a fixed presentation position.
27 //
28 // segment_usage
29 //   id                 Primary key
30 //   segment_id         Corresponding segment id
31 //   time_slot          time stamp identifying for what day this entry is about
32 //   visit_count        Number of visit in the segment
33 //
34 
35 namespace history {
36 
VisitSegmentDatabase()37 VisitSegmentDatabase::VisitSegmentDatabase() {
38 }
39 
~VisitSegmentDatabase()40 VisitSegmentDatabase::~VisitSegmentDatabase() {
41 }
42 
InitSegmentTables()43 bool VisitSegmentDatabase::InitSegmentTables() {
44   // Segments table.
45   if (!GetDB().DoesTableExist("segments")) {
46     if (!GetDB().Execute("CREATE TABLE segments ("
47         "id INTEGER PRIMARY KEY,"
48         "name VARCHAR,"
49         "url_id INTEGER NON NULL,"
50         "pres_index INTEGER DEFAULT -1 NOT NULL)")) {
51       NOTREACHED();
52       return false;
53     }
54 
55     if (!GetDB().Execute("CREATE INDEX segments_name ON segments(name)")) {
56       NOTREACHED();
57       return false;
58     }
59   }
60 
61   // This was added later, so we need to try to create it even if the table
62   // already exists.
63   GetDB().Execute("CREATE INDEX segments_url_id ON segments(url_id)");
64 
65   // Segment usage table.
66   if (!GetDB().DoesTableExist("segment_usage")) {
67     if (!GetDB().Execute("CREATE TABLE segment_usage ("
68         "id INTEGER PRIMARY KEY,"
69         "segment_id INTEGER NOT NULL,"
70         "time_slot INTEGER NOT NULL,"
71         "visit_count INTEGER DEFAULT 0 NOT NULL)")) {
72       NOTREACHED();
73       return false;
74     }
75     if (!GetDB().Execute(
76         "CREATE INDEX segment_usage_time_slot_segment_id ON "
77         "segment_usage(time_slot, segment_id)")) {
78       NOTREACHED();
79       return false;
80     }
81   }
82 
83   // Added in a later version, so we always need to try to creat this index.
84   GetDB().Execute("CREATE INDEX segments_usage_seg_id "
85                   "ON segment_usage(segment_id)");
86 
87   // Presentation index table.
88   //
89   // Important note:
90   // Right now, this table is only used to store the presentation index.
91   // If you need to add more columns, keep in mind that rows are currently
92   // deleted when the presentation index is changed to -1.
93   // See SetPagePresentationIndex() in this file
94   if (!GetDB().DoesTableExist("presentation")) {
95     if (!GetDB().Execute("CREATE TABLE presentation("
96         "url_id INTEGER PRIMARY KEY,"
97         "pres_index INTEGER NOT NULL)"))
98       return false;
99   }
100   return true;
101 }
102 
DropSegmentTables()103 bool VisitSegmentDatabase::DropSegmentTables() {
104   // Dropping the tables will implicitly delete the indices.
105   return GetDB().Execute("DROP TABLE segments") &&
106          GetDB().Execute("DROP TABLE segment_usage");
107 }
108 
109 // Note: the segment name is derived from the URL but is not a URL. It is
110 // a string that can be easily recreated from various URLS. Maybe this should
111 // be an MD5 to limit the length.
112 //
113 // static
ComputeSegmentName(const GURL & url)114 std::string VisitSegmentDatabase::ComputeSegmentName(const GURL& url) {
115   // TODO(brettw) this should probably use the registry controlled
116   // domains service.
117   GURL::Replacements r;
118   const char kWWWDot[] = "www.";
119   const int kWWWDotLen = arraysize(kWWWDot) - 1;
120 
121   std::string host = url.host();
122   const char* host_c = host.c_str();
123   // Remove www. to avoid some dups.
124   if (static_cast<int>(host.size()) > kWWWDotLen &&
125       LowerCaseEqualsASCII(host_c, host_c + kWWWDotLen, kWWWDot)) {
126     r.SetHost(host.c_str(),
127               url_parse::Component(kWWWDotLen,
128                   static_cast<int>(host.size()) - kWWWDotLen));
129   }
130   // Remove other stuff we don't want.
131   r.ClearUsername();
132   r.ClearPassword();
133   r.ClearQuery();
134   r.ClearRef();
135   r.ClearPort();
136 
137   return url.ReplaceComponents(r).spec();
138 }
139 
GetSegmentNamed(const std::string & segment_name)140 SegmentID VisitSegmentDatabase::GetSegmentNamed(
141     const std::string& segment_name) {
142   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
143       "SELECT id FROM segments WHERE name = ?"));
144   if (!statement)
145     return 0;
146 
147   statement.BindString(0, segment_name);
148   if (statement.Step())
149     return statement.ColumnInt64(0);
150   return 0;
151 }
152 
UpdateSegmentRepresentationURL(SegmentID segment_id,URLID url_id)153 bool VisitSegmentDatabase::UpdateSegmentRepresentationURL(SegmentID segment_id,
154                                                           URLID url_id) {
155   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
156       "UPDATE segments SET url_id = ? WHERE id = ?"));
157   if (!statement)
158     return false;
159 
160   statement.BindInt64(0, url_id);
161   statement.BindInt64(1, segment_id);
162   return statement.Run();
163 }
164 
GetSegmentRepresentationURL(SegmentID segment_id)165 URLID VisitSegmentDatabase::GetSegmentRepresentationURL(SegmentID segment_id) {
166   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
167       "SELECT url_id FROM segments WHERE id = ?"));
168   if (!statement)
169     return 0;
170 
171   statement.BindInt64(0, segment_id);
172   if (statement.Step())
173     return statement.ColumnInt64(0);
174   return 0;
175 }
176 
CreateSegment(URLID url_id,const std::string & segment_name)177 SegmentID VisitSegmentDatabase::CreateSegment(URLID url_id,
178                                               const std::string& segment_name) {
179   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
180       "INSERT INTO segments (name, url_id) VALUES (?,?)"));
181   if (!statement)
182     return false;
183 
184   statement.BindString(0, segment_name);
185   statement.BindInt64(1, url_id);
186   if (statement.Run())
187     return GetDB().GetLastInsertRowId();
188   return false;
189 }
190 
IncreaseSegmentVisitCount(SegmentID segment_id,base::Time ts,int amount)191 bool VisitSegmentDatabase::IncreaseSegmentVisitCount(SegmentID segment_id,
192                                                      base::Time ts,
193                                                      int amount) {
194   base::Time t = ts.LocalMidnight();
195 
196   sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE,
197       "SELECT id, visit_count FROM segment_usage "
198       "WHERE time_slot = ? AND segment_id = ?"));
199   if (!select)
200     return false;
201 
202   select.BindInt64(0, t.ToInternalValue());
203   select.BindInt64(1, segment_id);
204   if (select.Step()) {
205     sql::Statement update(GetDB().GetCachedStatement(SQL_FROM_HERE,
206         "UPDATE segment_usage SET visit_count = ? WHERE id = ?"));
207     if (!update)
208       return false;
209 
210     update.BindInt64(0, select.ColumnInt64(1) + static_cast<int64>(amount));
211     update.BindInt64(1, select.ColumnInt64(0));
212     return update.Run();
213 
214   } else {
215     sql::Statement insert(GetDB().GetCachedStatement(SQL_FROM_HERE,
216         "INSERT INTO segment_usage "
217         "(segment_id, time_slot, visit_count) VALUES (?, ?, ?)"));
218     if (!insert)
219       return false;
220 
221     insert.BindInt64(0, segment_id);
222     insert.BindInt64(1, t.ToInternalValue());
223     insert.BindInt64(2, static_cast<int64>(amount));
224     return insert.Run();
225   }
226 }
227 
QuerySegmentUsage(base::Time from_time,int max_result_count,std::vector<PageUsageData * > * results)228 void VisitSegmentDatabase::QuerySegmentUsage(
229     base::Time from_time,
230     int max_result_count,
231     std::vector<PageUsageData*>* results) {
232   // This function gathers the highest-ranked segments in two queries.
233   // The first gathers scores for all segments.
234   // The second gathers segment data (url, title, etc.) for the highest-ranked
235   // segments.
236   // TODO(evanm): this disregards the "presentation index", which was what was
237   // used to lock results into position.  But the rest of our code currently
238   // does as well.
239 
240   // Gather all the segment scores.
241   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
242       "SELECT segment_id, time_slot, visit_count "
243       "FROM segment_usage WHERE time_slot >= ? "
244       "ORDER BY segment_id"));
245   if (!statement) {
246     NOTREACHED() << GetDB().GetErrorMessage();
247     return;
248   }
249 
250   base::Time ts = from_time.LocalMidnight();
251   statement.BindInt64(0, ts.ToInternalValue());
252 
253   base::Time now = base::Time::Now();
254   SegmentID last_segment_id = 0;
255   PageUsageData* pud = NULL;
256   float score = 0;
257   while (statement.Step()) {
258     SegmentID segment_id = statement.ColumnInt64(0);
259     if (segment_id != last_segment_id) {
260       if (pud) {
261         pud->SetScore(score);
262         results->push_back(pud);
263       }
264 
265       pud = new PageUsageData(segment_id);
266       score = 0;
267       last_segment_id = segment_id;
268     }
269 
270     base::Time timeslot =
271         base::Time::FromInternalValue(statement.ColumnInt64(1));
272     int visit_count = statement.ColumnInt(2);
273     int days_ago = (now - timeslot).InDays();
274 
275     // Score for this day in isolation.
276     float day_visits_score = 1.0f + log(static_cast<float>(visit_count));
277     // Recent visits count more than historical ones, so we multiply in a boost
278     // related to how long ago this day was.
279     // This boost is a curve that smoothly goes through these values:
280     // Today gets 3x, a week ago 2x, three weeks ago 1.5x, falling off to 1x
281     // at the limit of how far we reach into the past.
282     float recency_boost = 1.0f + (2.0f * (1.0f / (1.0f + days_ago/7.0f)));
283     score += recency_boost * day_visits_score;
284   }
285 
286   if (pud) {
287     pud->SetScore(score);
288     results->push_back(pud);
289   }
290 
291   // Limit to the top kResultCount results.
292   sort(results->begin(), results->end(), PageUsageData::Predicate);
293   if (static_cast<int>(results->size()) > max_result_count) {
294     STLDeleteContainerPointers(results->begin() + max_result_count,
295                                results->end());
296     results->resize(max_result_count);
297   }
298 
299   // Now fetch the details about the entries we care about.
300   sql::Statement statement2(GetDB().GetCachedStatement(SQL_FROM_HERE,
301       "SELECT urls.url, urls.title FROM urls "
302       "JOIN segments ON segments.url_id = urls.id "
303       "WHERE segments.id = ?"));
304   if (!statement2) {
305     NOTREACHED() << GetDB().GetErrorMessage();
306     return;
307   }
308   for (size_t i = 0; i < results->size(); ++i) {
309     PageUsageData* pud = (*results)[i];
310     statement2.BindInt64(0, pud->GetID());
311     if (statement2.Step()) {
312       pud->SetURL(GURL(statement2.ColumnString(0)));
313       pud->SetTitle(UTF8ToUTF16(statement2.ColumnString(1)));
314     }
315     statement2.Reset();
316   }
317 }
318 
DeleteSegmentData(base::Time older_than)319 void VisitSegmentDatabase::DeleteSegmentData(base::Time older_than) {
320   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
321       "DELETE FROM segment_usage WHERE time_slot < ?"));
322   if (!statement)
323     return;
324 
325   statement.BindInt64(0, older_than.LocalMidnight().ToInternalValue());
326   if (!statement.Run())
327     NOTREACHED();
328 }
329 
SetSegmentPresentationIndex(SegmentID segment_id,int index)330 void VisitSegmentDatabase::SetSegmentPresentationIndex(SegmentID segment_id,
331                                                        int index) {
332   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
333       "UPDATE segments SET pres_index = ? WHERE id = ?"));
334   if (!statement)
335     return;
336 
337   statement.BindInt(0, index);
338   statement.BindInt64(1, segment_id);
339   if (!statement.Run())
340     NOTREACHED();
341   else
342     DCHECK_EQ(1, GetDB().GetLastChangeCount());
343 }
344 
DeleteSegmentForURL(URLID url_id)345 bool VisitSegmentDatabase::DeleteSegmentForURL(URLID url_id) {
346   sql::Statement select(GetDB().GetCachedStatement(SQL_FROM_HERE,
347       "SELECT id FROM segments WHERE url_id = ?"));
348   if (!select)
349     return false;
350 
351   sql::Statement delete_seg(GetDB().GetCachedStatement(SQL_FROM_HERE,
352       "DELETE FROM segments WHERE id = ?"));
353   if (!delete_seg)
354     return false;
355 
356   sql::Statement delete_usage(GetDB().GetCachedStatement(SQL_FROM_HERE,
357       "DELETE FROM segment_usage WHERE segment_id = ?"));
358   if (!delete_usage)
359     return false;
360 
361   bool r = true;
362   select.BindInt64(0, url_id);
363   // In theory there could not be more than one segment using that URL but we
364   // loop anyway to cleanup any inconsistency.
365   while (select.Step()) {
366     SegmentID segment_id = select.ColumnInt64(0);
367 
368     delete_usage.BindInt64(0, segment_id);
369     if (!delete_usage.Run()) {
370       NOTREACHED();
371       r = false;
372     }
373 
374     delete_seg.BindInt64(0, segment_id);
375     if (!delete_seg.Run()) {
376       NOTREACHED();
377       r = false;
378     }
379     delete_usage.Reset();
380     delete_seg.Reset();
381   }
382   return r;
383 }
384 
385 }  // namespace history
386