• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "app/sql/connection.h"
6 #include "base/file_path.h"
7 #include "base/file_util.h"
8 #include "base/message_loop.h"
9 #include "base/utf_string_conversions.h"
10 #include "chrome/browser/history/text_database_manager.h"
11 #include "chrome/browser/history/visit_database.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 
14 using base::Time;
15 using base::TimeDelta;
16 using base::TimeTicks;
17 
18 namespace history {
19 
20 namespace {
21 
22 const char* kURL1 = "http://www.google.com/asdf";
23 const char* kTitle1 = "Google A";
24 const char* kBody1 = "FOO page one.";
25 
26 const char* kURL2 = "http://www.google.com/qwer";
27 const char* kTitle2 = "Google B";
28 const char* kBody2 = "FOO two.";
29 
30 const char* kURL3 = "http://www.google.com/zxcv";
31 const char* kTitle3 = "Google C";
32 const char* kBody3 = "FOO drei";
33 
34 const char* kURL4 = "http://www.google.com/hjkl";
35 const char* kTitle4 = "Google D";
36 const char* kBody4 = "FOO lalala four.";
37 
38 const char* kURL5 = "http://www.google.com/uiop";
39 const char* kTitle5 = "Google cinq";
40 const char* kBody5 = "FOO page one.";
41 
42 // This provides a simple implementation of a URL+VisitDatabase using an
43 // in-memory sqlite connection. The text database manager expects to be able to
44 // update the visit database to keep in sync.
45 class InMemDB : public URLDatabase, public VisitDatabase {
46  public:
InMemDB()47   InMemDB() {
48     EXPECT_TRUE(db_.OpenInMemory());
49     CreateURLTable(false);
50     InitVisitTable();
51   }
~InMemDB()52   ~InMemDB() {
53   }
54 
55  private:
GetDB()56   virtual sql::Connection& GetDB() { return db_; }
57 
58   sql::Connection db_;
59 
60   DISALLOW_COPY_AND_ASSIGN(InMemDB);
61 };
62 
63 // Adds all the pages once, and the first page once more in the next month.
64 // The times of all the pages will be filled into |*times|.
AddAllPages(TextDatabaseManager & manager,VisitDatabase * visit_db,std::vector<Time> * times)65 void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db,
66                  std::vector<Time>* times) {
67   Time::Exploded exploded;
68   memset(&exploded, 0, sizeof(Time::Exploded));
69 
70   // Put the visits in two different months so it will query across databases.
71   exploded.year = 2008;
72   exploded.month = 1;
73   exploded.day_of_month = 3;
74 
75   VisitRow visit_row;
76   visit_row.url_id = 1;
77   visit_row.visit_time = Time::FromUTCExploded(exploded);
78   visit_row.referring_visit = 0;
79   visit_row.transition = 0;
80   visit_row.segment_id = 0;
81   visit_row.is_indexed = false;
82   VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
83 
84   times->push_back(visit_row.visit_time);
85   manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
86                       visit_row.visit_time, UTF8ToUTF16(kTitle1),
87                       UTF8ToUTF16(kBody1));
88 
89   exploded.day_of_month++;
90   visit_row.url_id = 2;
91   visit_row.visit_time = Time::FromUTCExploded(exploded);
92   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
93   times->push_back(visit_row.visit_time);
94   manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id,
95                       visit_row.visit_time, UTF8ToUTF16(kTitle2),
96                       UTF8ToUTF16(kBody2));
97 
98   exploded.day_of_month++;
99   visit_row.url_id = 2;
100   visit_row.visit_time = Time::FromUTCExploded(exploded);
101   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
102   times->push_back(visit_row.visit_time);
103   manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id,
104                       visit_row.visit_time, UTF8ToUTF16(kTitle3),
105                       UTF8ToUTF16(kBody3));
106 
107   // Put the next ones in the next month.
108   exploded.month++;
109   visit_row.url_id = 2;
110   visit_row.visit_time = Time::FromUTCExploded(exploded);
111   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
112   times->push_back(visit_row.visit_time);
113   manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id,
114                       visit_row.visit_time, UTF8ToUTF16(kTitle4),
115                       UTF8ToUTF16(kBody4));
116 
117   exploded.day_of_month++;
118   visit_row.url_id = 2;
119   visit_row.visit_time = Time::FromUTCExploded(exploded);
120   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
121   times->push_back(visit_row.visit_time);
122   manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id,
123                       visit_row.visit_time, UTF8ToUTF16(kTitle5),
124                       UTF8ToUTF16(kBody5));
125 
126   // Put the first one in again in the second month.
127   exploded.day_of_month++;
128   visit_row.url_id = 2;
129   visit_row.visit_time = Time::FromUTCExploded(exploded);
130   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
131   times->push_back(visit_row.visit_time);
132   manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
133                       visit_row.visit_time, UTF8ToUTF16(kTitle1),
134                       UTF8ToUTF16(kBody1));
135 }
136 
ResultsHaveURL(const std::vector<TextDatabase::Match> & results,const char * url)137 bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
138                     const char* url) {
139   GURL gurl(url);
140   for (size_t i = 0; i < results.size(); i++) {
141     if (results[i].url == gurl)
142       return true;
143   }
144   return false;
145 }
146 
147 }  // namespace
148 
149 class TextDatabaseManagerTest : public testing::Test {
150  public:
151   // Called manually by the test so it can report failure to initialize.
Init()152   bool Init() {
153     return file_util::CreateNewTempDirectory(
154         FILE_PATH_LITERAL("TestSearchTest"), &dir_);
155   }
156 
157  protected:
SetUp()158   void SetUp() {
159   }
160 
TearDown()161   void TearDown() {
162     file_util::Delete(dir_, true);
163   }
164 
165   MessageLoop message_loop_;
166 
167   // Directory containing the databases.
168   FilePath dir_;
169 };
170 
171 // Tests basic querying.
TEST_F(TextDatabaseManagerTest,InsertQuery)172 TEST_F(TextDatabaseManagerTest, InsertQuery) {
173   ASSERT_TRUE(Init());
174   InMemDB visit_db;
175   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
176   ASSERT_TRUE(manager.Init(NULL));
177 
178   std::vector<Time> times;
179   AddAllPages(manager, &visit_db, &times);
180 
181   QueryOptions options;
182   options.begin_time = times[0] - TimeDelta::FromDays(100);
183   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
184   std::vector<TextDatabase::Match> results;
185   Time first_time_searched;
186   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
187                          &results, &first_time_searched);
188 
189   // We should have matched every page.
190   EXPECT_EQ(6U, results.size());
191   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
192   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
193   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
194   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
195   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
196 
197   // The first time searched should have been the first page's time or before
198   // (it could have eliminated some time for us).
199   EXPECT_TRUE(first_time_searched <= times[0]);
200 }
201 
202 // Tests that adding page components piecemeal will get them added properly.
203 // This does not supply a visit to update, this mode is used only by the unit
204 // tests right now, but we test it anyway.
TEST_F(TextDatabaseManagerTest,InsertCompleteNoVisit)205 TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
206   ASSERT_TRUE(Init());
207   InMemDB visit_db;
208   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
209   ASSERT_TRUE(manager.Init(NULL));
210 
211   // First add one without a visit.
212   const GURL url(kURL1);
213   manager.AddPageURL(url, 0, 0, Time::Now());
214   manager.AddPageTitle(url, UTF8ToUTF16(kTitle1));
215   manager.AddPageContents(url, UTF8ToUTF16(kBody1));
216 
217   // Check that the page got added.
218   QueryOptions options;
219   std::vector<TextDatabase::Match> results;
220   Time first_time_searched;
221 
222   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
223                          &results, &first_time_searched);
224   ASSERT_EQ(1U, results.size());
225   EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title));
226 }
227 
228 // Like InsertCompleteNoVisit but specifies a visit to update. We check that the
229 // visit was updated properly.
TEST_F(TextDatabaseManagerTest,InsertCompleteVisit)230 TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
231   ASSERT_TRUE(Init());
232   InMemDB visit_db;
233   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
234   ASSERT_TRUE(manager.Init(NULL));
235 
236   // First add a visit to a page. We can just make up a URL ID since there is
237   // not actually any URL database around.
238   VisitRow visit;
239   visit.url_id = 1;
240   visit.visit_time = Time::Now();
241   visit.referring_visit = 0;
242   visit.transition = PageTransition::LINK;
243   visit.segment_id = 0;
244   visit.is_indexed = false;
245   visit_db.AddVisit(&visit, SOURCE_BROWSED);
246 
247   // Add a full text indexed entry for that visit.
248   const GURL url(kURL2);
249   manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time);
250   manager.AddPageContents(url, UTF8ToUTF16(kBody2));
251   manager.AddPageTitle(url, UTF8ToUTF16(kTitle2));
252 
253   // Check that the page got added.
254   QueryOptions options;
255   std::vector<TextDatabase::Match> results;
256   Time first_time_searched;
257 
258   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
259                          &results, &first_time_searched);
260   ASSERT_EQ(1U, results.size());
261   EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title));
262 
263   // Check that the visit got updated for its new indexed state.
264   VisitRow out_visit;
265   ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit));
266   EXPECT_TRUE(out_visit.is_indexed);
267 }
268 
269 // Tests that partial inserts that expire are added to the database.
TEST_F(TextDatabaseManagerTest,InsertPartial)270 TEST_F(TextDatabaseManagerTest, InsertPartial) {
271   ASSERT_TRUE(Init());
272   InMemDB visit_db;
273   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
274   ASSERT_TRUE(manager.Init(NULL));
275 
276   // Add the first one with just a URL.
277   GURL url1(kURL1);
278   manager.AddPageURL(url1, 0, 0, Time::Now());
279 
280   // Now add a second one with a URL and title.
281   GURL url2(kURL2);
282   manager.AddPageURL(url2, 0, 0, Time::Now());
283   manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2));
284 
285   // The third one has a URL and body.
286   GURL url3(kURL3);
287   manager.AddPageURL(url3, 0, 0, Time::Now());
288   manager.AddPageContents(url3, UTF8ToUTF16(kBody3));
289 
290   // Expire stuff very fast. This assumes that the time between the first
291   // AddPageURL and this line is less than the expiration time (20 seconds).
292   TimeTicks added_time = TimeTicks::Now();
293   TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5);
294   manager.FlushOldChangesForTime(expire_time);
295 
296   // Do a query, nothing should be added yet.
297   QueryOptions options;
298   std::vector<TextDatabase::Match> results;
299   Time first_time_searched;
300   manager.GetTextMatches(UTF8ToUTF16("google"), options,
301                          &results, &first_time_searched);
302   ASSERT_EQ(0U, results.size());
303 
304   // Compute a time threshold that will cause everything to be flushed, and
305   // poke at the manager's internals to cause this to happen.
306   expire_time = added_time + TimeDelta::FromDays(1);
307   manager.FlushOldChangesForTime(expire_time);
308 
309   // Now we should have all 3 URLs added.
310   manager.GetTextMatches(UTF8ToUTF16("google"), options,
311                          &results, &first_time_searched);
312   ASSERT_EQ(3U, results.size());
313   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
314   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
315   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
316 }
317 
318 // Tests that partial inserts (due to timeouts) will still get updated if the
319 // data comes in later.
TEST_F(TextDatabaseManagerTest,PartialComplete)320 TEST_F(TextDatabaseManagerTest, PartialComplete) {
321   ASSERT_TRUE(Init());
322   InMemDB visit_db;
323   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
324   ASSERT_TRUE(manager.Init(NULL));
325 
326   Time added_time = Time::Now();
327   GURL url(kURL1);
328 
329   // We have to have the URL in the URL and visit databases for this test to
330   // work.
331   URLRow url_row(url);
332   url_row.set_title(UTF8ToUTF16("chocolate"));
333   URLID url_id = visit_db.AddURL(url_row);
334   ASSERT_TRUE(url_id);
335   VisitRow visit_row;
336   visit_row.url_id = url_id;
337   visit_row.visit_time = added_time;
338   visit_db.AddVisit(&visit_row, SOURCE_BROWSED);
339 
340   // Add a URL with no title or body, and say that it expired.
341   manager.AddPageURL(url, 0, 0, added_time);
342   TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
343   manager.FlushOldChangesForTime(expire_time);
344 
345   // Add the title. We should be able to query based on that. The title in the
346   // URL row we set above should not come into the picture.
347   manager.AddPageTitle(url, UTF8ToUTF16("Some unique title"));
348   Time first_time_searched;
349   QueryOptions options;
350   std::vector<TextDatabase::Match> results;
351   manager.GetTextMatches(UTF8ToUTF16("unique"), options,
352                          &results, &first_time_searched);
353   EXPECT_EQ(1U, results.size());
354   manager.GetTextMatches(UTF8ToUTF16("chocolate"), options,
355                          &results, &first_time_searched);
356   EXPECT_EQ(0U, results.size());
357 
358   // Now add the body, which should be queryable.
359   manager.AddPageContents(url, UTF8ToUTF16("Very awesome body"));
360   manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched);
361   EXPECT_EQ(1U, results.size());
362 
363   // Adding the body will actually copy the title from the URL table rather
364   // than the previously indexed row (we made them not match above). This isn't
365   // necessarily what we want, but it's how it's implemented, and we don't want
366   // to regress it.
367   manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched);
368   EXPECT_EQ(1U, results.size());
369 }
370 
371 // Tests that changes get properly committed to disk.
TEST_F(TextDatabaseManagerTest,Writing)372 TEST_F(TextDatabaseManagerTest, Writing) {
373   ASSERT_TRUE(Init());
374 
375   QueryOptions options;
376   std::vector<TextDatabase::Match> results;
377   Time first_time_searched;
378 
379   InMemDB visit_db;
380 
381   // Create the manager and write some stuff to it.
382   {
383     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
384     ASSERT_TRUE(manager.Init(NULL));
385 
386     std::vector<Time> times;
387     AddAllPages(manager, &visit_db, &times);
388 
389     // We should have matched every page.
390     manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched);
391     EXPECT_EQ(6U, results.size());
392   }
393   results.clear();
394 
395   // Recreate the manager and make sure it finds the written stuff.
396   {
397     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
398     ASSERT_TRUE(manager.Init(NULL));
399 
400     // We should have matched every page again.
401     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
402                            &results, &first_time_searched);
403     EXPECT_EQ(6U, results.size());
404   }
405 }
406 
407 // Tests that changes get properly committed to disk, as in the Writing test
408 // above, but when there is a transaction around the adds.
TEST_F(TextDatabaseManagerTest,WritingTransaction)409 TEST_F(TextDatabaseManagerTest, WritingTransaction) {
410   ASSERT_TRUE(Init());
411 
412   QueryOptions options;
413   std::vector<TextDatabase::Match> results;
414   Time first_time_searched;
415 
416   InMemDB visit_db;
417 
418   // Create the manager and write some stuff to it.
419   {
420     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
421     ASSERT_TRUE(manager.Init(NULL));
422 
423     std::vector<Time> times;
424     manager.BeginTransaction();
425     AddAllPages(manager, &visit_db, &times);
426     // "Forget" to commit, it should be autocommittedd for us.
427 
428     // We should have matched every page.
429     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
430                            &results, &first_time_searched);
431     EXPECT_EQ(6U, results.size());
432   }
433   results.clear();
434 
435   // Recreate the manager and make sure it finds the written stuff.
436   {
437     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
438     ASSERT_TRUE(manager.Init(NULL));
439 
440     // We should have matched every page again.
441     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
442                            &results, &first_time_searched);
443     EXPECT_EQ(6U, results.size());
444   }
445 }
446 
447 // Tests querying where the maximum number of items is met.
TEST_F(TextDatabaseManagerTest,QueryMax)448 TEST_F(TextDatabaseManagerTest, QueryMax) {
449   ASSERT_TRUE(Init());
450   InMemDB visit_db;
451   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
452   ASSERT_TRUE(manager.Init(NULL));
453 
454   std::vector<Time> times;
455   AddAllPages(manager, &visit_db, &times);
456 
457   string16 foo = UTF8ToUTF16("FOO");
458 
459   QueryOptions options;
460   options.begin_time = times[0] - TimeDelta::FromDays(100);
461   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
462   options.max_count = 2;
463   std::vector<TextDatabase::Match> results;
464   Time first_time_searched;
465   manager.GetTextMatches(foo, options, &results, &first_time_searched);
466 
467   // We should have gotten the last two pages as results (the first page is
468   // also the last).
469   EXPECT_EQ(2U, results.size());
470   EXPECT_TRUE(first_time_searched <= times[4]);
471   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
472   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
473 
474   // Asking for 4 pages, the first one should be in another DB.
475   options.max_count = 4;
476   manager.GetTextMatches(foo, options, &results, &first_time_searched);
477 
478   EXPECT_EQ(4U, results.size());
479   EXPECT_TRUE(first_time_searched <= times[4]);
480   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
481   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
482   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
483   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
484 }
485 
486 // Tests querying backwards in time in chunks.
TEST_F(TextDatabaseManagerTest,QueryBackwards)487 TEST_F(TextDatabaseManagerTest, QueryBackwards) {
488   ASSERT_TRUE(Init());
489   InMemDB visit_db;
490   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
491   ASSERT_TRUE(manager.Init(NULL));
492 
493   std::vector<Time> times;
494   AddAllPages(manager, &visit_db, &times);
495 
496   string16 foo = UTF8ToUTF16("FOO");
497 
498   // First do a query for all time, but with a max of 2. This will give us the
499   // last two results and will tell us where to start searching when we want
500   // to go back in time.
501   QueryOptions options;
502   options.begin_time = times[0] - TimeDelta::FromDays(100);
503   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
504   options.max_count = 2;
505   std::vector<TextDatabase::Match> results;
506   Time first_time_searched;
507   manager.GetTextMatches(foo, options, &results, &first_time_searched);
508 
509   // Check that we got the last two results.
510   EXPECT_EQ(2U, results.size());
511   EXPECT_TRUE(first_time_searched <= times[4]);
512   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
513   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
514 
515   // Query the previous two URLs and make sure we got the correct ones.
516   options.end_time = first_time_searched;
517   manager.GetTextMatches(foo, options, &results, &first_time_searched);
518   EXPECT_EQ(2U, results.size());
519   EXPECT_TRUE(first_time_searched <= times[2]);
520   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
521   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
522 
523   // Query the previous two URLs...
524   options.end_time = first_time_searched;
525   manager.GetTextMatches(foo, options, &results, &first_time_searched);
526   EXPECT_EQ(2U, results.size());
527   EXPECT_TRUE(first_time_searched <= times[0]);
528   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
529   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
530 
531   // Try to query some more, there should be no results.
532   options.end_time = first_time_searched;
533   manager.GetTextMatches(foo, options, &results, &first_time_searched);
534   EXPECT_EQ(0U, results.size());
535 }
536 
537 }  // namespace history
538