1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "app/sql/connection.h"
6 #include "base/file_path.h"
7 #include "base/file_util.h"
8 #include "base/message_loop.h"
9 #include "base/utf_string_conversions.h"
10 #include "chrome/browser/history/text_database_manager.h"
11 #include "chrome/browser/history/visit_database.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13
14 using base::Time;
15 using base::TimeDelta;
16 using base::TimeTicks;
17
18 namespace history {
19
20 namespace {
21
22 const char* kURL1 = "http://www.google.com/asdf";
23 const char* kTitle1 = "Google A";
24 const char* kBody1 = "FOO page one.";
25
26 const char* kURL2 = "http://www.google.com/qwer";
27 const char* kTitle2 = "Google B";
28 const char* kBody2 = "FOO two.";
29
30 const char* kURL3 = "http://www.google.com/zxcv";
31 const char* kTitle3 = "Google C";
32 const char* kBody3 = "FOO drei";
33
34 const char* kURL4 = "http://www.google.com/hjkl";
35 const char* kTitle4 = "Google D";
36 const char* kBody4 = "FOO lalala four.";
37
38 const char* kURL5 = "http://www.google.com/uiop";
39 const char* kTitle5 = "Google cinq";
40 const char* kBody5 = "FOO page one.";
41
42 // This provides a simple implementation of a URL+VisitDatabase using an
43 // in-memory sqlite connection. The text database manager expects to be able to
44 // update the visit database to keep in sync.
45 class InMemDB : public URLDatabase, public VisitDatabase {
46 public:
InMemDB()47 InMemDB() {
48 EXPECT_TRUE(db_.OpenInMemory());
49 CreateURLTable(false);
50 InitVisitTable();
51 }
~InMemDB()52 ~InMemDB() {
53 }
54
55 private:
GetDB()56 virtual sql::Connection& GetDB() { return db_; }
57
58 sql::Connection db_;
59
60 DISALLOW_COPY_AND_ASSIGN(InMemDB);
61 };
62
63 // Adds all the pages once, and the first page once more in the next month.
64 // The times of all the pages will be filled into |*times|.
AddAllPages(TextDatabaseManager & manager,VisitDatabase * visit_db,std::vector<Time> * times)65 void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db,
66 std::vector<Time>* times) {
67 Time::Exploded exploded;
68 memset(&exploded, 0, sizeof(Time::Exploded));
69
70 // Put the visits in two different months so it will query across databases.
71 exploded.year = 2008;
72 exploded.month = 1;
73 exploded.day_of_month = 3;
74
75 VisitRow visit_row;
76 visit_row.url_id = 1;
77 visit_row.visit_time = Time::FromUTCExploded(exploded);
78 visit_row.referring_visit = 0;
79 visit_row.transition = 0;
80 visit_row.segment_id = 0;
81 visit_row.is_indexed = false;
82 VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
83
84 times->push_back(visit_row.visit_time);
85 manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
86 visit_row.visit_time, UTF8ToUTF16(kTitle1),
87 UTF8ToUTF16(kBody1));
88
89 exploded.day_of_month++;
90 visit_row.url_id = 2;
91 visit_row.visit_time = Time::FromUTCExploded(exploded);
92 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
93 times->push_back(visit_row.visit_time);
94 manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id,
95 visit_row.visit_time, UTF8ToUTF16(kTitle2),
96 UTF8ToUTF16(kBody2));
97
98 exploded.day_of_month++;
99 visit_row.url_id = 2;
100 visit_row.visit_time = Time::FromUTCExploded(exploded);
101 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
102 times->push_back(visit_row.visit_time);
103 manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id,
104 visit_row.visit_time, UTF8ToUTF16(kTitle3),
105 UTF8ToUTF16(kBody3));
106
107 // Put the next ones in the next month.
108 exploded.month++;
109 visit_row.url_id = 2;
110 visit_row.visit_time = Time::FromUTCExploded(exploded);
111 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
112 times->push_back(visit_row.visit_time);
113 manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id,
114 visit_row.visit_time, UTF8ToUTF16(kTitle4),
115 UTF8ToUTF16(kBody4));
116
117 exploded.day_of_month++;
118 visit_row.url_id = 2;
119 visit_row.visit_time = Time::FromUTCExploded(exploded);
120 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
121 times->push_back(visit_row.visit_time);
122 manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id,
123 visit_row.visit_time, UTF8ToUTF16(kTitle5),
124 UTF8ToUTF16(kBody5));
125
126 // Put the first one in again in the second month.
127 exploded.day_of_month++;
128 visit_row.url_id = 2;
129 visit_row.visit_time = Time::FromUTCExploded(exploded);
130 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
131 times->push_back(visit_row.visit_time);
132 manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
133 visit_row.visit_time, UTF8ToUTF16(kTitle1),
134 UTF8ToUTF16(kBody1));
135 }
136
ResultsHaveURL(const std::vector<TextDatabase::Match> & results,const char * url)137 bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
138 const char* url) {
139 GURL gurl(url);
140 for (size_t i = 0; i < results.size(); i++) {
141 if (results[i].url == gurl)
142 return true;
143 }
144 return false;
145 }
146
147 } // namespace
148
149 class TextDatabaseManagerTest : public testing::Test {
150 public:
151 // Called manually by the test so it can report failure to initialize.
Init()152 bool Init() {
153 return file_util::CreateNewTempDirectory(
154 FILE_PATH_LITERAL("TestSearchTest"), &dir_);
155 }
156
157 protected:
SetUp()158 void SetUp() {
159 }
160
TearDown()161 void TearDown() {
162 file_util::Delete(dir_, true);
163 }
164
165 MessageLoop message_loop_;
166
167 // Directory containing the databases.
168 FilePath dir_;
169 };
170
171 // Tests basic querying.
TEST_F(TextDatabaseManagerTest,InsertQuery)172 TEST_F(TextDatabaseManagerTest, InsertQuery) {
173 ASSERT_TRUE(Init());
174 InMemDB visit_db;
175 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
176 ASSERT_TRUE(manager.Init(NULL));
177
178 std::vector<Time> times;
179 AddAllPages(manager, &visit_db, ×);
180
181 QueryOptions options;
182 options.begin_time = times[0] - TimeDelta::FromDays(100);
183 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
184 std::vector<TextDatabase::Match> results;
185 Time first_time_searched;
186 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
187 &results, &first_time_searched);
188
189 // We should have matched every page.
190 EXPECT_EQ(6U, results.size());
191 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
192 EXPECT_TRUE(ResultsHaveURL(results, kURL2));
193 EXPECT_TRUE(ResultsHaveURL(results, kURL3));
194 EXPECT_TRUE(ResultsHaveURL(results, kURL4));
195 EXPECT_TRUE(ResultsHaveURL(results, kURL5));
196
197 // The first time searched should have been the first page's time or before
198 // (it could have eliminated some time for us).
199 EXPECT_TRUE(first_time_searched <= times[0]);
200 }
201
202 // Tests that adding page components piecemeal will get them added properly.
203 // This does not supply a visit to update, this mode is used only by the unit
204 // tests right now, but we test it anyway.
TEST_F(TextDatabaseManagerTest,InsertCompleteNoVisit)205 TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
206 ASSERT_TRUE(Init());
207 InMemDB visit_db;
208 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
209 ASSERT_TRUE(manager.Init(NULL));
210
211 // First add one without a visit.
212 const GURL url(kURL1);
213 manager.AddPageURL(url, 0, 0, Time::Now());
214 manager.AddPageTitle(url, UTF8ToUTF16(kTitle1));
215 manager.AddPageContents(url, UTF8ToUTF16(kBody1));
216
217 // Check that the page got added.
218 QueryOptions options;
219 std::vector<TextDatabase::Match> results;
220 Time first_time_searched;
221
222 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
223 &results, &first_time_searched);
224 ASSERT_EQ(1U, results.size());
225 EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title));
226 }
227
228 // Like InsertCompleteNoVisit but specifies a visit to update. We check that the
229 // visit was updated properly.
TEST_F(TextDatabaseManagerTest,InsertCompleteVisit)230 TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
231 ASSERT_TRUE(Init());
232 InMemDB visit_db;
233 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
234 ASSERT_TRUE(manager.Init(NULL));
235
236 // First add a visit to a page. We can just make up a URL ID since there is
237 // not actually any URL database around.
238 VisitRow visit;
239 visit.url_id = 1;
240 visit.visit_time = Time::Now();
241 visit.referring_visit = 0;
242 visit.transition = PageTransition::LINK;
243 visit.segment_id = 0;
244 visit.is_indexed = false;
245 visit_db.AddVisit(&visit, SOURCE_BROWSED);
246
247 // Add a full text indexed entry for that visit.
248 const GURL url(kURL2);
249 manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time);
250 manager.AddPageContents(url, UTF8ToUTF16(kBody2));
251 manager.AddPageTitle(url, UTF8ToUTF16(kTitle2));
252
253 // Check that the page got added.
254 QueryOptions options;
255 std::vector<TextDatabase::Match> results;
256 Time first_time_searched;
257
258 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
259 &results, &first_time_searched);
260 ASSERT_EQ(1U, results.size());
261 EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title));
262
263 // Check that the visit got updated for its new indexed state.
264 VisitRow out_visit;
265 ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit));
266 EXPECT_TRUE(out_visit.is_indexed);
267 }
268
269 // Tests that partial inserts that expire are added to the database.
TEST_F(TextDatabaseManagerTest,InsertPartial)270 TEST_F(TextDatabaseManagerTest, InsertPartial) {
271 ASSERT_TRUE(Init());
272 InMemDB visit_db;
273 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
274 ASSERT_TRUE(manager.Init(NULL));
275
276 // Add the first one with just a URL.
277 GURL url1(kURL1);
278 manager.AddPageURL(url1, 0, 0, Time::Now());
279
280 // Now add a second one with a URL and title.
281 GURL url2(kURL2);
282 manager.AddPageURL(url2, 0, 0, Time::Now());
283 manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2));
284
285 // The third one has a URL and body.
286 GURL url3(kURL3);
287 manager.AddPageURL(url3, 0, 0, Time::Now());
288 manager.AddPageContents(url3, UTF8ToUTF16(kBody3));
289
290 // Expire stuff very fast. This assumes that the time between the first
291 // AddPageURL and this line is less than the expiration time (20 seconds).
292 TimeTicks added_time = TimeTicks::Now();
293 TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5);
294 manager.FlushOldChangesForTime(expire_time);
295
296 // Do a query, nothing should be added yet.
297 QueryOptions options;
298 std::vector<TextDatabase::Match> results;
299 Time first_time_searched;
300 manager.GetTextMatches(UTF8ToUTF16("google"), options,
301 &results, &first_time_searched);
302 ASSERT_EQ(0U, results.size());
303
304 // Compute a time threshold that will cause everything to be flushed, and
305 // poke at the manager's internals to cause this to happen.
306 expire_time = added_time + TimeDelta::FromDays(1);
307 manager.FlushOldChangesForTime(expire_time);
308
309 // Now we should have all 3 URLs added.
310 manager.GetTextMatches(UTF8ToUTF16("google"), options,
311 &results, &first_time_searched);
312 ASSERT_EQ(3U, results.size());
313 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
314 EXPECT_TRUE(ResultsHaveURL(results, kURL2));
315 EXPECT_TRUE(ResultsHaveURL(results, kURL3));
316 }
317
318 // Tests that partial inserts (due to timeouts) will still get updated if the
319 // data comes in later.
TEST_F(TextDatabaseManagerTest,PartialComplete)320 TEST_F(TextDatabaseManagerTest, PartialComplete) {
321 ASSERT_TRUE(Init());
322 InMemDB visit_db;
323 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
324 ASSERT_TRUE(manager.Init(NULL));
325
326 Time added_time = Time::Now();
327 GURL url(kURL1);
328
329 // We have to have the URL in the URL and visit databases for this test to
330 // work.
331 URLRow url_row(url);
332 url_row.set_title(UTF8ToUTF16("chocolate"));
333 URLID url_id = visit_db.AddURL(url_row);
334 ASSERT_TRUE(url_id);
335 VisitRow visit_row;
336 visit_row.url_id = url_id;
337 visit_row.visit_time = added_time;
338 visit_db.AddVisit(&visit_row, SOURCE_BROWSED);
339
340 // Add a URL with no title or body, and say that it expired.
341 manager.AddPageURL(url, 0, 0, added_time);
342 TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
343 manager.FlushOldChangesForTime(expire_time);
344
345 // Add the title. We should be able to query based on that. The title in the
346 // URL row we set above should not come into the picture.
347 manager.AddPageTitle(url, UTF8ToUTF16("Some unique title"));
348 Time first_time_searched;
349 QueryOptions options;
350 std::vector<TextDatabase::Match> results;
351 manager.GetTextMatches(UTF8ToUTF16("unique"), options,
352 &results, &first_time_searched);
353 EXPECT_EQ(1U, results.size());
354 manager.GetTextMatches(UTF8ToUTF16("chocolate"), options,
355 &results, &first_time_searched);
356 EXPECT_EQ(0U, results.size());
357
358 // Now add the body, which should be queryable.
359 manager.AddPageContents(url, UTF8ToUTF16("Very awesome body"));
360 manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched);
361 EXPECT_EQ(1U, results.size());
362
363 // Adding the body will actually copy the title from the URL table rather
364 // than the previously indexed row (we made them not match above). This isn't
365 // necessarily what we want, but it's how it's implemented, and we don't want
366 // to regress it.
367 manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched);
368 EXPECT_EQ(1U, results.size());
369 }
370
371 // Tests that changes get properly committed to disk.
TEST_F(TextDatabaseManagerTest,Writing)372 TEST_F(TextDatabaseManagerTest, Writing) {
373 ASSERT_TRUE(Init());
374
375 QueryOptions options;
376 std::vector<TextDatabase::Match> results;
377 Time first_time_searched;
378
379 InMemDB visit_db;
380
381 // Create the manager and write some stuff to it.
382 {
383 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
384 ASSERT_TRUE(manager.Init(NULL));
385
386 std::vector<Time> times;
387 AddAllPages(manager, &visit_db, ×);
388
389 // We should have matched every page.
390 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched);
391 EXPECT_EQ(6U, results.size());
392 }
393 results.clear();
394
395 // Recreate the manager and make sure it finds the written stuff.
396 {
397 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
398 ASSERT_TRUE(manager.Init(NULL));
399
400 // We should have matched every page again.
401 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
402 &results, &first_time_searched);
403 EXPECT_EQ(6U, results.size());
404 }
405 }
406
407 // Tests that changes get properly committed to disk, as in the Writing test
408 // above, but when there is a transaction around the adds.
TEST_F(TextDatabaseManagerTest,WritingTransaction)409 TEST_F(TextDatabaseManagerTest, WritingTransaction) {
410 ASSERT_TRUE(Init());
411
412 QueryOptions options;
413 std::vector<TextDatabase::Match> results;
414 Time first_time_searched;
415
416 InMemDB visit_db;
417
418 // Create the manager and write some stuff to it.
419 {
420 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
421 ASSERT_TRUE(manager.Init(NULL));
422
423 std::vector<Time> times;
424 manager.BeginTransaction();
425 AddAllPages(manager, &visit_db, ×);
426 // "Forget" to commit, it should be autocommittedd for us.
427
428 // We should have matched every page.
429 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
430 &results, &first_time_searched);
431 EXPECT_EQ(6U, results.size());
432 }
433 results.clear();
434
435 // Recreate the manager and make sure it finds the written stuff.
436 {
437 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
438 ASSERT_TRUE(manager.Init(NULL));
439
440 // We should have matched every page again.
441 manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
442 &results, &first_time_searched);
443 EXPECT_EQ(6U, results.size());
444 }
445 }
446
447 // Tests querying where the maximum number of items is met.
TEST_F(TextDatabaseManagerTest,QueryMax)448 TEST_F(TextDatabaseManagerTest, QueryMax) {
449 ASSERT_TRUE(Init());
450 InMemDB visit_db;
451 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
452 ASSERT_TRUE(manager.Init(NULL));
453
454 std::vector<Time> times;
455 AddAllPages(manager, &visit_db, ×);
456
457 string16 foo = UTF8ToUTF16("FOO");
458
459 QueryOptions options;
460 options.begin_time = times[0] - TimeDelta::FromDays(100);
461 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
462 options.max_count = 2;
463 std::vector<TextDatabase::Match> results;
464 Time first_time_searched;
465 manager.GetTextMatches(foo, options, &results, &first_time_searched);
466
467 // We should have gotten the last two pages as results (the first page is
468 // also the last).
469 EXPECT_EQ(2U, results.size());
470 EXPECT_TRUE(first_time_searched <= times[4]);
471 EXPECT_TRUE(ResultsHaveURL(results, kURL5));
472 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
473
474 // Asking for 4 pages, the first one should be in another DB.
475 options.max_count = 4;
476 manager.GetTextMatches(foo, options, &results, &first_time_searched);
477
478 EXPECT_EQ(4U, results.size());
479 EXPECT_TRUE(first_time_searched <= times[4]);
480 EXPECT_TRUE(ResultsHaveURL(results, kURL3));
481 EXPECT_TRUE(ResultsHaveURL(results, kURL4));
482 EXPECT_TRUE(ResultsHaveURL(results, kURL5));
483 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
484 }
485
486 // Tests querying backwards in time in chunks.
TEST_F(TextDatabaseManagerTest,QueryBackwards)487 TEST_F(TextDatabaseManagerTest, QueryBackwards) {
488 ASSERT_TRUE(Init());
489 InMemDB visit_db;
490 TextDatabaseManager manager(dir_, &visit_db, &visit_db);
491 ASSERT_TRUE(manager.Init(NULL));
492
493 std::vector<Time> times;
494 AddAllPages(manager, &visit_db, ×);
495
496 string16 foo = UTF8ToUTF16("FOO");
497
498 // First do a query for all time, but with a max of 2. This will give us the
499 // last two results and will tell us where to start searching when we want
500 // to go back in time.
501 QueryOptions options;
502 options.begin_time = times[0] - TimeDelta::FromDays(100);
503 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
504 options.max_count = 2;
505 std::vector<TextDatabase::Match> results;
506 Time first_time_searched;
507 manager.GetTextMatches(foo, options, &results, &first_time_searched);
508
509 // Check that we got the last two results.
510 EXPECT_EQ(2U, results.size());
511 EXPECT_TRUE(first_time_searched <= times[4]);
512 EXPECT_TRUE(ResultsHaveURL(results, kURL5));
513 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
514
515 // Query the previous two URLs and make sure we got the correct ones.
516 options.end_time = first_time_searched;
517 manager.GetTextMatches(foo, options, &results, &first_time_searched);
518 EXPECT_EQ(2U, results.size());
519 EXPECT_TRUE(first_time_searched <= times[2]);
520 EXPECT_TRUE(ResultsHaveURL(results, kURL3));
521 EXPECT_TRUE(ResultsHaveURL(results, kURL4));
522
523 // Query the previous two URLs...
524 options.end_time = first_time_searched;
525 manager.GetTextMatches(foo, options, &results, &first_time_searched);
526 EXPECT_EQ(2U, results.size());
527 EXPECT_TRUE(first_time_searched <= times[0]);
528 EXPECT_TRUE(ResultsHaveURL(results, kURL2));
529 EXPECT_TRUE(ResultsHaveURL(results, kURL1));
530
531 // Try to query some more, there should be no results.
532 options.end_time = first_time_searched;
533 manager.GetTextMatches(foo, options, &results, &first_time_searched);
534 EXPECT_EQ(0U, results.size());
535 }
536
537 } // namespace history
538