• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
6 #define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
7 #pragma once
8 
9 #include <set>
10 #include <vector>
11 
12 #include "app/sql/connection.h"
13 #include "app/sql/meta_table.h"
14 #include "base/basictypes.h"
15 #include "base/file_path.h"
16 #include "base/string16.h"
17 #include "chrome/browser/history/history_types.h"
18 #include "googleurl/src/gurl.h"
19 
20 namespace history {
21 
22 // Encapsulation of a full-text indexed database file.
23 class TextDatabase {
24  public:
25   typedef int DBIdent;
26 
27   typedef std::set<GURL> URLSet;
28 
29   // Returned from the search function.
30   struct Match {
31     Match();
32     ~Match();
33 
34     // URL of the match.
35     GURL url;
36 
37     // The title is returned because the title in the text database and the URL
38     // database may differ. This happens because we capture the title when the
39     // body is captured, and don't update it later.
40     string16 title;
41 
42     // Time the page that was returned was visited.
43     base::Time time;
44 
45     // Identifies any found matches in the title of the document. These are not
46     // included in the snippet.
47     Snippet::MatchPositions title_match_positions;
48 
49     // Snippet of the match we generated from the body.
50     Snippet snippet;
51   };
52 
53   // Note: You must call init which must succeed before using this class.
54   //
55   // Computes the matches for the query, returning results in decreasing order
56   // of visit time.
57   //
58   // This function will attach the new database to the given database
59   // connection. This allows one sqlite3 object to share many TextDatabases,
60   // meaning that they will all share the same cache, which allows us to limit
61   // the total size that text indexing databasii can take up.
62   //
63   // |file_name| is the name of the file on disk.
64   //
65   // ID is the identifier for the database. It should uniquely identify it among
66   // other databases on disk and in the sqlite connection.
67   //
68   // |allow_create| indicates if we want to allow creation of the file if it
69   // doesn't exist. For files associated with older time periods, we don't want
70   // to create them if they don't exist, so this flag would be false.
71   TextDatabase(const FilePath& path,
72                DBIdent id,
73                bool allow_create);
74   ~TextDatabase();
75 
76   // Initializes the database connection and creates the file if the class
77   // was created with |allow_create|. If the file couldn't be opened or
78   // created, this will return false. No other functions should be called
79   // after this.
80   bool Init();
81 
82   // Allows updates to be batched. This gives higher performance when multiple
83   // updates are happening because every insert doesn't require a sync to disk.
84   // Transactions can be nested, only the outermost one will actually count.
85   void BeginTransaction();
86   void CommitTransaction();
87 
88   // For testing, returns the file name of the database so it can be deleted
89   // after the test. This is valid even before Init() is called.
file_name()90   const FilePath& file_name() const { return file_name_; }
91 
92   // Returns a NULL-terminated string that is the base of history index files,
93   // which is the part before the database identifier. For example
94   // "History Index *". This is for finding existing database files.
95   static const FilePath::CharType* file_base();
96 
97   // Converts a filename on disk (optionally including a path) to a database
98   // identifier. If the filename doesn't have the correct format, returns 0.
99   static DBIdent FileNameToID(const FilePath& file_path);
100 
101   // Changing operations -------------------------------------------------------
102 
103   // Adds the given data to the page. Returns true on success. The data should
104   // already be converted to UTF-8.
105   bool AddPageData(base::Time time,
106                    const std::string& url,
107                    const std::string& title,
108                    const std::string& contents);
109 
110   // Deletes the indexed data exactly matching the given URL/time pair.
111   void DeletePageData(base::Time time, const std::string& url);
112 
113   // Optimizes the tree inside the database. This will, in addition to making
114   // access faster, remove any deleted data from the database (normally it is
115   // added again as "removed" and it is manually cleaned up when it decides to
116   // optimize it naturally). It is bad for privacy if a user is deleting a
117   // page from history but it still exists in the full text database in some
118   // form. This function will clean that up.
119   void Optimize();
120 
121   // Querying ------------------------------------------------------------------
122 
123   // Executes the given query. See QueryOptions for more info on input.
124   //
125   // The results are appended to any existing ones in |*results|, and the first
126   // time considered for the output is in |first_time_searched|
127   // (see QueryResults for more).
128   //
129   // Any URLs found will be added to |unique_urls|. If a URL is already in the
130   // set, additional results will not be added (giving the ability to uniquify
131   // URL results).
132   //
133   // Callers must run QueryParser on the user text and pass the results of the
134   // QueryParser to this method as the query string.
135   void GetTextMatches(const std::string& query,
136                       const QueryOptions& options,
137                       std::vector<Match>* results,
138                       URLSet* unique_urls,
139                       base::Time* first_time_searched);
140 
141   // Converts the given database identifier to a filename. This does not include
142   // the path, just the file and extension.
143   static FilePath IDToFileName(DBIdent id);
144 
145  private:
146   // Ensures that the tables and indices are created. Returns true on success.
147   bool CreateTables();
148 
149   // The sql database. Not valid until Init is called.
150   sql::Connection db_;
151 
152   const FilePath path_;
153   const DBIdent ident_;
154   const bool allow_create_;
155 
156   // Full file name of the file on disk, computed in Init().
157   FilePath file_name_;
158 
159   sql::MetaTable meta_table_;
160 
161   DISALLOW_COPY_AND_ASSIGN(TextDatabase);
162 };
163 
164 }  // namespace history
165 
166 #endif  // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
167