• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
6 #define CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
7 #pragma once
8 
9 #if defined(OS_WIN)
10 #include <windows.h>
11 #endif
12 #include <set>
13 #include <vector>
14 
15 #include "base/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/shared_memory.h"
19 #include "chrome/browser/history/history.h"
20 #include "chrome/common/visitedlink_common.h"
21 
22 class GURL;
23 class Profile;
24 
25 // Controls the link coloring database. The master controls all writing to the
26 // database as well as disk I/O. There should be only one master.
27 //
28 // This class will defer writing operations to the file thread. This means that
29 // class destruction, the file may still be open since operations are pending on
30 // another thread.
31 class VisitedLinkMaster : public VisitedLinkCommon {
32  public:
33   // Listens to the link coloring database events. The master is given this
34   // event as a constructor argument and dispatches events using it.
35   class Listener {
36    public:
~Listener()37     virtual ~Listener() {}
38 
39     // Called when link coloring database has been created or replaced. The
40     // argument is the new table handle.
41     virtual void NewTable(base::SharedMemory*) = 0;
42 
43     // Called when new link has been added. The argument is the fingerprint
44     // (hash) of the link.
45     virtual void Add(Fingerprint fingerprint) = 0;
46 
47     // Called when link coloring state has been reset. This may occur when
48     // entire or parts of history were deleted.
49     virtual void Reset() = 0;
50   };
51 
52   // The |listener| may not be NULL.
53   VisitedLinkMaster(Listener* listener, Profile* profile);
54 
55   // In unit test mode, we allow the caller to optionally specify the database
56   // filename so that it can be run from a unit test. The directory where this
57   // file resides must exist in this mode. You can also specify the default
58   // table size to test table resizing. If this parameter is 0, we will use the
59   // defaults.
60   //
61   // In the unit test mode, we also allow the caller to provide a history
62   // service pointer (the history service can't be fetched from the browser
63   // process when we're in unit test mode). This can be NULL to try to access
64   // the main version, which will probably fail (which can be good for testing
65   // this failure mode).
66   //
67   // When |suppress_rebuild| is set, we'll not attempt to load data from
68   // history if the file can't be loaded. This should generally be set for
69   // testing except when you want to test the rebuild process explicitly.
70   VisitedLinkMaster(Listener* listener,
71                     HistoryService* history_service,
72                     bool suppress_rebuild,
73                     const FilePath& filename,
74                     int32 default_table_size);
75   virtual ~VisitedLinkMaster();
76 
77   // Must be called immediately after object creation. Nothing else will work
78   // until this is called. Returns true on success, false means that this
79   // object won't work.
80   bool Init();
81 
shared_memory()82   base::SharedMemory* shared_memory() { return shared_memory_; }
83 
84   // Adds a URL to the table.
85   void AddURL(const GURL& url);
86 
87   // Adds a set of URLs to the table.
88   void AddURLs(const std::vector<GURL>& url);
89 
90   // Deletes the specified URLs from the table.
91   void DeleteURLs(const std::set<GURL>& urls);
92 
93   // Clears the visited links table by deleting the file from disk. Used as
94   // part of history clearing.
95   void DeleteAllURLs();
96 
97 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
98   // This is a debugging function that can be called to double-check internal
99   // data structures. It will assert if the check fails.
100   void DebugValidate();
101 
102   // Sets a task to execute when the next rebuild from history is complete.
103   // This is used by unit tests to wait for the rebuild to complete before
104   // they continue. The pointer will be owned by this object after the call.
set_rebuild_complete_task(Task * task)105   void set_rebuild_complete_task(Task* task) {
106     DCHECK(!rebuild_complete_task_.get());
107     rebuild_complete_task_.reset(task);
108   }
109 
110   // returns the number of items in the table for testing verification
GetUsedCount()111   int32 GetUsedCount() const {
112     return used_items_;
113   }
114 
115   // Call to cause the entire database file to be re-written from scratch
116   // to disk. Used by the performance tester.
RewriteFile()117   bool RewriteFile() {
118     return WriteFullTable();
119   }
120 #endif
121 
122  private:
123   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
124   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
125   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
126 
127   // Object to rebuild the table on the history thread (see the .cc file).
128   class TableBuilder;
129 
130   // Byte offsets of values in the header.
131   static const int32 kFileHeaderSignatureOffset;
132   static const int32 kFileHeaderVersionOffset;
133   static const int32 kFileHeaderLengthOffset;
134   static const int32 kFileHeaderUsedOffset;
135   static const int32 kFileHeaderSaltOffset;
136 
137   // The signature at the beginning of a file.
138   static const int32 kFileSignature;
139 
140   // version of the file format this module currently uses
141   static const int32 kFileCurrentVersion;
142 
143   // Bytes in the file header, including the salt.
144   static const size_t kFileHeaderSize;
145 
146   // When creating a fresh new table, we use this many entries.
147   static const unsigned kDefaultTableSize;
148 
149   // When the user is deleting a boatload of URLs, we don't really want to do
150   // individual writes for each of them. When the count exceeds this threshold,
151   // we will write the whole table to disk at once instead of individual items.
152   static const size_t kBigDeleteThreshold;
153 
154   // Backend for the constructors initializing the members.
155   void InitMembers(Listener* listener, Profile* profile);
156 
157   // If a rebuild is in progress, we save the URL in the temporary list.
158   // Otherwise, we add this to the table. Returns the index of the
159   // inserted fingerprint or null_hash_ on failure.
160   Hash TryToAddURL(const GURL& url);
161 
162   // File I/O functions
163   // ------------------
164 
165   // Writes the entire table to disk, returning true on success. It will leave
166   // the table file open and the handle to it in file_
167   bool WriteFullTable();
168 
169   // Try to load the table from the database file. If the file doesn't exist or
170   // is corrupt, this will return failure.
171   bool InitFromFile();
172 
173   // Reads the header of the link coloring database from disk. Assumes the
174   // file pointer is at the beginning of the file and that there are no pending
175   // asynchronous I/O operations.
176   //
177   // Returns true on success and places the size of the table in num_entries
178   // and the number of nonzero fingerprints in used_count. This will fail if
179   // the version of the file is not the current version of the database.
180   bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
181                       uint8 salt[LINK_SALT_LENGTH]);
182 
183   // Fills *filename with the name of the link database filename
184   bool GetDatabaseFileName(FilePath* filename);
185 
186   // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
187   // the write to a background thread.
188   void WriteToFile(FILE* hfile, off_t offset, void* data, int32 data_size);
189 
190   // Helper function to schedule and asynchronous write of the used count to
191   // disk (this is a common operation).
192   void WriteUsedItemCountToFile();
193 
194   // Helper function to schedule an asynchronous write of the given range of
195   // hash functions to disk. The range is inclusive on both ends. The range can
196   // wrap around at 0 and this function will handle it.
197   void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
198 
199   // Synchronous read from the file. Assumes there are no pending asynchronous
200   // I/O functions. Returns true if the entire buffer was successfully filled.
201   bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
202 
203   // General table handling
204   // ----------------------
205 
206   // Called to add a fingerprint to the table. If |send_notifications| is true
207   // and the item is added successfully, Listener::Add will be invoked.
208   // Returns the index of the inserted fingerprint or null_hash_ if there was a
209   // duplicate and this item was skippped.
210   Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
211 
212   // Deletes all fingerprints from the given vector from the current hash table
213   // and syncs it to disk if there are changes. This does not update the
214   // deleted_since_rebuild_ list, the caller must update this itself if there
215   // is an update pending.
216   void DeleteFingerprintsFromCurrentTable(
217       const std::set<Fingerprint>& fingerprints);
218 
219   // Removes the indicated fingerprint from the table. If the update_file flag
220   // is set, the changes will also be written to disk. Returns true if the
221   // fingerprint was deleted, false if it was not in the table to delete.
222   bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
223 
224   // Creates a new empty table, call if InitFromFile() fails. Normally, when
225   // |suppress_rebuild| is false, the table will be rebuilt from history,
226   // keeping us in sync. When |suppress_rebuild| is true, the new table will be
227   // empty and we will not consult history. This is used when clearing the
228   // database and for unit tests.
229   bool InitFromScratch(bool suppress_rebuild);
230 
231   // Allocates the Fingerprint structure and length. When init_to_empty is set,
232   // the table will be filled with 0s and used_items_ will be set to 0 as well.
233   // If the flag is not set, these things are untouched and it is the
234   // responsibility of the caller to fill them (like when we are reading from
235   // a file).
236   bool CreateURLTable(int32 num_entries, bool init_to_empty);
237 
238   // A wrapper for CreateURLTable, this will allocate a new table, initialized
239   // to empty. The caller is responsible for saving the shared memory pointer
240   // and handles before this call (they will be replaced with new ones) and
241   // releasing them later. This is designed for callers that make a new table
242   // and then copy values from the old table to the new one, then release the
243   // old table.
244   //
245   // Returns true on success. On failure, the old table will be restored. The
246   // caller should not attemp to release the pointer/handle in this case.
247   bool BeginReplaceURLTable(int32 num_entries);
248 
249   // unallocates the Fingerprint table
250   void FreeURLTable();
251 
252   // For growing the table. ResizeTableIfNecessary will check to see if the
253   // table should be resized and calls ResizeTable if needed. Returns true if
254   // we decided to resize the table.
255   bool ResizeTableIfNecessary();
256 
257   // Resizes the table (growing or shrinking) as necessary to accomodate the
258   // current count.
259   void ResizeTable(int32 new_size);
260 
261   // Returns the desired table size for |item_count| URLs.
262   uint32 NewTableSizeForCount(int32 item_count) const;
263 
264   // Computes the table load as fraction. For example, if 1/4 of the entries are
265   // full, this value will be 0.25
ComputeTableLoad()266   float ComputeTableLoad() const {
267     return static_cast<float>(used_items_) / static_cast<float>(table_length_);
268   }
269 
270   // Initializes a rebuild of the visited link database based on the browser
271   // history. This will set table_builder_ while working, and there should not
272   // already be a rebuild in place when called. See the definition for more
273   // details on how this works.
274   //
275   // Returns true on success. Failure means we're not attempting to rebuild
276   // the database because something failed.
277   bool RebuildTableFromHistory();
278 
279   // Callback that the table rebuilder uses when the rebuild is complete.
280   // |success| is true if the fingerprint generation succeeded, in which case
281   // |fingerprints| will contain the computed fingerprints. On failure, there
282   // will be no fingerprints.
283   void OnTableRebuildComplete(bool success,
284                               const std::vector<Fingerprint>& fingerprints);
285 
286   // Increases or decreases the given hash value by one, wrapping around as
287   // necessary. Used for probing.
IncrementHash(Hash hash)288   inline Hash IncrementHash(Hash hash) {
289     if (hash >= table_length_ - 1)
290       return 0;  // Wrap around.
291     return hash + 1;
292   }
DecrementHash(Hash hash)293   inline Hash DecrementHash(Hash hash) {
294     if (hash <= 0)
295       return table_length_ - 1;  // Wrap around.
296     return hash - 1;
297   }
298 
299   Listener* listener_;
300 
301 #ifndef NDEBUG
302   // Indicates whether any asynchronous operation has ever been completed.
303   // We do some synchronous reads that require that no asynchronous operations
304   // are pending, yet we don't track whether they have been completed. This
305   // flag is a sanity check that these reads only happen before any
306   // asynchronous writes have been fired.
307   bool posted_asynchronous_operation_;
308 #endif
309 
310   // Reference to the user profile that this object belongs to
311   // (it knows the path to where the data is stored)
312   Profile* profile_;
313 
314   // When non-NULL, indicates we are in database rebuild mode and points to
315   // the class collecting fingerprint information from the history system.
316   // The pointer is owned by this class, but it must remain valid while the
317   // history query is running. We must only delete it when the query is done.
318   scoped_refptr<TableBuilder> table_builder_;
319 
320   // Indicates URLs added and deleted since we started rebuilding the table.
321   std::set<Fingerprint> added_since_rebuild_;
322   std::set<Fingerprint> deleted_since_rebuild_;
323 
324   // TODO(brettw) Support deletion, we need to track whether anything was
325   // deleted during the rebuild here. Then we should delete any of these
326   // entries from the complete table later.
327   // std::vector<Fingerprint> removed_since_rebuild_;
328 
329   // The currently open file with the table in it. This may be NULL if we're
330   // rebuilding and haven't written a new version yet. Writing to the file may
331   // be safely ignored in this case.
332   FILE* file_;
333 
334   // Shared memory consists of a SharedHeader followed by the table.
335   base::SharedMemory *shared_memory_;
336 
337   // When we generate new tables, we increment the serial number of the
338   // shared memory object.
339   int32 shared_memory_serial_;
340 
341   // Number of non-empty items in the table, used to compute fullness.
342   int32 used_items_;
343 
344   // Testing values -----------------------------------------------------------
345   //
346   // The following fields exist for testing purposes. They are not used in
347   // release builds. It'd be nice to eliminate them in release builds, but we
348   // don't want to change the signature of the object between the unit test and
349   // regular builds. Instead, we just have "default" values that these take
350   // in release builds that give "regular" behavior.
351 
352   // Overridden database file name for testing
353   FilePath database_name_override_;
354 
355   // When nonzero, overrides the table size for new databases for testing
356   int32 table_size_override_;
357 
358   // When non-NULL, overrides the history service to use rather than as the
359   // BrowserProcess. This is provided for unit tests.
360   HistoryService* history_service_override_;
361 
362   // When non-NULL, indicates the task that should be run after the next
363   // rebuild from history is complete.
364   scoped_ptr<Task> rebuild_complete_task_;
365 
366   // Set to prevent us from attempting to rebuild the database from global
367   // history if we have an error opening the file. This is used for testing,
368   // will be false in production.
369   bool suppress_rebuild_;
370 
371   DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
372 };
373 
374 // NOTE: These methods are defined inline here, so we can share the compilation
375 //       of visitedlink_master.cc between the browser and the unit/perf tests.
376 
377 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
DebugValidate()378 inline void VisitedLinkMaster::DebugValidate() {
379   int32 used_count = 0;
380   for (int32 i = 0; i < table_length_; i++) {
381     if (hash_table_[i])
382       used_count++;
383   }
384   DCHECK_EQ(used_count, used_items_);
385 }
386 #endif
387 
388 #endif  // CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
389