• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
7 
8 #if defined(OS_WIN)
9 #include <windows.h>
10 #endif
11 #include <set>
12 #include <vector>
13 
14 #include "base/callback.h"
15 #include "base/callback_forward.h"
16 #include "base/files/file_path.h"
17 #include "base/gtest_prod_util.h"
18 #include "base/memory/shared_memory.h"
19 #include "base/threading/sequenced_worker_pool.h"
20 #include "components/visitedlink/common/visitedlink_common.h"
21 
22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
23 #include "base/logging.h"
24 #endif
25 
26 class GURL;
27 
28 namespace content {
29 class BrowserContext;
30 }
31 
32 namespace visitedlink {
33 
34 class VisitedLinkDelegate;
35 
36 // Controls the link coloring database. The master controls all writing to the
37 // database as well as disk I/O. There should be only one master.
38 //
39 // This class will defer writing operations to the file thread. This means that
40 // class destruction, the file may still be open since operations are pending on
41 // another thread.
42 class VisitedLinkMaster : public VisitedLinkCommon {
43  public:
44   // Listens to the link coloring database events. The master is given this
45   // event as a constructor argument and dispatches events using it.
46   class Listener {
47    public:
~Listener()48     virtual ~Listener() {}
49 
50     // Called when link coloring database has been created or replaced. The
51     // argument is the new table handle.
52     virtual void NewTable(base::SharedMemory*) = 0;
53 
54     // Called when new link has been added. The argument is the fingerprint
55     // (hash) of the link.
56     virtual void Add(Fingerprint fingerprint) = 0;
57 
58     // Called when link coloring state has been reset. This may occur when
59     // entire or parts of history were deleted.
60     virtual void Reset() = 0;
61   };
62 
63   VisitedLinkMaster(content::BrowserContext* browser_context,
64                     VisitedLinkDelegate* delegate,
65                     bool persist_to_disk);
66 
67   // In unit test mode, we allow the caller to optionally specify the database
68   // filename so that it can be run from a unit test. The directory where this
69   // file resides must exist in this mode. You can also specify the default
70   // table size to test table resizing. If this parameter is 0, we will use the
71   // defaults.
72   //
73   // In the unit test mode, we also allow the caller to provide a history
74   // service pointer (the history service can't be fetched from the browser
75   // process when we're in unit test mode). This can be NULL to try to access
76   // the main version, which will probably fail (which can be good for testing
77   // this failure mode).
78   //
79   // When |suppress_rebuild| is set, we'll not attempt to load data from
80   // history if the file can't be loaded. This should generally be set for
81   // testing except when you want to test the rebuild process explicitly.
82   VisitedLinkMaster(Listener* listener,
83                     VisitedLinkDelegate* delegate,
84                     bool persist_to_disk,
85                     bool suppress_rebuild,
86                     const base::FilePath& filename,
87                     int32 default_table_size);
88   virtual ~VisitedLinkMaster();
89 
90   // Must be called immediately after object creation. Nothing else will work
91   // until this is called. Returns true on success, false means that this
92   // object won't work.
93   bool Init();
94 
shared_memory()95   base::SharedMemory* shared_memory() { return shared_memory_; }
96 
97   // Adds a URL to the table.
98   void AddURL(const GURL& url);
99 
100   // Adds a set of URLs to the table.
101   void AddURLs(const std::vector<GURL>& url);
102 
103   // See DeleteURLs.
104   class URLIterator {
105    public:
106     // HasNextURL must return true when this is called. Returns the next URL
107     // then advances the iterator. Note that the returned reference is only
108     // valid until the next call of NextURL.
109     virtual const GURL& NextURL() = 0;
110 
111     // Returns true if still has URLs to be iterated.
112     virtual bool HasNextURL() const = 0;
113 
114    protected:
~URLIterator()115     virtual ~URLIterator() {}
116   };
117 
118   // Deletes the specified URLs from |rows| from the table.
119   void DeleteURLs(URLIterator* iterator);
120 
121   // Clears the visited links table by deleting the file from disk. Used as
122   // part of history clearing.
123   void DeleteAllURLs();
124 
125   // Returns the Delegate of this Master.
126   VisitedLinkDelegate* GetDelegate();
127 
128 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
129   // This is a debugging function that can be called to double-check internal
130   // data structures. It will assert if the check fails.
131   void DebugValidate();
132 
133   // Sets a task to execute when the next rebuild from history is complete.
134   // This is used by unit tests to wait for the rebuild to complete before
135   // they continue. The pointer will be owned by this object after the call.
set_rebuild_complete_task(const base::Closure & task)136   void set_rebuild_complete_task(const base::Closure& task) {
137     DCHECK(rebuild_complete_task_.is_null());
138     rebuild_complete_task_ = task;
139   }
140 
141   // returns the number of items in the table for testing verification
GetUsedCount()142   int32 GetUsedCount() const {
143     return used_items_;
144   }
145 
146   // Returns the listener.
GetListener()147   VisitedLinkMaster::Listener* GetListener() const {
148     return listener_.get();
149   }
150 
151   // Call to cause the entire database file to be re-written from scratch
152   // to disk. Used by the performance tester.
RewriteFile()153   void RewriteFile() {
154     WriteFullTable();
155   }
156 #endif
157 
158  private:
159   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
160   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
161   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
162 
163   // Object to rebuild the table on the history thread (see the .cc file).
164   class TableBuilder;
165 
166   // Byte offsets of values in the header.
167   static const int32 kFileHeaderSignatureOffset;
168   static const int32 kFileHeaderVersionOffset;
169   static const int32 kFileHeaderLengthOffset;
170   static const int32 kFileHeaderUsedOffset;
171   static const int32 kFileHeaderSaltOffset;
172 
173   // The signature at the beginning of a file.
174   static const int32 kFileSignature;
175 
176   // version of the file format this module currently uses
177   static const int32 kFileCurrentVersion;
178 
179   // Bytes in the file header, including the salt.
180   static const size_t kFileHeaderSize;
181 
182   // When creating a fresh new table, we use this many entries.
183   static const unsigned kDefaultTableSize;
184 
185   // When the user is deleting a boatload of URLs, we don't really want to do
186   // individual writes for each of them. When the count exceeds this threshold,
187   // we will write the whole table to disk at once instead of individual items.
188   static const size_t kBigDeleteThreshold;
189 
190   // Backend for the constructors initializing the members.
191   void InitMembers();
192 
193   // If a rebuild is in progress, we save the URL in the temporary list.
194   // Otherwise, we add this to the table. Returns the index of the
195   // inserted fingerprint or null_hash_ on failure.
196   Hash TryToAddURL(const GURL& url);
197 
198   // File I/O functions
199   // ------------------
200   // These functions are only called if |persist_to_disk_| is true.
201 
202   // Posts the given task to the blocking worker pool with our options.
203   void PostIOTask(const tracked_objects::Location& from_here,
204                   const base::Closure& task);
205 
206   // Writes the entire table to disk. It will leave the table file open and
207   // the handle to it will be stored in file_.
208   void WriteFullTable();
209 
210   // Try to load the table from the database file. If the file doesn't exist or
211   // is corrupt, this will return failure.
212   bool InitFromFile();
213 
214   // Reads the header of the link coloring database from disk. Assumes the
215   // file pointer is at the beginning of the file and that there are no pending
216   // asynchronous I/O operations.
217   //
218   // Returns true on success and places the size of the table in num_entries
219   // and the number of nonzero fingerprints in used_count. This will fail if
220   // the version of the file is not the current version of the database.
221   bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
222                       uint8 salt[LINK_SALT_LENGTH]);
223 
224   // Fills *filename with the name of the link database filename
225   bool GetDatabaseFileName(base::FilePath* filename);
226 
227   // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
228   // the write to a background thread.
229   void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
230 
231   // Helper function to schedule and asynchronous write of the used count to
232   // disk (this is a common operation).
233   void WriteUsedItemCountToFile();
234 
235   // Helper function to schedule an asynchronous write of the given range of
236   // hash functions to disk. The range is inclusive on both ends. The range can
237   // wrap around at 0 and this function will handle it.
238   void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
239 
240   // Synchronous read from the file. Assumes there are no pending asynchronous
241   // I/O functions. Returns true if the entire buffer was successfully filled.
242   bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
243 
244   // General table handling
245   // ----------------------
246 
247   // Called to add a fingerprint to the table. If |send_notifications| is true
248   // and the item is added successfully, Listener::Add will be invoked.
249   // Returns the index of the inserted fingerprint or null_hash_ if there was a
250   // duplicate and this item was skippped.
251   Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
252 
253   // Deletes all fingerprints from the given vector from the current hash table
254   // and syncs it to disk if there are changes. This does not update the
255   // deleted_since_rebuild_ list, the caller must update this itself if there
256   // is an update pending.
257   void DeleteFingerprintsFromCurrentTable(
258       const std::set<Fingerprint>& fingerprints);
259 
260   // Removes the indicated fingerprint from the table. If the update_file flag
261   // is set, the changes will also be written to disk. Returns true if the
262   // fingerprint was deleted, false if it was not in the table to delete.
263   bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
264 
265   // Creates a new empty table, call if InitFromFile() fails. Normally, when
266   // |suppress_rebuild| is false, the table will be rebuilt from history,
267   // keeping us in sync. When |suppress_rebuild| is true, the new table will be
268   // empty and we will not consult history. This is used when clearing the
269   // database and for unit tests.
270   bool InitFromScratch(bool suppress_rebuild);
271 
272   // Allocates the Fingerprint structure and length. When init_to_empty is set,
273   // the table will be filled with 0s and used_items_ will be set to 0 as well.
274   // If the flag is not set, these things are untouched and it is the
275   // responsibility of the caller to fill them (like when we are reading from
276   // a file).
277   bool CreateURLTable(int32 num_entries, bool init_to_empty);
278 
279   // A wrapper for CreateURLTable, this will allocate a new table, initialized
280   // to empty. The caller is responsible for saving the shared memory pointer
281   // and handles before this call (they will be replaced with new ones) and
282   // releasing them later. This is designed for callers that make a new table
283   // and then copy values from the old table to the new one, then release the
284   // old table.
285   //
286   // Returns true on success. On failure, the old table will be restored. The
287   // caller should not attemp to release the pointer/handle in this case.
288   bool BeginReplaceURLTable(int32 num_entries);
289 
290   // unallocates the Fingerprint table
291   void FreeURLTable();
292 
293   // For growing the table. ResizeTableIfNecessary will check to see if the
294   // table should be resized and calls ResizeTable if needed. Returns true if
295   // we decided to resize the table.
296   bool ResizeTableIfNecessary();
297 
298   // Resizes the table (growing or shrinking) as necessary to accomodate the
299   // current count.
300   void ResizeTable(int32 new_size);
301 
302   // Returns the desired table size for |item_count| URLs.
303   uint32 NewTableSizeForCount(int32 item_count) const;
304 
305   // Computes the table load as fraction. For example, if 1/4 of the entries are
306   // full, this value will be 0.25
ComputeTableLoad()307   float ComputeTableLoad() const {
308     return static_cast<float>(used_items_) / static_cast<float>(table_length_);
309   }
310 
311   // Initializes a rebuild of the visited link database based on the browser
312   // history. This will set table_builder_ while working, and there should not
313   // already be a rebuild in place when called. See the definition for more
314   // details on how this works.
315   //
316   // Returns true on success. Failure means we're not attempting to rebuild
317   // the database because something failed.
318   bool RebuildTableFromDelegate();
319 
320   // Callback that the table rebuilder uses when the rebuild is complete.
321   // |success| is true if the fingerprint generation succeeded, in which case
322   // |fingerprints| will contain the computed fingerprints. On failure, there
323   // will be no fingerprints.
324   void OnTableRebuildComplete(bool success,
325                               const std::vector<Fingerprint>& fingerprints);
326 
327   // Increases or decreases the given hash value by one, wrapping around as
328   // necessary. Used for probing.
IncrementHash(Hash hash)329   inline Hash IncrementHash(Hash hash) {
330     if (hash >= table_length_ - 1)
331       return 0;  // Wrap around.
332     return hash + 1;
333   }
DecrementHash(Hash hash)334   inline Hash DecrementHash(Hash hash) {
335     if (hash <= 0)
336       return table_length_ - 1;  // Wrap around.
337     return hash - 1;
338   }
339 
340 #ifndef NDEBUG
341   // Indicates whether any asynchronous operation has ever been completed.
342   // We do some synchronous reads that require that no asynchronous operations
343   // are pending, yet we don't track whether they have been completed. This
344   // flag is a sanity check that these reads only happen before any
345   // asynchronous writes have been fired.
346   bool posted_asynchronous_operation_;
347 #endif
348 
349   // Reference to the browser context that this object belongs to
350   // (it knows the path to where the data is stored)
351   content::BrowserContext* browser_context_;
352 
353   // Client owns the delegate and is responsible for it being valid through
354   // the life time this VisitedLinkMaster.
355   VisitedLinkDelegate* delegate_;
356 
357   // VisitedLinkEventListener to handle incoming events.
358   scoped_ptr<Listener> listener_;
359 
360   // Lazily initialized sequence token for posting file tasks.
361   base::SequencedWorkerPool::SequenceToken sequence_token_;
362 
363   // When non-NULL, indicates we are in database rebuild mode and points to
364   // the class collecting fingerprint information from the history system.
365   // The pointer is owned by this class, but it must remain valid while the
366   // history query is running. We must only delete it when the query is done.
367   scoped_refptr<TableBuilder> table_builder_;
368 
369   // Indicates URLs added and deleted since we started rebuilding the table.
370   std::set<Fingerprint> added_since_rebuild_;
371   std::set<Fingerprint> deleted_since_rebuild_;
372 
373   // TODO(brettw) Support deletion, we need to track whether anything was
374   // deleted during the rebuild here. Then we should delete any of these
375   // entries from the complete table later.
376   // std::vector<Fingerprint> removed_since_rebuild_;
377 
378   // The currently open file with the table in it. This may be NULL if we're
379   // rebuilding and haven't written a new version yet or if |persist_to_disk_|
380   // is false. Writing to the file may be safely ignored in this case. Also
381   // |file_| may be non-NULL but point to a NULL pointer. That would mean that
382   // opening of the file is already scheduled in a background thread and any
383   // writing to the file can also be scheduled to the background thread as it's
384   // guaranteed to be executed after the opening.
385   // The class owns both the |file_| pointer and the pointer pointed
386   // by |*file_|.
387   FILE** file_;
388 
389   // If true, will try to persist the hash table to disk. Will rebuild from
390   // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
391   bool persist_to_disk_;
392 
393   // Shared memory consists of a SharedHeader followed by the table.
394   base::SharedMemory *shared_memory_;
395 
396   // When we generate new tables, we increment the serial number of the
397   // shared memory object.
398   int32 shared_memory_serial_;
399 
400   // Number of non-empty items in the table, used to compute fullness.
401   int32 used_items_;
402 
403   // Testing values -----------------------------------------------------------
404   //
405   // The following fields exist for testing purposes. They are not used in
406   // release builds. It'd be nice to eliminate them in release builds, but we
407   // don't want to change the signature of the object between the unit test and
408   // regular builds. Instead, we just have "default" values that these take
409   // in release builds that give "regular" behavior.
410 
411   // Overridden database file name for testing
412   base::FilePath database_name_override_;
413 
414   // When nonzero, overrides the table size for new databases for testing
415   int32 table_size_override_;
416 
417   // When set, indicates the task that should be run after the next rebuild from
418   // history is complete.
419   base::Closure rebuild_complete_task_;
420 
421   // Set to prevent us from attempting to rebuild the database from global
422   // history if we have an error opening the file. This is used for testing,
423   // will be false in production.
424   bool suppress_rebuild_;
425 
426   DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
427 };
428 
429 // NOTE: These methods are defined inline here, so we can share the compilation
430 //       of visitedlink_master.cc between the browser and the unit/perf tests.
431 
432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
DebugValidate()433 inline void VisitedLinkMaster::DebugValidate() {
434   int32 used_count = 0;
435   for (int32 i = 0; i < table_length_; i++) {
436     if (hash_table_[i])
437       used_count++;
438   }
439   DCHECK_EQ(used_count, used_items_);
440 }
441 #endif
442 
443 }  // namespace visitedlink
444 
445 #endif  // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
446