1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
7
8 #if defined(OS_WIN)
9 #include <windows.h>
10 #endif
11 #include <set>
12 #include <vector>
13
14 #include "base/callback.h"
15 #include "base/callback_forward.h"
16 #include "base/files/file_path.h"
17 #include "base/gtest_prod_util.h"
18 #include "base/memory/shared_memory.h"
19 #include "base/threading/sequenced_worker_pool.h"
20 #include "components/visitedlink/common/visitedlink_common.h"
21
22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
23 #include "base/logging.h"
24 #endif
25
26 class GURL;
27
28 namespace content {
29 class BrowserContext;
30 }
31
32 namespace visitedlink {
33
34 class VisitedLinkDelegate;
35
36 // Controls the link coloring database. The master controls all writing to the
37 // database as well as disk I/O. There should be only one master.
38 //
39 // This class will defer writing operations to the file thread. This means that
40 // class destruction, the file may still be open since operations are pending on
41 // another thread.
42 class VisitedLinkMaster : public VisitedLinkCommon {
43 public:
44 // Listens to the link coloring database events. The master is given this
45 // event as a constructor argument and dispatches events using it.
46 class Listener {
47 public:
~Listener()48 virtual ~Listener() {}
49
50 // Called when link coloring database has been created or replaced. The
51 // argument is the new table handle.
52 virtual void NewTable(base::SharedMemory*) = 0;
53
54 // Called when new link has been added. The argument is the fingerprint
55 // (hash) of the link.
56 virtual void Add(Fingerprint fingerprint) = 0;
57
58 // Called when link coloring state has been reset. This may occur when
59 // entire or parts of history were deleted.
60 virtual void Reset() = 0;
61 };
62
63 VisitedLinkMaster(content::BrowserContext* browser_context,
64 VisitedLinkDelegate* delegate,
65 bool persist_to_disk);
66
67 // In unit test mode, we allow the caller to optionally specify the database
68 // filename so that it can be run from a unit test. The directory where this
69 // file resides must exist in this mode. You can also specify the default
70 // table size to test table resizing. If this parameter is 0, we will use the
71 // defaults.
72 //
73 // In the unit test mode, we also allow the caller to provide a history
74 // service pointer (the history service can't be fetched from the browser
75 // process when we're in unit test mode). This can be NULL to try to access
76 // the main version, which will probably fail (which can be good for testing
77 // this failure mode).
78 //
79 // When |suppress_rebuild| is set, we'll not attempt to load data from
80 // history if the file can't be loaded. This should generally be set for
81 // testing except when you want to test the rebuild process explicitly.
82 VisitedLinkMaster(Listener* listener,
83 VisitedLinkDelegate* delegate,
84 bool persist_to_disk,
85 bool suppress_rebuild,
86 const base::FilePath& filename,
87 int32 default_table_size);
88 virtual ~VisitedLinkMaster();
89
90 // Must be called immediately after object creation. Nothing else will work
91 // until this is called. Returns true on success, false means that this
92 // object won't work.
93 bool Init();
94
shared_memory()95 base::SharedMemory* shared_memory() { return shared_memory_; }
96
97 // Adds a URL to the table.
98 void AddURL(const GURL& url);
99
100 // Adds a set of URLs to the table.
101 void AddURLs(const std::vector<GURL>& url);
102
103 // See DeleteURLs.
104 class URLIterator {
105 public:
106 // HasNextURL must return true when this is called. Returns the next URL
107 // then advances the iterator. Note that the returned reference is only
108 // valid until the next call of NextURL.
109 virtual const GURL& NextURL() = 0;
110
111 // Returns true if still has URLs to be iterated.
112 virtual bool HasNextURL() const = 0;
113
114 protected:
~URLIterator()115 virtual ~URLIterator() {}
116 };
117
118 // Deletes the specified URLs from |rows| from the table.
119 void DeleteURLs(URLIterator* iterator);
120
121 // Clears the visited links table by deleting the file from disk. Used as
122 // part of history clearing.
123 void DeleteAllURLs();
124
125 // Returns the Delegate of this Master.
126 VisitedLinkDelegate* GetDelegate();
127
128 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
129 // This is a debugging function that can be called to double-check internal
130 // data structures. It will assert if the check fails.
131 void DebugValidate();
132
133 // Sets a task to execute when the next rebuild from history is complete.
134 // This is used by unit tests to wait for the rebuild to complete before
135 // they continue. The pointer will be owned by this object after the call.
set_rebuild_complete_task(const base::Closure & task)136 void set_rebuild_complete_task(const base::Closure& task) {
137 DCHECK(rebuild_complete_task_.is_null());
138 rebuild_complete_task_ = task;
139 }
140
141 // returns the number of items in the table for testing verification
GetUsedCount()142 int32 GetUsedCount() const {
143 return used_items_;
144 }
145
146 // Returns the listener.
GetListener()147 VisitedLinkMaster::Listener* GetListener() const {
148 return listener_.get();
149 }
150
151 // Call to cause the entire database file to be re-written from scratch
152 // to disk. Used by the performance tester.
RewriteFile()153 void RewriteFile() {
154 WriteFullTable();
155 }
156 #endif
157
158 private:
159 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
160 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
161 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
162
163 // Object to rebuild the table on the history thread (see the .cc file).
164 class TableBuilder;
165
166 // Byte offsets of values in the header.
167 static const int32 kFileHeaderSignatureOffset;
168 static const int32 kFileHeaderVersionOffset;
169 static const int32 kFileHeaderLengthOffset;
170 static const int32 kFileHeaderUsedOffset;
171 static const int32 kFileHeaderSaltOffset;
172
173 // The signature at the beginning of a file.
174 static const int32 kFileSignature;
175
176 // version of the file format this module currently uses
177 static const int32 kFileCurrentVersion;
178
179 // Bytes in the file header, including the salt.
180 static const size_t kFileHeaderSize;
181
182 // When creating a fresh new table, we use this many entries.
183 static const unsigned kDefaultTableSize;
184
185 // When the user is deleting a boatload of URLs, we don't really want to do
186 // individual writes for each of them. When the count exceeds this threshold,
187 // we will write the whole table to disk at once instead of individual items.
188 static const size_t kBigDeleteThreshold;
189
190 // Backend for the constructors initializing the members.
191 void InitMembers();
192
193 // If a rebuild is in progress, we save the URL in the temporary list.
194 // Otherwise, we add this to the table. Returns the index of the
195 // inserted fingerprint or null_hash_ on failure.
196 Hash TryToAddURL(const GURL& url);
197
198 // File I/O functions
199 // ------------------
200 // These functions are only called if |persist_to_disk_| is true.
201
202 // Posts the given task to the blocking worker pool with our options.
203 void PostIOTask(const tracked_objects::Location& from_here,
204 const base::Closure& task);
205
206 // Writes the entire table to disk. It will leave the table file open and
207 // the handle to it will be stored in file_.
208 void WriteFullTable();
209
210 // Try to load the table from the database file. If the file doesn't exist or
211 // is corrupt, this will return failure.
212 bool InitFromFile();
213
214 // Reads the header of the link coloring database from disk. Assumes the
215 // file pointer is at the beginning of the file and that there are no pending
216 // asynchronous I/O operations.
217 //
218 // Returns true on success and places the size of the table in num_entries
219 // and the number of nonzero fingerprints in used_count. This will fail if
220 // the version of the file is not the current version of the database.
221 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
222 uint8 salt[LINK_SALT_LENGTH]);
223
224 // Fills *filename with the name of the link database filename
225 bool GetDatabaseFileName(base::FilePath* filename);
226
227 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
228 // the write to a background thread.
229 void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
230
231 // Helper function to schedule and asynchronous write of the used count to
232 // disk (this is a common operation).
233 void WriteUsedItemCountToFile();
234
235 // Helper function to schedule an asynchronous write of the given range of
236 // hash functions to disk. The range is inclusive on both ends. The range can
237 // wrap around at 0 and this function will handle it.
238 void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
239
240 // Synchronous read from the file. Assumes there are no pending asynchronous
241 // I/O functions. Returns true if the entire buffer was successfully filled.
242 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
243
244 // General table handling
245 // ----------------------
246
247 // Called to add a fingerprint to the table. If |send_notifications| is true
248 // and the item is added successfully, Listener::Add will be invoked.
249 // Returns the index of the inserted fingerprint or null_hash_ if there was a
250 // duplicate and this item was skippped.
251 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
252
253 // Deletes all fingerprints from the given vector from the current hash table
254 // and syncs it to disk if there are changes. This does not update the
255 // deleted_since_rebuild_ list, the caller must update this itself if there
256 // is an update pending.
257 void DeleteFingerprintsFromCurrentTable(
258 const std::set<Fingerprint>& fingerprints);
259
260 // Removes the indicated fingerprint from the table. If the update_file flag
261 // is set, the changes will also be written to disk. Returns true if the
262 // fingerprint was deleted, false if it was not in the table to delete.
263 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
264
265 // Creates a new empty table, call if InitFromFile() fails. Normally, when
266 // |suppress_rebuild| is false, the table will be rebuilt from history,
267 // keeping us in sync. When |suppress_rebuild| is true, the new table will be
268 // empty and we will not consult history. This is used when clearing the
269 // database and for unit tests.
270 bool InitFromScratch(bool suppress_rebuild);
271
272 // Allocates the Fingerprint structure and length. When init_to_empty is set,
273 // the table will be filled with 0s and used_items_ will be set to 0 as well.
274 // If the flag is not set, these things are untouched and it is the
275 // responsibility of the caller to fill them (like when we are reading from
276 // a file).
277 bool CreateURLTable(int32 num_entries, bool init_to_empty);
278
279 // A wrapper for CreateURLTable, this will allocate a new table, initialized
280 // to empty. The caller is responsible for saving the shared memory pointer
281 // and handles before this call (they will be replaced with new ones) and
282 // releasing them later. This is designed for callers that make a new table
283 // and then copy values from the old table to the new one, then release the
284 // old table.
285 //
286 // Returns true on success. On failure, the old table will be restored. The
287 // caller should not attemp to release the pointer/handle in this case.
288 bool BeginReplaceURLTable(int32 num_entries);
289
290 // unallocates the Fingerprint table
291 void FreeURLTable();
292
293 // For growing the table. ResizeTableIfNecessary will check to see if the
294 // table should be resized and calls ResizeTable if needed. Returns true if
295 // we decided to resize the table.
296 bool ResizeTableIfNecessary();
297
298 // Resizes the table (growing or shrinking) as necessary to accomodate the
299 // current count.
300 void ResizeTable(int32 new_size);
301
302 // Returns the desired table size for |item_count| URLs.
303 uint32 NewTableSizeForCount(int32 item_count) const;
304
305 // Computes the table load as fraction. For example, if 1/4 of the entries are
306 // full, this value will be 0.25
ComputeTableLoad()307 float ComputeTableLoad() const {
308 return static_cast<float>(used_items_) / static_cast<float>(table_length_);
309 }
310
311 // Initializes a rebuild of the visited link database based on the browser
312 // history. This will set table_builder_ while working, and there should not
313 // already be a rebuild in place when called. See the definition for more
314 // details on how this works.
315 //
316 // Returns true on success. Failure means we're not attempting to rebuild
317 // the database because something failed.
318 bool RebuildTableFromDelegate();
319
320 // Callback that the table rebuilder uses when the rebuild is complete.
321 // |success| is true if the fingerprint generation succeeded, in which case
322 // |fingerprints| will contain the computed fingerprints. On failure, there
323 // will be no fingerprints.
324 void OnTableRebuildComplete(bool success,
325 const std::vector<Fingerprint>& fingerprints);
326
327 // Increases or decreases the given hash value by one, wrapping around as
328 // necessary. Used for probing.
IncrementHash(Hash hash)329 inline Hash IncrementHash(Hash hash) {
330 if (hash >= table_length_ - 1)
331 return 0; // Wrap around.
332 return hash + 1;
333 }
DecrementHash(Hash hash)334 inline Hash DecrementHash(Hash hash) {
335 if (hash <= 0)
336 return table_length_ - 1; // Wrap around.
337 return hash - 1;
338 }
339
340 #ifndef NDEBUG
341 // Indicates whether any asynchronous operation has ever been completed.
342 // We do some synchronous reads that require that no asynchronous operations
343 // are pending, yet we don't track whether they have been completed. This
344 // flag is a sanity check that these reads only happen before any
345 // asynchronous writes have been fired.
346 bool posted_asynchronous_operation_;
347 #endif
348
349 // Reference to the browser context that this object belongs to
350 // (it knows the path to where the data is stored)
351 content::BrowserContext* browser_context_;
352
353 // Client owns the delegate and is responsible for it being valid through
354 // the life time this VisitedLinkMaster.
355 VisitedLinkDelegate* delegate_;
356
357 // VisitedLinkEventListener to handle incoming events.
358 scoped_ptr<Listener> listener_;
359
360 // Lazily initialized sequence token for posting file tasks.
361 base::SequencedWorkerPool::SequenceToken sequence_token_;
362
363 // When non-NULL, indicates we are in database rebuild mode and points to
364 // the class collecting fingerprint information from the history system.
365 // The pointer is owned by this class, but it must remain valid while the
366 // history query is running. We must only delete it when the query is done.
367 scoped_refptr<TableBuilder> table_builder_;
368
369 // Indicates URLs added and deleted since we started rebuilding the table.
370 std::set<Fingerprint> added_since_rebuild_;
371 std::set<Fingerprint> deleted_since_rebuild_;
372
373 // TODO(brettw) Support deletion, we need to track whether anything was
374 // deleted during the rebuild here. Then we should delete any of these
375 // entries from the complete table later.
376 // std::vector<Fingerprint> removed_since_rebuild_;
377
378 // The currently open file with the table in it. This may be NULL if we're
379 // rebuilding and haven't written a new version yet or if |persist_to_disk_|
380 // is false. Writing to the file may be safely ignored in this case. Also
381 // |file_| may be non-NULL but point to a NULL pointer. That would mean that
382 // opening of the file is already scheduled in a background thread and any
383 // writing to the file can also be scheduled to the background thread as it's
384 // guaranteed to be executed after the opening.
385 // The class owns both the |file_| pointer and the pointer pointed
386 // by |*file_|.
387 FILE** file_;
388
389 // If true, will try to persist the hash table to disk. Will rebuild from
390 // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
391 bool persist_to_disk_;
392
393 // Shared memory consists of a SharedHeader followed by the table.
394 base::SharedMemory *shared_memory_;
395
396 // When we generate new tables, we increment the serial number of the
397 // shared memory object.
398 int32 shared_memory_serial_;
399
400 // Number of non-empty items in the table, used to compute fullness.
401 int32 used_items_;
402
403 // Testing values -----------------------------------------------------------
404 //
405 // The following fields exist for testing purposes. They are not used in
406 // release builds. It'd be nice to eliminate them in release builds, but we
407 // don't want to change the signature of the object between the unit test and
408 // regular builds. Instead, we just have "default" values that these take
409 // in release builds that give "regular" behavior.
410
411 // Overridden database file name for testing
412 base::FilePath database_name_override_;
413
414 // When nonzero, overrides the table size for new databases for testing
415 int32 table_size_override_;
416
417 // When set, indicates the task that should be run after the next rebuild from
418 // history is complete.
419 base::Closure rebuild_complete_task_;
420
421 // Set to prevent us from attempting to rebuild the database from global
422 // history if we have an error opening the file. This is used for testing,
423 // will be false in production.
424 bool suppress_rebuild_;
425
426 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
427 };
428
429 // NOTE: These methods are defined inline here, so we can share the compilation
430 // of visitedlink_master.cc between the browser and the unit/perf tests.
431
432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
DebugValidate()433 inline void VisitedLinkMaster::DebugValidate() {
434 int32 used_count = 0;
435 for (int32 i = 0; i < table_length_; i++) {
436 if (hash_table_[i])
437 used_count++;
438 }
439 DCHECK_EQ(used_count, used_items_);
440 }
441 #endif
442
443 } // namespace visitedlink
444
445 #endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
446