1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/visitedlink/browser/visitedlink_master.h"
6
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
13
14 #include <algorithm>
15
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/files/file_util.h"
20 #include "base/files/scoped_file.h"
21 #include "base/logging.h"
22 #include "base/message_loop/message_loop.h"
23 #include "base/path_service.h"
24 #include "base/rand_util.h"
25 #include "base/strings/string_util.h"
26 #include "base/threading/thread_restrictions.h"
27 #include "components/visitedlink/browser/visitedlink_delegate.h"
28 #include "components/visitedlink/browser/visitedlink_event_listener.h"
29 #include "content/public/browser/browser_context.h"
30 #include "content/public/browser/browser_thread.h"
31 #include "url/gurl.h"
32
33 using content::BrowserThread;
34
35 namespace visitedlink {
36
37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
42
43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
44
45 // the signature at the beginning of the URL table = "VLnk" (visited links)
46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
47 const size_t VisitedLinkMaster::kFileHeaderSize =
48 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
49
50 // This value should also be the same as the smallest size in the lookup
51 // table in NewTableSizeForCount (prime number).
52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
53
54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
55
56 namespace {
57
58 // Fills the given salt structure with some quasi-random values
59 // It is not necessary to generate a cryptographically strong random string,
60 // only that it be reasonably different for different users.
GenerateSalt(uint8 salt[LINK_SALT_LENGTH])61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
62 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
63 uint64 randval = base::RandUint64();
64 memcpy(salt, &randval, 8);
65 }
66
67 // Opens file on a background thread to not block UI thread.
AsyncOpen(FILE ** file,const base::FilePath & filename)68 void AsyncOpen(FILE** file, const base::FilePath& filename) {
69 *file = base::OpenFile(filename, "wb+");
70 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
71 }
72
73 // Returns true if the write was complete.
WriteToFile(FILE * file,off_t offset,const void * data,size_t data_len)74 static bool WriteToFile(FILE* file,
75 off_t offset,
76 const void* data,
77 size_t data_len) {
78 if (fseek(file, offset, SEEK_SET) != 0)
79 return false; // Don't write to an invalid part of the file.
80
81 size_t num_written = fwrite(data, 1, data_len, file);
82
83 // The write may not make it to the kernel (stdlib may buffer the write)
84 // until the next fseek/fclose call. If we crash, it's easy for our used
85 // item count to be out of sync with the number of hashes we write.
86 // Protect against this by calling fflush.
87 int ret = fflush(file);
88 DCHECK_EQ(0, ret);
89 return num_written == data_len;
90 }
91
92 // This task executes on a background thread and executes a write. This
93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
94 // is used because file may still not be opened by the time of scheduling
95 // the task for execution.
AsyncWrite(FILE ** file,int32 offset,const std::string & data)96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
97 if (*file)
98 WriteToFile(*file, offset, data.data(), data.size());
99 }
100
101 // Truncates the file to the current position asynchronously on a background
102 // thread. Double pointer to FILE is used because file may still not be opened
103 // by the time of scheduling the task for execution.
AsyncTruncate(FILE ** file)104 void AsyncTruncate(FILE** file) {
105 if (*file)
106 base::IgnoreResult(base::TruncateFile(*file));
107 }
108
109 // Closes the file on a background thread and releases memory used for storage
110 // of FILE* value. Double pointer to FILE is used because file may still not
111 // be opened by the time of scheduling the task for execution.
AsyncClose(FILE ** file)112 void AsyncClose(FILE** file) {
113 if (*file)
114 base::IgnoreResult(fclose(*file));
115 free(file);
116 }
117
118 } // namespace
119
120 // TableBuilder ---------------------------------------------------------------
121
122 // How rebuilding from history works
123 // ---------------------------------
124 //
125 // We mark that we're rebuilding from history by setting the table_builder_
126 // member in VisitedLinkMaster to the TableBuilder we create. This builder
127 // will be called on the history thread by the history system for every URL
128 // in the database.
129 //
130 // The builder will store the fingerprints for those URLs, and then marshalls
131 // back to the main thread where the VisitedLinkMaster will be notified. The
132 // master then replaces its table with a new table containing the computed
133 // fingerprints.
134 //
135 // The builder must remain active while the history system is using it.
136 // Sometimes, the master will be deleted before the rebuild is complete, in
137 // which case it notifies the builder via DisownMaster(). The builder will
138 // delete itself once rebuilding is complete, and not execute any callback.
139 class VisitedLinkMaster::TableBuilder
140 : public VisitedLinkDelegate::URLEnumerator {
141 public:
142 TableBuilder(VisitedLinkMaster* master,
143 const uint8 salt[LINK_SALT_LENGTH]);
144
145 // Called on the main thread when the master is being destroyed. This will
146 // prevent a crash when the query completes and the master is no longer
147 // around. We can not actually do anything but mark this fact, since the
148 // table will be being rebuilt simultaneously on the other thread.
149 void DisownMaster();
150
151 // VisitedLinkDelegate::URLEnumerator
152 virtual void OnURL(const GURL& url) OVERRIDE;
153 virtual void OnComplete(bool succeed) OVERRIDE;
154
155 private:
~TableBuilder()156 virtual ~TableBuilder() {}
157
158 // OnComplete mashals to this function on the main thread to do the
159 // notification.
160 void OnCompleteMainThread();
161
162 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
163 VisitedLinkMaster* master_;
164
165 // Indicates whether the operation has failed or not.
166 bool success_;
167
168 // Salt for this new table.
169 uint8 salt_[LINK_SALT_LENGTH];
170
171 // Stores the fingerprints we computed on the background thread.
172 VisitedLinkCommon::Fingerprints fingerprints_;
173
174 DISALLOW_COPY_AND_ASSIGN(TableBuilder);
175 };
176
177 // VisitedLinkMaster ----------------------------------------------------------
178
VisitedLinkMaster(content::BrowserContext * browser_context,VisitedLinkDelegate * delegate,bool persist_to_disk)179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
180 VisitedLinkDelegate* delegate,
181 bool persist_to_disk)
182 : browser_context_(browser_context),
183 delegate_(delegate),
184 listener_(new VisitedLinkEventListener(this, browser_context)),
185 persist_to_disk_(persist_to_disk) {
186 InitMembers();
187 }
188
VisitedLinkMaster(Listener * listener,VisitedLinkDelegate * delegate,bool persist_to_disk,bool suppress_rebuild,const base::FilePath & filename,int32 default_table_size)189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
190 VisitedLinkDelegate* delegate,
191 bool persist_to_disk,
192 bool suppress_rebuild,
193 const base::FilePath& filename,
194 int32 default_table_size)
195 : browser_context_(NULL),
196 delegate_(delegate),
197 persist_to_disk_(persist_to_disk) {
198 listener_.reset(listener);
199 DCHECK(listener_.get());
200 InitMembers();
201
202 database_name_override_ = filename;
203 table_size_override_ = default_table_size;
204 suppress_rebuild_ = suppress_rebuild;
205 }
206
~VisitedLinkMaster()207 VisitedLinkMaster::~VisitedLinkMaster() {
208 if (table_builder_.get()) {
209 // Prevent the table builder from calling us back now that we're being
210 // destroyed. Note that we DON'T delete the object, since the history
211 // system is still writing into it. When that is complete, the table
212 // builder will destroy itself when it finds we are gone.
213 table_builder_->DisownMaster();
214 }
215 FreeURLTable();
216 // FreeURLTable() will schedule closing of the file and deletion of |file_|.
217 // So nothing should be done here.
218 }
219
InitMembers()220 void VisitedLinkMaster::InitMembers() {
221 file_ = NULL;
222 shared_memory_ = NULL;
223 shared_memory_serial_ = 0;
224 used_items_ = 0;
225 table_size_override_ = 0;
226 suppress_rebuild_ = false;
227 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
228
229 #ifndef NDEBUG
230 posted_asynchronous_operation_ = false;
231 #endif
232 }
233
Init()234 bool VisitedLinkMaster::Init() {
235 // We probably shouldn't be loading this from the UI thread,
236 // but it does need to happen early on in startup.
237 // http://code.google.com/p/chromium/issues/detail?id=24163
238 base::ThreadRestrictions::ScopedAllowIO allow_io;
239
240 if (persist_to_disk_) {
241 if (InitFromFile())
242 return true;
243 }
244 return InitFromScratch(suppress_rebuild_);
245 }
246
TryToAddURL(const GURL & url)247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
248 // Extra check that we are not incognito. This should not happen.
249 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
250 // removed from BrowserContext.
251 if (browser_context_ && browser_context_->IsOffTheRecord()) {
252 NOTREACHED();
253 return null_hash_;
254 }
255
256 if (!url.is_valid())
257 return null_hash_; // Don't add invalid URLs.
258
259 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
260 url.spec().size(),
261 salt_);
262 if (table_builder_.get()) {
263 // If we have a pending delete for this fingerprint, cancel it.
264 std::set<Fingerprint>::iterator found =
265 deleted_since_rebuild_.find(fingerprint);
266 if (found != deleted_since_rebuild_.end())
267 deleted_since_rebuild_.erase(found);
268
269 // A rebuild is in progress, save this addition in the temporary list so
270 // it can be added once rebuild is complete.
271 added_since_rebuild_.insert(fingerprint);
272 }
273
274 // If the table is "full", we don't add URLs and just drop them on the floor.
275 // This can happen if we get thousands of new URLs and something causes
276 // the table resizing to fail. This check prevents a hang in that case. Note
277 // that this is *not* the resize limit, this is just a sanity check.
278 if (used_items_ / 8 > table_length_ / 10)
279 return null_hash_; // Table is more than 80% full.
280
281 return AddFingerprint(fingerprint, true);
282 }
283
PostIOTask(const tracked_objects::Location & from_here,const base::Closure & task)284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
285 const base::Closure& task) {
286 DCHECK(persist_to_disk_);
287 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
288 from_here, task);
289 }
290
AddURL(const GURL & url)291 void VisitedLinkMaster::AddURL(const GURL& url) {
292 Hash index = TryToAddURL(url);
293 if (!table_builder_.get() && index != null_hash_) {
294 // Not rebuilding, so we want to keep the file on disk up-to-date.
295 if (persist_to_disk_) {
296 WriteUsedItemCountToFile();
297 WriteHashRangeToFile(index, index);
298 }
299 ResizeTableIfNecessary();
300 }
301 }
302
AddURLs(const std::vector<GURL> & url)303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
304 for (std::vector<GURL>::const_iterator i = url.begin();
305 i != url.end(); ++i) {
306 Hash index = TryToAddURL(*i);
307 if (!table_builder_.get() && index != null_hash_)
308 ResizeTableIfNecessary();
309 }
310
311 // Keeps the file on disk up-to-date.
312 if (!table_builder_.get() && persist_to_disk_)
313 WriteFullTable();
314 }
315
DeleteAllURLs()316 void VisitedLinkMaster::DeleteAllURLs() {
317 // Any pending modifications are invalid.
318 added_since_rebuild_.clear();
319 deleted_since_rebuild_.clear();
320
321 // Clear the hash table.
322 used_items_ = 0;
323 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
324
325 // Resize it if it is now too empty. Resize may write the new table out for
326 // us, otherwise, schedule writing the new table to disk ourselves.
327 if (!ResizeTableIfNecessary() && persist_to_disk_)
328 WriteFullTable();
329
330 listener_->Reset();
331 }
332
GetDelegate()333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
334 return delegate_;
335 }
336
DeleteURLs(URLIterator * urls)337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
338 if (!urls->HasNextURL())
339 return;
340
341 listener_->Reset();
342
343 if (table_builder_.get()) {
344 // A rebuild is in progress, save this deletion in the temporary list so
345 // it can be added once rebuild is complete.
346 while (urls->HasNextURL()) {
347 const GURL& url(urls->NextURL());
348 if (!url.is_valid())
349 continue;
350
351 Fingerprint fingerprint =
352 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
353 deleted_since_rebuild_.insert(fingerprint);
354
355 // If the URL was just added and now we're deleting it, it may be in the
356 // list of things added since the last rebuild. Delete it from that list.
357 std::set<Fingerprint>::iterator found =
358 added_since_rebuild_.find(fingerprint);
359 if (found != added_since_rebuild_.end())
360 added_since_rebuild_.erase(found);
361
362 // Delete the URLs from the in-memory table, but don't bother writing
363 // to disk since it will be replaced soon.
364 DeleteFingerprint(fingerprint, false);
365 }
366 return;
367 }
368
369 // Compute the deleted URLs' fingerprints and delete them
370 std::set<Fingerprint> deleted_fingerprints;
371 while (urls->HasNextURL()) {
372 const GURL& url(urls->NextURL());
373 if (!url.is_valid())
374 continue;
375 deleted_fingerprints.insert(
376 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
377 }
378 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
379 }
380
381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
AddFingerprint(Fingerprint fingerprint,bool send_notifications)382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
383 Fingerprint fingerprint,
384 bool send_notifications) {
385 if (!hash_table_ || table_length_ == 0) {
386 NOTREACHED(); // Not initialized.
387 return null_hash_;
388 }
389
390 Hash cur_hash = HashFingerprint(fingerprint);
391 Hash first_hash = cur_hash;
392 while (true) {
393 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
394 if (cur_fingerprint == fingerprint)
395 return null_hash_; // This fingerprint is already in there, do nothing.
396
397 if (cur_fingerprint == null_fingerprint_) {
398 // End of probe sequence found, insert here.
399 hash_table_[cur_hash] = fingerprint;
400 used_items_++;
401 // If allowed, notify listener that a new visited link was added.
402 if (send_notifications)
403 listener_->Add(fingerprint);
404 return cur_hash;
405 }
406
407 // Advance in the probe sequence.
408 cur_hash = IncrementHash(cur_hash);
409 if (cur_hash == first_hash) {
410 // This means that we've wrapped around and are about to go into an
411 // infinite loop. Something was wrong with the hashtable resizing
412 // logic, so stop here.
413 NOTREACHED();
414 return null_hash_;
415 }
416 }
417 }
418
DeleteFingerprintsFromCurrentTable(const std::set<Fingerprint> & fingerprints)419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
420 const std::set<Fingerprint>& fingerprints) {
421 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
422
423 // Delete the URLs from the table.
424 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
425 i != fingerprints.end(); ++i)
426 DeleteFingerprint(*i, !bulk_write);
427
428 // These deleted fingerprints may make us shrink the table.
429 if (ResizeTableIfNecessary())
430 return; // The resize function wrote the new table to disk for us.
431
432 // Nobody wrote this out for us, write the full file to disk.
433 if (bulk_write && persist_to_disk_)
434 WriteFullTable();
435 }
436
DeleteFingerprint(Fingerprint fingerprint,bool update_file)437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
438 bool update_file) {
439 if (!hash_table_ || table_length_ == 0) {
440 NOTREACHED(); // Not initialized.
441 return false;
442 }
443 if (!IsVisited(fingerprint))
444 return false; // Not in the database to delete.
445
446 // First update the header used count.
447 used_items_--;
448 if (update_file && persist_to_disk_)
449 WriteUsedItemCountToFile();
450
451 Hash deleted_hash = HashFingerprint(fingerprint);
452
453 // Find the range of "stuff" in the hash table that is adjacent to this
454 // fingerprint. These are things that could be affected by the change in
455 // the hash table. Since we use linear probing, anything after the deleted
456 // item up until an empty item could be affected.
457 Hash end_range = deleted_hash;
458 while (true) {
459 Hash next_hash = IncrementHash(end_range);
460 if (next_hash == deleted_hash)
461 break; // We wrapped around and the whole table is full.
462 if (!hash_table_[next_hash])
463 break; // Found the last spot.
464 end_range = next_hash;
465 }
466
467 // We could get all fancy and move the affected fingerprints around, but
468 // instead we just remove them all and re-add them (minus our deleted one).
469 // This will mean there's a small window of time where the affected links
470 // won't be marked visited.
471 base::StackVector<Fingerprint, 32> shuffled_fingerprints;
472 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
473 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
474 if (hash_table_[i] != fingerprint) {
475 // Don't save the one we're deleting!
476 shuffled_fingerprints->push_back(hash_table_[i]);
477
478 // This will balance the increment of this value in AddFingerprint below
479 // so there is no net change.
480 used_items_--;
481 }
482 hash_table_[i] = null_fingerprint_;
483 }
484
485 if (!shuffled_fingerprints->empty()) {
486 // Need to add the new items back.
487 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
488 AddFingerprint(shuffled_fingerprints[i], false);
489 }
490
491 // Write the affected range to disk [deleted_hash, end_range].
492 if (update_file && persist_to_disk_)
493 WriteHashRangeToFile(deleted_hash, end_range);
494
495 return true;
496 }
497
WriteFullTable()498 void VisitedLinkMaster::WriteFullTable() {
499 // This function can get called when the file is open, for example, when we
500 // resize the table. We must handle this case and not try to reopen the file,
501 // since there may be write operations pending on the file I/O thread.
502 //
503 // Note that once we start writing, we do not delete on error. This means
504 // there can be a partial file, but the short file will be detected next time
505 // we start, and will be replaced.
506 //
507 // This might possibly get corrupted if we crash in the middle of writing.
508 // We should pick up the most common types of these failures when we notice
509 // that the file size is different when we load it back in, and then we will
510 // regenerate the table.
511 DCHECK(persist_to_disk_);
512
513 if (!file_) {
514 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
515 base::FilePath filename;
516 GetDatabaseFileName(&filename);
517 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
518 }
519
520 // Write the new header.
521 int32 header[4];
522 header[0] = kFileSignature;
523 header[1] = kFileCurrentVersion;
524 header[2] = table_length_;
525 header[3] = used_items_;
526 WriteToFile(file_, 0, header, sizeof(header));
527 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
528
529 // Write the hash data.
530 WriteToFile(file_, kFileHeaderSize,
531 hash_table_, table_length_ * sizeof(Fingerprint));
532
533 // The hash table may have shrunk, so make sure this is the end.
534 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
535 }
536
InitFromFile()537 bool VisitedLinkMaster::InitFromFile() {
538 DCHECK(file_ == NULL);
539 DCHECK(persist_to_disk_);
540
541 base::FilePath filename;
542 GetDatabaseFileName(&filename);
543 base::ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
544 if (!file_closer.get())
545 return false;
546
547 int32 num_entries, used_count;
548 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
549 return false; // Header isn't valid.
550
551 // Allocate and read the table.
552 if (!CreateURLTable(num_entries, false))
553 return false;
554 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
555 hash_table_, num_entries * sizeof(Fingerprint))) {
556 FreeURLTable();
557 return false;
558 }
559 used_items_ = used_count;
560
561 #ifndef NDEBUG
562 DebugValidate();
563 #endif
564
565 file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
566 *file_ = file_closer.release();
567 return true;
568 }
569
InitFromScratch(bool suppress_rebuild)570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
571 int32 table_size = kDefaultTableSize;
572 if (table_size_override_)
573 table_size = table_size_override_;
574
575 // The salt must be generated before the table so that it can be copied to
576 // the shared memory.
577 GenerateSalt(salt_);
578 if (!CreateURLTable(table_size, true))
579 return false;
580
581 #ifndef NDEBUG
582 DebugValidate();
583 #endif
584
585 if (suppress_rebuild && persist_to_disk_) {
586 // When we disallow rebuilds (normally just unit tests), just use the
587 // current empty table.
588 WriteFullTable();
589 return true;
590 }
591
592 // This will build the table from history. On the first run, history will
593 // be empty, so this will be correct. This will also write the new table
594 // to disk. We don't want to save explicitly here, since the rebuild may
595 // not complete, leaving us with an empty but valid visited link database.
596 // In the future, we won't know we need to try rebuilding again.
597 return RebuildTableFromDelegate();
598 }
599
ReadFileHeader(FILE * file,int32 * num_entries,int32 * used_count,uint8 salt[LINK_SALT_LENGTH])600 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
601 int32* num_entries,
602 int32* used_count,
603 uint8 salt[LINK_SALT_LENGTH]) {
604 DCHECK(persist_to_disk_);
605
606 // Get file size.
607 // Note that there is no need to seek back to the original location in the
608 // file since ReadFromFile() [which is the next call accessing the file]
609 // seeks before reading.
610 if (fseek(file, 0, SEEK_END) == -1)
611 return false;
612 size_t file_size = ftell(file);
613
614 if (file_size <= kFileHeaderSize)
615 return false;
616
617 uint8 header[kFileHeaderSize];
618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
619 return false;
620
621 // Verify the signature.
622 int32 signature;
623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
624 if (signature != kFileSignature)
625 return false;
626
627 // Verify the version is up-to-date. As with other read errors, a version
628 // mistmatch will trigger a rebuild of the database from history, which will
629 // have the effect of migrating the database.
630 int32 version;
631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
632 if (version != kFileCurrentVersion)
633 return false; // Bad version.
634
635 // Read the table size and make sure it matches the file size.
636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
638 return false; // Bad size.
639
640 // Read the used item count.
641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
642 if (*used_count > *num_entries)
643 return false; // Bad used item count;
644
645 // Read the salt.
646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
647
648 // This file looks OK from the header's perspective.
649 return true;
650 }
651
GetDatabaseFileName(base::FilePath * filename)652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
653 if (!database_name_override_.empty()) {
654 // use this filename, the directory must exist
655 *filename = database_name_override_;
656 return true;
657 }
658
659 if (!browser_context_ || browser_context_->GetPath().empty())
660 return false;
661
662 base::FilePath profile_dir = browser_context_->GetPath();
663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
664 return true;
665 }
666
667 // Initializes the shared memory structure. The salt should already be filled
668 // in so that it can be written to the shared memory
CreateURLTable(int32 num_entries,bool init_to_empty)669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
670 // The table is the size of the table followed by the entries.
671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
672
673 // Create the shared memory object.
674 shared_memory_ = new base::SharedMemory();
675 if (!shared_memory_)
676 return false;
677
678 base::SharedMemoryCreateOptions options;
679 options.size = alloc_size;
680 options.share_read_only = true;
681
682 if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) {
683 delete shared_memory_;
684 shared_memory_ = NULL;
685 return false;
686 }
687
688 if (init_to_empty) {
689 memset(shared_memory_->memory(), 0, alloc_size);
690 used_items_ = 0;
691 }
692 table_length_ = num_entries;
693
694 // Save the header for other processes to read.
695 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
696 header->length = table_length_;
697 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
698
699 // Our table pointer is just the data immediately following the size.
700 hash_table_ = reinterpret_cast<Fingerprint*>(
701 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
702
703 return true;
704 }
705
BeginReplaceURLTable(int32 num_entries)706 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
707 base::SharedMemory *old_shared_memory = shared_memory_;
708 Fingerprint* old_hash_table = hash_table_;
709 int32 old_table_length = table_length_;
710 if (!CreateURLTable(num_entries, true)) {
711 // Try to put back the old state.
712 shared_memory_ = old_shared_memory;
713 hash_table_ = old_hash_table;
714 table_length_ = old_table_length;
715 return false;
716 }
717
718 #ifndef NDEBUG
719 DebugValidate();
720 #endif
721
722 return true;
723 }
724
FreeURLTable()725 void VisitedLinkMaster::FreeURLTable() {
726 if (shared_memory_) {
727 delete shared_memory_;
728 shared_memory_ = NULL;
729 }
730 if (!persist_to_disk_ || !file_)
731 return;
732 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
733 // AsyncClose() will close the file and free the memory pointed by |file_|.
734 file_ = NULL;
735 }
736
ResizeTableIfNecessary()737 bool VisitedLinkMaster::ResizeTableIfNecessary() {
738 DCHECK(table_length_ > 0) << "Must have a table";
739
740 // Load limits for good performance/space. We are pretty conservative about
741 // keeping the table not very full. This is because we use linear probing
742 // which increases the likelihood of clumps of entries which will reduce
743 // performance.
744 const float max_table_load = 0.5f; // Grow when we're > this full.
745 const float min_table_load = 0.2f; // Shrink when we're < this full.
746
747 float load = ComputeTableLoad();
748 if (load < max_table_load &&
749 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
750 load > min_table_load))
751 return false;
752
753 // Table needs to grow or shrink.
754 int new_size = NewTableSizeForCount(used_items_);
755 DCHECK(new_size > used_items_);
756 DCHECK(load <= min_table_load || new_size > table_length_);
757 ResizeTable(new_size);
758 return true;
759 }
760
ResizeTable(int32 new_size)761 void VisitedLinkMaster::ResizeTable(int32 new_size) {
762 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
763 shared_memory_serial_++;
764
765 #ifndef NDEBUG
766 DebugValidate();
767 #endif
768
769 base::SharedMemory* old_shared_memory = shared_memory_;
770 Fingerprint* old_hash_table = hash_table_;
771 int32 old_table_length = table_length_;
772 if (!BeginReplaceURLTable(new_size))
773 return;
774
775 // Now we have two tables, our local copy which is the old one, and the new
776 // one loaded into this object where we need to copy the data.
777 for (int32 i = 0; i < old_table_length; i++) {
778 Fingerprint cur = old_hash_table[i];
779 if (cur)
780 AddFingerprint(cur, false);
781 }
782
783 // On error unmapping, just forget about it since we can't do anything
784 // else to release it.
785 delete old_shared_memory;
786
787 // Send an update notification to all child processes so they read the new
788 // table.
789 listener_->NewTable(shared_memory_);
790
791 #ifndef NDEBUG
792 DebugValidate();
793 #endif
794
795 // The new table needs to be written to disk.
796 if (persist_to_disk_)
797 WriteFullTable();
798 }
799
NewTableSizeForCount(int32 item_count) const800 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
801 // These table sizes are selected to be the maximum prime number less than
802 // a "convenient" multiple of 1K.
803 static const int table_sizes[] = {
804 16381, // 16K = 16384 <- don't shrink below this table size
805 // (should be == default_table_size)
806 32767, // 32K = 32768
807 65521, // 64K = 65536
808 130051, // 128K = 131072
809 262127, // 256K = 262144
810 524269, // 512K = 524288
811 1048549, // 1M = 1048576
812 2097143, // 2M = 2097152
813 4194301, // 4M = 4194304
814 8388571, // 8M = 8388608
815 16777199, // 16M = 16777216
816 33554347}; // 32M = 33554432
817
818 // Try to leave the table 33% full.
819 int desired = item_count * 3;
820
821 // Find the closest prime.
822 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
823 if (table_sizes[i] > desired)
824 return table_sizes[i];
825 }
826
827 // Growing very big, just approximate a "good" number, not growing as much
828 // as normal.
829 return item_count * 2 - 1;
830 }
831
832 // See the TableBuilder definition in the header file for how this works.
RebuildTableFromDelegate()833 bool VisitedLinkMaster::RebuildTableFromDelegate() {
834 DCHECK(!table_builder_.get());
835
836 // TODO(brettw) make sure we have reasonable salt!
837 table_builder_ = new TableBuilder(this, salt_);
838 delegate_->RebuildTable(table_builder_);
839 return true;
840 }
841
842 // See the TableBuilder declaration above for how this works.
OnTableRebuildComplete(bool success,const std::vector<Fingerprint> & fingerprints)843 void VisitedLinkMaster::OnTableRebuildComplete(
844 bool success,
845 const std::vector<Fingerprint>& fingerprints) {
846 if (success) {
847 // Replace the old table with a new blank one.
848 shared_memory_serial_++;
849
850 // We are responsible for freeing it AFTER it has been replaced if
851 // replacement succeeds.
852 base::SharedMemory* old_shared_memory = shared_memory_;
853
854 int new_table_size = NewTableSizeForCount(
855 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
856 if (BeginReplaceURLTable(new_table_size)) {
857 // Free the old table.
858 delete old_shared_memory;
859
860 // Add the stored fingerprints to the hash table.
861 for (size_t i = 0; i < fingerprints.size(); i++)
862 AddFingerprint(fingerprints[i], false);
863
864 // Also add anything that was added while we were asynchronously
865 // generating the new table.
866 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
867 i != added_since_rebuild_.end(); ++i)
868 AddFingerprint(*i, false);
869 added_since_rebuild_.clear();
870
871 // Now handle deletions.
872 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
873 deleted_since_rebuild_.clear();
874
875 // Send an update notification to all child processes.
876 listener_->NewTable(shared_memory_);
877
878 if (persist_to_disk_)
879 WriteFullTable();
880 }
881 }
882 table_builder_ = NULL; // Will release our reference to the builder.
883
884 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
885 if (!rebuild_complete_task_.is_null()) {
886 rebuild_complete_task_.Run();
887 rebuild_complete_task_.Reset();
888 }
889 }
890
WriteToFile(FILE ** file,off_t offset,void * data,int32 data_size)891 void VisitedLinkMaster::WriteToFile(FILE** file,
892 off_t offset,
893 void* data,
894 int32 data_size) {
895 DCHECK(persist_to_disk_);
896 #ifndef NDEBUG
897 posted_asynchronous_operation_ = true;
898 #endif
899 PostIOTask(FROM_HERE,
900 base::Bind(&AsyncWrite, file, offset,
901 std::string(static_cast<const char*>(data), data_size)));
902 }
903
WriteUsedItemCountToFile()904 void VisitedLinkMaster::WriteUsedItemCountToFile() {
905 DCHECK(persist_to_disk_);
906 if (!file_)
907 return; // See comment on the file_ variable for why this might happen.
908 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
909 }
910
WriteHashRangeToFile(Hash first_hash,Hash last_hash)911 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
912 DCHECK(persist_to_disk_);
913
914 if (!file_)
915 return; // See comment on the file_ variable for why this might happen.
916 if (last_hash < first_hash) {
917 // Handle wraparound at 0. This first write is first_hash->EOF
918 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
919 &hash_table_[first_hash],
920 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
921
922 // Now do 0->last_lash.
923 WriteToFile(file_, kFileHeaderSize, hash_table_,
924 (last_hash + 1) * sizeof(Fingerprint));
925 } else {
926 // Normal case, just write the range.
927 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
928 &hash_table_[first_hash],
929 (last_hash - first_hash + 1) * sizeof(Fingerprint));
930 }
931 }
932
ReadFromFile(FILE * file,off_t offset,void * data,size_t data_size)933 bool VisitedLinkMaster::ReadFromFile(FILE* file,
934 off_t offset,
935 void* data,
936 size_t data_size) {
937 DCHECK(persist_to_disk_);
938 #ifndef NDEBUG
939 // Since this function is synchronous, we require that no asynchronous
940 // operations could possibly be pending.
941 DCHECK(!posted_asynchronous_operation_);
942 #endif
943
944 if (fseek(file, offset, SEEK_SET) != 0)
945 return false;
946
947 size_t num_read = fread(data, 1, data_size, file);
948 return num_read == data_size;
949 }
950
951 // VisitedLinkTableBuilder ----------------------------------------------------
952
TableBuilder(VisitedLinkMaster * master,const uint8 salt[LINK_SALT_LENGTH])953 VisitedLinkMaster::TableBuilder::TableBuilder(
954 VisitedLinkMaster* master,
955 const uint8 salt[LINK_SALT_LENGTH])
956 : master_(master),
957 success_(true) {
958 fingerprints_.reserve(4096);
959 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
960 }
961
962 // TODO(brettw): Do we want to try to cancel the request if this happens? It
963 // could delay shutdown if there are a lot of URLs.
DisownMaster()964 void VisitedLinkMaster::TableBuilder::DisownMaster() {
965 master_ = NULL;
966 }
967
OnURL(const GURL & url)968 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
969 if (!url.is_empty()) {
970 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
971 url.spec().data(), url.spec().length(), salt_));
972 }
973 }
974
OnComplete(bool success)975 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
976 success_ = success;
977 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
978
979 // Marshal to the main thread to notify the VisitedLinkMaster that the
980 // rebuild is complete.
981 BrowserThread::PostTask(
982 BrowserThread::UI, FROM_HERE,
983 base::Bind(&TableBuilder::OnCompleteMainThread, this));
984 }
985
OnCompleteMainThread()986 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
987 if (master_)
988 master_->OnTableRebuildComplete(success_, fingerprints_);
989 }
990
991 } // namespace visitedlink
992