• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/visitedlink/browser/visitedlink_master.h"
6 
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif  // defined(OS_WIN)
12 #include <stdio.h>
13 
14 #include <algorithm>
15 
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/files/file_util.h"
20 #include "base/files/scoped_file.h"
21 #include "base/logging.h"
22 #include "base/message_loop/message_loop.h"
23 #include "base/path_service.h"
24 #include "base/rand_util.h"
25 #include "base/strings/string_util.h"
26 #include "base/threading/thread_restrictions.h"
27 #include "components/visitedlink/browser/visitedlink_delegate.h"
28 #include "components/visitedlink/browser/visitedlink_event_listener.h"
29 #include "content/public/browser/browser_context.h"
30 #include "content/public/browser/browser_thread.h"
31 #include "url/gurl.h"
32 
33 using content::BrowserThread;
34 
35 namespace visitedlink {
36 
37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
42 
43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
44 
45 // the signature at the beginning of the URL table = "VLnk" (visited links)
46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
47 const size_t VisitedLinkMaster::kFileHeaderSize =
48     kFileHeaderSaltOffset + LINK_SALT_LENGTH;
49 
50 // This value should also be the same as the smallest size in the lookup
51 // table in NewTableSizeForCount (prime number).
52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
53 
54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
55 
56 namespace {
57 
58 // Fills the given salt structure with some quasi-random values
59 // It is not necessary to generate a cryptographically strong random string,
60 // only that it be reasonably different for different users.
GenerateSalt(uint8 salt[LINK_SALT_LENGTH])61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
62   DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
63   uint64 randval = base::RandUint64();
64   memcpy(salt, &randval, 8);
65 }
66 
67 // Opens file on a background thread to not block UI thread.
AsyncOpen(FILE ** file,const base::FilePath & filename)68 void AsyncOpen(FILE** file, const base::FilePath& filename) {
69   *file = base::OpenFile(filename, "wb+");
70   DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
71 }
72 
73 // Returns true if the write was complete.
WriteToFile(FILE * file,off_t offset,const void * data,size_t data_len)74 static bool WriteToFile(FILE* file,
75                         off_t offset,
76                         const void* data,
77                         size_t data_len) {
78   if (fseek(file, offset, SEEK_SET) != 0)
79     return false;  // Don't write to an invalid part of the file.
80 
81   size_t num_written = fwrite(data, 1, data_len, file);
82 
83   // The write may not make it to the kernel (stdlib may buffer the write)
84   // until the next fseek/fclose call.  If we crash, it's easy for our used
85   // item count to be out of sync with the number of hashes we write.
86   // Protect against this by calling fflush.
87   int ret = fflush(file);
88   DCHECK_EQ(0, ret);
89   return num_written == data_len;
90 }
91 
92 // This task executes on a background thread and executes a write. This
93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
94 // is used because file may still not be opened by the time of scheduling
95 // the task for execution.
AsyncWrite(FILE ** file,int32 offset,const std::string & data)96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
97   if (*file)
98     WriteToFile(*file, offset, data.data(), data.size());
99 }
100 
101 // Truncates the file to the current position asynchronously on a background
102 // thread. Double pointer to FILE is used because file may still not be opened
103 // by the time of scheduling the task for execution.
AsyncTruncate(FILE ** file)104 void AsyncTruncate(FILE** file) {
105   if (*file)
106     base::IgnoreResult(base::TruncateFile(*file));
107 }
108 
109 // Closes the file on a background thread and releases memory used for storage
110 // of FILE* value. Double pointer to FILE is used because file may still not
111 // be opened by the time of scheduling the task for execution.
AsyncClose(FILE ** file)112 void AsyncClose(FILE** file) {
113   if (*file)
114     base::IgnoreResult(fclose(*file));
115   free(file);
116 }
117 
118 }  // namespace
119 
120 // TableBuilder ---------------------------------------------------------------
121 
122 // How rebuilding from history works
123 // ---------------------------------
124 //
125 // We mark that we're rebuilding from history by setting the table_builder_
126 // member in VisitedLinkMaster to the TableBuilder we create. This builder
127 // will be called on the history thread by the history system for every URL
128 // in the database.
129 //
130 // The builder will store the fingerprints for those URLs, and then marshalls
131 // back to the main thread where the VisitedLinkMaster will be notified. The
132 // master then replaces its table with a new table containing the computed
133 // fingerprints.
134 //
135 // The builder must remain active while the history system is using it.
136 // Sometimes, the master will be deleted before the rebuild is complete, in
137 // which case it notifies the builder via DisownMaster(). The builder will
138 // delete itself once rebuilding is complete, and not execute any callback.
139 class VisitedLinkMaster::TableBuilder
140     : public VisitedLinkDelegate::URLEnumerator {
141  public:
142   TableBuilder(VisitedLinkMaster* master,
143                const uint8 salt[LINK_SALT_LENGTH]);
144 
145   // Called on the main thread when the master is being destroyed. This will
146   // prevent a crash when the query completes and the master is no longer
147   // around. We can not actually do anything but mark this fact, since the
148   // table will be being rebuilt simultaneously on the other thread.
149   void DisownMaster();
150 
151   // VisitedLinkDelegate::URLEnumerator
152   virtual void OnURL(const GURL& url) OVERRIDE;
153   virtual void OnComplete(bool succeed) OVERRIDE;
154 
155  private:
~TableBuilder()156   virtual ~TableBuilder() {}
157 
158   // OnComplete mashals to this function on the main thread to do the
159   // notification.
160   void OnCompleteMainThread();
161 
162   // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
163   VisitedLinkMaster* master_;
164 
165   // Indicates whether the operation has failed or not.
166   bool success_;
167 
168   // Salt for this new table.
169   uint8 salt_[LINK_SALT_LENGTH];
170 
171   // Stores the fingerprints we computed on the background thread.
172   VisitedLinkCommon::Fingerprints fingerprints_;
173 
174   DISALLOW_COPY_AND_ASSIGN(TableBuilder);
175 };
176 
177 // VisitedLinkMaster ----------------------------------------------------------
178 
VisitedLinkMaster(content::BrowserContext * browser_context,VisitedLinkDelegate * delegate,bool persist_to_disk)179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
180                                      VisitedLinkDelegate* delegate,
181                                      bool persist_to_disk)
182     : browser_context_(browser_context),
183       delegate_(delegate),
184       listener_(new VisitedLinkEventListener(this, browser_context)),
185       persist_to_disk_(persist_to_disk) {
186   InitMembers();
187 }
188 
VisitedLinkMaster(Listener * listener,VisitedLinkDelegate * delegate,bool persist_to_disk,bool suppress_rebuild,const base::FilePath & filename,int32 default_table_size)189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
190                                      VisitedLinkDelegate* delegate,
191                                      bool persist_to_disk,
192                                      bool suppress_rebuild,
193                                      const base::FilePath& filename,
194                                      int32 default_table_size)
195     : browser_context_(NULL),
196       delegate_(delegate),
197       persist_to_disk_(persist_to_disk) {
198   listener_.reset(listener);
199   DCHECK(listener_.get());
200   InitMembers();
201 
202   database_name_override_ = filename;
203   table_size_override_ = default_table_size;
204   suppress_rebuild_ = suppress_rebuild;
205 }
206 
~VisitedLinkMaster()207 VisitedLinkMaster::~VisitedLinkMaster() {
208   if (table_builder_.get()) {
209     // Prevent the table builder from calling us back now that we're being
210     // destroyed. Note that we DON'T delete the object, since the history
211     // system is still writing into it. When that is complete, the table
212     // builder will destroy itself when it finds we are gone.
213     table_builder_->DisownMaster();
214   }
215   FreeURLTable();
216   // FreeURLTable() will schedule closing of the file and deletion of |file_|.
217   // So nothing should be done here.
218 }
219 
InitMembers()220 void VisitedLinkMaster::InitMembers() {
221   file_ = NULL;
222   shared_memory_ = NULL;
223   shared_memory_serial_ = 0;
224   used_items_ = 0;
225   table_size_override_ = 0;
226   suppress_rebuild_ = false;
227   sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
228 
229 #ifndef NDEBUG
230   posted_asynchronous_operation_ = false;
231 #endif
232 }
233 
Init()234 bool VisitedLinkMaster::Init() {
235   // We probably shouldn't be loading this from the UI thread,
236   // but it does need to happen early on in startup.
237   //   http://code.google.com/p/chromium/issues/detail?id=24163
238   base::ThreadRestrictions::ScopedAllowIO allow_io;
239 
240   if (persist_to_disk_) {
241     if (InitFromFile())
242       return true;
243   }
244   return InitFromScratch(suppress_rebuild_);
245 }
246 
TryToAddURL(const GURL & url)247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
248   // Extra check that we are not incognito. This should not happen.
249   // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
250   // removed from BrowserContext.
251   if (browser_context_ && browser_context_->IsOffTheRecord()) {
252     NOTREACHED();
253     return null_hash_;
254   }
255 
256   if (!url.is_valid())
257     return null_hash_;  // Don't add invalid URLs.
258 
259   Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
260                                                   url.spec().size(),
261                                                   salt_);
262   if (table_builder_.get()) {
263     // If we have a pending delete for this fingerprint, cancel it.
264     std::set<Fingerprint>::iterator found =
265         deleted_since_rebuild_.find(fingerprint);
266     if (found != deleted_since_rebuild_.end())
267         deleted_since_rebuild_.erase(found);
268 
269     // A rebuild is in progress, save this addition in the temporary list so
270     // it can be added once rebuild is complete.
271     added_since_rebuild_.insert(fingerprint);
272   }
273 
274   // If the table is "full", we don't add URLs and just drop them on the floor.
275   // This can happen if we get thousands of new URLs and something causes
276   // the table resizing to fail. This check prevents a hang in that case. Note
277   // that this is *not* the resize limit, this is just a sanity check.
278   if (used_items_ / 8 > table_length_ / 10)
279     return null_hash_;  // Table is more than 80% full.
280 
281   return AddFingerprint(fingerprint, true);
282 }
283 
PostIOTask(const tracked_objects::Location & from_here,const base::Closure & task)284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
285                                    const base::Closure& task) {
286   DCHECK(persist_to_disk_);
287   BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
288                                                             from_here, task);
289 }
290 
AddURL(const GURL & url)291 void VisitedLinkMaster::AddURL(const GURL& url) {
292   Hash index = TryToAddURL(url);
293   if (!table_builder_.get() && index != null_hash_) {
294     // Not rebuilding, so we want to keep the file on disk up-to-date.
295     if (persist_to_disk_) {
296       WriteUsedItemCountToFile();
297       WriteHashRangeToFile(index, index);
298     }
299     ResizeTableIfNecessary();
300   }
301 }
302 
AddURLs(const std::vector<GURL> & url)303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
304   for (std::vector<GURL>::const_iterator i = url.begin();
305        i != url.end(); ++i) {
306     Hash index = TryToAddURL(*i);
307     if (!table_builder_.get() && index != null_hash_)
308       ResizeTableIfNecessary();
309   }
310 
311   // Keeps the file on disk up-to-date.
312   if (!table_builder_.get() && persist_to_disk_)
313     WriteFullTable();
314 }
315 
DeleteAllURLs()316 void VisitedLinkMaster::DeleteAllURLs() {
317   // Any pending modifications are invalid.
318   added_since_rebuild_.clear();
319   deleted_since_rebuild_.clear();
320 
321   // Clear the hash table.
322   used_items_ = 0;
323   memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
324 
325   // Resize it if it is now too empty. Resize may write the new table out for
326   // us, otherwise, schedule writing the new table to disk ourselves.
327   if (!ResizeTableIfNecessary() && persist_to_disk_)
328     WriteFullTable();
329 
330   listener_->Reset();
331 }
332 
GetDelegate()333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
334   return delegate_;
335 }
336 
DeleteURLs(URLIterator * urls)337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
338   if (!urls->HasNextURL())
339     return;
340 
341   listener_->Reset();
342 
343   if (table_builder_.get()) {
344     // A rebuild is in progress, save this deletion in the temporary list so
345     // it can be added once rebuild is complete.
346     while (urls->HasNextURL()) {
347       const GURL& url(urls->NextURL());
348       if (!url.is_valid())
349         continue;
350 
351       Fingerprint fingerprint =
352           ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
353       deleted_since_rebuild_.insert(fingerprint);
354 
355       // If the URL was just added and now we're deleting it, it may be in the
356       // list of things added since the last rebuild. Delete it from that list.
357       std::set<Fingerprint>::iterator found =
358           added_since_rebuild_.find(fingerprint);
359       if (found != added_since_rebuild_.end())
360         added_since_rebuild_.erase(found);
361 
362       // Delete the URLs from the in-memory table, but don't bother writing
363       // to disk since it will be replaced soon.
364       DeleteFingerprint(fingerprint, false);
365     }
366     return;
367   }
368 
369   // Compute the deleted URLs' fingerprints and delete them
370   std::set<Fingerprint> deleted_fingerprints;
371   while (urls->HasNextURL()) {
372     const GURL& url(urls->NextURL());
373     if (!url.is_valid())
374       continue;
375     deleted_fingerprints.insert(
376         ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
377   }
378   DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
379 }
380 
381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
AddFingerprint(Fingerprint fingerprint,bool send_notifications)382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
383     Fingerprint fingerprint,
384     bool send_notifications) {
385   if (!hash_table_ || table_length_ == 0) {
386     NOTREACHED();  // Not initialized.
387     return null_hash_;
388   }
389 
390   Hash cur_hash = HashFingerprint(fingerprint);
391   Hash first_hash = cur_hash;
392   while (true) {
393     Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
394     if (cur_fingerprint == fingerprint)
395       return null_hash_;  // This fingerprint is already in there, do nothing.
396 
397     if (cur_fingerprint == null_fingerprint_) {
398       // End of probe sequence found, insert here.
399       hash_table_[cur_hash] = fingerprint;
400       used_items_++;
401       // If allowed, notify listener that a new visited link was added.
402       if (send_notifications)
403         listener_->Add(fingerprint);
404       return cur_hash;
405     }
406 
407     // Advance in the probe sequence.
408     cur_hash = IncrementHash(cur_hash);
409     if (cur_hash == first_hash) {
410       // This means that we've wrapped around and are about to go into an
411       // infinite loop. Something was wrong with the hashtable resizing
412       // logic, so stop here.
413       NOTREACHED();
414       return null_hash_;
415     }
416   }
417 }
418 
DeleteFingerprintsFromCurrentTable(const std::set<Fingerprint> & fingerprints)419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
420     const std::set<Fingerprint>& fingerprints) {
421   bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
422 
423   // Delete the URLs from the table.
424   for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
425        i != fingerprints.end(); ++i)
426     DeleteFingerprint(*i, !bulk_write);
427 
428   // These deleted fingerprints may make us shrink the table.
429   if (ResizeTableIfNecessary())
430     return;  // The resize function wrote the new table to disk for us.
431 
432   // Nobody wrote this out for us, write the full file to disk.
433   if (bulk_write && persist_to_disk_)
434     WriteFullTable();
435 }
436 
DeleteFingerprint(Fingerprint fingerprint,bool update_file)437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
438                                           bool update_file) {
439   if (!hash_table_ || table_length_ == 0) {
440     NOTREACHED();  // Not initialized.
441     return false;
442   }
443   if (!IsVisited(fingerprint))
444     return false;  // Not in the database to delete.
445 
446   // First update the header used count.
447   used_items_--;
448   if (update_file && persist_to_disk_)
449     WriteUsedItemCountToFile();
450 
451   Hash deleted_hash = HashFingerprint(fingerprint);
452 
453   // Find the range of "stuff" in the hash table that is adjacent to this
454   // fingerprint. These are things that could be affected by the change in
455   // the hash table. Since we use linear probing, anything after the deleted
456   // item up until an empty item could be affected.
457   Hash end_range = deleted_hash;
458   while (true) {
459     Hash next_hash = IncrementHash(end_range);
460     if (next_hash == deleted_hash)
461       break;  // We wrapped around and the whole table is full.
462     if (!hash_table_[next_hash])
463       break;  // Found the last spot.
464     end_range = next_hash;
465   }
466 
467   // We could get all fancy and move the affected fingerprints around, but
468   // instead we just remove them all and re-add them (minus our deleted one).
469   // This will mean there's a small window of time where the affected links
470   // won't be marked visited.
471   base::StackVector<Fingerprint, 32> shuffled_fingerprints;
472   Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
473   for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
474     if (hash_table_[i] != fingerprint) {
475       // Don't save the one we're deleting!
476       shuffled_fingerprints->push_back(hash_table_[i]);
477 
478       // This will balance the increment of this value in AddFingerprint below
479       // so there is no net change.
480       used_items_--;
481     }
482     hash_table_[i] = null_fingerprint_;
483   }
484 
485   if (!shuffled_fingerprints->empty()) {
486     // Need to add the new items back.
487     for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
488       AddFingerprint(shuffled_fingerprints[i], false);
489   }
490 
491   // Write the affected range to disk [deleted_hash, end_range].
492   if (update_file && persist_to_disk_)
493     WriteHashRangeToFile(deleted_hash, end_range);
494 
495   return true;
496 }
497 
WriteFullTable()498 void VisitedLinkMaster::WriteFullTable() {
499   // This function can get called when the file is open, for example, when we
500   // resize the table. We must handle this case and not try to reopen the file,
501   // since there may be write operations pending on the file I/O thread.
502   //
503   // Note that once we start writing, we do not delete on error. This means
504   // there can be a partial file, but the short file will be detected next time
505   // we start, and will be replaced.
506   //
507   // This might possibly get corrupted if we crash in the middle of writing.
508   // We should pick up the most common types of these failures when we notice
509   // that the file size is different when we load it back in, and then we will
510   // regenerate the table.
511   DCHECK(persist_to_disk_);
512 
513   if (!file_) {
514     file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
515     base::FilePath filename;
516     GetDatabaseFileName(&filename);
517     PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
518   }
519 
520   // Write the new header.
521   int32 header[4];
522   header[0] = kFileSignature;
523   header[1] = kFileCurrentVersion;
524   header[2] = table_length_;
525   header[3] = used_items_;
526   WriteToFile(file_, 0, header, sizeof(header));
527   WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
528 
529   // Write the hash data.
530   WriteToFile(file_, kFileHeaderSize,
531               hash_table_, table_length_ * sizeof(Fingerprint));
532 
533   // The hash table may have shrunk, so make sure this is the end.
534   PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
535 }
536 
InitFromFile()537 bool VisitedLinkMaster::InitFromFile() {
538   DCHECK(file_ == NULL);
539   DCHECK(persist_to_disk_);
540 
541   base::FilePath filename;
542   GetDatabaseFileName(&filename);
543   base::ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
544   if (!file_closer.get())
545     return false;
546 
547   int32 num_entries, used_count;
548   if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
549     return false;  // Header isn't valid.
550 
551   // Allocate and read the table.
552   if (!CreateURLTable(num_entries, false))
553     return false;
554   if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
555                     hash_table_, num_entries * sizeof(Fingerprint))) {
556     FreeURLTable();
557     return false;
558   }
559   used_items_ = used_count;
560 
561 #ifndef NDEBUG
562   DebugValidate();
563 #endif
564 
565   file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
566   *file_ = file_closer.release();
567   return true;
568 }
569 
InitFromScratch(bool suppress_rebuild)570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
571   int32 table_size = kDefaultTableSize;
572   if (table_size_override_)
573     table_size = table_size_override_;
574 
575   // The salt must be generated before the table so that it can be copied to
576   // the shared memory.
577   GenerateSalt(salt_);
578   if (!CreateURLTable(table_size, true))
579     return false;
580 
581 #ifndef NDEBUG
582   DebugValidate();
583 #endif
584 
585   if (suppress_rebuild && persist_to_disk_) {
586     // When we disallow rebuilds (normally just unit tests), just use the
587     // current empty table.
588     WriteFullTable();
589     return true;
590   }
591 
592   // This will build the table from history. On the first run, history will
593   // be empty, so this will be correct. This will also write the new table
594   // to disk. We don't want to save explicitly here, since the rebuild may
595   // not complete, leaving us with an empty but valid visited link database.
596   // In the future, we won't know we need to try rebuilding again.
597   return RebuildTableFromDelegate();
598 }
599 
ReadFileHeader(FILE * file,int32 * num_entries,int32 * used_count,uint8 salt[LINK_SALT_LENGTH])600 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
601                                        int32* num_entries,
602                                        int32* used_count,
603                                        uint8 salt[LINK_SALT_LENGTH]) {
604   DCHECK(persist_to_disk_);
605 
606   // Get file size.
607   // Note that there is no need to seek back to the original location in the
608   // file since ReadFromFile() [which is the next call accessing the file]
609   // seeks before reading.
610   if (fseek(file, 0, SEEK_END) == -1)
611     return false;
612   size_t file_size = ftell(file);
613 
614   if (file_size <= kFileHeaderSize)
615     return false;
616 
617   uint8 header[kFileHeaderSize];
618   if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
619     return false;
620 
621   // Verify the signature.
622   int32 signature;
623   memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
624   if (signature != kFileSignature)
625     return false;
626 
627   // Verify the version is up-to-date. As with other read errors, a version
628   // mistmatch will trigger a rebuild of the database from history, which will
629   // have the effect of migrating the database.
630   int32 version;
631   memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
632   if (version != kFileCurrentVersion)
633     return false;  // Bad version.
634 
635   // Read the table size and make sure it matches the file size.
636   memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
637   if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
638     return false;  // Bad size.
639 
640   // Read the used item count.
641   memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
642   if (*used_count > *num_entries)
643     return false;  // Bad used item count;
644 
645   // Read the salt.
646   memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
647 
648   // This file looks OK from the header's perspective.
649   return true;
650 }
651 
GetDatabaseFileName(base::FilePath * filename)652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
653   if (!database_name_override_.empty()) {
654     // use this filename, the directory must exist
655     *filename = database_name_override_;
656     return true;
657   }
658 
659   if (!browser_context_ || browser_context_->GetPath().empty())
660     return false;
661 
662   base::FilePath profile_dir = browser_context_->GetPath();
663   *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
664   return true;
665 }
666 
667 // Initializes the shared memory structure. The salt should already be filled
668 // in so that it can be written to the shared memory
CreateURLTable(int32 num_entries,bool init_to_empty)669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
670   // The table is the size of the table followed by the entries.
671   uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
672 
673   // Create the shared memory object.
674   shared_memory_ = new base::SharedMemory();
675   if (!shared_memory_)
676     return false;
677 
678   base::SharedMemoryCreateOptions options;
679   options.size = alloc_size;
680   options.share_read_only = true;
681 
682   if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) {
683     delete shared_memory_;
684     shared_memory_ = NULL;
685     return false;
686   }
687 
688   if (init_to_empty) {
689     memset(shared_memory_->memory(), 0, alloc_size);
690     used_items_ = 0;
691   }
692   table_length_ = num_entries;
693 
694   // Save the header for other processes to read.
695   SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
696   header->length = table_length_;
697   memcpy(header->salt, salt_, LINK_SALT_LENGTH);
698 
699   // Our table pointer is just the data immediately following the size.
700   hash_table_ = reinterpret_cast<Fingerprint*>(
701       static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
702 
703   return true;
704 }
705 
BeginReplaceURLTable(int32 num_entries)706 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
707   base::SharedMemory *old_shared_memory = shared_memory_;
708   Fingerprint* old_hash_table = hash_table_;
709   int32 old_table_length = table_length_;
710   if (!CreateURLTable(num_entries, true)) {
711     // Try to put back the old state.
712     shared_memory_ = old_shared_memory;
713     hash_table_ = old_hash_table;
714     table_length_ = old_table_length;
715     return false;
716   }
717 
718 #ifndef NDEBUG
719   DebugValidate();
720 #endif
721 
722   return true;
723 }
724 
FreeURLTable()725 void VisitedLinkMaster::FreeURLTable() {
726   if (shared_memory_) {
727     delete shared_memory_;
728     shared_memory_ = NULL;
729   }
730   if (!persist_to_disk_ || !file_)
731     return;
732   PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
733   // AsyncClose() will close the file and free the memory pointed by |file_|.
734   file_ = NULL;
735 }
736 
ResizeTableIfNecessary()737 bool VisitedLinkMaster::ResizeTableIfNecessary() {
738   DCHECK(table_length_ > 0) << "Must have a table";
739 
740   // Load limits for good performance/space. We are pretty conservative about
741   // keeping the table not very full. This is because we use linear probing
742   // which increases the likelihood of clumps of entries which will reduce
743   // performance.
744   const float max_table_load = 0.5f;  // Grow when we're > this full.
745   const float min_table_load = 0.2f;  // Shrink when we're < this full.
746 
747   float load = ComputeTableLoad();
748   if (load < max_table_load &&
749       (table_length_ <= static_cast<float>(kDefaultTableSize) ||
750        load > min_table_load))
751     return false;
752 
753   // Table needs to grow or shrink.
754   int new_size = NewTableSizeForCount(used_items_);
755   DCHECK(new_size > used_items_);
756   DCHECK(load <= min_table_load || new_size > table_length_);
757   ResizeTable(new_size);
758   return true;
759 }
760 
ResizeTable(int32 new_size)761 void VisitedLinkMaster::ResizeTable(int32 new_size) {
762   DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
763   shared_memory_serial_++;
764 
765 #ifndef NDEBUG
766   DebugValidate();
767 #endif
768 
769   base::SharedMemory* old_shared_memory = shared_memory_;
770   Fingerprint* old_hash_table = hash_table_;
771   int32 old_table_length = table_length_;
772   if (!BeginReplaceURLTable(new_size))
773     return;
774 
775   // Now we have two tables, our local copy which is the old one, and the new
776   // one loaded into this object where we need to copy the data.
777   for (int32 i = 0; i < old_table_length; i++) {
778     Fingerprint cur = old_hash_table[i];
779     if (cur)
780       AddFingerprint(cur, false);
781   }
782 
783   // On error unmapping, just forget about it since we can't do anything
784   // else to release it.
785   delete old_shared_memory;
786 
787   // Send an update notification to all child processes so they read the new
788   // table.
789   listener_->NewTable(shared_memory_);
790 
791 #ifndef NDEBUG
792   DebugValidate();
793 #endif
794 
795   // The new table needs to be written to disk.
796   if (persist_to_disk_)
797     WriteFullTable();
798 }
799 
NewTableSizeForCount(int32 item_count) const800 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
801   // These table sizes are selected to be the maximum prime number less than
802   // a "convenient" multiple of 1K.
803   static const int table_sizes[] = {
804       16381,    // 16K  = 16384   <- don't shrink below this table size
805                 //                   (should be == default_table_size)
806       32767,    // 32K  = 32768
807       65521,    // 64K  = 65536
808       130051,   // 128K = 131072
809       262127,   // 256K = 262144
810       524269,   // 512K = 524288
811       1048549,  // 1M   = 1048576
812       2097143,  // 2M   = 2097152
813       4194301,  // 4M   = 4194304
814       8388571,  // 8M   = 8388608
815       16777199,  // 16M  = 16777216
816       33554347};  // 32M  = 33554432
817 
818   // Try to leave the table 33% full.
819   int desired = item_count * 3;
820 
821   // Find the closest prime.
822   for (size_t i = 0; i < arraysize(table_sizes); i ++) {
823     if (table_sizes[i] > desired)
824       return table_sizes[i];
825   }
826 
827   // Growing very big, just approximate a "good" number, not growing as much
828   // as normal.
829   return item_count * 2 - 1;
830 }
831 
832 // See the TableBuilder definition in the header file for how this works.
RebuildTableFromDelegate()833 bool VisitedLinkMaster::RebuildTableFromDelegate() {
834   DCHECK(!table_builder_.get());
835 
836   // TODO(brettw) make sure we have reasonable salt!
837   table_builder_ = new TableBuilder(this, salt_);
838   delegate_->RebuildTable(table_builder_);
839   return true;
840 }
841 
842 // See the TableBuilder declaration above for how this works.
OnTableRebuildComplete(bool success,const std::vector<Fingerprint> & fingerprints)843 void VisitedLinkMaster::OnTableRebuildComplete(
844     bool success,
845     const std::vector<Fingerprint>& fingerprints) {
846   if (success) {
847     // Replace the old table with a new blank one.
848     shared_memory_serial_++;
849 
850     // We are responsible for freeing it AFTER it has been replaced if
851     // replacement succeeds.
852     base::SharedMemory* old_shared_memory = shared_memory_;
853 
854     int new_table_size = NewTableSizeForCount(
855         static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
856     if (BeginReplaceURLTable(new_table_size)) {
857       // Free the old table.
858       delete old_shared_memory;
859 
860       // Add the stored fingerprints to the hash table.
861       for (size_t i = 0; i < fingerprints.size(); i++)
862         AddFingerprint(fingerprints[i], false);
863 
864       // Also add anything that was added while we were asynchronously
865       // generating the new table.
866       for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
867            i != added_since_rebuild_.end(); ++i)
868         AddFingerprint(*i, false);
869       added_since_rebuild_.clear();
870 
871       // Now handle deletions.
872       DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
873       deleted_since_rebuild_.clear();
874 
875       // Send an update notification to all child processes.
876       listener_->NewTable(shared_memory_);
877 
878       if (persist_to_disk_)
879         WriteFullTable();
880     }
881   }
882   table_builder_ = NULL;  // Will release our reference to the builder.
883 
884   // Notify the unit test that the rebuild is complete (will be NULL in prod.)
885   if (!rebuild_complete_task_.is_null()) {
886     rebuild_complete_task_.Run();
887     rebuild_complete_task_.Reset();
888   }
889 }
890 
WriteToFile(FILE ** file,off_t offset,void * data,int32 data_size)891 void VisitedLinkMaster::WriteToFile(FILE** file,
892                                     off_t offset,
893                                     void* data,
894                                     int32 data_size) {
895   DCHECK(persist_to_disk_);
896 #ifndef NDEBUG
897   posted_asynchronous_operation_ = true;
898 #endif
899   PostIOTask(FROM_HERE,
900       base::Bind(&AsyncWrite, file, offset,
901                  std::string(static_cast<const char*>(data), data_size)));
902 }
903 
WriteUsedItemCountToFile()904 void VisitedLinkMaster::WriteUsedItemCountToFile() {
905   DCHECK(persist_to_disk_);
906   if (!file_)
907     return;  // See comment on the file_ variable for why this might happen.
908   WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
909 }
910 
WriteHashRangeToFile(Hash first_hash,Hash last_hash)911 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
912   DCHECK(persist_to_disk_);
913 
914   if (!file_)
915     return;  // See comment on the file_ variable for why this might happen.
916   if (last_hash < first_hash) {
917     // Handle wraparound at 0. This first write is first_hash->EOF
918     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
919                 &hash_table_[first_hash],
920                 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
921 
922     // Now do 0->last_lash.
923     WriteToFile(file_, kFileHeaderSize, hash_table_,
924                 (last_hash + 1) * sizeof(Fingerprint));
925   } else {
926     // Normal case, just write the range.
927     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
928                 &hash_table_[first_hash],
929                 (last_hash - first_hash + 1) * sizeof(Fingerprint));
930   }
931 }
932 
ReadFromFile(FILE * file,off_t offset,void * data,size_t data_size)933 bool VisitedLinkMaster::ReadFromFile(FILE* file,
934                                      off_t offset,
935                                      void* data,
936                                      size_t data_size) {
937   DCHECK(persist_to_disk_);
938 #ifndef NDEBUG
939   // Since this function is synchronous, we require that no asynchronous
940   // operations could possibly be pending.
941   DCHECK(!posted_asynchronous_operation_);
942 #endif
943 
944   if (fseek(file, offset, SEEK_SET) != 0)
945     return false;
946 
947   size_t num_read = fread(data, 1, data_size, file);
948   return num_read == data_size;
949 }
950 
951 // VisitedLinkTableBuilder ----------------------------------------------------
952 
TableBuilder(VisitedLinkMaster * master,const uint8 salt[LINK_SALT_LENGTH])953 VisitedLinkMaster::TableBuilder::TableBuilder(
954     VisitedLinkMaster* master,
955     const uint8 salt[LINK_SALT_LENGTH])
956     : master_(master),
957       success_(true) {
958   fingerprints_.reserve(4096);
959   memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
960 }
961 
962 // TODO(brettw): Do we want to try to cancel the request if this happens? It
963 // could delay shutdown if there are a lot of URLs.
DisownMaster()964 void VisitedLinkMaster::TableBuilder::DisownMaster() {
965   master_ = NULL;
966 }
967 
OnURL(const GURL & url)968 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
969   if (!url.is_empty()) {
970     fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
971         url.spec().data(), url.spec().length(), salt_));
972   }
973 }
974 
OnComplete(bool success)975 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
976   success_ = success;
977   DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
978 
979   // Marshal to the main thread to notify the VisitedLinkMaster that the
980   // rebuild is complete.
981   BrowserThread::PostTask(
982       BrowserThread::UI, FROM_HERE,
983       base::Bind(&TableBuilder::OnCompleteMainThread, this));
984 }
985 
OnCompleteMainThread()986 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
987   if (master_)
988     master_->OnTableRebuildComplete(success_, fingerprints_);
989 }
990 
991 }  // namespace visitedlink
992