1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/visitedlink/visitedlink_master.h"
6
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
13
14 #include <algorithm>
15
16 #include "base/file_util.h"
17 #include "base/logging.h"
18 #include "base/message_loop.h"
19 #include "base/path_service.h"
20 #include "base/process_util.h"
21 #include "base/rand_util.h"
22 #include "base/stack_container.h"
23 #include "base/string_util.h"
24 #include "base/threading/thread_restrictions.h"
25 #include "content/browser/browser_thread.h"
26 #include "chrome/browser/history/history.h"
27 #include "chrome/browser/profiles/profile.h"
28
29 using file_util::ScopedFILE;
30 using file_util::OpenFile;
31 using file_util::TruncateFile;
32
33 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
34 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
35 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
36 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
37 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
38
39 const int32 VisitedLinkMaster::kFileCurrentVersion = 2;
40
41 // the signature at the beginning of the URL table = "VLnk" (visited links)
42 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
43 const size_t VisitedLinkMaster::kFileHeaderSize =
44 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
45
46 // This value should also be the same as the smallest size in the lookup
47 // table in NewTableSizeForCount (prime number).
48 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
49
50 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
51
52 namespace {
53
54 // Fills the given salt structure with some quasi-random values
55 // It is not necessary to generate a cryptographically strong random string,
56 // only that it be reasonably different for different users.
GenerateSalt(uint8 salt[LINK_SALT_LENGTH])57 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
58 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
59 uint64 randval = base::RandUint64();
60 memcpy(salt, &randval, 8);
61 }
62
63 // AsyncWriter ----------------------------------------------------------------
64
65 // This task executes on a background thread and executes a write. This
66 // prevents us from blocking the UI thread doing I/O.
67 class AsyncWriter : public Task {
68 public:
AsyncWriter(FILE * file,int32 offset,const void * data,size_t data_len)69 AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len)
70 : file_(file),
71 offset_(offset) {
72 data_->resize(data_len);
73 memcpy(&*data_->begin(), data, data_len);
74 }
75
Run()76 virtual void Run() {
77 WriteToFile(file_, offset_,
78 &*data_->begin(), static_cast<int32>(data_->size()));
79 }
80
81 // Exposed as a static so it can be called directly from the Master to
82 // reduce the number of platform-specific I/O sites we have. Returns true if
83 // the write was complete.
WriteToFile(FILE * file,off_t offset,const void * data,size_t data_len)84 static bool WriteToFile(FILE* file,
85 off_t offset,
86 const void* data,
87 size_t data_len) {
88 if (fseek(file, offset, SEEK_SET) != 0)
89 return false; // Don't write to an invalid part of the file.
90
91 size_t num_written = fwrite(data, 1, data_len, file);
92
93 // The write may not make it to the kernel (stdlib may buffer the write)
94 // until the next fseek/fclose call. If we crash, it's easy for our used
95 // item count to be out of sync with the number of hashes we write.
96 // Protect against this by calling fflush.
97 int ret = fflush(file);
98 DCHECK_EQ(0, ret);
99 return num_written == data_len;
100 }
101
102 private:
103 // The data to write and where to write it.
104 FILE* file_;
105 int32 offset_; // Offset from the beginning of the file.
106
107 // Most writes are just a single fingerprint, so we reserve that much in this
108 // object to avoid mallocs in that case.
109 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_;
110
111 DISALLOW_COPY_AND_ASSIGN(AsyncWriter);
112 };
113
114 // Used to asynchronously set the end of the file. This must be done on the
115 // same thread as the writing to keep things synchronized.
116 class AsyncSetEndOfFile : public Task {
117 public:
AsyncSetEndOfFile(FILE * file)118 explicit AsyncSetEndOfFile(FILE* file) : file_(file) {}
119
Run()120 virtual void Run() {
121 TruncateFile(file_);
122 }
123
124 private:
125 FILE* file_;
126 DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile);
127 };
128
129 // Used to asynchronously close a file. This must be done on the same thread as
130 // the writing to keep things synchronized.
131 class AsyncCloseHandle : public Task {
132 public:
AsyncCloseHandle(FILE * file)133 explicit AsyncCloseHandle(FILE* file) : file_(file) {}
134
Run()135 virtual void Run() {
136 fclose(file_);
137 }
138
139 private:
140 FILE* file_;
141 DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle);
142 };
143
144 } // namespace
145
146 // TableBuilder ---------------------------------------------------------------
147
148 // How rebuilding from history works
149 // ---------------------------------
150 //
151 // We mark that we're rebuilding from history by setting the table_builder_
152 // member in VisitedLinkMaster to the TableBuilder we create. This builder
153 // will be called on the history thread by the history system for every URL
154 // in the database.
155 //
156 // The builder will store the fingerprints for those URLs, and then marshalls
157 // back to the main thread where the VisitedLinkMaster will be notified. The
158 // master then replaces its table with a new table containing the computed
159 // fingerprints.
160 //
161 // The builder must remain active while the history system is using it.
162 // Sometimes, the master will be deleted before the rebuild is complete, in
163 // which case it notifies the builder via DisownMaster(). The builder will
164 // delete itself once rebuilding is complete, and not execute any callback.
165 class VisitedLinkMaster::TableBuilder
166 : public HistoryService::URLEnumerator,
167 public base::RefCountedThreadSafe<TableBuilder> {
168 public:
169 TableBuilder(VisitedLinkMaster* master,
170 const uint8 salt[LINK_SALT_LENGTH]);
171
172 // Called on the main thread when the master is being destroyed. This will
173 // prevent a crash when the query completes and the master is no longer
174 // around. We can not actually do anything but mark this fact, since the
175 // table will be being rebuilt simultaneously on the other thread.
176 void DisownMaster();
177
178 // HistoryService::URLEnumerator
179 virtual void OnURL(const GURL& url);
180 virtual void OnComplete(bool succeed);
181
182 private:
183 friend class base::RefCountedThreadSafe<TableBuilder>;
184
~TableBuilder()185 ~TableBuilder() {}
186
187 // OnComplete mashals to this function on the main thread to do the
188 // notification.
189 void OnCompleteMainThread();
190
191 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
192 VisitedLinkMaster* master_;
193
194 // Indicates whether the operation has failed or not.
195 bool success_;
196
197 // Salt for this new table.
198 uint8 salt_[LINK_SALT_LENGTH];
199
200 // Stores the fingerprints we computed on the background thread.
201 VisitedLinkCommon::Fingerprints fingerprints_;
202 };
203
204 // VisitedLinkMaster ----------------------------------------------------------
205
VisitedLinkMaster(Listener * listener,Profile * profile)206 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
207 Profile* profile) {
208 InitMembers(listener, profile);
209 }
210
VisitedLinkMaster(Listener * listener,HistoryService * history_service,bool suppress_rebuild,const FilePath & filename,int32 default_table_size)211 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
212 HistoryService* history_service,
213 bool suppress_rebuild,
214 const FilePath& filename,
215 int32 default_table_size) {
216 InitMembers(listener, NULL);
217
218 database_name_override_ = filename;
219 table_size_override_ = default_table_size;
220 history_service_override_ = history_service;
221 suppress_rebuild_ = suppress_rebuild;
222 }
223
~VisitedLinkMaster()224 VisitedLinkMaster::~VisitedLinkMaster() {
225 if (table_builder_.get()) {
226 // Prevent the table builder from calling us back now that we're being
227 // destroyed. Note that we DON'T delete the object, since the history
228 // system is still writing into it. When that is complete, the table
229 // builder will destroy itself when it finds we are gone.
230 table_builder_->DisownMaster();
231 }
232 FreeURLTable();
233 }
234
InitMembers(Listener * listener,Profile * profile)235 void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) {
236 DCHECK(listener);
237
238 listener_ = listener;
239 file_ = NULL;
240 shared_memory_ = NULL;
241 shared_memory_serial_ = 0;
242 used_items_ = 0;
243 table_size_override_ = 0;
244 history_service_override_ = NULL;
245 suppress_rebuild_ = false;
246 profile_ = profile;
247
248 #ifndef NDEBUG
249 posted_asynchronous_operation_ = false;
250 #endif
251 }
252
Init()253 bool VisitedLinkMaster::Init() {
254 // We probably shouldn't be loading this from the UI thread,
255 // but it does need to happen early on in startup.
256 // http://code.google.com/p/chromium/issues/detail?id=24163
257 base::ThreadRestrictions::ScopedAllowIO allow_io;
258 if (!InitFromFile())
259 return InitFromScratch(suppress_rebuild_);
260 return true;
261 }
262
TryToAddURL(const GURL & url)263 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
264 // Extra check that we are not incognito. This should not happen.
265 if (profile_ && profile_->IsOffTheRecord()) {
266 NOTREACHED();
267 return null_hash_;
268 }
269
270 if (!url.is_valid())
271 return null_hash_; // Don't add invalid URLs.
272
273 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
274 url.spec().size(),
275 salt_);
276 if (table_builder_) {
277 // If we have a pending delete for this fingerprint, cancel it.
278 std::set<Fingerprint>::iterator found =
279 deleted_since_rebuild_.find(fingerprint);
280 if (found != deleted_since_rebuild_.end())
281 deleted_since_rebuild_.erase(found);
282
283 // A rebuild is in progress, save this addition in the temporary list so
284 // it can be added once rebuild is complete.
285 added_since_rebuild_.insert(fingerprint);
286 }
287
288 // If the table is "full", we don't add URLs and just drop them on the floor.
289 // This can happen if we get thousands of new URLs and something causes
290 // the table resizing to fail. This check prevents a hang in that case. Note
291 // that this is *not* the resize limit, this is just a sanity check.
292 if (used_items_ / 8 > table_length_ / 10)
293 return null_hash_; // Table is more than 80% full.
294
295 return AddFingerprint(fingerprint, true);
296 }
297
AddURL(const GURL & url)298 void VisitedLinkMaster::AddURL(const GURL& url) {
299 Hash index = TryToAddURL(url);
300 if (!table_builder_ && index != null_hash_) {
301 // Not rebuilding, so we want to keep the file on disk up-to-date.
302 WriteUsedItemCountToFile();
303 WriteHashRangeToFile(index, index);
304 ResizeTableIfNecessary();
305 }
306 }
307
AddURLs(const std::vector<GURL> & url)308 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
309 for (std::vector<GURL>::const_iterator i = url.begin();
310 i != url.end(); ++i) {
311 Hash index = TryToAddURL(*i);
312 if (!table_builder_ && index != null_hash_)
313 ResizeTableIfNecessary();
314 }
315
316 // Keeps the file on disk up-to-date.
317 if (!table_builder_)
318 WriteFullTable();
319 }
320
DeleteAllURLs()321 void VisitedLinkMaster::DeleteAllURLs() {
322 // Any pending modifications are invalid.
323 added_since_rebuild_.clear();
324 deleted_since_rebuild_.clear();
325
326 // Clear the hash table.
327 used_items_ = 0;
328 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
329
330 // Resize it if it is now too empty. Resize may write the new table out for
331 // us, otherwise, schedule writing the new table to disk ourselves.
332 if (!ResizeTableIfNecessary())
333 WriteFullTable();
334
335 listener_->Reset();
336 }
337
DeleteURLs(const std::set<GURL> & urls)338 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) {
339 typedef std::set<GURL>::const_iterator SetIterator;
340
341 if (urls.empty())
342 return;
343
344 listener_->Reset();
345
346 if (table_builder_) {
347 // A rebuild is in progress, save this deletion in the temporary list so
348 // it can be added once rebuild is complete.
349 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
350 if (!i->is_valid())
351 continue;
352
353 Fingerprint fingerprint =
354 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_);
355 deleted_since_rebuild_.insert(fingerprint);
356
357 // If the URL was just added and now we're deleting it, it may be in the
358 // list of things added since the last rebuild. Delete it from that list.
359 std::set<Fingerprint>::iterator found =
360 added_since_rebuild_.find(fingerprint);
361 if (found != added_since_rebuild_.end())
362 added_since_rebuild_.erase(found);
363
364 // Delete the URLs from the in-memory table, but don't bother writing
365 // to disk since it will be replaced soon.
366 DeleteFingerprint(fingerprint, false);
367 }
368 return;
369 }
370
371 // Compute the deleted URLs' fingerprints and delete them
372 std::set<Fingerprint> deleted_fingerprints;
373 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
374 if (!i->is_valid())
375 continue;
376 deleted_fingerprints.insert(
377 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_));
378 }
379 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
380 }
381
382 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
AddFingerprint(Fingerprint fingerprint,bool send_notifications)383 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
384 Fingerprint fingerprint,
385 bool send_notifications) {
386 if (!hash_table_ || table_length_ == 0) {
387 NOTREACHED(); // Not initialized.
388 return null_hash_;
389 }
390
391 Hash cur_hash = HashFingerprint(fingerprint);
392 Hash first_hash = cur_hash;
393 while (true) {
394 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
395 if (cur_fingerprint == fingerprint)
396 return null_hash_; // This fingerprint is already in there, do nothing.
397
398 if (cur_fingerprint == null_fingerprint_) {
399 // End of probe sequence found, insert here.
400 hash_table_[cur_hash] = fingerprint;
401 used_items_++;
402 // If allowed, notify listener that a new visited link was added.
403 if (send_notifications)
404 listener_->Add(fingerprint);
405 return cur_hash;
406 }
407
408 // Advance in the probe sequence.
409 cur_hash = IncrementHash(cur_hash);
410 if (cur_hash == first_hash) {
411 // This means that we've wrapped around and are about to go into an
412 // infinite loop. Something was wrong with the hashtable resizing
413 // logic, so stop here.
414 NOTREACHED();
415 return null_hash_;
416 }
417 }
418 }
419
DeleteFingerprintsFromCurrentTable(const std::set<Fingerprint> & fingerprints)420 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
421 const std::set<Fingerprint>& fingerprints) {
422 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
423
424 // Delete the URLs from the table.
425 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
426 i != fingerprints.end(); ++i)
427 DeleteFingerprint(*i, !bulk_write);
428
429 // These deleted fingerprints may make us shrink the table.
430 if (ResizeTableIfNecessary())
431 return; // The resize function wrote the new table to disk for us.
432
433 // Nobody wrote this out for us, write the full file to disk.
434 if (bulk_write)
435 WriteFullTable();
436 }
437
DeleteFingerprint(Fingerprint fingerprint,bool update_file)438 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
439 bool update_file) {
440 if (!hash_table_ || table_length_ == 0) {
441 NOTREACHED(); // Not initialized.
442 return false;
443 }
444 if (!IsVisited(fingerprint))
445 return false; // Not in the database to delete.
446
447 // First update the header used count.
448 used_items_--;
449 if (update_file)
450 WriteUsedItemCountToFile();
451
452 Hash deleted_hash = HashFingerprint(fingerprint);
453
454 // Find the range of "stuff" in the hash table that is adjacent to this
455 // fingerprint. These are things that could be affected by the change in
456 // the hash table. Since we use linear probing, anything after the deleted
457 // item up until an empty item could be affected.
458 Hash end_range = deleted_hash;
459 while (true) {
460 Hash next_hash = IncrementHash(end_range);
461 if (next_hash == deleted_hash)
462 break; // We wrapped around and the whole table is full.
463 if (!hash_table_[next_hash])
464 break; // Found the last spot.
465 end_range = next_hash;
466 }
467
468 // We could get all fancy and move the affected fingerprints around, but
469 // instead we just remove them all and re-add them (minus our deleted one).
470 // This will mean there's a small window of time where the affected links
471 // won't be marked visited.
472 StackVector<Fingerprint, 32> shuffled_fingerprints;
473 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
474 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
475 if (hash_table_[i] != fingerprint) {
476 // Don't save the one we're deleting!
477 shuffled_fingerprints->push_back(hash_table_[i]);
478
479 // This will balance the increment of this value in AddFingerprint below
480 // so there is no net change.
481 used_items_--;
482 }
483 hash_table_[i] = null_fingerprint_;
484 }
485
486 if (!shuffled_fingerprints->empty()) {
487 // Need to add the new items back.
488 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
489 AddFingerprint(shuffled_fingerprints[i], false);
490 }
491
492 // Write the affected range to disk [deleted_hash, end_range].
493 if (update_file)
494 WriteHashRangeToFile(deleted_hash, end_range);
495
496 return true;
497 }
498
WriteFullTable()499 bool VisitedLinkMaster::WriteFullTable() {
500 // This function can get called when the file is open, for example, when we
501 // resize the table. We must handle this case and not try to reopen the file,
502 // since there may be write operations pending on the file I/O thread.
503 //
504 // Note that once we start writing, we do not delete on error. This means
505 // there can be a partial file, but the short file will be detected next time
506 // we start, and will be replaced.
507 //
508 // This might possibly get corrupted if we crash in the middle of writing.
509 // We should pick up the most common types of these failures when we notice
510 // that the file size is different when we load it back in, and then we will
511 // regenerate the table.
512 if (!file_) {
513 FilePath filename;
514 GetDatabaseFileName(&filename);
515 file_ = OpenFile(filename, "wb+");
516 if (!file_) {
517 DLOG(ERROR) << "Failed to open file " << filename.value();
518 return false;
519 }
520 }
521
522 // Write the new header.
523 int32 header[4];
524 header[0] = kFileSignature;
525 header[1] = kFileCurrentVersion;
526 header[2] = table_length_;
527 header[3] = used_items_;
528 WriteToFile(file_, 0, header, sizeof(header));
529 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
530
531 // Write the hash data.
532 WriteToFile(file_, kFileHeaderSize,
533 hash_table_, table_length_ * sizeof(Fingerprint));
534
535 // The hash table may have shrunk, so make sure this is the end.
536 BrowserThread::PostTask(
537 BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_));
538 return true;
539 }
540
InitFromFile()541 bool VisitedLinkMaster::InitFromFile() {
542 DCHECK(file_ == NULL);
543
544 FilePath filename;
545 GetDatabaseFileName(&filename);
546 ScopedFILE file_closer(OpenFile(filename, "rb+"));
547 if (!file_closer.get())
548 return false;
549
550 int32 num_entries, used_count;
551 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
552 return false; // Header isn't valid.
553
554 // Allocate and read the table.
555 if (!CreateURLTable(num_entries, false))
556 return false;
557 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
558 hash_table_, num_entries * sizeof(Fingerprint))) {
559 FreeURLTable();
560 return false;
561 }
562 used_items_ = used_count;
563
564 #ifndef NDEBUG
565 DebugValidate();
566 #endif
567
568 file_ = file_closer.release();
569 return true;
570 }
571
InitFromScratch(bool suppress_rebuild)572 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
573 int32 table_size = kDefaultTableSize;
574 if (table_size_override_)
575 table_size = table_size_override_;
576
577 // The salt must be generated before the table so that it can be copied to
578 // the shared memory.
579 GenerateSalt(salt_);
580 if (!CreateURLTable(table_size, true))
581 return false;
582
583 #ifndef NDEBUG
584 DebugValidate();
585 #endif
586
587 if (suppress_rebuild) {
588 // When we disallow rebuilds (normally just unit tests), just use the
589 // current empty table.
590 return WriteFullTable();
591 }
592
593 // This will build the table from history. On the first run, history will
594 // be empty, so this will be correct. This will also write the new table
595 // to disk. We don't want to save explicitly here, since the rebuild may
596 // not complete, leaving us with an empty but valid visited link database.
597 // In the future, we won't know we need to try rebuilding again.
598 return RebuildTableFromHistory();
599 }
600
ReadFileHeader(FILE * file,int32 * num_entries,int32 * used_count,uint8 salt[LINK_SALT_LENGTH])601 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
602 int32* num_entries,
603 int32* used_count,
604 uint8 salt[LINK_SALT_LENGTH]) {
605 // Get file size.
606 // Note that there is no need to seek back to the original location in the
607 // file since ReadFromFile() [which is the next call accessing the file]
608 // seeks before reading.
609 if (fseek(file, 0, SEEK_END) == -1)
610 return false;
611 size_t file_size = ftell(file);
612
613 if (file_size <= kFileHeaderSize)
614 return false;
615
616 uint8 header[kFileHeaderSize];
617 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
618 return false;
619
620 // Verify the signature.
621 int32 signature;
622 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
623 if (signature != kFileSignature)
624 return false;
625
626 // Verify the version is up-to-date. As with other read errors, a version
627 // mistmatch will trigger a rebuild of the database from history, which will
628 // have the effect of migrating the database.
629 int32 version;
630 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
631 if (version != kFileCurrentVersion)
632 return false; // Bad version.
633
634 // Read the table size and make sure it matches the file size.
635 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
636 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
637 return false; // Bad size.
638
639 // Read the used item count.
640 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
641 if (*used_count > *num_entries)
642 return false; // Bad used item count;
643
644 // Read the salt.
645 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
646
647 // This file looks OK from the header's perspective.
648 return true;
649 }
650
GetDatabaseFileName(FilePath * filename)651 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) {
652 if (!database_name_override_.empty()) {
653 // use this filename, the directory must exist
654 *filename = database_name_override_;
655 return true;
656 }
657
658 if (!profile_ || profile_->GetPath().empty())
659 return false;
660
661 FilePath profile_dir = profile_->GetPath();
662 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
663 return true;
664 }
665
666 // Initializes the shared memory structure. The salt should already be filled
667 // in so that it can be written to the shared memory
CreateURLTable(int32 num_entries,bool init_to_empty)668 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
669 // The table is the size of the table followed by the entries.
670 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
671
672 // Create the shared memory object.
673 shared_memory_ = new base::SharedMemory();
674 if (!shared_memory_)
675 return false;
676
677 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
678 delete shared_memory_;
679 shared_memory_ = NULL;
680 return false;
681 }
682
683 if (init_to_empty) {
684 memset(shared_memory_->memory(), 0, alloc_size);
685 used_items_ = 0;
686 }
687 table_length_ = num_entries;
688
689 // Save the header for other processes to read.
690 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
691 header->length = table_length_;
692 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
693
694 // Our table pointer is just the data immediately following the size.
695 hash_table_ = reinterpret_cast<Fingerprint*>(
696 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
697
698 return true;
699 }
700
BeginReplaceURLTable(int32 num_entries)701 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
702 base::SharedMemory *old_shared_memory = shared_memory_;
703 Fingerprint* old_hash_table = hash_table_;
704 int32 old_table_length = table_length_;
705 if (!CreateURLTable(num_entries, true)) {
706 // Try to put back the old state.
707 shared_memory_ = old_shared_memory;
708 hash_table_ = old_hash_table;
709 table_length_ = old_table_length;
710 return false;
711 }
712
713 #ifndef NDEBUG
714 DebugValidate();
715 #endif
716
717 return true;
718 }
719
FreeURLTable()720 void VisitedLinkMaster::FreeURLTable() {
721 if (shared_memory_) {
722 delete shared_memory_;
723 shared_memory_ = NULL;
724 }
725 if (!file_)
726 return;
727
728 BrowserThread::PostTask(
729 BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_));
730 }
731
ResizeTableIfNecessary()732 bool VisitedLinkMaster::ResizeTableIfNecessary() {
733 DCHECK(table_length_ > 0) << "Must have a table";
734
735 // Load limits for good performance/space. We are pretty conservative about
736 // keeping the table not very full. This is because we use linear probing
737 // which increases the likelihood of clumps of entries which will reduce
738 // performance.
739 const float max_table_load = 0.5f; // Grow when we're > this full.
740 const float min_table_load = 0.2f; // Shrink when we're < this full.
741
742 float load = ComputeTableLoad();
743 if (load < max_table_load &&
744 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
745 load > min_table_load))
746 return false;
747
748 // Table needs to grow or shrink.
749 int new_size = NewTableSizeForCount(used_items_);
750 DCHECK(new_size > used_items_);
751 DCHECK(load <= min_table_load || new_size > table_length_);
752 ResizeTable(new_size);
753 return true;
754 }
755
ResizeTable(int32 new_size)756 void VisitedLinkMaster::ResizeTable(int32 new_size) {
757 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
758 shared_memory_serial_++;
759
760 #ifndef NDEBUG
761 DebugValidate();
762 #endif
763
764 base::SharedMemory* old_shared_memory = shared_memory_;
765 Fingerprint* old_hash_table = hash_table_;
766 int32 old_table_length = table_length_;
767 if (!BeginReplaceURLTable(new_size))
768 return;
769
770 // Now we have two tables, our local copy which is the old one, and the new
771 // one loaded into this object where we need to copy the data.
772 for (int32 i = 0; i < old_table_length; i++) {
773 Fingerprint cur = old_hash_table[i];
774 if (cur)
775 AddFingerprint(cur, false);
776 }
777
778 // On error unmapping, just forget about it since we can't do anything
779 // else to release it.
780 delete old_shared_memory;
781
782 // Send an update notification to all child processes so they read the new
783 // table.
784 listener_->NewTable(shared_memory_);
785
786 #ifndef NDEBUG
787 DebugValidate();
788 #endif
789
790 // The new table needs to be written to disk.
791 WriteFullTable();
792 }
793
NewTableSizeForCount(int32 item_count) const794 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
795 // These table sizes are selected to be the maximum prime number less than
796 // a "convenient" multiple of 1K.
797 static const int table_sizes[] = {
798 16381, // 16K = 16384 <- don't shrink below this table size
799 // (should be == default_table_size)
800 32767, // 32K = 32768
801 65521, // 64K = 65536
802 130051, // 128K = 131072
803 262127, // 256K = 262144
804 524269, // 512K = 524288
805 1048549, // 1M = 1048576
806 2097143, // 2M = 2097152
807 4194301, // 4M = 4194304
808 8388571, // 8M = 8388608
809 16777199, // 16M = 16777216
810 33554347}; // 32M = 33554432
811
812 // Try to leave the table 33% full.
813 int desired = item_count * 3;
814
815 // Find the closest prime.
816 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
817 if (table_sizes[i] > desired)
818 return table_sizes[i];
819 }
820
821 // Growing very big, just approximate a "good" number, not growing as much
822 // as normal.
823 return item_count * 2 - 1;
824 }
825
826 // See the TableBuilder definition in the header file for how this works.
RebuildTableFromHistory()827 bool VisitedLinkMaster::RebuildTableFromHistory() {
828 DCHECK(!table_builder_);
829 if (table_builder_)
830 return false;
831
832 HistoryService* history_service = history_service_override_;
833 if (!history_service && profile_) {
834 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
835 }
836
837 if (!history_service) {
838 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't "
839 "obtain a HistoryService.";
840 return false;
841 }
842
843 // TODO(brettw) make sure we have reasonable salt!
844 table_builder_ = new TableBuilder(this, salt_);
845
846 // Make sure the table builder stays live during the call, even if the
847 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread.
848 table_builder_->AddRef();
849 history_service->IterateURLs(table_builder_);
850 return true;
851 }
852
853 // See the TableBuilder declaration above for how this works.
OnTableRebuildComplete(bool success,const std::vector<Fingerprint> & fingerprints)854 void VisitedLinkMaster::OnTableRebuildComplete(
855 bool success,
856 const std::vector<Fingerprint>& fingerprints) {
857 if (success) {
858 // Replace the old table with a new blank one.
859 shared_memory_serial_++;
860
861 // We are responsible for freeing it AFTER it has been replaced if
862 // replacement succeeds.
863 base::SharedMemory* old_shared_memory = shared_memory_;
864
865 int new_table_size = NewTableSizeForCount(
866 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
867 if (BeginReplaceURLTable(new_table_size)) {
868 // Free the old table.
869 delete old_shared_memory;
870
871 // Add the stored fingerprints to the hash table.
872 for (size_t i = 0; i < fingerprints.size(); i++)
873 AddFingerprint(fingerprints[i], false);
874
875 // Also add anything that was added while we were asynchronously
876 // generating the new table.
877 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
878 i != added_since_rebuild_.end(); ++i)
879 AddFingerprint(*i, false);
880 added_since_rebuild_.clear();
881
882 // Now handle deletions.
883 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
884 deleted_since_rebuild_.clear();
885
886 // Send an update notification to all child processes.
887 listener_->NewTable(shared_memory_);
888
889 // We shouldn't be writing the table from the main thread!
890 // http://code.google.com/p/chromium/issues/detail?id=24163
891 base::ThreadRestrictions::ScopedAllowIO allow_io;
892 WriteFullTable();
893 }
894 }
895 table_builder_ = NULL; // Will release our reference to the builder.
896
897 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
898 if (rebuild_complete_task_.get()) {
899 rebuild_complete_task_->Run();
900 rebuild_complete_task_.reset(NULL);
901 }
902 }
903
WriteToFile(FILE * file,off_t offset,void * data,int32 data_size)904 void VisitedLinkMaster::WriteToFile(FILE* file,
905 off_t offset,
906 void* data,
907 int32 data_size) {
908 #ifndef NDEBUG
909 posted_asynchronous_operation_ = true;
910 #endif
911
912 BrowserThread::PostTask(
913 BrowserThread::FILE, FROM_HERE,
914 new AsyncWriter(file, offset, data, data_size));
915 }
916
WriteUsedItemCountToFile()917 void VisitedLinkMaster::WriteUsedItemCountToFile() {
918 if (!file_)
919 return; // See comment on the file_ variable for why this might happen.
920 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
921 }
922
WriteHashRangeToFile(Hash first_hash,Hash last_hash)923 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
924 if (!file_)
925 return; // See comment on the file_ variable for why this might happen.
926 if (last_hash < first_hash) {
927 // Handle wraparound at 0. This first write is first_hash->EOF
928 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
929 &hash_table_[first_hash],
930 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
931
932 // Now do 0->last_lash.
933 WriteToFile(file_, kFileHeaderSize, hash_table_,
934 (last_hash + 1) * sizeof(Fingerprint));
935 } else {
936 // Normal case, just write the range.
937 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
938 &hash_table_[first_hash],
939 (last_hash - first_hash + 1) * sizeof(Fingerprint));
940 }
941 }
942
ReadFromFile(FILE * file,off_t offset,void * data,size_t data_size)943 bool VisitedLinkMaster::ReadFromFile(FILE* file,
944 off_t offset,
945 void* data,
946 size_t data_size) {
947 #ifndef NDEBUG
948 // Since this function is synchronous, we require that no asynchronous
949 // operations could possibly be pending.
950 DCHECK(!posted_asynchronous_operation_);
951 #endif
952
953 if (fseek(file, offset, SEEK_SET) != 0)
954 return false;
955
956 size_t num_read = fread(data, 1, data_size, file);
957 return num_read == data_size;
958 }
959
960 // VisitedLinkTableBuilder ----------------------------------------------------
961
TableBuilder(VisitedLinkMaster * master,const uint8 salt[LINK_SALT_LENGTH])962 VisitedLinkMaster::TableBuilder::TableBuilder(
963 VisitedLinkMaster* master,
964 const uint8 salt[LINK_SALT_LENGTH])
965 : master_(master),
966 success_(true) {
967 fingerprints_.reserve(4096);
968 memcpy(salt_, salt, sizeof(salt));
969 }
970
971 // TODO(brettw): Do we want to try to cancel the request if this happens? It
972 // could delay shutdown if there are a lot of URLs.
DisownMaster()973 void VisitedLinkMaster::TableBuilder::DisownMaster() {
974 master_ = NULL;
975 }
976
OnURL(const GURL & url)977 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
978 if (!url.is_empty()) {
979 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
980 url.spec().data(), url.spec().length(), salt_));
981 }
982 }
983
OnComplete(bool success)984 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
985 success_ = success;
986 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
987
988 // Marshal to the main thread to notify the VisitedLinkMaster that the
989 // rebuild is complete.
990 BrowserThread::PostTask(
991 BrowserThread::UI, FROM_HERE,
992 NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread));
993 }
994
OnCompleteMainThread()995 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
996 if (master_)
997 master_->OnTableRebuildComplete(success_, fingerprints_);
998
999 // WILL (generally) DELETE THIS! This balances the AddRef in
1000 // VisitedLinkMaster::RebuildTableFromHistory.
1001 Release();
1002 }
1003