1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__ 6 #define CHROME_COMMON_VISITEDLINK_COMMON_H__ 7 #pragma once 8 9 #include <vector> 10 11 #include "base/basictypes.h" 12 13 class GURL; 14 15 // number of bytes in the salt 16 #define LINK_SALT_LENGTH 8 17 18 // A multiprocess-safe database of the visited links for the browser. There 19 // should be exactly one process that has write access (implemented by 20 // VisitedLinkMaster), while all other processes should be read-only 21 // (implemented by VisitedLinkSlave). These other processes add links by calling 22 // the writer process to add them for it. The writer may also notify the readers 23 // to replace their table when the table is resized. 24 // 25 // IPC is not implemented in these classes. This is done through callback 26 // functions supplied by the creator of these objects to allow more flexibility, 27 // especially for testing. 28 // 29 // This class defines the common base for these others. We implement accessors 30 // for looking things up in the hash table, and for computing hash values and 31 // fingerprints. Both the master and the slave inherit from this, and add their 32 // own code to set up and change these values as their design requires. The 33 // slave pretty much just sets up the shared memory and saves the pointer. The 34 // master does a lot of work to manage the table, reading and writing it to and 35 // from disk, and resizing it when it gets too full. 36 // 37 // To ask whether a page is in history, we compute a 64-bit fingerprint of the 38 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is, 39 // we consider it visited. Otherwise, it is unvisited. Note that it is possible 40 // to get collisions, which is the penalty for not storing all URL strings in 41 // memory (which could get to be more than we want to have in memory). We use 42 // a salt value for the links on one computer so that an attacker can not 43 // manually create a link that causes a collision. 44 class VisitedLinkCommon { 45 public: 46 // A number that identifies the URL. 47 typedef uint64 Fingerprint; 48 typedef std::vector<Fingerprint> Fingerprints; 49 50 // A hash value of a fingerprint 51 typedef int32 Hash; 52 53 // A fingerprint or hash value that does not exist 54 static const Fingerprint null_fingerprint_; 55 static const Hash null_hash_; 56 57 VisitedLinkCommon(); 58 virtual ~VisitedLinkCommon(); 59 60 // Returns the fingerprint for the given URL. ComputeURLFingerprint(const char * canonical_url,size_t url_len)61 Fingerprint ComputeURLFingerprint(const char* canonical_url, 62 size_t url_len) const { 63 return ComputeURLFingerprint(canonical_url, url_len, salt_); 64 } 65 66 // Looks up the given key in the table. The fingerprint for the URL is 67 // computed if you call one with the string argument. Returns true if found. 68 // Does not modify the hastable. 69 bool IsVisited(const char* canonical_url, size_t url_len) const; 70 bool IsVisited(const GURL& url) const; 71 bool IsVisited(Fingerprint fingerprint) const; 72 73 #ifdef UNIT_TEST 74 // Returns statistics about DB usage GetUsageStatistics(int32 * table_size,VisitedLinkCommon::Fingerprint ** fingerprints)75 void GetUsageStatistics(int32* table_size, 76 VisitedLinkCommon::Fingerprint** fingerprints) { 77 *table_size = table_length_; 78 *fingerprints = hash_table_; 79 } 80 #endif 81 82 protected: 83 // This structure is at the beginning of the shared memory so that the slaves 84 // can get stats on the table 85 struct SharedHeader { 86 // see goes into table_length_ 87 uint32 length; 88 89 // goes into salt_ 90 uint8 salt[LINK_SALT_LENGTH]; 91 }; 92 93 // Returns the fingerprint at the given index into the URL table. This 94 // function should be called instead of accessing the table directly to 95 // contain endian issues. FingerprintAt(int32 table_offset)96 Fingerprint FingerprintAt(int32 table_offset) const { 97 if (!hash_table_) 98 return null_fingerprint_; 99 return hash_table_[table_offset]; 100 } 101 102 // Computes the fingerprint of the given canonical URL. It is static so the 103 // same algorithm can be re-used by the table rebuilder, so you will have to 104 // pass the salt as a parameter. See the non-static version above if you 105 // want to use the current class' salt. 106 static Fingerprint ComputeURLFingerprint(const char* canonical_url, 107 size_t url_len, 108 const uint8 salt[LINK_SALT_LENGTH]); 109 110 // Computes the hash value of the given fingerprint, this is used as a lookup 111 // into the hashtable. HashFingerprint(Fingerprint fingerprint,int32 table_length)112 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { 113 if (table_length == 0) 114 return null_hash_; 115 return static_cast<Hash>(fingerprint % table_length); 116 } 117 // Uses the current hashtable. HashFingerprint(Fingerprint fingerprint)118 Hash HashFingerprint(Fingerprint fingerprint) const { 119 return HashFingerprint(fingerprint, table_length_); 120 } 121 122 // pointer to the first item 123 VisitedLinkCommon::Fingerprint* hash_table_; 124 125 // the number of items in the hash table 126 int32 table_length_; 127 128 // salt used for each URL when computing the fingerprint 129 uint8 salt_[LINK_SALT_LENGTH]; 130 131 private: 132 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); 133 }; 134 135 #endif // CHROME_COMMON_VISITEDLINK_COMMON_H_ 136