• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
6 #define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
7 
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 
12 class GURL;
13 
14 namespace visitedlink {
15 
16 // number of bytes in the salt
17 #define LINK_SALT_LENGTH 8
18 
19 // A multiprocess-safe database of the visited links for the browser. There
20 // should be exactly one process that has write access (implemented by
21 // VisitedLinkMaster), while all other processes should be read-only
22 // (implemented by VisitedLinkSlave). These other processes add links by calling
23 // the writer process to add them for it. The writer may also notify the readers
24 // to replace their table when the table is resized.
25 //
26 // IPC is not implemented in these classes. This is done through callback
27 // functions supplied by the creator of these objects to allow more flexibility,
28 // especially for testing.
29 //
30 // This class defines the common base for these others. We implement accessors
31 // for looking things up in the hash table, and for computing hash values and
32 // fingerprints. Both the master and the slave inherit from this, and add their
33 // own code to set up and change these values as their design requires. The
34 // slave pretty much just sets up the shared memory and saves the pointer. The
35 // master does a lot of work to manage the table, reading and writing it to and
36 // from disk, and resizing it when it gets too full.
37 //
38 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
39 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
40 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
41 // to get collisions, which is the penalty for not storing all URL strings in
42 // memory (which could get to be more than we want to have in memory). We use
43 // a salt value for the links on one computer so that an attacker can not
44 // manually create a link that causes a collision.
45 class VisitedLinkCommon {
46  public:
47   // A number that identifies the URL.
48   typedef uint64 Fingerprint;
49   typedef std::vector<Fingerprint> Fingerprints;
50 
51   // A hash value of a fingerprint
52   typedef int32 Hash;
53 
54   // A fingerprint or hash value that does not exist
55   static const Fingerprint null_fingerprint_;
56   static const Hash null_hash_;
57 
58   VisitedLinkCommon();
59   virtual ~VisitedLinkCommon();
60 
61   // Returns the fingerprint for the given URL.
ComputeURLFingerprint(const char * canonical_url,size_t url_len)62   Fingerprint ComputeURLFingerprint(const char* canonical_url,
63                                     size_t url_len) const {
64     return ComputeURLFingerprint(canonical_url, url_len, salt_);
65   }
66 
67   // Looks up the given key in the table. The fingerprint for the URL is
68   // computed if you call one with the string argument. Returns true if found.
69   // Does not modify the hastable.
70   bool IsVisited(const char* canonical_url, size_t url_len) const;
71   bool IsVisited(const GURL& url) const;
72   bool IsVisited(Fingerprint fingerprint) const;
73 
74 #ifdef UNIT_TEST
75   // Returns statistics about DB usage
GetUsageStatistics(int32 * table_size,VisitedLinkCommon::Fingerprint ** fingerprints)76   void GetUsageStatistics(int32* table_size,
77                           VisitedLinkCommon::Fingerprint** fingerprints) {
78     *table_size = table_length_;
79     *fingerprints = hash_table_;
80   }
81 #endif
82 
83  protected:
84   // This structure is at the beginning of the shared memory so that the slaves
85   // can get stats on the table
86   struct SharedHeader {
87     // see goes into table_length_
88     uint32 length;
89 
90     // goes into salt_
91     uint8 salt[LINK_SALT_LENGTH];
92   };
93 
94   // Returns the fingerprint at the given index into the URL table. This
95   // function should be called instead of accessing the table directly to
96   // contain endian issues.
FingerprintAt(int32 table_offset)97   Fingerprint FingerprintAt(int32 table_offset) const {
98     if (!hash_table_)
99       return null_fingerprint_;
100     return hash_table_[table_offset];
101   }
102 
103   // Computes the fingerprint of the given canonical URL. It is static so the
104   // same algorithm can be re-used by the table rebuilder, so you will have to
105   // pass the salt as a parameter. See the non-static version above if you
106   // want to use the current class' salt.
107   static Fingerprint ComputeURLFingerprint(const char* canonical_url,
108                                            size_t url_len,
109                                            const uint8 salt[LINK_SALT_LENGTH]);
110 
111   // Computes the hash value of the given fingerprint, this is used as a lookup
112   // into the hashtable.
HashFingerprint(Fingerprint fingerprint,int32 table_length)113   static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
114     if (table_length == 0)
115       return null_hash_;
116     return static_cast<Hash>(fingerprint % table_length);
117   }
118   // Uses the current hashtable.
HashFingerprint(Fingerprint fingerprint)119   Hash HashFingerprint(Fingerprint fingerprint) const {
120     return HashFingerprint(fingerprint, table_length_);
121   }
122 
123   // pointer to the first item
124   VisitedLinkCommon::Fingerprint* hash_table_;
125 
126   // the number of items in the hash table
127   int32 table_length_;
128 
129   // salt used for each URL when computing the fingerprint
130   uint8 salt_[LINK_SALT_LENGTH];
131 
132  private:
133   DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
134 };
135 
136 }  // namespace visitedlink
137 
138 #endif  // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
139