• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
6 #define CHROME_COMMON_VISITEDLINK_COMMON_H__
7 #pragma once
8 
9 #include <vector>
10 
11 #include "base/basictypes.h"
12 
13 class GURL;
14 
15 // number of bytes in the salt
16 #define LINK_SALT_LENGTH 8
17 
18 // A multiprocess-safe database of the visited links for the browser. There
19 // should be exactly one process that has write access (implemented by
20 // VisitedLinkMaster), while all other processes should be read-only
21 // (implemented by VisitedLinkSlave). These other processes add links by calling
22 // the writer process to add them for it. The writer may also notify the readers
23 // to replace their table when the table is resized.
24 //
25 // IPC is not implemented in these classes. This is done through callback
26 // functions supplied by the creator of these objects to allow more flexibility,
27 // especially for testing.
28 //
29 // This class defines the common base for these others. We implement accessors
30 // for looking things up in the hash table, and for computing hash values and
31 // fingerprints. Both the master and the slave inherit from this, and add their
32 // own code to set up and change these values as their design requires. The
33 // slave pretty much just sets up the shared memory and saves the pointer. The
34 // master does a lot of work to manage the table, reading and writing it to and
35 // from disk, and resizing it when it gets too full.
36 //
37 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
38 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
39 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
40 // to get collisions, which is the penalty for not storing all URL strings in
41 // memory (which could get to be more than we want to have in memory). We use
42 // a salt value for the links on one computer so that an attacker can not
43 // manually create a link that causes a collision.
44 class VisitedLinkCommon {
45  public:
46   // A number that identifies the URL.
47   typedef uint64 Fingerprint;
48   typedef std::vector<Fingerprint> Fingerprints;
49 
50   // A hash value of a fingerprint
51   typedef int32 Hash;
52 
53   // A fingerprint or hash value that does not exist
54   static const Fingerprint null_fingerprint_;
55   static const Hash null_hash_;
56 
57   VisitedLinkCommon();
58   virtual ~VisitedLinkCommon();
59 
60   // Returns the fingerprint for the given URL.
ComputeURLFingerprint(const char * canonical_url,size_t url_len)61   Fingerprint ComputeURLFingerprint(const char* canonical_url,
62                                     size_t url_len) const {
63     return ComputeURLFingerprint(canonical_url, url_len, salt_);
64   }
65 
66   // Looks up the given key in the table. The fingerprint for the URL is
67   // computed if you call one with the string argument. Returns true if found.
68   // Does not modify the hastable.
69   bool IsVisited(const char* canonical_url, size_t url_len) const;
70   bool IsVisited(const GURL& url) const;
71   bool IsVisited(Fingerprint fingerprint) const;
72 
73 #ifdef UNIT_TEST
74   // Returns statistics about DB usage
GetUsageStatistics(int32 * table_size,VisitedLinkCommon::Fingerprint ** fingerprints)75   void GetUsageStatistics(int32* table_size,
76                           VisitedLinkCommon::Fingerprint** fingerprints) {
77     *table_size = table_length_;
78     *fingerprints = hash_table_;
79   }
80 #endif
81 
82  protected:
83   // This structure is at the beginning of the shared memory so that the slaves
84   // can get stats on the table
85   struct SharedHeader {
86     // see goes into table_length_
87     uint32 length;
88 
89     // goes into salt_
90     uint8 salt[LINK_SALT_LENGTH];
91   };
92 
93   // Returns the fingerprint at the given index into the URL table. This
94   // function should be called instead of accessing the table directly to
95   // contain endian issues.
FingerprintAt(int32 table_offset)96   Fingerprint FingerprintAt(int32 table_offset) const {
97     if (!hash_table_)
98       return null_fingerprint_;
99     return hash_table_[table_offset];
100   }
101 
102   // Computes the fingerprint of the given canonical URL. It is static so the
103   // same algorithm can be re-used by the table rebuilder, so you will have to
104   // pass the salt as a parameter. See the non-static version above if you
105   // want to use the current class' salt.
106   static Fingerprint ComputeURLFingerprint(const char* canonical_url,
107                                            size_t url_len,
108                                            const uint8 salt[LINK_SALT_LENGTH]);
109 
110   // Computes the hash value of the given fingerprint, this is used as a lookup
111   // into the hashtable.
HashFingerprint(Fingerprint fingerprint,int32 table_length)112   static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
113     if (table_length == 0)
114       return null_hash_;
115     return static_cast<Hash>(fingerprint % table_length);
116   }
117   // Uses the current hashtable.
HashFingerprint(Fingerprint fingerprint)118   Hash HashFingerprint(Fingerprint fingerprint) const {
119     return HashFingerprint(fingerprint, table_length_);
120   }
121 
122   // pointer to the first item
123   VisitedLinkCommon::Fingerprint* hash_table_;
124 
125   // the number of items in the hash table
126   int32 table_length_;
127 
128   // salt used for each URL when computing the fingerprint
129   uint8 salt_[LINK_SALT_LENGTH];
130 
131  private:
132   DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
133 };
134 
135 #endif  // CHROME_COMMON_VISITEDLINK_COMMON_H_
136