1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/common/visitedlink_common.h"
6
7 #include <string.h> // for memset()
8
9 #include "base/logging.h"
10 #include "base/md5.h"
11 #include "googleurl/src/gurl.h"
12
13 const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0;
14 const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1;
15
VisitedLinkCommon()16 VisitedLinkCommon::VisitedLinkCommon()
17 : hash_table_(NULL),
18 table_length_(0) {
19 memset(salt_, 0, sizeof(salt_));
20 }
21
~VisitedLinkCommon()22 VisitedLinkCommon::~VisitedLinkCommon() {
23 }
24
25 // FIXME: this uses linear probing, it should be replaced with quadratic
26 // probing or something better. See VisitedLinkMaster::AddFingerprint
IsVisited(const char * canonical_url,size_t url_len) const27 bool VisitedLinkCommon::IsVisited(const char* canonical_url,
28 size_t url_len) const {
29 if (url_len == 0)
30 return false;
31 if (!hash_table_ || table_length_ == 0)
32 return false;
33 return IsVisited(ComputeURLFingerprint(canonical_url, url_len));
34 }
35
IsVisited(const GURL & url) const36 bool VisitedLinkCommon::IsVisited(const GURL& url) const {
37 return IsVisited(url.spec().data(), url.spec().size());
38 }
39
IsVisited(Fingerprint fingerprint) const40 bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const {
41 // Go through the table until we find the item or an empty spot (meaning it
42 // wasn't found). This loop will terminate as long as the table isn't full,
43 // which should be enforced by AddFingerprint.
44 Hash first_hash = HashFingerprint(fingerprint);
45 Hash cur_hash = first_hash;
46 while (true) {
47 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
48 if (cur_fingerprint == null_fingerprint_)
49 return false; // End of probe sequence found.
50 if (cur_fingerprint == fingerprint)
51 return true; // Found a match.
52
53 // This spot was taken, but not by the item we're looking for, search in
54 // the next position.
55 cur_hash++;
56 if (cur_hash == table_length_)
57 cur_hash = 0;
58 if (cur_hash == first_hash) {
59 // Wrapped around and didn't find an empty space, this means we're in an
60 // infinite loop because AddFingerprint didn't do its job resizing.
61 NOTREACHED();
62 return false;
63 }
64 }
65 }
66
67 // Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
68 // this is as random as any other subset of the MD5SUM.
69 //
70 // FIXME: this uses the MD5SUM of the 16-bit character version. For systems
71 // where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be
72 // compatable. We should define explicitly what should happen here across
73 // platforms, and convert if necessary (probably to UTF-16).
74
75 // static
ComputeURLFingerprint(const char * canonical_url,size_t url_len,const uint8 salt[LINK_SALT_LENGTH])76 VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint(
77 const char* canonical_url,
78 size_t url_len,
79 const uint8 salt[LINK_SALT_LENGTH]) {
80 DCHECK(url_len > 0) << "Canonical URLs should not be empty";
81
82 MD5Context ctx;
83 MD5Init(&ctx);
84 MD5Update(&ctx, salt, sizeof(salt));
85 MD5Update(&ctx, canonical_url, url_len * sizeof(char));
86
87 MD5Digest digest;
88 MD5Final(&digest, &ctx);
89
90 // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that
91 // direct cast the alignment could be wrong, and we can't access a 64-bit int
92 // on arbitrary alignment on some processors. This reinterpret_casts it
93 // down to a char array of the same size as fingerprint, and then does the
94 // bit cast, which amounts to a memcpy. This does not handle endian issues.
95 return bit_cast<Fingerprint, uint8[8]>(
96 *reinterpret_cast<uint8(*)[8]>(&digest.a));
97 }
98