• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
7 #pragma once
8 
9 #include <set>
10 #include <vector>
11 
12 #include "base/basictypes.h"
13 #include "base/callback.h"
14 #include "base/hash_tables.h"
15 #include "base/task.h"
16 #include "base/time.h"
17 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
18 
19 class FilePath;
20 
21 // SafeBrowsingStore provides a storage abstraction for the
22 // safe-browsing data used to build the bloom filter.  The items
23 // stored are:
24 //   The set of add and sub chunks seen.
25 //   List of SBAddPrefix (chunk_id and SBPrefix).
26 //   List of SBSubPrefix (chunk_id and the target SBAddPrefix).
27 //   List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
28 //   List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
29 //
30 // The store is geared towards updating the data, not runtime access
31 // to the data (that is handled by SafeBrowsingDatabase).  Updates are
32 // handled similar to a SQL transaction cycle, with the new data being
33 // returned from FinishUpdate() (the COMMIT).  Data is not persistent
34 // until FinishUpdate() returns successfully.
35 //
36 // FinishUpdate() also handles dropping items who's chunk has been
37 // deleted, and netting out the add/sub lists (when a sub matches an
38 // add, both are dropped).
39 
40 // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
41 // that these items can be generically compared with each other by
42 // SBAddPrefixLess() and SBAddPrefixHashLess().
43 
44 struct SBAddPrefix {
45   int32 chunk_id;
46   SBPrefix prefix;
47 
SBAddPrefixSBAddPrefix48   SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {}
SBAddPrefixSBAddPrefix49   SBAddPrefix() : chunk_id(), prefix() {}
50 
GetAddChunkIdSBAddPrefix51   int32 GetAddChunkId() const { return chunk_id; }
GetAddPrefixSBAddPrefix52   SBPrefix GetAddPrefix() const { return prefix; }
53 };
54 
55 struct SBSubPrefix {
56   int32 chunk_id;
57   int32 add_chunk_id;
58   SBPrefix add_prefix;
59 
SBSubPrefixSBSubPrefix60   SBSubPrefix(int32 id, int32 add_id, int prefix)
61       : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {}
SBSubPrefixSBSubPrefix62   SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
63 
GetAddChunkIdSBSubPrefix64   int32 GetAddChunkId() const { return add_chunk_id; }
GetAddPrefixSBSubPrefix65   SBPrefix GetAddPrefix() const { return add_prefix; }
66 };
67 
68 struct SBAddFullHash {
69   int32 chunk_id;
70   int32 received;
71   SBFullHash full_hash;
72 
SBAddFullHashSBAddFullHash73   SBAddFullHash(int32 id, base::Time r, const SBFullHash& h)
74       : chunk_id(id),
75         received(static_cast<int32>(r.ToTimeT())),
76         full_hash(h) {
77   }
78 
79   // Provided for ReadAddHashes() implementations, which already have
80   // an int32 for the time.
SBAddFullHashSBAddFullHash81   SBAddFullHash(int32 id, int32 r, const SBFullHash& h)
82       : chunk_id(id), received(r), full_hash(h) {}
83 
SBAddFullHashSBAddFullHash84   SBAddFullHash() : chunk_id(), received(), full_hash() {}
85 
GetAddChunkIdSBAddFullHash86   int32 GetAddChunkId() const { return chunk_id; }
GetAddPrefixSBAddFullHash87   SBPrefix GetAddPrefix() const { return full_hash.prefix; }
88 };
89 
90 struct SBSubFullHash {
91   int32 chunk_id;
92   int32 add_chunk_id;
93   SBFullHash full_hash;
94 
SBSubFullHashSBSubFullHash95   SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h)
96       : chunk_id(id), add_chunk_id(add_id), full_hash(h) {}
SBSubFullHashSBSubFullHash97   SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
98 
GetAddChunkIdSBSubFullHash99   int32 GetAddChunkId() const { return add_chunk_id; }
GetAddPrefixSBSubFullHash100   SBPrefix GetAddPrefix() const { return full_hash.prefix; }
101 };
102 
103 // Determine less-than based on add chunk and prefix.
104 template <class T, class U>
SBAddPrefixLess(const T & a,const U & b)105 bool SBAddPrefixLess(const T& a, const U& b) {
106   if (a.GetAddChunkId() != b.GetAddChunkId())
107     return a.GetAddChunkId() < b.GetAddChunkId();
108 
109   return a.GetAddPrefix() < b.GetAddPrefix();
110 }
111 
112 // Determine less-than based on add chunk, prefix, and full hash.
113 // Prefix can compare differently than hash due to byte ordering,
114 // so it must take precedence.
115 template <class T, class U>
SBAddPrefixHashLess(const T & a,const U & b)116 bool SBAddPrefixHashLess(const T& a, const U& b) {
117   if (SBAddPrefixLess(a, b))
118     return true;
119 
120   if (SBAddPrefixLess(b, a))
121     return false;
122 
123   return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
124                 sizeof(a.full_hash.full_hash)) < 0;
125 }
126 
127 // Process the lists for subs which knock out adds.  For any item in
128 // |sub_prefixes| which has a match in |add_prefixes|, knock out the
129 // matched items from all vectors.  Additionally remove items from
130 // deleted chunks.
131 //
132 // TODO(shess): Since the prefixes are uniformly-distributed hashes,
133 // there aren't many ways to organize the inputs for efficient
134 // processing.  For this reason, the vectors are sorted and processed
135 // in parallel.  At this time this code does the sorting internally,
136 // but it might make sense to make sorting an API requirement so that
137 // the storage can optimize for it.
138 //
139 // TODO(shess): The original code did not process |sub_full_hashes|
140 // for matches in |add_full_hashes|, so this code doesn't, either.  I
141 // think this is probably a bug.
142 void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes,
143                    std::vector<SBSubPrefix>* sub_prefixes,
144                    std::vector<SBAddFullHash>* add_full_hashes,
145                    std::vector<SBSubFullHash>* sub_full_hashes,
146                    const base::hash_set<int32>& add_chunks_deleted,
147                    const base::hash_set<int32>& sub_chunks_deleted);
148 
149 // Records a histogram of the number of items in |prefix_misses| which
150 // are not in |add_prefixes|.
151 void SBCheckPrefixMisses(const std::vector<SBAddPrefix>& add_prefixes,
152                          const std::set<SBPrefix>& prefix_misses);
153 
154 // TODO(shess): This uses int32 rather than int because it's writing
155 // specifically-sized items to files.  SBPrefix should likewise be
156 // explicitly sized.
157 
158 // Abstract interface for storing data.
159 class SafeBrowsingStore {
160  public:
SafeBrowsingStore()161   SafeBrowsingStore() {}
~SafeBrowsingStore()162   virtual ~SafeBrowsingStore() {}
163 
164   // Sets up the information for later use, but does not necessarily
165   // check whether the underlying file exists, or is valid.  If
166   // |curruption_callback| is non-NULL it will be called if corruption
167   // is detected, which could happen as part of any call other than
168   // Delete().  The appropriate action is to use Delete() to clear the
169   // store.
170   virtual void Init(const FilePath& filename,
171                     Callback0::Type* corruption_callback) = 0;
172 
173   // Deletes the files which back the store, returning true if
174   // successful.
175   virtual bool Delete() = 0;
176 
177   // Get all Add prefixes out from the store.
178   virtual bool GetAddPrefixes(std::vector<SBAddPrefix>* add_prefixes) = 0;
179 
180   // Get all add full-length hashes.
181   virtual bool GetAddFullHashes(
182       std::vector<SBAddFullHash>* add_full_hashes) = 0;
183 
184   // Start an update.  None of the following methods should be called
185   // unless this returns true.  If this returns true, the update
186   // should be terminated by FinishUpdate() or CancelUpdate().
187   virtual bool BeginUpdate() = 0;
188 
189   // Start a chunk of data.  None of the methods through FinishChunk()
190   // should be called unless this returns true.
191   // TODO(shess): Would it make sense for this to accept |chunk_id|?
192   // Possibly not, because of possible confusion between sub_chunk_id
193   // and add_chunk_id.
194   virtual bool BeginChunk() = 0;
195 
196   virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0;
197   virtual bool WriteAddHash(int32 chunk_id,
198                             base::Time receive_time,
199                             const SBFullHash& full_hash) = 0;
200   virtual bool WriteSubPrefix(int32 chunk_id,
201                               int32 add_chunk_id, SBPrefix prefix) = 0;
202   virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
203                             const SBFullHash& full_hash) = 0;
204 
205   // Collect the chunk data and preferrably store it on disk to
206   // release memory.  Shoul not modify the data in-place.
207   virtual bool FinishChunk() = 0;
208 
209   // Track the chunks which have been seen.
210   virtual void SetAddChunk(int32 chunk_id) = 0;
211   virtual bool CheckAddChunk(int32 chunk_id) = 0;
212   virtual void GetAddChunks(std::vector<int32>* out) = 0;
213   virtual void SetSubChunk(int32 chunk_id) = 0;
214   virtual bool CheckSubChunk(int32 chunk_id) = 0;
215   virtual void GetSubChunks(std::vector<int32>* out) = 0;
216 
217   // Delete the indicated chunk_id.  The chunk will continue to be
218   // visible until the end of the transaction.
219   virtual void DeleteAddChunk(int32 chunk_id) = 0;
220   virtual void DeleteSubChunk(int32 chunk_id) = 0;
221 
222   // Pass the collected chunks through SBPRocessSubs() and commit to
223   // permanent storage.  The resulting add prefixes and hashes will be
224   // stored in |add_prefixes_result| and |add_full_hashes_result|.
225   // |pending_adds| is the set of full hashes which have been received
226   // since the previous update, and is provided as a convenience
227   // (could be written via WriteAddHash(), but that would flush the
228   // chunk to disk).  |prefix_misses| is the set of prefixes where the
229   // |GetHash()| request returned no full hashes, used for diagnostic
230   // purposes.
231   virtual bool FinishUpdate(
232       const std::vector<SBAddFullHash>& pending_adds,
233       const std::set<SBPrefix>& prefix_misses,
234       std::vector<SBAddPrefix>* add_prefixes_result,
235       std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
236 
237   // Cancel the update in process and remove any temporary disk
238   // storage, leaving the original data unmodified.
239   virtual bool CancelUpdate() = 0;
240 
241  private:
242   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
243 };
244 
245 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
246