• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
7 #pragma once
8 
9 #include <set>
10 #include <vector>
11 
12 #include "base/file_path.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/synchronization/lock.h"
15 #include "base/task.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
17 #include "testing/gtest/include/gtest/gtest_prod.h"
18 
19 namespace base {
20   class Time;
21 }
22 
23 namespace safe_browsing {
24 class PrefixSet;
25 }
26 
27 class BloomFilter;
28 class GURL;
29 class MessageLoop;
30 class SafeBrowsingDatabase;
31 
32 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
33 // to create fake Databases for testing.
34 class SafeBrowsingDatabaseFactory {
35  public:
SafeBrowsingDatabaseFactory()36   SafeBrowsingDatabaseFactory() { }
~SafeBrowsingDatabaseFactory()37   virtual ~SafeBrowsingDatabaseFactory() { }
38   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
39       bool enable_download_protection,
40       bool enable_client_side_whitelist) = 0;
41  private:
42   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
43 };
44 
45 
46 // Encapsulates on-disk databases that for safebrowsing. There are
47 // three databases: browse, download and client-side detection (csd)
48 // whitelist databases. The browse database contains information
49 // about phishing and malware urls. The download database contains
50 // URLs for bad binaries (e.g: those containing virus) and hash of
51 // these downloaded contents. The csd whitelist database contains URLs
52 // that will never be considered as phishing by the client-side
53 // phishing detection. These on-disk databases are shared among all
54 // profiles, as it doesn't contain user-specific data. This object is
55 // not thread-safe, i.e. all its methods should be used on the same
56 // thread that it was created on.
57 class SafeBrowsingDatabase {
58  public:
59   // Factory method for obtaining a SafeBrowsingDatabase implementation.
60   // It is not thread safe.
61   // |enable_download_protection| is used to control the download database
62   // feature.
63   // |enable_client_side_whitelist| is used to control the csd whitelist
64   // database feature.
65   static SafeBrowsingDatabase* Create(bool enable_download_protection,
66                                       bool enable_client_side_whitelist);
67 
68   // Makes the passed |factory| the factory used to instantiate
69   // a SafeBrowsingDatabase. This is used for tests.
RegisterFactory(SafeBrowsingDatabaseFactory * factory)70   static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
71     factory_ = factory;
72   }
73 
74   virtual ~SafeBrowsingDatabase();
75 
76   // Initializes the database with the given filename.
77   virtual void Init(const FilePath& filename) = 0;
78 
79   // Deletes the current database and creates a new one.
80   virtual bool ResetDatabase() = 0;
81 
82   // Returns false if |url| is not in the browse database.  If it
83   // returns true, then either |matching_list| is the name of the matching
84   // list, or |prefix_hits| and |full_hits| contains the matching hash
85   // prefixes.  This function is safe to call from threads other than
86   // the creation thread.
87   virtual bool ContainsBrowseUrl(const GURL& url,
88                                  std::string* matching_list,
89                                  std::vector<SBPrefix>* prefix_hits,
90                                  std::vector<SBFullHashResult>* full_hits,
91                                  base::Time last_update) = 0;
92 
93   // Returns false if none of |urls| are in Download database. If it returns
94   // true, |prefix_hits| should contain the prefixes for the URLs that were in
95   // the database.  This function could ONLY be accessed from creation thread.
96   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
97                                    std::vector<SBPrefix>* prefix_hits) = 0;
98 
99   // Returns false if |prefix| is not in Download database.
100   // This function could ONLY be accessed from creation thread.
101   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0;
102 
103   // Returns false if |url| is not on the client-side phishing detection
104   // whitelist.  Otherwise, this function returns true.  Note: the whitelist
105   // only contains full-length hashes so we don't return any prefix hit.
106   // This function should only be called from the IO thread.
107   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
108 
109   // A database transaction should look like:
110   //
111   // std::vector<SBListChunkRanges> lists;
112   // if (db.UpdateStarted(&lists)) {
113   //   // Do something with |lists|.
114   //
115   //   // Process add/sub commands.
116   //   db.InsertChunks(list_name, chunks);
117   //
118   //   // Process adddel/subdel commands.
119   //   db.DeleteChunks(chunks_deletes);
120   //
121   //   // If passed true, processes the collected chunk info and
122   //   // rebuilds the bloom filter.  If passed false, rolls everything
123   //   // back.
124   //   db.UpdateFinished(success);
125   // }
126   //
127   // If UpdateStarted() returns true, the caller MUST eventually call
128   // UpdateFinished().  If it returns false, the caller MUST NOT call
129   // the other functions.
130   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
131   virtual void InsertChunks(const std::string& list_name,
132                             const SBChunkList& chunks) = 0;
133   virtual void DeleteChunks(
134       const std::vector<SBChunkDelete>& chunk_deletes) = 0;
135   virtual void UpdateFinished(bool update_succeeded) = 0;
136 
137   // Store the results of a GetHash response. In the case of empty results, we
138   // cache the prefixes until the next update so that we don't have to issue
139   // further GetHash requests we know will be empty.
140   virtual void CacheHashResults(
141       const std::vector<SBPrefix>& prefixes,
142       const std::vector<SBFullHashResult>& full_hits) = 0;
143 
144   // The name of the bloom-filter file for the given database file.
145   static FilePath BloomFilterForFilename(const FilePath& db_filename);
146 
147   // Filename for malware and phishing URL database.
148   static FilePath BrowseDBFilename(const FilePath& db_base_filename);
149 
150   // Filename for download URL and download binary hash database.
151   static FilePath DownloadDBFilename(const FilePath& db_base_filename);
152 
153   // Filename for client-side phishing detection whitelist databsae.
154   static FilePath CsdWhitelistDBFilename(
155       const FilePath& csd_whitelist_base_filename);
156 
157   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
158   // ORDERING OF THESE VALUES.
159   enum FailureType {
160     FAILURE_DATABASE_CORRUPT,
161     FAILURE_DATABASE_CORRUPT_HANDLER,
162     FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
163     FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
164     FAILURE_DATABASE_FILTER_MISSING,
165     FAILURE_DATABASE_FILTER_READ,
166     FAILURE_DATABASE_FILTER_WRITE,
167     FAILURE_DATABASE_FILTER_DELETE,
168     FAILURE_DATABASE_STORE_MISSING,
169     FAILURE_DATABASE_STORE_DELETE,
170     FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
171     FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
172     FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN,
173     FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH,
174 
175     // Memory space for histograms is determined by the max.  ALWAYS
176     // ADD NEW VALUES BEFORE THIS ONE.
177     FAILURE_DATABASE_MAX
178   };
179 
180   static void RecordFailure(FailureType failure_type);
181 
182  private:
183   // The factory used to instantiate a SafeBrowsingDatabase object.
184   // Useful for tests, so they can provide their own implementation of
185   // SafeBrowsingDatabase.
186   static SafeBrowsingDatabaseFactory* factory_;
187 };
188 
189 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
190  public:
191   // Create a database with a browse store, download store and
192   // csd_whitelist_store. Takes ownership of browse_store, download_store and
193   // csd_whitelist_store. When |download_store| is NULL, the database
194   // will ignore any operations related download (url hashes and
195   // binary hashes).  Same for the |csd_whitelist_store|.
196   SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
197                           SafeBrowsingStore* download_store,
198                           SafeBrowsingStore* csd_whitelist_store);
199 
200   // Create a database with a browse store. This is a legacy interface that
201   // useds Sqlite.
202   SafeBrowsingDatabaseNew();
203 
204   virtual ~SafeBrowsingDatabaseNew();
205 
206   // Implement SafeBrowsingDatabase interface.
207   virtual void Init(const FilePath& filename);
208   virtual bool ResetDatabase();
209   virtual bool ContainsBrowseUrl(const GURL& url,
210                                  std::string* matching_list,
211                                  std::vector<SBPrefix>* prefix_hits,
212                                  std::vector<SBFullHashResult>* full_hits,
213                                  base::Time last_update);
214   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
215                                    std::vector<SBPrefix>* prefix_hits);
216   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix);
217   virtual bool ContainsCsdWhitelistedUrl(const GURL& url);
218   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists);
219   virtual void InsertChunks(const std::string& list_name,
220                             const SBChunkList& chunks);
221   virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes);
222   virtual void UpdateFinished(bool update_succeeded);
223   virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes,
224                                 const std::vector<SBFullHashResult>& full_hits);
225 
226  private:
227   friend class SafeBrowsingDatabaseTest;
228   FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching);
229 
230   // Return the browse_store_, download_store_ or csd_whitelist_store_
231   // based on list_id.
232   SafeBrowsingStore* GetStore(int list_id);
233 
234     // Deletes the files on disk.
235   bool Delete();
236 
237   // Load the bloom filter off disk, or generates one if it doesn't exist.
238   void LoadBloomFilter();
239 
240   // Writes the current bloom filter to disk.
241   void WriteBloomFilter();
242 
243   // Loads the given full-length hashes to the csd whitelist.  If the number
244   // of hashes is too large or if the kill switch URL is on the whitelist
245   // we will whitelist all URLs.
246   void LoadCsdWhitelist(const std::vector<SBAddFullHash>& full_hashes);
247 
248   // Call this method if an error occured with the csd whitelist.  This will
249   // result in all calls to ContainsCsdWhitelistedUrl() to returning true.
250   void CsdWhitelistAllUrls();
251 
252   // Helpers for handling database corruption.
253   // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
254   // |corruption_detected_|, |HandleCorruptDatabase()| posts
255   // |OnHandleCorruptDatabase()| to the current thread, to be run
256   // after the current task completes.
257   // TODO(shess): Wire things up to entirely abort the update
258   // transaction when this happens.
259   void HandleCorruptDatabase();
260   void OnHandleCorruptDatabase();
261 
262   // Helpers for InsertChunks().
263   void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
264   void InsertAddChunks(int list_id, const SBChunkList& chunks);
265   void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
266   void InsertSubChunks(int list_id, const SBChunkList& chunks);
267 
268   void UpdateDownloadStore();
269   void UpdateBrowseStore();
270   void UpdateCsdWhitelistStore();
271 
272   // Helper function to compare addprefixes in download_store_ with |prefixes|.
273   // The |list_bit| indicates which list (download url or download hash)
274   // to compare.
275   // Returns true if there is a match, |*prefix_hits| will contain the actual
276   // matching prefixes.
277   bool MatchDownloadAddPrefixes(int list_bit,
278                                 const std::vector<SBPrefix>& prefixes,
279                                 std::vector<SBPrefix>* prefix_hits);
280 
281   // Used to verify that various calls are made from the thread the
282   // object was created on.
283   MessageLoop* creation_loop_;
284 
285   // Lock for protecting access to variables that may be used on the
286   // IO thread.  This includes |browse_bloom_filter_|, |full_browse_hashes_|,
287   // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|, and
288   // |csd_whitelist_all_urls_|.
289   base::Lock lookup_lock_;
290 
291   // Underlying persistent store for chunk data.
292   // For browsing related (phishing and malware URLs) chunks and prefixes.
293   FilePath browse_filename_;
294   scoped_ptr<SafeBrowsingStore> browse_store_;
295 
296   // For download related (download URL and binary hash) chunks and prefixes.
297   FilePath download_filename_;
298   scoped_ptr<SafeBrowsingStore> download_store_;
299 
300   // For the client-side phishing detection whitelist chunks and full-length
301   // hashes.  This list only contains 256 bit hashes.
302   FilePath csd_whitelist_filename_;
303   scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
304 
305   // All the client-side phishing detection whitelist entries are loaded in
306   // a sorted vector.
307   std::vector<SBFullHash> csd_whitelist_;
308 
309   // If true, ContainsCsdWhitelistedUrl will always return true for all URLs.
310   // This is set to true if the csd whitelist is too large to be stored in
311   // memory, if the kill switch URL is on the csd whitelist or if there was
312   // an error during the most recent update.
313   bool csd_whitelist_all_urls_;
314 
315   // Bloom filter generated from the add-prefixes in |browse_store_|.
316   // Only browse_store_ requires the BloomFilter for fast query.
317   FilePath bloom_filter_filename_;
318   scoped_refptr<BloomFilter> browse_bloom_filter_;
319 
320   // Cached browse store related full-hash items, ordered by prefix for
321   // efficient scanning.
322   // |full_browse_hashes_| are items from |browse_store_|,
323   // |pending_browse_hashes_| are items from |CacheHashResults()|, which
324   // will be pushed to the store on the next update.
325   std::vector<SBAddFullHash> full_browse_hashes_;
326   std::vector<SBAddFullHash> pending_browse_hashes_;
327 
328   // Cache of prefixes that returned empty results (no full hash
329   // match) to |CacheHashResults()|.  Cached to prevent asking for
330   // them every time.  Cleared on next update.
331   std::set<SBPrefix> prefix_miss_cache_;
332 
333   // Used to schedule resetting the database because of corruption.
334   ScopedRunnableMethodFactory<SafeBrowsingDatabaseNew> reset_factory_;
335 
336   // Set if corruption is detected during the course of an update.
337   // Causes the update functions to fail with no side effects, until
338   // the next call to |UpdateStarted()|.
339   bool corruption_detected_;
340 
341   // Set to true if any chunks are added or deleted during an update.
342   // Used to optimize away database update.
343   bool change_detected_;
344 
345   // Used to check if a prefix was in the database.
346   scoped_ptr<safe_browsing::PrefixSet> prefix_set_;
347 };
348 
349 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
350