• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
7 
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
12 
13 #include "base/containers/hash_tables.h"
14 #include "base/files/file_path.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/memory/weak_ptr.h"
18 #include "base/synchronization/lock.h"
19 #include "base/time/time.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
21 
22 namespace base {
23 class MessageLoop;
24 }
25 
26 namespace safe_browsing {
27 class PrefixSet;
28 }
29 
30 class GURL;
31 class SafeBrowsingDatabase;
32 
33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
34 // to create fake Databases for testing.
35 class SafeBrowsingDatabaseFactory {
36  public:
SafeBrowsingDatabaseFactory()37   SafeBrowsingDatabaseFactory() { }
~SafeBrowsingDatabaseFactory()38   virtual ~SafeBrowsingDatabaseFactory() { }
39   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
40       bool enable_download_protection,
41       bool enable_client_side_whitelist,
42       bool enable_download_whitelist,
43       bool enable_extension_blacklist,
44       bool enable_side_effect_free_whitelist,
45       bool enable_ip_blacklist) = 0;
46  private:
47   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
48 };
49 
50 // Contains full_hash elements which are cached in memory.  Differs from
51 // SBAddFullHash in deriving |list_id| from |chunk_id|.  Differs from
52 // SBFullHashResult in adding |received| for later expiration.
53 // TODO(shess): Remove/refactor this as part of converting to v2.3 caching
54 // semantics.
55 struct SBFullHashCached {
56   SBFullHash hash;
57   int list_id;  // TODO(shess): Use safe_browsing_util::ListType.
58   base::Time expire_after;
59 };
60 
61 // Encapsulates on-disk databases that for safebrowsing. There are
62 // four databases: browse, download, download whitelist and
63 // client-side detection (csd) whitelist databases. The browse database contains
64 // information about phishing and malware urls. The download database contains
65 // URLs for bad binaries (e.g: those containing virus) and hash of
66 // these downloaded contents. The download whitelist contains whitelisted
67 // download hosting sites as well as whitelisted binary signing certificates
68 // etc.  The csd whitelist database contains URLs that will never be considered
69 // as phishing by the client-side phishing detection. These on-disk databases
70 // are shared among all profiles, as it doesn't contain user-specific data. This
71 // object is not thread-safe, i.e. all its methods should be used on the same
72 // thread that it was created on.
73 class SafeBrowsingDatabase {
74  public:
75   // Factory method for obtaining a SafeBrowsingDatabase implementation.
76   // It is not thread safe.
77   // |enable_download_protection| is used to control the download database
78   // feature.
79   // |enable_client_side_whitelist| is used to control the csd whitelist
80   // database feature.
81   // |enable_download_whitelist| is used to control the download whitelist
82   // database feature.
83   // |enable_ip_blacklist| is used to control the csd malware IP blacklist
84   // database feature.
85   static SafeBrowsingDatabase* Create(bool enable_download_protection,
86                                       bool enable_client_side_whitelist,
87                                       bool enable_download_whitelist,
88                                       bool enable_extension_blacklist,
89                                       bool side_effect_free_whitelist,
90                                       bool enable_ip_blacklist);
91 
92   // Makes the passed |factory| the factory used to instantiate
93   // a SafeBrowsingDatabase. This is used for tests.
RegisterFactory(SafeBrowsingDatabaseFactory * factory)94   static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
95     factory_ = factory;
96   }
97 
98   virtual ~SafeBrowsingDatabase();
99 
100   // Initializes the database with the given filename.
101   virtual void Init(const base::FilePath& filename) = 0;
102 
103   // Deletes the current database and creates a new one.
104   virtual bool ResetDatabase() = 0;
105 
106   // Returns false if |url| is not in the browse database.  If it returns true,
107   // then |prefix_hits| contains the list of prefix matches, and |cache_hits|
108   // contains the cached gethash results for those prefixes (if any).  This
109   // function is safe to call from threads other than the creation thread.
110   virtual bool ContainsBrowseUrl(
111       const GURL& url,
112       std::vector<SBPrefix>* prefix_hits,
113       std::vector<SBFullHashResult>* cache_hits) = 0;
114 
115   // Returns false if none of |urls| are in Download database. If it returns
116   // true, |prefix_hits| should contain the prefixes for the URLs that were in
117   // the database.  This function could ONLY be accessed from creation thread.
118   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
119                                    std::vector<SBPrefix>* prefix_hits) = 0;
120 
121   // Returns false if |url| is not on the client-side phishing detection
122   // whitelist.  Otherwise, this function returns true.  Note: the whitelist
123   // only contains full-length hashes so we don't return any prefix hit.
124   // This function should only be called from the IO thread.
125   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
126 
127   // The download whitelist is used for two purposes: a white-domain list of
128   // sites that are considered to host only harmless binaries as well as a
129   // whitelist of arbitrary strings such as hashed certificate authorities that
130   // are considered to be trusted.  The two methods below let you lookup
131   // the whitelist either for a URL or an arbitrary string.  These methods will
132   // return false if no match is found and true otherwise.
133   // This function could ONLY be accessed from the IO thread.
134   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
135   virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
136 
137   // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
138   // in the database.
139   //
140   // This function can ONLY be accessed from the creation thread.
141   virtual bool ContainsExtensionPrefixes(
142       const std::vector<SBPrefix>& prefixes,
143       std::vector<SBPrefix>* prefix_hits) = 0;
144 
145   // Returns false unless the hash of |url| is on the side-effect free
146   // whitelist.
147   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
148 
149   // Returns true iff the given IP is currently on the csd malware IP blacklist.
150   virtual bool ContainsMalwareIP(const std::string& ip_address) = 0;
151 
152   // A database transaction should look like:
153   //
154   // std::vector<SBListChunkRanges> lists;
155   // if (db.UpdateStarted(&lists)) {
156   //   // Do something with |lists|.
157   //
158   //   // Process add/sub commands.
159   //   db.InsertChunks(list_name, chunks);
160   //
161   //   // Process adddel/subdel commands.
162   //   db.DeleteChunks(chunks_deletes);
163   //
164   //   // If passed true, processes the collected chunk info and
165   //   // rebuilds the filter.  If passed false, rolls everything
166   //   // back.
167   //   db.UpdateFinished(success);
168   // }
169   //
170   // If UpdateStarted() returns true, the caller MUST eventually call
171   // UpdateFinished().  If it returns false, the caller MUST NOT call
172   // the other functions.
173   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
174   virtual void InsertChunks(const std::string& list_name,
175                             const std::vector<SBChunkData*>& chunks) = 0;
176   virtual void DeleteChunks(
177       const std::vector<SBChunkDelete>& chunk_deletes) = 0;
178   virtual void UpdateFinished(bool update_succeeded) = 0;
179 
180   // Store the results of a GetHash response. In the case of empty results, we
181   // cache the prefixes until the next update so that we don't have to issue
182   // further GetHash requests we know will be empty.
183   virtual void CacheHashResults(
184       const std::vector<SBPrefix>& prefixes,
185       const std::vector<SBFullHashResult>& full_hits,
186       const base::TimeDelta& cache_lifetime) = 0;
187 
188   // Returns true if the malware IP blacklisting killswitch URL is present
189   // in the csd whitelist.
190   virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
191 
192   // Returns true if the whitelist killswitch URL is present in the csd
193   // whitelist.
194   virtual bool IsCsdWhitelistKillSwitchOn() = 0;
195 
196   // The name of the bloom-filter file for the given database file.
197   // NOTE(shess): OBSOLETE.  Present for deleting stale files.
198   static base::FilePath BloomFilterForFilename(
199       const base::FilePath& db_filename);
200 
201   // The name of the prefix set file for the given database file.
202   static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
203 
204   // Filename for malware and phishing URL database.
205   static base::FilePath BrowseDBFilename(
206       const base::FilePath& db_base_filename);
207 
208   // Filename for download URL and download binary hash database.
209   static base::FilePath DownloadDBFilename(
210       const base::FilePath& db_base_filename);
211 
212   // Filename for client-side phishing detection whitelist databsae.
213   static base::FilePath CsdWhitelistDBFilename(
214       const base::FilePath& csd_whitelist_base_filename);
215 
216   // Filename for download whitelist databsae.
217   static base::FilePath DownloadWhitelistDBFilename(
218       const base::FilePath& download_whitelist_base_filename);
219 
220   // Filename for extension blacklist database.
221   static base::FilePath ExtensionBlacklistDBFilename(
222       const base::FilePath& extension_blacklist_base_filename);
223 
224   // Filename for side-effect free whitelist database.
225   static base::FilePath SideEffectFreeWhitelistDBFilename(
226       const base::FilePath& side_effect_free_whitelist_base_filename);
227 
228   // Filename for the csd malware IP blacklist database.
229   static base::FilePath IpBlacklistDBFilename(
230       const base::FilePath& ip_blacklist_base_filename);
231 
232   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
233   // ORDERING OF THESE VALUES.
234   enum FailureType {
235     FAILURE_DATABASE_CORRUPT,
236     FAILURE_DATABASE_CORRUPT_HANDLER,
237     FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
238     FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
239     FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
240     FAILURE_DATABASE_FILTER_READ_OBSOLETE,
241     FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
242     FAILURE_DATABASE_FILTER_DELETE,
243     FAILURE_DATABASE_STORE_MISSING,
244     FAILURE_DATABASE_STORE_DELETE,
245     FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
246     FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
247     FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
248     FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
249     FAILURE_BROWSE_PREFIX_SET_MISSING,
250     FAILURE_BROWSE_PREFIX_SET_READ,
251     FAILURE_BROWSE_PREFIX_SET_WRITE,
252     FAILURE_BROWSE_PREFIX_SET_DELETE,
253     FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
254     FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
255     FAILURE_EXTENSION_BLACKLIST_DELETE,
256     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
257     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
258     FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
259     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
260     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
261     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
262     FAILURE_IP_BLACKLIST_UPDATE_BEGIN,
263     FAILURE_IP_BLACKLIST_UPDATE_FINISH,
264     FAILURE_IP_BLACKLIST_UPDATE_INVALID,
265     FAILURE_IP_BLACKLIST_DELETE,
266 
267     // Memory space for histograms is determined by the max.  ALWAYS
268     // ADD NEW VALUES BEFORE THIS ONE.
269     FAILURE_DATABASE_MAX
270   };
271 
272   static void RecordFailure(FailureType failure_type);
273 
274  private:
275   // The factory used to instantiate a SafeBrowsingDatabase object.
276   // Useful for tests, so they can provide their own implementation of
277   // SafeBrowsingDatabase.
278   static SafeBrowsingDatabaseFactory* factory_;
279 };
280 
281 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
282  public:
283   // Create a database with a browse, download, download whitelist and
284   // csd whitelist store objects. Takes ownership of all the store objects.
285   // When |download_store| is NULL, the database will ignore any operations
286   // related download (url hashes and binary hashes).  The same is true for
287   // the |csd_whitelist_store|, |download_whitelist_store| and
288   // |ip_blacklist_store|.
289   SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
290                           SafeBrowsingStore* download_store,
291                           SafeBrowsingStore* csd_whitelist_store,
292                           SafeBrowsingStore* download_whitelist_store,
293                           SafeBrowsingStore* extension_blacklist_store,
294                           SafeBrowsingStore* side_effect_free_whitelist_store,
295                           SafeBrowsingStore* ip_blacklist_store);
296 
297   // Create a database with a browse store. This is a legacy interface that
298   // useds Sqlite.
299   SafeBrowsingDatabaseNew();
300 
301   virtual ~SafeBrowsingDatabaseNew();
302 
303   // Implement SafeBrowsingDatabase interface.
304   virtual void Init(const base::FilePath& filename) OVERRIDE;
305   virtual bool ResetDatabase() OVERRIDE;
306   virtual bool ContainsBrowseUrl(
307       const GURL& url,
308       std::vector<SBPrefix>* prefix_hits,
309       std::vector<SBFullHashResult>* cache_hits) OVERRIDE;
310   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
311                                    std::vector<SBPrefix>* prefix_hits) OVERRIDE;
312   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
313   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
314   virtual bool ContainsDownloadWhitelistedString(
315       const std::string& str) OVERRIDE;
316   virtual bool ContainsExtensionPrefixes(
317       const std::vector<SBPrefix>& prefixes,
318       std::vector<SBPrefix>* prefix_hits) OVERRIDE;
319   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url)  OVERRIDE;
320   virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE;
321   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
322   virtual void InsertChunks(const std::string& list_name,
323                             const std::vector<SBChunkData*>& chunks) OVERRIDE;
324   virtual void DeleteChunks(
325       const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
326   virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
327   virtual void CacheHashResults(
328       const std::vector<SBPrefix>& prefixes,
329       const std::vector<SBFullHashResult>& full_hits,
330       const base::TimeDelta& cache_lifetime) OVERRIDE;
331 
332   // Returns the value of malware_kill_switch_;
333   virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
334 
335   // Returns true if the CSD whitelist has everything whitelisted.
336   virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE;
337 
338  private:
339   friend class SafeBrowsingDatabaseTest;
340   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
341 
342   // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
343   // in a sorted vector) as well as a boolean flag indicating whether all
344   // lookups in the whitelist should be considered matches for safety.
345   typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
346 
347   // This map holds a csd malware IP blacklist which maps a prefix mask
348   // to a set of hashed blacklisted IP prefixes.  Each IP prefix is a hashed
349   // IPv6 IP prefix using SHA-1.
350   typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist;
351 
352   // Returns true if the whitelist is disabled or if any of the given hashes
353   // matches the whitelist.
354   bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
355                                  const std::vector<SBFullHash>& hashes);
356 
357   // Return the browse_store_, download_store_, download_whitelist_store or
358   // csd_whitelist_store_ based on list_id.
359   SafeBrowsingStore* GetStore(int list_id);
360 
361   // Deletes the files on disk.
362   bool Delete();
363 
364   // Load the prefix set off disk, if available.
365   void LoadPrefixSet();
366 
367   // Writes the current prefix set to disk.
368   void WritePrefixSet();
369 
370   // Loads the given full-length hashes to the given whitelist.  If the number
371   // of hashes is too large or if the kill switch URL is on the whitelist
372   // we will whitelist everything.
373   void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
374                      SBWhitelist* whitelist);
375 
376   // Call this method if an error occured with the given whitelist.  This will
377   // result in all lookups to the whitelist to return true.
378   void WhitelistEverything(SBWhitelist* whitelist);
379 
380   // Parses the IP blacklist from the given full-length hashes.
381   void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes);
382 
383   // Helpers for handling database corruption.
384   // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
385   // |corruption_detected_|, |HandleCorruptDatabase()| posts
386   // |OnHandleCorruptDatabase()| to the current thread, to be run
387   // after the current task completes.
388   // TODO(shess): Wire things up to entirely abort the update
389   // transaction when this happens.
390   void HandleCorruptDatabase();
391   void OnHandleCorruptDatabase();
392 
393   // Helpers for InsertChunks().
394   void InsertAddChunk(SafeBrowsingStore* store,
395                       safe_browsing_util::ListType list_id,
396                       const SBChunkData& chunk);
397   void InsertSubChunk(SafeBrowsingStore* store,
398                       safe_browsing_util::ListType list_id,
399                       const SBChunkData& chunk);
400 
401   // Returns the size in bytes of the store after the update.
402   int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
403                                SafeBrowsingStore* store,
404                                FailureType failure_type);
405   void UpdateBrowseStore();
406   void UpdateSideEffectFreeWhitelistStore();
407   void UpdateWhitelistStore(const base::FilePath& store_filename,
408                             SafeBrowsingStore* store,
409                             SBWhitelist* whitelist);
410   void UpdateIpBlacklistStore();
411 
412   // Used to verify that various calls are made from the thread the
413   // object was created on.
414   base::MessageLoop* creation_loop_;
415 
416   // Lock for protecting access to variables that may be used on the
417   // IO thread.  This includes |prefix_set_|, |cached_browse_hashes_|,
418   // |prefix_miss_cache_|, |csd_whitelist_|.
419   base::Lock lookup_lock_;
420 
421   // The base filename passed to Init(), used to generate the store and prefix
422   // set filenames used to store data on disk.
423   base::FilePath filename_base_;
424 
425   // Underlying persistent store for chunk data.
426   // For browsing related (phishing and malware URLs) chunks and prefixes.
427   scoped_ptr<SafeBrowsingStore> browse_store_;
428 
429   // For download related (download URL and binary hash) chunks and prefixes.
430   scoped_ptr<SafeBrowsingStore> download_store_;
431 
432   // For the client-side phishing detection whitelist chunks and full-length
433   // hashes.  This list only contains 256 bit hashes.
434   scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
435 
436   // For the download whitelist chunks and full-length hashes.  This list only
437   // contains 256 bit hashes.
438   scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
439 
440   // For extension IDs.
441   scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
442 
443   // For side-effect free whitelist.
444   scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
445 
446   // For IP blacklist.
447   scoped_ptr<SafeBrowsingStore> ip_blacklist_store_;
448 
449   SBWhitelist csd_whitelist_;
450   SBWhitelist download_whitelist_;
451   SBWhitelist extension_blacklist_;
452 
453   // The IP blacklist should be small.  At most a couple hundred IPs.
454   IPBlacklist ip_blacklist_;
455 
456   // Store items from CacheHashResults(), ordered by hash for efficient
457   // scanning.  Discarded on next update.
458   std::vector<SBFullHashCached> cached_browse_hashes_;
459 
460   // Cache of prefixes that returned empty results (no full hash
461   // match) to |CacheHashResults()|.  Cached to prevent asking for
462   // them every time.  Cleared on next update.
463   std::set<SBPrefix> prefix_miss_cache_;
464 
465   // Used to schedule resetting the database because of corruption.
466   base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
467 
468   // Set if corruption is detected during the course of an update.
469   // Causes the update functions to fail with no side effects, until
470   // the next call to |UpdateStarted()|.
471   bool corruption_detected_;
472 
473   // Set to true if any chunks are added or deleted during an update.
474   // Used to optimize away database update.
475   bool change_detected_;
476 
477   // Used to check if a prefix was in the browse database.
478   scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
479 
480   // Used to check if a prefix was in the browse database.
481   scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
482 };
483 
484 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
485