• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // The Safe Browsing service is responsible for downloading anti-phishing and
6 // anti-malware tables and checking urls against them.
7 
8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
10 
11 #include <deque>
12 #include <map>
13 #include <set>
14 #include <string>
15 #include <vector>
16 
17 #include "base/callback.h"
18 #include "base/containers/hash_tables.h"
19 #include "base/memory/ref_counted.h"
20 #include "base/memory/scoped_ptr.h"
21 #include "base/synchronization/lock.h"
22 #include "base/time/time.h"
23 #include "chrome/browser/safe_browsing/protocol_manager.h"
24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
25 #include "url/gurl.h"
26 
27 class SafeBrowsingService;
28 class SafeBrowsingDatabase;
29 
30 namespace base {
31 class Thread;
32 }
33 
34 namespace net {
35 class URLRequestContext;
36 class URLRequestContextGetter;
37 }
38 
39 namespace safe_browsing {
40 class ClientSideDetectionService;
41 class DownloadProtectionService;
42 }
43 
44 // Construction needs to happen on the main thread.
45 class SafeBrowsingDatabaseManager
46     : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>,
47       public SafeBrowsingProtocolManagerDelegate {
48  public:
49   class Client;
50 
51   // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
52   struct SafeBrowsingCheck {
53     // |check_type| should correspond to the type of item that is being
54     // checked, either a URL or a binary hash/URL. We store this for two
55     // purposes: to know which of Client's methods to call when a result is
56     // known, and for logging purposes. It *isn't* used to predict the response
57     // list type, that is information that the server gives us.
58     SafeBrowsingCheck(const std::vector<GURL>& urls,
59                       const std::vector<SBFullHash>& full_hashes,
60                       Client* client,
61                       safe_browsing_util::ListType check_type,
62                       const std::vector<SBThreatType>& expected_threats);
63     ~SafeBrowsingCheck();
64 
65     // Either |urls| or |full_hashes| is used to lookup database. |*_results|
66     // are parallel vectors containing the results. They are initialized to
67     // contain SB_THREAT_TYPE_SAFE.
68     std::vector<GURL> urls;
69     std::vector<SBThreatType> url_results;
70     std::vector<std::string> url_metadata;
71     std::vector<SBFullHash> full_hashes;
72     std::vector<SBThreatType> full_hash_results;
73 
74     Client* client;
75     bool need_get_hash;
76     base::TimeTicks start;  // When check was sent to SB service.
77     safe_browsing_util::ListType check_type;  // See comment in constructor.
78     std::vector<SBThreatType> expected_threats;
79     std::vector<SBPrefix> prefix_hits;
80     std::vector<SBFullHashResult> cache_hits;
81 
82     // Vends weak pointers for TimeoutCallback().  If the response is
83     // received before the timeout fires, factory is destructed and
84     // the timeout won't be fired.
85     // TODO(lzheng): We should consider to use this time out check
86     // for browsing too (instead of implementin in
87     // safe_browsing_resource_handler.cc).
88     scoped_ptr<base::WeakPtrFactory<
89         SafeBrowsingDatabaseManager> > timeout_factory_;
90 
91    private:
92     DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
93   };
94 
95   class Client {
96    public:
97     void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
98 
99    protected:
~Client()100     virtual ~Client() {}
101 
102     // Called when the result of checking a browse URL is known.
OnCheckBrowseUrlResult(const GURL & url,SBThreatType threat_type,const std::string & metadata)103     virtual void OnCheckBrowseUrlResult(const GURL& url,
104                                         SBThreatType threat_type,
105                                         const std::string& metadata) {}
106 
107     // Called when the result of checking a download URL is known.
OnCheckDownloadUrlResult(const std::vector<GURL> & url_chain,SBThreatType threat_type)108     virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
109                                           SBThreatType threat_type) {}
110 
111     // Called when the result of checking a set of extensions is known.
OnCheckExtensionsResult(const std::set<std::string> & threats)112     virtual void OnCheckExtensionsResult(
113         const std::set<std::string>& threats) {}
114   };
115 
116   // Creates the safe browsing service.  Need to initialize before using.
117   explicit SafeBrowsingDatabaseManager(
118       const scoped_refptr<SafeBrowsingService>& service);
119 
120   // Returns true if the url's scheme can be checked.
121   bool CanCheckUrl(const GURL& url) const;
122 
123   // Returns whether download protection is enabled.
download_protection_enabled()124   bool download_protection_enabled() const {
125     return enable_download_protection_;
126   }
127 
128   // Called on the IO thread to check if the given url is safe or not.  If we
129   // can synchronously determine that the url is safe, CheckUrl returns true.
130   // Otherwise it returns false, and "client" is called asynchronously with the
131   // result when it is ready.
132   virtual bool CheckBrowseUrl(const GURL& url, Client* client);
133 
134   // Check if the prefix for |url| is in safebrowsing download add lists.
135   // Result will be passed to callback in |client|.
136   virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain,
137                                 Client* client);
138 
139   // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
140   // Returns true if not, false if further checks need to be made in which case
141   // the result will be passed to |client|.
142   virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids,
143                                  Client* client);
144 
145   // Check if the given url is on the side-effect free whitelist.
146   // Can be called on any thread. Returns false if the check cannot be performed
147   // (e.g. because we are disabled or because of an invalid scheme in the URL).
148   // Otherwise, returns true if the URL is on the whitelist based on matching
149   // the hash prefix only (so there may be false positives).
150   virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url);
151 
152   // Check if the |url| matches any of the full-length hashes from the
153   // client-side phishing detection whitelist.  Returns true if there was a
154   // match and false otherwise.  To make sure we are conservative we will return
155   // true if an error occurs. This method is expected to be called on the IO
156   // thread.
157   virtual bool MatchCsdWhitelistUrl(const GURL& url);
158 
159   // Check if the given IP address (either IPv4 or IPv6) matches the malware
160   // IP blacklist.
161   virtual bool MatchMalwareIP(const std::string& ip_address);
162 
163   // Check if the |url| matches any of the full-length hashes from the
164   // download whitelist.  Returns true if there was a match and false otherwise.
165   // To make sure we are conservative we will return true if an error occurs.
166   // This method is expected to be called on the IO thread.
167   virtual bool MatchDownloadWhitelistUrl(const GURL& url);
168 
169   // Check if |str| matches any of the full-length hashes from the download
170   // whitelist.  Returns true if there was a match and false otherwise.
171   // To make sure we are conservative we will return true if an error occurs.
172   // This method is expected to be called on the IO thread.
173   virtual bool MatchDownloadWhitelistString(const std::string& str);
174 
175   // Check if the CSD malware IP matching kill switch is turned on.
176   virtual bool IsMalwareKillSwitchOn();
177 
178   // Check if the CSD whitelist kill switch is turned on.
179   virtual bool IsCsdWhitelistKillSwitchOn();
180 
181   // Called on the IO thread to cancel a pending check if the result is no
182   // longer needed.
183   void CancelCheck(Client* client);
184 
185   // Called on the IO thread when the SafeBrowsingProtocolManager has received
186   // the full hash results for prefix hits detected in the database.
187   void HandleGetHashResults(SafeBrowsingCheck* check,
188                             const std::vector<SBFullHashResult>& full_hashes,
189                             const base::TimeDelta& cache_lifetime);
190 
191   // Log the user perceived delay caused by SafeBrowsing. This delay is the time
192   // delta starting from when we would have started reading data from the
193   // network, and ending when the SafeBrowsing check completes indicating that
194   // the current page is 'safe'.
195   void LogPauseDelay(base::TimeDelta time);
196 
197   // Called to initialize objects that are used on the io_thread.  This may be
198   // called multiple times during the life of the DatabaseManager. Should be
199   // called on IO thread.
200   void StartOnIOThread();
201 
202   // Called to stop or shutdown operations on the io_thread. This may be called
203   // multiple times during the life of the DatabaseManager. Should be called
204   // on IO thread. If shutdown is true, the manager is disabled permanently.
205   void StopOnIOThread(bool shutdown);
206 
207  protected:
208   virtual ~SafeBrowsingDatabaseManager();
209 
210   // protected for tests.
211   void NotifyDatabaseUpdateFinished(bool update_succeeded);
212 
213  private:
214   friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>;
215   friend class SafeBrowsingServerTest;
216   friend class SafeBrowsingServiceTest;
217   friend class SafeBrowsingServiceTestHelper;
218   friend class SafeBrowsingDatabaseManagerTest;
219   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest, GetUrlThreatType);
220 
221   typedef std::set<SafeBrowsingCheck*> CurrentChecks;
222   typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
223   typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
224 
225   // Clients that we've queued up for checking later once the database is ready.
226   struct QueuedCheck {
227     QueuedCheck(const safe_browsing_util::ListType check_type,
228                 Client* client,
229                 const GURL& url,
230                 const std::vector<SBThreatType>& expected_threats,
231                 const base::TimeTicks& start);
232     ~QueuedCheck();
233     safe_browsing_util::ListType check_type;
234     Client* client;
235     GURL url;
236     std::vector<SBThreatType> expected_threats;
237     base::TimeTicks start;  // When check was queued.
238   };
239 
240   // Return the threat type from the first result in |full_hashes| which matches
241   // |hash|, or SAFE if none match.
242   static SBThreatType GetHashThreatType(
243       const SBFullHash& hash,
244       const std::vector<SBFullHashResult>& full_hashes);
245 
246   // Given a URL, compare all the possible host + path full hashes to the set of
247   // provided full hashes.  Returns the threat type of the matching result from
248   // |full_hashes|, or SAFE if none match.
249   static SBThreatType GetUrlThreatType(
250       const GURL& url,
251       const std::vector<SBFullHashResult>& full_hashes,
252       size_t* index);
253 
254   // Called to stop operations on the io_thread. This may be called multiple
255   // times during the life of the DatabaseManager. Should be called on IO
256   // thread.
257   void DoStopOnIOThread();
258 
259   // Returns whether |database_| exists and is accessible.
260   bool DatabaseAvailable() const;
261 
262   // Called on the IO thread.  If the database does not exist, queues up a call
263   // on the db thread to create it.  Returns whether the database is available.
264   //
265   // Note that this is only needed outside the db thread, since functions on the
266   // db thread can call GetDatabase() directly.
267   bool MakeDatabaseAvailable();
268 
269   // Should only be called on db thread as SafeBrowsingDatabase is not
270   // threadsafe.
271   SafeBrowsingDatabase* GetDatabase();
272 
273   // Called on the IO thread with the check result.
274   void OnCheckDone(SafeBrowsingCheck* info);
275 
276   // Called on the database thread to retrieve chunks.
277   void GetAllChunksFromDatabase(GetChunksCallback callback);
278 
279   // Called on the IO thread with the results of all chunks.
280   void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
281                                   bool database_error,
282                                   GetChunksCallback callback);
283 
284   // Called on the IO thread after the database reports that it added a chunk.
285   void OnAddChunksComplete(AddChunksCallback callback);
286 
287   // Notification that the database is done loading its bloom filter.  We may
288   // have had to queue checks until the database is ready, and if so, this
289   // checks them.
290   void DatabaseLoadComplete();
291 
292   // Called on the database thread to add/remove chunks and host keys.
293   void AddDatabaseChunks(const std::string& list,
294                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
295                          AddChunksCallback callback);
296 
297   void DeleteDatabaseChunks(
298       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes);
299 
300   void NotifyClientBlockingComplete(Client* client, bool proceed);
301 
302   void DatabaseUpdateFinished(bool update_succeeded);
303 
304   // Called on the db thread to close the database.  See CloseDatabase().
305   void OnCloseDatabase();
306 
307   // Runs on the db thread to reset the database. We assume that resetting the
308   // database is a synchronous operation.
309   void OnResetDatabase();
310 
311   // Internal worker function for processing full hashes.
312   void OnHandleGetHashResults(SafeBrowsingCheck* check,
313                               const std::vector<SBFullHashResult>& full_hashes);
314 
315   // Run one check against |full_hashes|.  Returns |true| if the check
316   // finds a match in |full_hashes|.
317   bool HandleOneCheck(SafeBrowsingCheck* check,
318                       const std::vector<SBFullHashResult>& full_hashes);
319 
320   // Invoked by CheckDownloadUrl. It checks the download URL on
321   // safe_browsing_thread_.
322   void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
323 
324   // The callback function when a safebrowsing check is timed out. Client will
325   // be notified that the safebrowsing check is SAFE when this happens.
326   void TimeoutCallback(SafeBrowsingCheck* check);
327 
328   // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
329   void CheckDownloadUrlDone(SafeBrowsingCheck* check);
330 
331   // Checks all extension ID hashes on safe_browsing_thread_.
332   void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check);
333 
334   // Helper function that calls safe browsing client and cleans up |checks_|.
335   void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
336 
337   // Helper function to set |check| with default values and start a safe
338   // browsing check with timeout of |timeout|. |task| will be called on
339   // success, otherwise TimeoutCallback will be called.
340   void StartSafeBrowsingCheck(SafeBrowsingCheck* check,
341                               const base::Closure& task);
342 
343   // SafeBrowsingProtocolManageDelegate override
344   virtual void ResetDatabase() OVERRIDE;
345   virtual void UpdateStarted() OVERRIDE;
346   virtual void UpdateFinished(bool success) OVERRIDE;
347   virtual void GetChunks(GetChunksCallback callback) OVERRIDE;
348   virtual void AddChunks(const std::string& list,
349                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
350                          AddChunksCallback callback) OVERRIDE;
351   virtual void DeleteChunks(
352       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes) OVERRIDE;
353 
354   scoped_refptr<SafeBrowsingService> sb_service_;
355 
356   CurrentChecks checks_;
357 
358   // Used for issuing only one GetHash request for a given prefix.
359   GetHashRequests gethash_requests_;
360 
361   // The persistent database.  We don't use a scoped_ptr because it
362   // needs to be destroyed on a different thread than this object.
363   SafeBrowsingDatabase* database_;
364 
365   // Lock used to prevent possible data races due to compiler optimizations.
366   mutable base::Lock database_lock_;
367 
368   // Whether the service is running. 'enabled_' is used by the
369   // SafeBrowsingDatabaseManager on the IO thread during normal operations.
370   bool enabled_;
371 
372   // Indicate if download_protection is enabled by command switch
373   // so we allow this feature to be exersized.
374   bool enable_download_protection_;
375 
376   // Indicate if client-side phishing detection whitelist should be enabled
377   // or not.
378   bool enable_csd_whitelist_;
379 
380   // Indicate if the download whitelist should be enabled or not.
381   bool enable_download_whitelist_;
382 
383   // Indicate if the extension blacklist should be enabled.
384   bool enable_extension_blacklist_;
385 
386   // Indicate if the side effect free whitelist should be enabled.
387   bool enable_side_effect_free_whitelist_;
388 
389   // Indicate if the csd malware IP blacklist should be enabled.
390   bool enable_ip_blacklist_;
391 
392   // The SafeBrowsing thread that runs database operations.
393   //
394   // Note: Functions that run on this thread should run synchronously and return
395   // to the IO thread, not post additional tasks back to this thread, lest we
396   // cause a race condition at shutdown time that leads to a database leak.
397   scoped_ptr<base::Thread> safe_browsing_thread_;
398 
399   // Indicates if we're currently in an update cycle.
400   bool update_in_progress_;
401 
402   // When true, newly fetched chunks may not in the database yet since the
403   // database is still updating.
404   bool database_update_in_progress_;
405 
406   // Indicates if we're in the midst of trying to close the database.  If this
407   // is true, nothing on the IO thread should access the database.
408   bool closing_database_;
409 
410   std::deque<QueuedCheck> queued_checks_;
411 
412   // Timeout to use for safe browsing checks.
413   base::TimeDelta check_timeout_;
414 
415   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager);
416 };
417 
418 #endif  // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
419