• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Helper class which handles communication with the SafeBrowsing backends for
6 // client-side phishing detection.  This class can be used to get a file
7 // descriptor to the client-side phishing model and also to send a ping back to
8 // Google to verify if a particular site is really phishing or not.
9 //
10 // This class is not thread-safe and expects all calls to GetModelFile() and
11 // SendClientReportPhishingRequest() to be made on the UI thread.  We also
12 // expect that the calling thread runs a message loop and that there is a FILE
13 // thread running to execute asynchronous file operations.
14 
15 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
16 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
17 #pragma once
18 
19 #include <map>
20 #include <queue>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "base/basictypes.h"
26 #include "base/callback.h"
27 #include "base/file_path.h"
28 #include "base/gtest_prod_util.h"
29 #include "base/memory/linked_ptr.h"
30 #include "base/memory/ref_counted.h"
31 #include "base/memory/scoped_callback_factory.h"
32 #include "base/memory/scoped_ptr.h"
33 #include "base/platform_file.h"
34 #include "base/task.h"
35 #include "base/time.h"
36 #include "chrome/common/net/url_fetcher.h"
37 #include "googleurl/src/gurl.h"
38 #include "net/base/net_util.h"
39 
40 namespace net {
41 class URLRequestContextGetter;
42 class URLRequestStatus;
43 }  // namespace net
44 
45 namespace safe_browsing {
46 class ClientPhishingRequest;
47 
48 class ClientSideDetectionService : public URLFetcher::Delegate {
49  public:
50   typedef Callback1<base::PlatformFile>::Type OpenModelDoneCallback;
51 
52   typedef Callback2<GURL /* phishing URL */, bool /* is phishing */>::Type
53       ClientReportPhishingRequestCallback;
54 
55   virtual ~ClientSideDetectionService();
56 
57   // Creates a client-side detection service and starts fetching the client-side
58   // detection model if necessary.  The model will be stored in |model_path|.
59   // The caller takes ownership of the object.  This function may return NULL.
60   static ClientSideDetectionService* Create(
61       const FilePath& model_path,
62       net::URLRequestContextGetter* request_context_getter);
63 
64   // From the URLFetcher::Delegate interface.
65   virtual void OnURLFetchComplete(const URLFetcher* source,
66                                   const GURL& url,
67                                   const net::URLRequestStatus& status,
68                                   int response_code,
69                                   const ResponseCookies& cookies,
70                                   const std::string& data);
71 
72   // Gets the model file descriptor once the model is ready and stored
73   // on disk.  If there was an error the callback is called and the
74   // platform file is set to kInvalidPlatformFileValue. The
75   // ClientSideDetectionService takes ownership of the |callback|.
76   // The callback is always called after GetModelFile() returns and on the
77   // same thread as GetModelFile() was called.
78   void GetModelFile(OpenModelDoneCallback* callback);
79 
80   // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
81   // The URL scheme of the |url()| in the request should be HTTP.  This method
82   // takes ownership of the |verdict| as well as the |callback| and calls the
83   // the callback once the result has come back from the server or if an error
84   // occurs during the fetch.  If an error occurs the phishing verdict will
85   // always be false.  The callback is always called after
86   // SendClientReportPhishingRequest() returns and on the same thread as
87   // SendClientReportPhishingRequest() was called.
88   virtual void SendClientReportPhishingRequest(
89       ClientPhishingRequest* verdict,
90       ClientReportPhishingRequestCallback* callback);
91 
92   // Returns true if the given IP address string falls within a private
93   // (unroutable) network block.  Pages which are hosted on these IP addresses
94   // are exempt from client-side phishing detection.  This is called by the
95   // ClientSideDetectionHost prior to sending the renderer a
96   // SafeBrowsingMsg_StartPhishingDetection IPC.
97   //
98   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
99   // address.
100   virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
101 
102   // Returns true and sets is_phishing if url is in the cache and valid.
103   virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
104 
105   // Returns true if the url is in the cache.
106   virtual bool IsInCache(const GURL& url);
107 
108   // Returns true if we have sent more than kMaxReportsPerInterval in the last
109   // kReportsInterval.
110   virtual bool OverReportLimit();
111 
112  protected:
113   // Use Create() method to create an instance of this object.
114   ClientSideDetectionService(
115       const FilePath& model_path,
116       net::URLRequestContextGetter* request_context_getter);
117 
118  private:
119   friend class ClientSideDetectionServiceTest;
120 
121   enum ModelStatus {
122     // It's unclear whether or not the model was already fetched.
123     UNKNOWN_STATUS,
124     // Model is fetched and is stored on disk.
125     READY_STATUS,
126     // Error occured during fetching or writing.
127     ERROR_STATUS,
128   };
129 
130   // CacheState holds all information necessary to respond to a caller without
131   // actually making a HTTP request.
132   struct CacheState {
133     bool is_phishing;
134     base::Time timestamp;
135 
136     CacheState(bool phish, base::Time time);
137   };
138   typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
139 
140   // A tuple of (IP address block, prefix size) representing a private
141   // IP address range.
142   typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
143 
144   static const char kClientReportPhishingUrl[];
145   static const char kClientModelUrl[];
146   static const int kMaxReportsPerInterval;
147   static const base::TimeDelta kReportsInterval;
148   static const base::TimeDelta kNegativeCacheInterval;
149   static const base::TimeDelta kPositiveCacheInterval;
150 
151   // Sets the model status and invokes all the pending callbacks in
152   // |open_callbacks_| with the current |model_file_| as parameter.
153   void SetModelStatus(ModelStatus status);
154 
155   // Called once the initial open() of the model file is done.  If the file
156   // exists we're done and we can call all the pending callbacks.  If the
157   // file doesn't exist this method will asynchronously fetch the model
158   // from the server by invoking StartFetchingModel().
159   void OpenModelFileDone(base::PlatformFileError error_code,
160                          base::PassPlatformFile file,
161                          bool created);
162 
163   // Callback that is invoked once the attempt to create the model
164   // file on disk is done.  If the file was created successfully we
165   // start writing the model to disk (asynchronously).  Otherwise, we
166   // give up and send an invalid platform file to all the pending callbacks.
167   void CreateModelFileDone(base::PlatformFileError error_code,
168                            base::PassPlatformFile file,
169                            bool created);
170 
171   // Callback is invoked once we're done writing the model file to disk.
172   // If everything went well then |model_file_| is a valid platform file
173   // that can be sent to all the pending callbacks.  If an error occurs
174   // we give up and send an invalid platform file to all the pending callbacks.
175   void WriteModelFileDone(base::PlatformFileError error_code,
176                           int bytes_written);
177 
178   // Helper function which closes the |model_file_| if necessary.
179   void CloseModelFile();
180 
181   // Starts sending the request to the client-side detection frontends.
182   // This method takes ownership of both pointers.
183   void StartClientReportPhishingRequest(
184       ClientPhishingRequest* verdict,
185       ClientReportPhishingRequestCallback* callback);
186 
187   // Starts getting the model file.
188   void StartGetModelFile(OpenModelDoneCallback* callback);
189 
190   // Called by OnURLFetchComplete to handle the response from fetching the
191   // model.
192   void HandleModelResponse(const URLFetcher* source,
193                            const GURL& url,
194                            const net::URLRequestStatus& status,
195                            int response_code,
196                            const ResponseCookies& cookies,
197                            const std::string& data);
198 
199   // Called by OnURLFetchComplete to handle the server response from
200   // sending the client-side phishing request.
201   void HandlePhishingVerdict(const URLFetcher* source,
202                              const GURL& url,
203                              const net::URLRequestStatus& status,
204                              int response_code,
205                              const ResponseCookies& cookies,
206                              const std::string& data);
207 
208   // Invalidate cache results which are no longer useful.
209   void UpdateCache();
210 
211   // Get the number of phishing reports that we have sent over kReportsInterval
212   int GetNumReports();
213 
214   // Initializes the |private_networks_| vector with the network blocks
215   // that we consider non-public IP addresses.  Returns true on success.
216   bool InitializePrivateNetworks();
217 
218   FilePath model_path_;
219   ModelStatus model_status_;
220   base::PlatformFile model_file_;
221   scoped_ptr<URLFetcher> model_fetcher_;
222   scoped_ptr<std::string> tmp_model_string_;
223   std::vector<OpenModelDoneCallback*> open_callbacks_;
224 
225   // Map of client report phishing request to the corresponding callback that
226   // has to be invoked when the request is done.
227   struct ClientReportInfo;
228   std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_;
229 
230   // Cache of completed requests. Used to satisfy requests for the same urls
231   // as long as the next request falls within our caching window (which is
232   // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
233   // size of this cache is limited by kMaxReportsPerDay *
234   // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
235   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
236   PhishingCache cache_;
237 
238   // Timestamp of when we sent a phishing request. Used to limit the number
239   // of phishing requests that we send in a day.
240   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
241   std::queue<base::Time> phishing_report_times_;
242 
243   // Used to asynchronously call the callbacks for GetModelFile and
244   // SendClientReportPhishingRequest.
245   ScopedRunnableMethodFactory<ClientSideDetectionService> method_factory_;
246 
247   // The client-side detection service object (this) might go away before some
248   // of the callbacks are done (e.g., asynchronous file operations).  The
249   // callback factory will revoke all pending callbacks if this goes away to
250   // avoid a crash.
251   base::ScopedCallbackFactory<ClientSideDetectionService> callback_factory_;
252 
253   // The context we use to issue network requests.
254   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
255 
256   // The network blocks that we consider private IP address ranges.
257   std::vector<AddressRange> private_networks_;
258 
259   DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
260 };
261 
262 }  // namepsace safe_browsing
263 
264 #endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
265