• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
6 
7 #include <vector>
8 
9 #include "base/command_line.h"
10 #include "base/logging.h"
11 #include "base/memory/ref_counted.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/metrics/histogram.h"
14 #include "base/task.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
18 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
19 #include "chrome/common/chrome_switches.h"
20 #include "chrome/common/safe_browsing/csd.pb.h"
21 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
22 #include "content/browser/browser_thread.h"
23 #include "content/browser/renderer_host/render_process_host.h"
24 #include "content/browser/renderer_host/render_view_host.h"
25 #include "content/browser/renderer_host/resource_dispatcher_host.h"
26 #include "content/browser/tab_contents/navigation_controller.h"
27 #include "content/browser/tab_contents/tab_contents.h"
28 #include "content/common/notification_service.h"
29 #include "content/common/notification_type.h"
30 #include "content/common/view_messages.h"
31 #include "googleurl/src/gurl.h"
32 
33 namespace safe_browsing {
34 
35 // This class is instantiated each time a new toplevel URL loads, and
36 // asynchronously checks whether the phishing classifier should run for this
37 // URL.  If so, it notifies the renderer with a StartPhishingDetection IPC.
38 // Objects of this class are ref-counted and will be destroyed once nobody
39 // uses it anymore.  If |tab_contents|, |csd_service| or |host| go away you need
40 // to call Cancel().  We keep the |sb_service| alive in a ref pointer for as
41 // long as it takes.
42 class ClientSideDetectionHost::ShouldClassifyUrlRequest
43     : public base::RefCountedThreadSafe<
44           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
45  public:
ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params & params,TabContents * tab_contents,ClientSideDetectionService * csd_service,SafeBrowsingService * sb_service,ClientSideDetectionHost * host)46   ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params,
47                            TabContents* tab_contents,
48                            ClientSideDetectionService* csd_service,
49                            SafeBrowsingService* sb_service,
50                            ClientSideDetectionHost* host)
51       : canceled_(false),
52         params_(params),
53         tab_contents_(tab_contents),
54         csd_service_(csd_service),
55         sb_service_(sb_service),
56         host_(host) {
57     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
58     DCHECK(tab_contents_);
59     DCHECK(csd_service_);
60     DCHECK(sb_service_);
61     DCHECK(host_);
62   }
63 
Start()64   void Start() {
65     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
66 
67     // We start by doing some simple checks that can run on the UI thread.
68     UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
69 
70     // Only classify [X]HTML documents.
71     if (params_.contents_mime_type != "text/html" &&
72         params_.contents_mime_type != "application/xhtml+xml") {
73       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
74               << " because it has an unsupported MIME type: "
75               << params_.contents_mime_type;
76       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
77                                 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
78                                 NO_CLASSIFY_MAX);
79       return;
80     }
81 
82     // Don't run the phishing classifier if the URL came from a private
83     // network, since we don't want to ping back in this case.  We also need
84     // to check whether the connection was proxied -- if so, we won't have the
85     // correct remote IP address, and will skip phishing classification.
86     if (params_.was_fetched_via_proxy) {
87       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
88               << " because it was fetched via a proxy.";
89       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
90                                 NO_CLASSIFY_PROXY_FETCH,
91                                 NO_CLASSIFY_MAX);
92       return;
93     }
94     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
95       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
96               << " because of hosting on private IP: "
97               << params_.socket_address.host();
98       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
99                                 NO_CLASSIFY_PRIVATE_IP,
100                                 NO_CLASSIFY_MAX);
101       return;
102     }
103 
104     // Don't run the phishing classifier if the tab is incognito.
105     if (tab_contents_->profile()->IsOffTheRecord()) {
106       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
107               << " because we're browsing incognito.";
108       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
109                                 NO_CLASSIFY_OFF_THE_RECORD,
110                                 NO_CLASSIFY_MAX);
111 
112       return;
113     }
114 
115     // We lookup the csd-whitelist before we lookup the cache because
116     // a URL may have recently been whitelisted.  If the URL matches
117     // the csd-whitelist we won't start classification.  The
118     // csd-whitelist check has to be done on the IO thread because it
119     // uses the SafeBrowsing service class.
120     BrowserThread::PostTask(
121         BrowserThread::IO,
122         FROM_HERE,
123         NewRunnableMethod(this,
124                           &ShouldClassifyUrlRequest::CheckCsdWhitelist,
125                           params_.url));
126   }
127 
Cancel()128   void Cancel() {
129     canceled_ = true;
130     // Just to make sure we don't do anything stupid we reset all these
131     // pointers except for the safebrowsing service class which may be
132     // accessed by CheckCsdWhitelist().
133     tab_contents_ = NULL;
134     csd_service_ = NULL;
135     host_ = NULL;
136   }
137 
138  private:
139   friend class base::RefCountedThreadSafe<
140       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
141 
142   // Enum used to keep stats about why the pre-classification check failed.
143   enum PreClassificationCheckFailures {
144     NO_CLASSIFY_PROXY_FETCH,
145     NO_CLASSIFY_PRIVATE_IP,
146     NO_CLASSIFY_OFF_THE_RECORD,
147     NO_CLASSIFY_MATCH_CSD_WHITELIST,
148     NO_CLASSIFY_TOO_MANY_REPORTS,
149     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
150 
151     NO_CLASSIFY_MAX  // Always add new values before this one.
152   };
153 
154   // The destructor can be called either from the UI or the IO thread.
~ShouldClassifyUrlRequest()155   virtual ~ShouldClassifyUrlRequest() { }
156 
CheckCsdWhitelist(const GURL & url)157   void CheckCsdWhitelist(const GURL& url) {
158     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
159     if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) {
160       // We're done.  There is no point in going back to the UI thread.
161       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
162                                 NO_CLASSIFY_MATCH_CSD_WHITELIST,
163                                 NO_CLASSIFY_MAX);
164       return;
165     }
166 
167     BrowserThread::PostTask(
168         BrowserThread::UI,
169         FROM_HERE,
170         NewRunnableMethod(this,
171                           &ShouldClassifyUrlRequest::CheckCache));
172   }
173 
CheckCache()174   void CheckCache() {
175     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
176     if (canceled_) {
177       return;
178     }
179 
180     // If result is cached, we don't want to run classification again
181     bool is_phishing;
182     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
183       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
184       UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
185       // Since we are already on the UI thread, this is safe.
186       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
187       return;
188     }
189 
190     // We want to limit the number of requests, though we will ignore the
191     // limit for urls in the cache.  We don't want to start classifying
192     // too many pages as phishing, but for those that we already think are
193     // phishing we want to give ourselves a chance to fix false positives.
194     if (csd_service_->IsInCache(params_.url)) {
195       VLOG(1) << "Reporting limit skipped for " << params_.url
196               << " as it was in the cache.";
197       UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
198     } else if (csd_service_->OverReportLimit()) {
199       VLOG(1) << "Too many report phishing requests sent recently, "
200               << "not running classification for " << params_.url;
201       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
202                                 NO_CLASSIFY_TOO_MANY_REPORTS,
203                                 NO_CLASSIFY_MAX);
204       return;
205     }
206 
207     // Everything checks out, so start classification.
208     // |tab_contents_| is safe to call as we will be destructed
209     // before it is.
210     RenderViewHost* rvh = tab_contents_->render_view_host();
211     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
212         rvh->routing_id(), params_.url));
213   }
214 
215   // No need to protect |canceled_| with a lock because it is only read and
216   // written by the UI thread.
217   bool canceled_;
218   ViewHostMsg_FrameNavigate_Params params_;
219   TabContents* tab_contents_;
220   ClientSideDetectionService* csd_service_;
221   // We keep a ref pointer here just to make sure the service class stays alive
222   // long enough.
223   scoped_refptr<SafeBrowsingService> sb_service_;
224   ClientSideDetectionHost* host_;
225 
226   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
227 };
228 
229 // This class is used to display the phishing interstitial.
230 class CsdClient : public SafeBrowsingService::Client {
231  public:
CsdClient()232   CsdClient() {}
233 
234   // Method from SafeBrowsingService::Client.  This method is called on the
235   // IO thread once the interstitial is going away.  This method simply deletes
236   // the CsdClient object.
OnBlockingPageComplete(bool proceed)237   virtual void OnBlockingPageComplete(bool proceed) {
238     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
239     // Delete this on the UI thread since it was created there.
240     BrowserThread::PostTask(BrowserThread::UI,
241                             FROM_HERE,
242                             new DeleteTask<CsdClient>(this));
243   }
244 
245  private:
246   friend class DeleteTask<CsdClient>;  // Calls the private destructor.
247 
248   // We're taking care of deleting this object.  No-one else should delete
249   // this object.
~CsdClient()250   virtual ~CsdClient() {}
251 
252   DISALLOW_COPY_AND_ASSIGN(CsdClient);
253 };
254 
ClientSideDetectionHost(TabContents * tab)255 ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab)
256     : TabContentsObserver(tab),
257       csd_service_(g_browser_process->safe_browsing_detection_service()),
258       cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) {
259   DCHECK(tab);
260   // Note: csd_service_ and sb_service_ might be NULL.
261   ResourceDispatcherHost* resource =
262       g_browser_process->resource_dispatcher_host();
263   if (resource) {
264     sb_service_ = resource->safe_browsing_service();
265   }
266 }
267 
~ClientSideDetectionHost()268 ClientSideDetectionHost::~ClientSideDetectionHost() {
269   // Tell any pending classification request that it is being canceled.
270   if (classification_request_.get()) {
271     classification_request_->Cancel();
272   }
273 }
274 
OnMessageReceived(const IPC::Message & message)275 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
276   bool handled = true;
277   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
278     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite,
279                         OnDetectedPhishingSite)
280     IPC_MESSAGE_UNHANDLED(handled = false)
281   IPC_END_MESSAGE_MAP()
282   return handled;
283 }
284 
DidNavigateMainFramePostCommit(const NavigationController::LoadCommittedDetails & details,const ViewHostMsg_FrameNavigate_Params & params)285 void ClientSideDetectionHost::DidNavigateMainFramePostCommit(
286     const NavigationController::LoadCommittedDetails& details,
287     const ViewHostMsg_FrameNavigate_Params& params) {
288   // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests
289   // that don't call this method on the UI thread.
290   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
291 
292   if (details.is_in_page) {
293     // If the navigation is within the same page, the user isn't really
294     // navigating away.  We don't need to cancel a pending callback or
295     // begin a new classification.
296     return;
297   }
298 
299   // If we navigate away and there currently is a pending phishing
300   // report request we have to cancel it to make sure we don't display
301   // an interstitial for the wrong page.  Note that this won't cancel
302   // the server ping back but only cancel the showing of the
303   // interstial.
304   cb_factory_.RevokeAll();
305 
306   if (csd_service_) {
307     // Cancel any pending classification request.
308     if (classification_request_.get()) {
309       classification_request_->Cancel();
310     }
311 
312     // Notify the renderer if it should classify this URL.
313     classification_request_ = new ShouldClassifyUrlRequest(params,
314                                                            tab_contents(),
315                                                            csd_service_,
316                                                            sb_service_,
317                                                            this);
318     classification_request_->Start();
319   }
320 }
321 
OnDetectedPhishingSite(const std::string & verdict_str)322 void ClientSideDetectionHost::OnDetectedPhishingSite(
323     const std::string& verdict_str) {
324   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
325   // There is something seriously wrong if there is no service class but
326   // this method is called.  The renderer should not start phishing detection
327   // if there isn't any service class in the browser.
328   DCHECK(csd_service_);
329   // We parse the protocol buffer here.  If we're unable to parse it we won't
330   // send the verdict further.
331   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
332   if (csd_service_ &&
333       verdict->ParseFromString(verdict_str) &&
334       verdict->IsInitialized()) {
335     // There shouldn't be any pending requests because we revoke them everytime
336     // we navigate away.
337     DCHECK(!cb_factory_.HasPendingCallbacks());
338     csd_service_->SendClientReportPhishingRequest(
339         verdict.release(),  // The service takes ownership of the verdict.
340         cb_factory_.NewCallback(
341             &ClientSideDetectionHost::MaybeShowPhishingWarning));
342   }
343 }
344 
MaybeShowPhishingWarning(GURL phishing_url,bool is_phishing)345 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
346                                                        bool is_phishing) {
347   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
348   if (is_phishing &&
349       CommandLine::ForCurrentProcess()->HasSwitch(
350           switches::kEnableClientSidePhishingInterstitial)) {
351     DCHECK(tab_contents());
352     // TODO(noelutz): this is not perfect.  It's still possible that the
353     // user browses away before the interstitial is shown.  Maybe we should
354     // stop all pending navigations?
355     if (sb_service_) {
356       // TODO(noelutz): refactor the SafeBrowsing service class and the
357       // SafeBrowsing blocking page class so that we don't need to depend
358       // on the SafeBrowsingService here and so that we don't need to go
359       // through the IO message loop.
360       std::vector<GURL> redirect_urls;
361       BrowserThread::PostTask(
362           BrowserThread::IO,
363           FROM_HERE,
364           NewRunnableMethod(sb_service_.get(),
365                             &SafeBrowsingService::DisplayBlockingPage,
366                             phishing_url, phishing_url,
367                             redirect_urls,
368                             // We only classify the main frame URL.
369                             ResourceType::MAIN_FRAME,
370                             // TODO(noelutz): create a separate threat type
371                             // for client-side phishing detection.
372                             SafeBrowsingService::URL_PHISHING,
373                             new CsdClient() /* will delete itself */,
374                             tab_contents()->GetRenderProcessHost()->id(),
375                             tab_contents()->render_view_host()->routing_id()));
376     }
377   }
378 }
379 
set_client_side_detection_service(ClientSideDetectionService * service)380 void ClientSideDetectionHost::set_client_side_detection_service(
381     ClientSideDetectionService* service) {
382   csd_service_ = service;
383 }
384 
set_safe_browsing_service(SafeBrowsingService * service)385 void ClientSideDetectionHost::set_safe_browsing_service(
386     SafeBrowsingService* service) {
387   sb_service_ = service;
388 }
389 
390 }  // namespace safe_browsing
391