1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7 #include <vector>
8
9 #include "base/command_line.h"
10 #include "base/logging.h"
11 #include "base/memory/ref_counted.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/metrics/histogram.h"
14 #include "base/task.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/profiles/profile.h"
17 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
18 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
19 #include "chrome/common/chrome_switches.h"
20 #include "chrome/common/safe_browsing/csd.pb.h"
21 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
22 #include "content/browser/browser_thread.h"
23 #include "content/browser/renderer_host/render_process_host.h"
24 #include "content/browser/renderer_host/render_view_host.h"
25 #include "content/browser/renderer_host/resource_dispatcher_host.h"
26 #include "content/browser/tab_contents/navigation_controller.h"
27 #include "content/browser/tab_contents/tab_contents.h"
28 #include "content/common/notification_service.h"
29 #include "content/common/notification_type.h"
30 #include "content/common/view_messages.h"
31 #include "googleurl/src/gurl.h"
32
33 namespace safe_browsing {
34
35 // This class is instantiated each time a new toplevel URL loads, and
36 // asynchronously checks whether the phishing classifier should run for this
37 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC.
38 // Objects of this class are ref-counted and will be destroyed once nobody
39 // uses it anymore. If |tab_contents|, |csd_service| or |host| go away you need
40 // to call Cancel(). We keep the |sb_service| alive in a ref pointer for as
41 // long as it takes.
42 class ClientSideDetectionHost::ShouldClassifyUrlRequest
43 : public base::RefCountedThreadSafe<
44 ClientSideDetectionHost::ShouldClassifyUrlRequest> {
45 public:
ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params & params,TabContents * tab_contents,ClientSideDetectionService * csd_service,SafeBrowsingService * sb_service,ClientSideDetectionHost * host)46 ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params,
47 TabContents* tab_contents,
48 ClientSideDetectionService* csd_service,
49 SafeBrowsingService* sb_service,
50 ClientSideDetectionHost* host)
51 : canceled_(false),
52 params_(params),
53 tab_contents_(tab_contents),
54 csd_service_(csd_service),
55 sb_service_(sb_service),
56 host_(host) {
57 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
58 DCHECK(tab_contents_);
59 DCHECK(csd_service_);
60 DCHECK(sb_service_);
61 DCHECK(host_);
62 }
63
Start()64 void Start() {
65 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
66
67 // We start by doing some simple checks that can run on the UI thread.
68 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
69
70 // Only classify [X]HTML documents.
71 if (params_.contents_mime_type != "text/html" &&
72 params_.contents_mime_type != "application/xhtml+xml") {
73 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
74 << " because it has an unsupported MIME type: "
75 << params_.contents_mime_type;
76 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
77 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
78 NO_CLASSIFY_MAX);
79 return;
80 }
81
82 // Don't run the phishing classifier if the URL came from a private
83 // network, since we don't want to ping back in this case. We also need
84 // to check whether the connection was proxied -- if so, we won't have the
85 // correct remote IP address, and will skip phishing classification.
86 if (params_.was_fetched_via_proxy) {
87 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
88 << " because it was fetched via a proxy.";
89 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
90 NO_CLASSIFY_PROXY_FETCH,
91 NO_CLASSIFY_MAX);
92 return;
93 }
94 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
95 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
96 << " because of hosting on private IP: "
97 << params_.socket_address.host();
98 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
99 NO_CLASSIFY_PRIVATE_IP,
100 NO_CLASSIFY_MAX);
101 return;
102 }
103
104 // Don't run the phishing classifier if the tab is incognito.
105 if (tab_contents_->profile()->IsOffTheRecord()) {
106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
107 << " because we're browsing incognito.";
108 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
109 NO_CLASSIFY_OFF_THE_RECORD,
110 NO_CLASSIFY_MAX);
111
112 return;
113 }
114
115 // We lookup the csd-whitelist before we lookup the cache because
116 // a URL may have recently been whitelisted. If the URL matches
117 // the csd-whitelist we won't start classification. The
118 // csd-whitelist check has to be done on the IO thread because it
119 // uses the SafeBrowsing service class.
120 BrowserThread::PostTask(
121 BrowserThread::IO,
122 FROM_HERE,
123 NewRunnableMethod(this,
124 &ShouldClassifyUrlRequest::CheckCsdWhitelist,
125 params_.url));
126 }
127
Cancel()128 void Cancel() {
129 canceled_ = true;
130 // Just to make sure we don't do anything stupid we reset all these
131 // pointers except for the safebrowsing service class which may be
132 // accessed by CheckCsdWhitelist().
133 tab_contents_ = NULL;
134 csd_service_ = NULL;
135 host_ = NULL;
136 }
137
138 private:
139 friend class base::RefCountedThreadSafe<
140 ClientSideDetectionHost::ShouldClassifyUrlRequest>;
141
142 // Enum used to keep stats about why the pre-classification check failed.
143 enum PreClassificationCheckFailures {
144 NO_CLASSIFY_PROXY_FETCH,
145 NO_CLASSIFY_PRIVATE_IP,
146 NO_CLASSIFY_OFF_THE_RECORD,
147 NO_CLASSIFY_MATCH_CSD_WHITELIST,
148 NO_CLASSIFY_TOO_MANY_REPORTS,
149 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
150
151 NO_CLASSIFY_MAX // Always add new values before this one.
152 };
153
154 // The destructor can be called either from the UI or the IO thread.
~ShouldClassifyUrlRequest()155 virtual ~ShouldClassifyUrlRequest() { }
156
CheckCsdWhitelist(const GURL & url)157 void CheckCsdWhitelist(const GURL& url) {
158 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
159 if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) {
160 // We're done. There is no point in going back to the UI thread.
161 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
162 NO_CLASSIFY_MATCH_CSD_WHITELIST,
163 NO_CLASSIFY_MAX);
164 return;
165 }
166
167 BrowserThread::PostTask(
168 BrowserThread::UI,
169 FROM_HERE,
170 NewRunnableMethod(this,
171 &ShouldClassifyUrlRequest::CheckCache));
172 }
173
CheckCache()174 void CheckCache() {
175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
176 if (canceled_) {
177 return;
178 }
179
180 // If result is cached, we don't want to run classification again
181 bool is_phishing;
182 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
183 VLOG(1) << "Satisfying request for " << params_.url << " from cache";
184 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
185 // Since we are already on the UI thread, this is safe.
186 host_->MaybeShowPhishingWarning(params_.url, is_phishing);
187 return;
188 }
189
190 // We want to limit the number of requests, though we will ignore the
191 // limit for urls in the cache. We don't want to start classifying
192 // too many pages as phishing, but for those that we already think are
193 // phishing we want to give ourselves a chance to fix false positives.
194 if (csd_service_->IsInCache(params_.url)) {
195 VLOG(1) << "Reporting limit skipped for " << params_.url
196 << " as it was in the cache.";
197 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
198 } else if (csd_service_->OverReportLimit()) {
199 VLOG(1) << "Too many report phishing requests sent recently, "
200 << "not running classification for " << params_.url;
201 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
202 NO_CLASSIFY_TOO_MANY_REPORTS,
203 NO_CLASSIFY_MAX);
204 return;
205 }
206
207 // Everything checks out, so start classification.
208 // |tab_contents_| is safe to call as we will be destructed
209 // before it is.
210 RenderViewHost* rvh = tab_contents_->render_view_host();
211 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
212 rvh->routing_id(), params_.url));
213 }
214
215 // No need to protect |canceled_| with a lock because it is only read and
216 // written by the UI thread.
217 bool canceled_;
218 ViewHostMsg_FrameNavigate_Params params_;
219 TabContents* tab_contents_;
220 ClientSideDetectionService* csd_service_;
221 // We keep a ref pointer here just to make sure the service class stays alive
222 // long enough.
223 scoped_refptr<SafeBrowsingService> sb_service_;
224 ClientSideDetectionHost* host_;
225
226 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
227 };
228
229 // This class is used to display the phishing interstitial.
230 class CsdClient : public SafeBrowsingService::Client {
231 public:
CsdClient()232 CsdClient() {}
233
234 // Method from SafeBrowsingService::Client. This method is called on the
235 // IO thread once the interstitial is going away. This method simply deletes
236 // the CsdClient object.
OnBlockingPageComplete(bool proceed)237 virtual void OnBlockingPageComplete(bool proceed) {
238 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
239 // Delete this on the UI thread since it was created there.
240 BrowserThread::PostTask(BrowserThread::UI,
241 FROM_HERE,
242 new DeleteTask<CsdClient>(this));
243 }
244
245 private:
246 friend class DeleteTask<CsdClient>; // Calls the private destructor.
247
248 // We're taking care of deleting this object. No-one else should delete
249 // this object.
~CsdClient()250 virtual ~CsdClient() {}
251
252 DISALLOW_COPY_AND_ASSIGN(CsdClient);
253 };
254
ClientSideDetectionHost(TabContents * tab)255 ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab)
256 : TabContentsObserver(tab),
257 csd_service_(g_browser_process->safe_browsing_detection_service()),
258 cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) {
259 DCHECK(tab);
260 // Note: csd_service_ and sb_service_ might be NULL.
261 ResourceDispatcherHost* resource =
262 g_browser_process->resource_dispatcher_host();
263 if (resource) {
264 sb_service_ = resource->safe_browsing_service();
265 }
266 }
267
~ClientSideDetectionHost()268 ClientSideDetectionHost::~ClientSideDetectionHost() {
269 // Tell any pending classification request that it is being canceled.
270 if (classification_request_.get()) {
271 classification_request_->Cancel();
272 }
273 }
274
OnMessageReceived(const IPC::Message & message)275 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
276 bool handled = true;
277 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
278 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite,
279 OnDetectedPhishingSite)
280 IPC_MESSAGE_UNHANDLED(handled = false)
281 IPC_END_MESSAGE_MAP()
282 return handled;
283 }
284
DidNavigateMainFramePostCommit(const NavigationController::LoadCommittedDetails & details,const ViewHostMsg_FrameNavigate_Params & params)285 void ClientSideDetectionHost::DidNavigateMainFramePostCommit(
286 const NavigationController::LoadCommittedDetails& details,
287 const ViewHostMsg_FrameNavigate_Params& params) {
288 // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests
289 // that don't call this method on the UI thread.
290 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
291
292 if (details.is_in_page) {
293 // If the navigation is within the same page, the user isn't really
294 // navigating away. We don't need to cancel a pending callback or
295 // begin a new classification.
296 return;
297 }
298
299 // If we navigate away and there currently is a pending phishing
300 // report request we have to cancel it to make sure we don't display
301 // an interstitial for the wrong page. Note that this won't cancel
302 // the server ping back but only cancel the showing of the
303 // interstial.
304 cb_factory_.RevokeAll();
305
306 if (csd_service_) {
307 // Cancel any pending classification request.
308 if (classification_request_.get()) {
309 classification_request_->Cancel();
310 }
311
312 // Notify the renderer if it should classify this URL.
313 classification_request_ = new ShouldClassifyUrlRequest(params,
314 tab_contents(),
315 csd_service_,
316 sb_service_,
317 this);
318 classification_request_->Start();
319 }
320 }
321
OnDetectedPhishingSite(const std::string & verdict_str)322 void ClientSideDetectionHost::OnDetectedPhishingSite(
323 const std::string& verdict_str) {
324 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
325 // There is something seriously wrong if there is no service class but
326 // this method is called. The renderer should not start phishing detection
327 // if there isn't any service class in the browser.
328 DCHECK(csd_service_);
329 // We parse the protocol buffer here. If we're unable to parse it we won't
330 // send the verdict further.
331 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
332 if (csd_service_ &&
333 verdict->ParseFromString(verdict_str) &&
334 verdict->IsInitialized()) {
335 // There shouldn't be any pending requests because we revoke them everytime
336 // we navigate away.
337 DCHECK(!cb_factory_.HasPendingCallbacks());
338 csd_service_->SendClientReportPhishingRequest(
339 verdict.release(), // The service takes ownership of the verdict.
340 cb_factory_.NewCallback(
341 &ClientSideDetectionHost::MaybeShowPhishingWarning));
342 }
343 }
344
MaybeShowPhishingWarning(GURL phishing_url,bool is_phishing)345 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
346 bool is_phishing) {
347 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
348 if (is_phishing &&
349 CommandLine::ForCurrentProcess()->HasSwitch(
350 switches::kEnableClientSidePhishingInterstitial)) {
351 DCHECK(tab_contents());
352 // TODO(noelutz): this is not perfect. It's still possible that the
353 // user browses away before the interstitial is shown. Maybe we should
354 // stop all pending navigations?
355 if (sb_service_) {
356 // TODO(noelutz): refactor the SafeBrowsing service class and the
357 // SafeBrowsing blocking page class so that we don't need to depend
358 // on the SafeBrowsingService here and so that we don't need to go
359 // through the IO message loop.
360 std::vector<GURL> redirect_urls;
361 BrowserThread::PostTask(
362 BrowserThread::IO,
363 FROM_HERE,
364 NewRunnableMethod(sb_service_.get(),
365 &SafeBrowsingService::DisplayBlockingPage,
366 phishing_url, phishing_url,
367 redirect_urls,
368 // We only classify the main frame URL.
369 ResourceType::MAIN_FRAME,
370 // TODO(noelutz): create a separate threat type
371 // for client-side phishing detection.
372 SafeBrowsingService::URL_PHISHING,
373 new CsdClient() /* will delete itself */,
374 tab_contents()->GetRenderProcessHost()->id(),
375 tab_contents()->render_view_host()->routing_id()));
376 }
377 }
378 }
379
set_client_side_detection_service(ClientSideDetectionService * service)380 void ClientSideDetectionHost::set_client_side_detection_service(
381 ClientSideDetectionService* service) {
382 csd_service_ = service;
383 }
384
set_safe_browsing_service(SafeBrowsingService * service)385 void ClientSideDetectionHost::set_safe_browsing_service(
386 SafeBrowsingService* service) {
387 sb_service_ = service;
388 }
389
390 } // namespace safe_browsing
391