• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
6 
7 #include <string>
8 
9 #include "base/bind.h"
10 #include "base/command_line.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/common/chrome_switches.h"
15 #include "chrome/common/safe_browsing/client_model.pb.h"
16 #include "chrome/common/safe_browsing/csd.pb.h"
17 #include "chrome/renderer/safe_browsing/features.h"
18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h"
20 #include "chrome/renderer/safe_browsing/scorer.h"
21 #include "chrome/test/base/in_process_browser_test.h"
22 #include "chrome/test/base/ui_test_utils.h"
23 #include "content/public/renderer/render_view.h"
24 #include "crypto/sha2.h"
25 #include "net/dns/mock_host_resolver.h"
26 #include "net/test/embedded_test_server/embedded_test_server.h"
27 #include "net/test/embedded_test_server/http_response.h"
28 #include "testing/gmock/include/gmock/gmock.h"
29 #include "url/gurl.h"
30 
31 using ::testing::AllOf;
32 using ::testing::Contains;
33 using ::testing::Not;
34 using ::testing::Pair;
35 
36 namespace {
37 
38 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
39 const int kRenderViewRoutingId = 2;
40 
41 }
42 
43 namespace safe_browsing {
44 
45 class PhishingClassifierTest : public InProcessBrowserTest {
46  protected:
PhishingClassifierTest()47   PhishingClassifierTest()
48       : url_tld_token_net_(features::kUrlTldToken + std::string("net")),
49         page_link_domain_phishing_(features::kPageLinkDomain +
50                                    std::string("phishing.com")),
51         page_term_login_(features::kPageTerm + std::string("login")) {
52   }
53 
SetUpCommandLine(CommandLine * command_line)54   virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
55     command_line->AppendSwitch(switches::kSingleProcess);
56 #if defined(OS_WIN)
57     // Don't want to try to create a GPU process.
58     command_line->AppendSwitch(switches::kDisableGpu);
59 #endif
60   }
61 
SetUpOnMainThread()62   virtual void SetUpOnMainThread() OVERRIDE {
63     // Construct a model to test with.  We include one feature from each of
64     // the feature extractors, which allows us to verify that they all ran.
65     ClientSideModel model;
66 
67     model.add_hashes(crypto::SHA256HashString(url_tld_token_net_));
68     model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_));
69     model.add_hashes(crypto::SHA256HashString(page_term_login_));
70     model.add_hashes(crypto::SHA256HashString("login"));
71     model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken +
72                                               std::string("net")));
73     model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain +
74                                               std::string("phishing.com")));
75     model.add_hashes(crypto::SHA256HashString(features::kPageTerm +
76                                               std::string("login")));
77     model.add_hashes(crypto::SHA256HashString("login"));
78 
79     // Add a default rule with a non-phishy weight.
80     ClientSideModel::Rule* rule = model.add_rule();
81     rule->set_weight(-1.0);
82 
83     // To give a phishy score, the total weight needs to be >= 0
84     // (0.5 when converted to a probability).  This will only happen
85     // if all of the listed features are present.
86     rule = model.add_rule();
87     rule->add_feature(0);
88     rule->add_feature(1);
89     rule->add_feature(2);
90     rule->set_weight(1.0);
91 
92     model.add_page_term(3);
93     model.set_murmur_hash_seed(2777808611U);
94     model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed()));
95     model.set_max_words_per_term(1);
96     model.set_max_shingles_per_page(100);
97     model.set_shingle_size(3);
98 
99     clock_ = new MockFeatureExtractorClock;
100     scorer_.reset(Scorer::Create(model.SerializeAsString()));
101     ASSERT_TRUE(scorer_.get());
102 
103     classifier_.reset(new PhishingClassifier(
104         content::RenderView::FromRoutingID(kRenderViewRoutingId),
105         clock_));
106   }
107 
TearDownOnMainThread()108   virtual void TearDownOnMainThread() OVERRIDE {
109     content::RunAllPendingInMessageLoop();
110   }
111 
112   // Helper method to start phishing classification and wait for it to
113   // complete.  Returns the true if the page is classified as phishy and
114   // false otherwise.
RunPhishingClassifier(const base::string16 * page_text,float * phishy_score,FeatureMap * features)115   bool RunPhishingClassifier(const base::string16* page_text,
116                              float* phishy_score,
117                              FeatureMap* features) {
118     ClientPhishingRequest verdict;
119     // The classifier accesses the RenderView and must run in the RenderThread.
120     PostTaskToInProcessRendererAndWait(
121         base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier,
122                    base::Unretained(this),
123                    page_text, phishy_score, features, &verdict));
124     return verdict.is_phishing();
125   }
126 
DoRunPhishingClassifier(const base::string16 * page_text,float * phishy_score,FeatureMap * features,ClientPhishingRequest * verdict)127   void DoRunPhishingClassifier(const base::string16* page_text,
128                                float* phishy_score,
129                                FeatureMap* features,
130                                ClientPhishingRequest* verdict) {
131     *phishy_score = PhishingClassifier::kInvalidScore;
132     features->Clear();
133 
134     // Force synchronous behavior for ease of unittesting.
135     base::RunLoop run_loop;
136     classifier_->BeginClassification(
137         page_text,
138         base::Bind(&PhishingClassifierTest::ClassificationFinished,
139                    base::Unretained(this), &run_loop, verdict));
140     content::RunThisRunLoop(&run_loop);
141 
142     *phishy_score = verdict->client_score();
143     for (int i = 0; i < verdict->feature_map_size(); ++i) {
144       features->AddRealFeature(verdict->feature_map(i).name(),
145                                verdict->feature_map(i).value());
146     }
147   }
148 
149   // Completion callback for classification.
ClassificationFinished(base::RunLoop * run_loop,ClientPhishingRequest * verdict_out,const ClientPhishingRequest & verdict)150   void ClassificationFinished(base::RunLoop* run_loop,
151                               ClientPhishingRequest* verdict_out,
152                               const ClientPhishingRequest& verdict) {
153     *verdict_out = verdict;  // Copy the verdict.
154     run_loop->Quit();
155   }
156 
157   scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
embedded_test_server()158   net::test_server::EmbeddedTestServer* embedded_test_server() {
159     // TODO(ajwong): Merge this into BrowserTestBase.
160     if (!embedded_test_server_) {
161       embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
162       embedded_test_server_->RegisterRequestHandler(
163           base::Bind(&PhishingClassifierTest::HandleRequest,
164                      base::Unretained(this)));
165       CHECK(embedded_test_server_->InitializeAndWaitUntilReady());
166     }
167     return embedded_test_server_.get();
168   }
169 
LoadHtml(const std::string & host,const std::string & content)170   void LoadHtml(const std::string& host, const std::string& content) {
171     GURL::Replacements replace_host;
172     replace_host.SetHostStr(host);
173     response_content_ = content;
174     ui_test_utils::NavigateToURL(
175         browser(),
176         embedded_test_server()->base_url().ReplaceComponents(replace_host));
177   }
178 
LoadHtmlPost(const std::string & host,const std::string & content)179   void LoadHtmlPost(const std::string& host, const std::string& content) {
180     GURL::Replacements replace_host;
181     replace_host.SetHostStr(host);
182     response_content_ = content;
183     ui_test_utils::NavigateToURLWithPost(
184         browser(),
185         embedded_test_server()->base_url().ReplaceComponents(replace_host));
186   }
187 
188   scoped_ptr<net::test_server::HttpResponse>
HandleRequest(const net::test_server::HttpRequest & request)189       HandleRequest(const net::test_server::HttpRequest& request) {
190     scoped_ptr<net::test_server::BasicHttpResponse> http_response(
191         new net::test_server::BasicHttpResponse());
192     http_response->set_code(net::HTTP_OK);
193     http_response->set_content_type("text/html");
194     http_response->set_content(response_content_);
195     return http_response.PassAs<net::test_server::HttpResponse>();
196   }
197 
198   std::string response_content_;
199   scoped_ptr<Scorer> scorer_;
200   scoped_ptr<PhishingClassifier> classifier_;
201   MockFeatureExtractorClock* clock_;  // Owned by classifier_.
202 
203   // Features that are in the model.
204   const std::string url_tld_token_net_;
205   const std::string page_link_domain_phishing_;
206   const std::string page_term_login_;
207 };
208 
209 // This test flakes on Mac with force compositing mode.
210 // http://crbug.com/316709
211 #if defined(OS_MACOSX)
212 #define MAYBE_TestClassification DISABLED_TestClassification
213 #else
214 #define MAYBE_TestClassification TestClassification
215 #endif
IN_PROC_BROWSER_TEST_F(PhishingClassifierTest,MAYBE_TestClassification)216 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) {
217   host_resolver()->AddRule("*", "127.0.0.1");
218 
219   // No scorer yet, so the classifier is not ready.
220   ASSERT_FALSE(classifier_->is_ready());
221 
222   // Now set the scorer.
223   classifier_->set_phishing_scorer(scorer_.get());
224   ASSERT_TRUE(classifier_->is_ready());
225 
226   // This test doesn't exercise the extraction timing.
227   EXPECT_CALL(*clock_, Now())
228       .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
229 
230   base::string16 page_text = base::ASCIIToUTF16("login");
231   float phishy_score;
232   FeatureMap features;
233 
234   LoadHtml("host.net",
235       "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
236   EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features));
237   // Note: features.features() might contain other features that simply aren't
238   // in the model.
239   EXPECT_THAT(features.features(),
240               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
241                     Contains(Pair(page_link_domain_phishing_, 1.0)),
242                     Contains(Pair(page_term_login_, 1.0))));
243   EXPECT_FLOAT_EQ(0.5, phishy_score);
244 
245   // Change the link domain to something non-phishy.
246   LoadHtml("host.net",
247            "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
248   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
249   EXPECT_THAT(features.features(),
250               AllOf(Contains(Pair(url_tld_token_net_, 1.0)),
251                     Contains(Pair(page_term_login_, 1.0))));
252   EXPECT_THAT(features.features(),
253               Not(Contains(Pair(page_link_domain_phishing_, 1.0))));
254   EXPECT_GE(phishy_score, 0.0);
255   EXPECT_LT(phishy_score, 0.5);
256 
257   // Extraction should fail for this case since there is no TLD.
258   LoadHtml("localhost", "<html><body>content</body></html>");
259   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
260   EXPECT_EQ(0U, features.features().size());
261   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
262 
263   // Extraction should also fail for this case because the URL is not http.
264   net::SpawnedTestServer https_server(
265       net::SpawnedTestServer::TYPE_HTTPS,
266       net::SpawnedTestServer::kLocalhost,
267       base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
268   ASSERT_TRUE(https_server.Start());
269   std::string host_str("host.net");  // Must outlive replace_host.
270   GURL::Replacements replace_host;
271   replace_host.SetHostStr(host_str);
272   GURL test_url = https_server.GetURL("/files/title1.html");
273   ui_test_utils::NavigateToURL(browser(),
274                                test_url.ReplaceComponents(replace_host));
275   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
276   EXPECT_EQ(0U, features.features().size());
277   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
278 
279   // Extraction should fail for this case because the URL is a POST request.
280   LoadHtmlPost("host.net", "<html><body>content</body></html>");
281   EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features));
282   EXPECT_EQ(0U, features.features().size());
283   EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score);
284 }
285 
286 // Test flakes with LSAN enabled. See http://crbug.com/373155.
287 #if defined(LEAK_SANITIZER)
288 #define MAYBE_DisableDetection DISABLED_DisableDetection
289 #else
290 #define MAYBE_DisableDetection DisableDetection
291 #endif
IN_PROC_BROWSER_TEST_F(PhishingClassifierTest,MAYBE_DisableDetection)292 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) {
293   // No scorer yet, so the classifier is not ready.
294   EXPECT_FALSE(classifier_->is_ready());
295 
296   // Now set the scorer.
297   classifier_->set_phishing_scorer(scorer_.get());
298   EXPECT_TRUE(classifier_->is_ready());
299 
300   // Set a NULL scorer, which turns detection back off.
301   classifier_->set_phishing_scorer(NULL);
302   EXPECT_FALSE(classifier_->is_ready());
303 }
304 
305 }  // namespace safe_browsing
306