• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 
7 #include "base/string_util.h"
8 #include "crypto/sha2.h"
9 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
10 #include "googleurl/src/gurl.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 
13 namespace {
14 
VectorContains(const std::vector<std::string> & data,const std::string & str)15 bool VectorContains(const std::vector<std::string>& data,
16                     const std::string& str) {
17   return std::find(data.begin(), data.end(), str) != data.end();
18 }
19 
20 }
21 
22 // Tests that we generate the required host/path combinations for testing
23 // according to the Safe Browsing spec.
24 // See section 6.2 in
25 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
TEST(SafeBrowsingUtilTest,UrlParsing)26 TEST(SafeBrowsingUtilTest, UrlParsing) {
27   std::vector<std::string> hosts, paths;
28 
29   GURL url("http://a.b.c/1/2.html?param=1");
30   safe_browsing_util::GenerateHostsToCheck(url, &hosts);
31   safe_browsing_util::GeneratePathsToCheck(url, &paths);
32   EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
33   EXPECT_EQ(paths.size(), static_cast<size_t>(4));
34   EXPECT_EQ(hosts[0], "b.c");
35   EXPECT_EQ(hosts[1], "a.b.c");
36 
37   EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
38   EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
39   EXPECT_TRUE(VectorContains(paths, "/1/"));
40   EXPECT_TRUE(VectorContains(paths, "/"));
41 
42   url = GURL("http://a.b.c.d.e.f.g/1.html");
43   safe_browsing_util::GenerateHostsToCheck(url, &hosts);
44   safe_browsing_util::GeneratePathsToCheck(url, &paths);
45   EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
46   EXPECT_EQ(paths.size(), static_cast<size_t>(2));
47   EXPECT_EQ(hosts[0], "f.g");
48   EXPECT_EQ(hosts[1], "e.f.g");
49   EXPECT_EQ(hosts[2], "d.e.f.g");
50   EXPECT_EQ(hosts[3], "c.d.e.f.g");
51   EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
52   EXPECT_TRUE(VectorContains(paths, "/1.html"));
53   EXPECT_TRUE(VectorContains(paths, "/"));
54 
55   url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
56   safe_browsing_util::GeneratePathsToCheck(url, &paths);
57   EXPECT_EQ(paths.size(), static_cast<size_t>(3));
58   EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
59   EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
60   EXPECT_TRUE(VectorContains(paths, "/"));
61 }
62 
63 // Tests the url canonicalization according to the Safe Browsing spec.
64 // See section 6.1 in
65 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
TEST(SafeBrowsingUtilTest,CanonicalizeUrl)66 TEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
67   struct {
68     const char* input_url;
69     const char* expected_canonicalized_hostname;
70     const char* expected_canonicalized_path;
71     const char* expected_canonicalized_query;
72   } tests[] = {
73     {
74       "http://host/%25%32%35",
75       "host",
76       "/%25",
77       ""
78     }, {
79       "http://host/%25%32%35%25%32%35",
80       "host",
81       "/%25%25",
82       ""
83     }, {
84       "http://host/%2525252525252525",
85       "host",
86       "/%25",
87       ""
88     }, {
89       "http://host/asdf%25%32%35asd",
90       "host",
91       "/asdf%25asd",
92       ""
93     }, {
94       "http://host/%%%25%32%35asd%%",
95       "host",
96       "/%25%25%25asd%25%25",
97       ""
98     }, {
99       "http://host/%%%25%32%35asd%%",
100       "host",
101       "/%25%25%25asd%25%25",
102       ""
103     }, {
104       "http://www.google.com/",
105       "www.google.com",
106       "/",
107       ""
108     }, {
109       "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
110           "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
111       "168.188.99.26",
112       "/.secure/www.ebay.com/",
113       ""
114     }, {
115       "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
116           "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
117       "195.127.0.11",
118       "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
119           "alidateinfoswqpcmlx=hgplmcx/",
120       ""
121     }, {
122       "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
123           "22%252833%252944_55%252B",
124       "host.com",
125       "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
126       ""
127     }, {
128       "http://3279880203/blah",
129       "195.127.0.11",
130       "/blah",
131       ""
132     }, {
133       "http://www.google.com/blah/..",
134       "www.google.com",
135       "/",
136       ""
137     }, {
138       "http://www.google.com/blah#fraq",
139       "www.google.com",
140       "/blah",
141       ""
142     }, {
143       "http://www.GOOgle.com/",
144       "www.google.com",
145       "/",
146       ""
147     }, {
148       "http://www.google.com.../",
149       "www.google.com",
150       "/",
151       ""
152     }, {
153       "http://www.google.com/q?",
154       "www.google.com",
155       "/q",
156       ""
157     }, {
158       "http://www.google.com/q?r?",
159       "www.google.com",
160       "/q",
161       "r?"
162     }, {
163       "http://www.google.com/q?r?s",
164       "www.google.com",
165       "/q",
166       "r?s"
167     }, {
168       "http://evil.com/foo#bar#baz",
169       "evil.com",
170       "/foo",
171       ""
172     }, {
173       "http://evil.com/foo;",
174       "evil.com",
175       "/foo;",
176       ""
177     }, {
178       "http://evil.com/foo?bar;",
179       "evil.com",
180       "/foo",
181       "bar;"
182     }, {
183       "http://notrailingslash.com",
184       "notrailingslash.com",
185       "/",
186       ""
187     }, {
188       "http://www.gotaport.com:1234/",
189       "www.gotaport.com",
190       "/",
191       ""
192     }, {
193       "  http://www.google.com/  ",
194       "www.google.com",
195       "/",
196       ""
197     }, {
198       "http:// leadingspace.com/",
199       "%20leadingspace.com",
200       "/",
201       ""
202     }, {
203       "http://%20leadingspace.com/",
204       "%20leadingspace.com",
205       "/",
206       ""
207     }, {
208       "https://www.securesite.com/",
209       "www.securesite.com",
210       "/",
211       ""
212     }, {
213       "http://host.com/ab%23cd",
214       "host.com",
215       "/ab%23cd",
216       ""
217     }, {
218       "http://host%3e.com//twoslashes?more//slashes",
219       "host>.com",
220       "/twoslashes",
221       "more//slashes"
222     }, {
223       "http://host.com/abc?val=xyz#anything",
224       "host.com",
225       "/abc",
226       "val=xyz"
227     }, {
228       "http://abc:def@host.com/xyz",
229       "host.com",
230       "/xyz",
231       ""
232     }, {
233       "http://host%3e.com/abc/%2e%2e%2fdef",
234       "host>.com",
235       "/def",
236       ""
237     }, {
238       "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
239       "host.com",
240       "/abc/def/xyz",
241       ""
242     }, {
243       "ftp://host.com/foo?bar",
244       "host.com",
245       "/foo",
246       "bar"
247     }, {
248       "data:text/html;charset=utf-8,%0D%0A",
249       "",
250       "",
251       ""
252     }, {
253       "javascript:alert()",
254       "",
255       "",
256       ""
257     }, {
258       "mailto:abc@example.com",
259       "",
260       "",
261       ""
262     },
263   };
264   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
265     SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url));
266     GURL url(tests[i].input_url);
267 
268     std::string canonicalized_hostname;
269     std::string canonicalized_path;
270     std::string canonicalized_query;
271     safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
272         &canonicalized_path, &canonicalized_query);
273 
274     EXPECT_EQ(tests[i].expected_canonicalized_hostname,
275               canonicalized_hostname);
276     EXPECT_EQ(tests[i].expected_canonicalized_path,
277               canonicalized_path);
278     EXPECT_EQ(tests[i].expected_canonicalized_query,
279               canonicalized_query);
280   }
281 }
282 
TEST(SafeBrowsingUtilTest,GetUrlHashIndex)283 TEST(SafeBrowsingUtilTest, GetUrlHashIndex) {
284   GURL url("http://www.evil.com/phish.html");
285   SBFullHashResult full_hash;
286   crypto::SHA256HashString(url.host() + url.path(),
287                          &full_hash.hash,
288                          sizeof(SBFullHash));
289   std::vector<SBFullHashResult> full_hashes;
290   full_hashes.push_back(full_hash);
291 
292   EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), 0);
293 
294   url = GURL("http://www.evil.com/okay_path.html");
295   EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), -1);
296 }
297 
TEST(SafeBrowsingUtilTest,ListIdListNameConversion)298 TEST(SafeBrowsingUtilTest, ListIdListNameConversion) {
299   std::string list_name;
300   EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID,
301                                                &list_name));
302   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE,
303                                               &list_name));
304   EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList));
305   EXPECT_EQ(safe_browsing_util::MALWARE,
306             safe_browsing_util::GetListId(list_name));
307 
308   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH,
309                                               &list_name));
310   EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList));
311   EXPECT_EQ(safe_browsing_util::PHISH,
312             safe_browsing_util::GetListId(list_name));
313 
314   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL,
315                                               &list_name));
316   EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList));
317   EXPECT_EQ(safe_browsing_util::BINURL,
318             safe_browsing_util::GetListId(list_name));
319 
320 
321   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH,
322                                               &list_name));
323   EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList));
324   EXPECT_EQ(safe_browsing_util::BINHASH,
325             safe_browsing_util::GetListId(list_name));
326 }
327 
328 // Since the ids are saved in file, we need to make sure they don't change.
329 // Since only the last bit of each id is saved in file together with
330 // chunkids, this checks only last bit.
TEST(SafeBrowsingUtilTest,ListIdVerification)331 TEST(SafeBrowsingUtilTest, ListIdVerification) {
332   EXPECT_EQ(0, safe_browsing_util::MALWARE % 2);
333   EXPECT_EQ(1, safe_browsing_util::PHISH % 2);
334   EXPECT_EQ(0, safe_browsing_util::BINURL %2);
335   EXPECT_EQ(1, safe_browsing_util::BINHASH % 2);
336 }
337 
TEST(SafeBrowsingUtilTest,StringToSBFullHashAndSBFullHashToString)338 TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) {
339   // 31 chars plus the last \0 as full_hash.
340   const std::string hash_in = "12345678902234567890323456789012";
341   SBFullHash hash_out;
342   safe_browsing_util::StringToSBFullHash(hash_in, &hash_out);
343   EXPECT_EQ(0x34333231, hash_out.prefix);
344   EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash)));
345 
346   std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out);
347   EXPECT_EQ(hash_in, hash_final);
348 }
349