1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6
7 #include "base/string_util.h"
8 #include "crypto/sha2.h"
9 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
10 #include "googleurl/src/gurl.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12
13 namespace {
14
VectorContains(const std::vector<std::string> & data,const std::string & str)15 bool VectorContains(const std::vector<std::string>& data,
16 const std::string& str) {
17 return std::find(data.begin(), data.end(), str) != data.end();
18 }
19
20 }
21
22 // Tests that we generate the required host/path combinations for testing
23 // according to the Safe Browsing spec.
24 // See section 6.2 in
25 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
TEST(SafeBrowsingUtilTest,UrlParsing)26 TEST(SafeBrowsingUtilTest, UrlParsing) {
27 std::vector<std::string> hosts, paths;
28
29 GURL url("http://a.b.c/1/2.html?param=1");
30 safe_browsing_util::GenerateHostsToCheck(url, &hosts);
31 safe_browsing_util::GeneratePathsToCheck(url, &paths);
32 EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
33 EXPECT_EQ(paths.size(), static_cast<size_t>(4));
34 EXPECT_EQ(hosts[0], "b.c");
35 EXPECT_EQ(hosts[1], "a.b.c");
36
37 EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
38 EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
39 EXPECT_TRUE(VectorContains(paths, "/1/"));
40 EXPECT_TRUE(VectorContains(paths, "/"));
41
42 url = GURL("http://a.b.c.d.e.f.g/1.html");
43 safe_browsing_util::GenerateHostsToCheck(url, &hosts);
44 safe_browsing_util::GeneratePathsToCheck(url, &paths);
45 EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
46 EXPECT_EQ(paths.size(), static_cast<size_t>(2));
47 EXPECT_EQ(hosts[0], "f.g");
48 EXPECT_EQ(hosts[1], "e.f.g");
49 EXPECT_EQ(hosts[2], "d.e.f.g");
50 EXPECT_EQ(hosts[3], "c.d.e.f.g");
51 EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
52 EXPECT_TRUE(VectorContains(paths, "/1.html"));
53 EXPECT_TRUE(VectorContains(paths, "/"));
54
55 url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
56 safe_browsing_util::GeneratePathsToCheck(url, &paths);
57 EXPECT_EQ(paths.size(), static_cast<size_t>(3));
58 EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
59 EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
60 EXPECT_TRUE(VectorContains(paths, "/"));
61 }
62
63 // Tests the url canonicalization according to the Safe Browsing spec.
64 // See section 6.1 in
65 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
TEST(SafeBrowsingUtilTest,CanonicalizeUrl)66 TEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
67 struct {
68 const char* input_url;
69 const char* expected_canonicalized_hostname;
70 const char* expected_canonicalized_path;
71 const char* expected_canonicalized_query;
72 } tests[] = {
73 {
74 "http://host/%25%32%35",
75 "host",
76 "/%25",
77 ""
78 }, {
79 "http://host/%25%32%35%25%32%35",
80 "host",
81 "/%25%25",
82 ""
83 }, {
84 "http://host/%2525252525252525",
85 "host",
86 "/%25",
87 ""
88 }, {
89 "http://host/asdf%25%32%35asd",
90 "host",
91 "/asdf%25asd",
92 ""
93 }, {
94 "http://host/%%%25%32%35asd%%",
95 "host",
96 "/%25%25%25asd%25%25",
97 ""
98 }, {
99 "http://host/%%%25%32%35asd%%",
100 "host",
101 "/%25%25%25asd%25%25",
102 ""
103 }, {
104 "http://www.google.com/",
105 "www.google.com",
106 "/",
107 ""
108 }, {
109 "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
110 "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
111 "168.188.99.26",
112 "/.secure/www.ebay.com/",
113 ""
114 }, {
115 "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
116 "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
117 "195.127.0.11",
118 "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
119 "alidateinfoswqpcmlx=hgplmcx/",
120 ""
121 }, {
122 "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
123 "22%252833%252944_55%252B",
124 "host.com",
125 "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
126 ""
127 }, {
128 "http://3279880203/blah",
129 "195.127.0.11",
130 "/blah",
131 ""
132 }, {
133 "http://www.google.com/blah/..",
134 "www.google.com",
135 "/",
136 ""
137 }, {
138 "http://www.google.com/blah#fraq",
139 "www.google.com",
140 "/blah",
141 ""
142 }, {
143 "http://www.GOOgle.com/",
144 "www.google.com",
145 "/",
146 ""
147 }, {
148 "http://www.google.com.../",
149 "www.google.com",
150 "/",
151 ""
152 }, {
153 "http://www.google.com/q?",
154 "www.google.com",
155 "/q",
156 ""
157 }, {
158 "http://www.google.com/q?r?",
159 "www.google.com",
160 "/q",
161 "r?"
162 }, {
163 "http://www.google.com/q?r?s",
164 "www.google.com",
165 "/q",
166 "r?s"
167 }, {
168 "http://evil.com/foo#bar#baz",
169 "evil.com",
170 "/foo",
171 ""
172 }, {
173 "http://evil.com/foo;",
174 "evil.com",
175 "/foo;",
176 ""
177 }, {
178 "http://evil.com/foo?bar;",
179 "evil.com",
180 "/foo",
181 "bar;"
182 }, {
183 "http://notrailingslash.com",
184 "notrailingslash.com",
185 "/",
186 ""
187 }, {
188 "http://www.gotaport.com:1234/",
189 "www.gotaport.com",
190 "/",
191 ""
192 }, {
193 " http://www.google.com/ ",
194 "www.google.com",
195 "/",
196 ""
197 }, {
198 "http:// leadingspace.com/",
199 "%20leadingspace.com",
200 "/",
201 ""
202 }, {
203 "http://%20leadingspace.com/",
204 "%20leadingspace.com",
205 "/",
206 ""
207 }, {
208 "https://www.securesite.com/",
209 "www.securesite.com",
210 "/",
211 ""
212 }, {
213 "http://host.com/ab%23cd",
214 "host.com",
215 "/ab%23cd",
216 ""
217 }, {
218 "http://host%3e.com//twoslashes?more//slashes",
219 "host>.com",
220 "/twoslashes",
221 "more//slashes"
222 }, {
223 "http://host.com/abc?val=xyz#anything",
224 "host.com",
225 "/abc",
226 "val=xyz"
227 }, {
228 "http://abc:def@host.com/xyz",
229 "host.com",
230 "/xyz",
231 ""
232 }, {
233 "http://host%3e.com/abc/%2e%2e%2fdef",
234 "host>.com",
235 "/def",
236 ""
237 }, {
238 "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
239 "host.com",
240 "/abc/def/xyz",
241 ""
242 }, {
243 "ftp://host.com/foo?bar",
244 "host.com",
245 "/foo",
246 "bar"
247 }, {
248 "data:text/html;charset=utf-8,%0D%0A",
249 "",
250 "",
251 ""
252 }, {
253 "javascript:alert()",
254 "",
255 "",
256 ""
257 }, {
258 "mailto:abc@example.com",
259 "",
260 "",
261 ""
262 },
263 };
264 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
265 SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url));
266 GURL url(tests[i].input_url);
267
268 std::string canonicalized_hostname;
269 std::string canonicalized_path;
270 std::string canonicalized_query;
271 safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
272 &canonicalized_path, &canonicalized_query);
273
274 EXPECT_EQ(tests[i].expected_canonicalized_hostname,
275 canonicalized_hostname);
276 EXPECT_EQ(tests[i].expected_canonicalized_path,
277 canonicalized_path);
278 EXPECT_EQ(tests[i].expected_canonicalized_query,
279 canonicalized_query);
280 }
281 }
282
TEST(SafeBrowsingUtilTest,GetUrlHashIndex)283 TEST(SafeBrowsingUtilTest, GetUrlHashIndex) {
284 GURL url("http://www.evil.com/phish.html");
285 SBFullHashResult full_hash;
286 crypto::SHA256HashString(url.host() + url.path(),
287 &full_hash.hash,
288 sizeof(SBFullHash));
289 std::vector<SBFullHashResult> full_hashes;
290 full_hashes.push_back(full_hash);
291
292 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), 0);
293
294 url = GURL("http://www.evil.com/okay_path.html");
295 EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), -1);
296 }
297
TEST(SafeBrowsingUtilTest,ListIdListNameConversion)298 TEST(SafeBrowsingUtilTest, ListIdListNameConversion) {
299 std::string list_name;
300 EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID,
301 &list_name));
302 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE,
303 &list_name));
304 EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList));
305 EXPECT_EQ(safe_browsing_util::MALWARE,
306 safe_browsing_util::GetListId(list_name));
307
308 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH,
309 &list_name));
310 EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList));
311 EXPECT_EQ(safe_browsing_util::PHISH,
312 safe_browsing_util::GetListId(list_name));
313
314 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL,
315 &list_name));
316 EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList));
317 EXPECT_EQ(safe_browsing_util::BINURL,
318 safe_browsing_util::GetListId(list_name));
319
320
321 EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH,
322 &list_name));
323 EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList));
324 EXPECT_EQ(safe_browsing_util::BINHASH,
325 safe_browsing_util::GetListId(list_name));
326 }
327
328 // Since the ids are saved in file, we need to make sure they don't change.
329 // Since only the last bit of each id is saved in file together with
330 // chunkids, this checks only last bit.
TEST(SafeBrowsingUtilTest,ListIdVerification)331 TEST(SafeBrowsingUtilTest, ListIdVerification) {
332 EXPECT_EQ(0, safe_browsing_util::MALWARE % 2);
333 EXPECT_EQ(1, safe_browsing_util::PHISH % 2);
334 EXPECT_EQ(0, safe_browsing_util::BINURL %2);
335 EXPECT_EQ(1, safe_browsing_util::BINHASH % 2);
336 }
337
TEST(SafeBrowsingUtilTest,StringToSBFullHashAndSBFullHashToString)338 TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) {
339 // 31 chars plus the last \0 as full_hash.
340 const std::string hash_in = "12345678902234567890323456789012";
341 SBFullHash hash_out;
342 safe_browsing_util::StringToSBFullHash(hash_in, &hash_out);
343 EXPECT_EQ(0x34333231, hash_out.prefix);
344 EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash)));
345
346 std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out);
347 EXPECT_EQ(hash_in, hash_final);
348 }
349