1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/url_util.h"
6
7 #include <optional>
8 #include <ostream>
9
10 #include "base/format_macros.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "base/test/scoped_feature_list.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "url/gurl.h"
15 #include "url/scheme_host_port.h"
16 #include "url/url_features.h"
17 #include "url/url_util.h"
18
19 using base::ASCIIToUTF16;
20 using base::WideToUTF16;
21
22 namespace net {
23 namespace {
24
TEST(UrlUtilTest,AppendQueryParameter)25 TEST(UrlUtilTest, AppendQueryParameter) {
26 // Appending a name-value pair to a URL without a query component.
27 EXPECT_EQ("http://example.com/path?name=value",
28 AppendQueryParameter(GURL("http://example.com/path"),
29 "name", "value").spec());
30
31 // Appending a name-value pair to a URL with a query component.
32 // The original component should be preserved, and the new pair should be
33 // appended with '&'.
34 EXPECT_EQ("http://example.com/path?existing=one&name=value",
35 AppendQueryParameter(GURL("http://example.com/path?existing=one"),
36 "name", "value").spec());
37
38 // Appending a name-value pair with unsafe characters included. The
39 // unsafe characters should be escaped.
40 EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
41 AppendQueryParameter(GURL("http://example.com/path?existing=one"),
42 "na me", "v.alue=")
43 .spec());
44 }
45
TEST(UrlUtilTest,AppendOrReplaceQueryParameter)46 TEST(UrlUtilTest, AppendOrReplaceQueryParameter) {
47 // Appending a name-value pair to a URL without a query component.
48 EXPECT_EQ("http://example.com/path?name=value",
49 AppendOrReplaceQueryParameter(GURL("http://example.com/path"),
50 "name", "value").spec());
51
52 // Appending a name-value pair to a URL with a query component.
53 // The original component should be preserved, and the new pair should be
54 // appended with '&'.
55 EXPECT_EQ("http://example.com/path?existing=one&name=value",
56 AppendOrReplaceQueryParameter(
57 GURL("http://example.com/path?existing=one"),
58 "name", "value").spec());
59
60 // Appending a name-value pair with unsafe characters included. The
61 // unsafe characters should be escaped.
62 EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
63 AppendOrReplaceQueryParameter(
64 GURL("http://example.com/path?existing=one"),
65 "na me", "v.alue=").spec());
66
67 // Replace value of an existing paramater.
68 EXPECT_EQ("http://example.com/path?existing=one&name=new",
69 AppendOrReplaceQueryParameter(
70 GURL("http://example.com/path?existing=one&name=old"),
71 "name", "new").spec());
72
73 // Replace a name-value pair with unsafe characters included. The
74 // unsafe characters should be escaped.
75 EXPECT_EQ("http://example.com/path?na+me=n.ew%3D&existing=one",
76 AppendOrReplaceQueryParameter(
77 GURL("http://example.com/path?na+me=old&existing=one"),
78 "na me", "n.ew=").spec());
79
80 // Replace the value of first parameter with this name only.
81 EXPECT_EQ("http://example.com/path?name=new&existing=one&name=old",
82 AppendOrReplaceQueryParameter(
83 GURL("http://example.com/path?name=old&existing=one&name=old"),
84 "name", "new").spec());
85
86 // Preserve the content of the original params regardless of our failure to
87 // interpret them correctly.
88 EXPECT_EQ("http://example.com/path?bar&name=new&left=&"
89 "=right&=&&name=again",
90 AppendOrReplaceQueryParameter(
91 GURL("http://example.com/path?bar&name=old&left=&"
92 "=right&=&&name=again"),
93 "name", "new").spec());
94
95 // ----- Removing the key using nullopt value -----
96
97 // Removes the name-value pair from the URL preserving other query parameters.
98 EXPECT_EQ("http://example.com/path?abc=xyz",
99 AppendOrReplaceQueryParameter(
100 GURL("http://example.com/path?name=value&abc=xyz"), "name",
101 std::nullopt)
102 .spec());
103
104 // Removes the name-value pair from the URL.
105 EXPECT_EQ("http://example.com/path?",
106 AppendOrReplaceQueryParameter(
107 GURL("http://example.com/path?existing=one"), "existing",
108 std::nullopt)
109 .spec());
110
111 // Removes the first name-value pair.
112 EXPECT_EQ("http://example.com/path?c=d&e=f",
113 AppendOrReplaceQueryParameter(
114 GURL("http://example.com/path?a=b&c=d&e=f"), "a", std::nullopt)
115 .spec());
116
117 // Removes a name-value pair in between two query params.
118 EXPECT_EQ(
119 "http://example.com/path?existing=one&hello=world",
120 AppendOrReplaceQueryParameter(
121 GURL("http://example.com/path?existing=one&replace=sure&hello=world"),
122 "replace", std::nullopt)
123 .spec());
124
125 // Removes the last name-value pair.
126 EXPECT_EQ("http://example.com/path?existing=one",
127 AppendOrReplaceQueryParameter(
128 GURL("http://example.com/path?existing=one&replace=sure"),
129 "replace", std::nullopt)
130 .spec());
131
132 // Removing a name-value pair with unsafe characters included. The
133 // unsafe characters should be escaped.
134 EXPECT_EQ("http://example.com/path?existing=one&hello=world",
135 AppendOrReplaceQueryParameter(
136 GURL("http://example.com/"
137 "path?existing=one&na+me=v.alue%3D&hello=world"),
138 "na me", std::nullopt)
139 .spec());
140
141 // Does nothing if the provided query param key does not exist.
142 EXPECT_EQ("http://example.com/path?existing=one&name=old",
143 AppendOrReplaceQueryParameter(
144 GURL("http://example.com/path?existing=one&name=old"), "old",
145 std::nullopt)
146 .spec());
147
148 // Remove the value of first parameter with this name only.
149 EXPECT_EQ(
150 "http://example.com/path?existing=one&name=old",
151 AppendOrReplaceQueryParameter(
152 GURL("http://example.com/path?name=something&existing=one&name=old"),
153 "name", std::nullopt)
154 .spec());
155
156 // Preserve the content of the original params regardless of our failure to
157 // interpret them correctly.
158 EXPECT_EQ(
159 "http://example.com/path?bar&left=&"
160 "=right&=&&name=again",
161 AppendOrReplaceQueryParameter(
162 GURL("http://example.com/path?bar&name=old&left=&"
163 "=right&=&&name=again"),
164 "name", std::nullopt)
165 .spec());
166 }
167
TEST(UrlUtilTest,AppendOrReplaceRef)168 TEST(UrlUtilTest, AppendOrReplaceRef) {
169 // Setting a new ref should append it.
170 EXPECT_EQ("http://example.com/path#ref",
171 AppendOrReplaceRef(GURL("http://example.com/path"), "ref").spec());
172
173 // Setting a ref over an existing one should replace it.
174 EXPECT_EQ("http://example.com/path#ref",
175 AppendOrReplaceRef(GURL("http://example.com/path#old_ref"), "ref")
176 .spec());
177
178 // Setting a ref on a url with existing query parameters should simply append
179 // it at the end
180 EXPECT_EQ(
181 "http://example.com/path?query=value#ref",
182 AppendOrReplaceRef(GURL("http://example.com/path?query=value#ref"), "ref")
183 .spec());
184
185 // Setting a ref on a url with existing query parameters and with special
186 // encoded characters: `special-chars?query=value#ref chars%\";'`
187 EXPECT_EQ(
188 "http://example.com/special-chars?query=value#ref%20chars%%22;'",
189 AppendOrReplaceRef(GURL("http://example.com/special-chars?query=value"),
190 "ref chars%\";'")
191 .spec());
192
193 // Testing adding a ref to a URL with specially encoded characters.
194 // `special chars%\";'?query=value#ref`
195 EXPECT_EQ(
196 "http://example.com/special%20chars%%22;'?query=value#ref",
197 AppendOrReplaceRef(
198 GURL("http://example.com/special chars%\";'?query=value"), "ref")
199 .spec());
200 }
201
TEST(UrlUtilTest,GetValueForKeyInQuery)202 TEST(UrlUtilTest, GetValueForKeyInQuery) {
203 GURL url("http://example.com/path?name=value&boolParam&"
204 "url=http://test.com/q?n1%3Dv1%26n2");
205 std::string value;
206
207 // False when getting a non-existent query param.
208 EXPECT_FALSE(GetValueForKeyInQuery(url, "non-exist", &value));
209
210 // True when query param exist.
211 EXPECT_TRUE(GetValueForKeyInQuery(url, "name", &value));
212 EXPECT_EQ("value", value);
213
214 EXPECT_TRUE(GetValueForKeyInQuery(url, "boolParam", &value));
215 EXPECT_EQ("", value);
216
217 EXPECT_TRUE(GetValueForKeyInQuery(url, "url", &value));
218 EXPECT_EQ("http://test.com/q?n1=v1&n2", value);
219 }
220
TEST(UrlUtilTest,GetValueForKeyInQueryInvalidURL)221 TEST(UrlUtilTest, GetValueForKeyInQueryInvalidURL) {
222 GURL url("http://%01/?test");
223 std::string value;
224
225 // Always false when parsing an invalid URL.
226 EXPECT_FALSE(GetValueForKeyInQuery(url, "test", &value));
227 }
228
TEST(UrlUtilTest,ParseQuery)229 TEST(UrlUtilTest, ParseQuery) {
230 const GURL url("http://example.com/path?name=value&boolParam&"
231 "url=http://test.com/q?n1%3Dv1%26n2&"
232 "multikey=value1&multikey=value2&multikey");
233 QueryIterator it(url);
234
235 ASSERT_FALSE(it.IsAtEnd());
236 EXPECT_EQ("name", it.GetKey());
237 EXPECT_EQ("value", it.GetValue());
238 EXPECT_EQ("value", it.GetUnescapedValue());
239 it.Advance();
240
241 ASSERT_FALSE(it.IsAtEnd());
242 EXPECT_EQ("boolParam", it.GetKey());
243 EXPECT_EQ("", it.GetValue());
244 EXPECT_EQ("", it.GetUnescapedValue());
245 it.Advance();
246
247 ASSERT_FALSE(it.IsAtEnd());
248 EXPECT_EQ("url", it.GetKey());
249 EXPECT_EQ("http://test.com/q?n1%3Dv1%26n2", it.GetValue());
250 EXPECT_EQ("http://test.com/q?n1=v1&n2", it.GetUnescapedValue());
251 it.Advance();
252
253 ASSERT_FALSE(it.IsAtEnd());
254 EXPECT_EQ("multikey", it.GetKey());
255 EXPECT_EQ("value1", it.GetValue());
256 EXPECT_EQ("value1", it.GetUnescapedValue());
257 it.Advance();
258
259 ASSERT_FALSE(it.IsAtEnd());
260 EXPECT_EQ("multikey", it.GetKey());
261 EXPECT_EQ("value2", it.GetValue());
262 EXPECT_EQ("value2", it.GetUnescapedValue());
263 it.Advance();
264
265 ASSERT_FALSE(it.IsAtEnd());
266 EXPECT_EQ("multikey", it.GetKey());
267 EXPECT_EQ("", it.GetValue());
268 EXPECT_EQ("", it.GetUnescapedValue());
269 it.Advance();
270
271 EXPECT_TRUE(it.IsAtEnd());
272 }
273
TEST(UrlUtilTest,ParseQueryInvalidURL)274 TEST(UrlUtilTest, ParseQueryInvalidURL) {
275 const GURL url("http://%01/?test");
276 QueryIterator it(url);
277 EXPECT_TRUE(it.IsAtEnd());
278 }
279
TEST(UrlUtilTest,ParseHostAndPort)280 TEST(UrlUtilTest, ParseHostAndPort) {
281 const struct {
282 const char* const input;
283 bool success;
284 const char* const expected_host;
285 int expected_port;
286 } tests[] = {
287 // Valid inputs:
288 {"foo:10", true, "foo", 10},
289 {"foo", true, "foo", -1},
290 {
291 "[1080:0:0:0:8:800:200C:4171]:11",
292 true,
293 "1080:0:0:0:8:800:200C:4171",
294 11
295 },
296 {
297 "[1080:0:0:0:8:800:200C:4171]",
298 true,
299 "1080:0:0:0:8:800:200C:4171",
300 -1
301 },
302
303 // Because no validation is done on the host, the following are accepted,
304 // even though they are invalid names.
305 {"]", true, "]", -1},
306 {"::1", true, ":", 1},
307 // Invalid inputs:
308 {"foo:bar", false, "", -1},
309 {"foo:", false, "", -1},
310 {":", false, "", -1},
311 {":80", false, "", -1},
312 {"", false, "", -1},
313 {"porttoolong:300000", false, "", -1},
314 {"usrname@host", false, "", -1},
315 {"usrname:password@host", false, "", -1},
316 {":password@host", false, "", -1},
317 {":password@host:80", false, "", -1},
318 {":password@host", false, "", -1},
319 {"@host", false, "", -1},
320 {"[", false, "", -1},
321 {"[]", false, "", -1},
322 };
323
324 for (const auto& test : tests) {
325 std::string host;
326 int port;
327 bool ok = ParseHostAndPort(test.input, &host, &port);
328 EXPECT_EQ(test.success, ok);
329
330 if (test.success) {
331 EXPECT_EQ(test.expected_host, host);
332 EXPECT_EQ(test.expected_port, port);
333 }
334 }
335 }
TEST(UrlUtilTest,GetHostAndPort)336 TEST(UrlUtilTest, GetHostAndPort) {
337 const struct {
338 GURL url;
339 const char* const expected_host_and_port;
340 } tests[] = {
341 { GURL("http://www.foo.com/x"), "www.foo.com:80"},
342 { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
343
344 // For IPv6 literals should always include the brackets.
345 { GURL("http://[1::2]/x"), "[1::2]:80"},
346 { GURL("http://[::a]:33/x"), "[::a]:33"},
347 };
348 for (const auto& test : tests) {
349 std::string host_and_port = GetHostAndPort(test.url);
350 EXPECT_EQ(std::string(test.expected_host_and_port), host_and_port);
351 }
352 }
353
TEST(UrlUtilTest,GetHostAndOptionalPort)354 TEST(UrlUtilTest, GetHostAndOptionalPort) {
355 const struct {
356 GURL url;
357 const char* const expected_host_and_port;
358 } tests[] = {
359 {GURL("http://www.foo.com/x"), "www.foo.com"},
360 {GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
361 {GURL("http://www.foo.com:443/x"), "www.foo.com:443"},
362
363 {GURL("https://www.foo.com/x"), "www.foo.com"},
364 {GURL("https://www.foo.com:80/x"), "www.foo.com:80"},
365
366 // For IPv6 literals should always include the brackets.
367 {GURL("http://[1::2]/x"), "[1::2]"},
368 {GURL("http://[::a]:33/x"), "[::a]:33"},
369 };
370 for (const auto& test : tests) {
371 EXPECT_EQ(test.expected_host_and_port, GetHostAndOptionalPort(test.url));
372 // Also test the SchemeHostPort variant.
373 EXPECT_EQ(test.expected_host_and_port,
374 GetHostAndOptionalPort(url::SchemeHostPort(test.url)));
375 }
376 }
377
TEST(UrlUtilTest,GetHostOrSpecFromURL)378 TEST(UrlUtilTest, GetHostOrSpecFromURL) {
379 EXPECT_EQ("example.com",
380 GetHostOrSpecFromURL(GURL("http://example.com/test")));
381 EXPECT_EQ("example.com",
382 GetHostOrSpecFromURL(GURL("http://example.com./test")));
383 EXPECT_EQ("file:///tmp/test.html",
384 GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
385 }
386
TEST(UrlUtilTest,GetSuperdomain)387 TEST(UrlUtilTest, GetSuperdomain) {
388 struct {
389 const char* const domain;
390 const char* const expected_superdomain;
391 } tests[] = {
392 // Basic cases
393 {"foo.bar.example", "bar.example"},
394 {"bar.example", "example"},
395 {"example", ""},
396
397 // Returned value may be an eTLD.
398 {"google.com", "com"},
399 {"google.co.uk", "co.uk"},
400
401 // Weird cases.
402 {"", ""},
403 {"has.trailing.dot.", "trailing.dot."},
404 {"dot.", ""},
405 {".has.leading.dot", "has.leading.dot"},
406 {".", ""},
407 {"..", "."},
408 {"127.0.0.1", "0.0.1"},
409 };
410
411 for (const auto& test : tests) {
412 EXPECT_EQ(test.expected_superdomain, GetSuperdomain(test.domain));
413 }
414 }
415
TEST(UrlUtilTest,IsSubdomainOf)416 TEST(UrlUtilTest, IsSubdomainOf) {
417 struct {
418 const char* subdomain;
419 const char* superdomain;
420 bool is_subdomain;
421 } tests[] = {
422 {"bar.foo.com", "foo.com", true},
423 {"barfoo.com", "foo.com", false},
424 {"bar.foo.com", "com", true},
425 {"bar.foo.com", "other.com", false},
426 {"bar.foo.com", "bar.foo.com", true},
427 {"bar.foo.com", "baz.foo.com", false},
428 {"bar.foo.com", "baz.bar.foo.com", false},
429 {"bar.foo.com", "ar.foo.com", false},
430 {"foo.com", "foo.com.", false},
431 {"bar.foo.com", "foo.com.", false},
432 {"", "", true},
433 {"a", "", false},
434 {"", "a", false},
435 {"127.0.0.1", "0.0.1", true}, // Don't do this...
436 };
437
438 for (const auto& test : tests) {
439 EXPECT_EQ(test.is_subdomain,
440 IsSubdomainOf(test.subdomain, test.superdomain));
441 }
442 }
443
TEST(UrlUtilTest,CompliantHost)444 TEST(UrlUtilTest, CompliantHost) {
445 struct {
446 const char* const host;
447 bool expected_output;
448 } compliant_host_cases[] = {
449 {"", false},
450 {"a", true},
451 {"-", false},
452 {"_", false},
453 {".", false},
454 {"9", true},
455 {"9a", true},
456 {"9_", true},
457 {"a.", true},
458 {".a", false},
459 {"a.a", true},
460 {"9.a", true},
461 {"a.9", true},
462 {"_9a", false},
463 {"-9a", false},
464 {"a.a9", true},
465 {"_.9a", true},
466 {"a.-a9", false},
467 {"a+9a", false},
468 {"-a.a9", true},
469 {"a_.a9", true},
470 {"1-.a-b", true},
471 {"1_.a-b", true},
472 {"1-2.a_b", true},
473 {"a.b.c.d.e", true},
474 {"1.2.3.4.5", true},
475 {"1.2.3..4.5", false},
476 {"1.2.3.4.5.", true},
477 {"1.2.3.4.5..", false},
478 {"%20%20noodles.blorg", false},
479 {"noo dles.blorg ", false},
480 {"noo dles.blorg. ", false},
481 {"^noodles.blorg", false},
482 {"noodles^.blorg", false},
483 {"noo&dles.blorg", false},
484 {"noodles.blorg`", false},
485 {"www.noodles.blorg", true},
486 {"1www.noodles.blorg", true},
487 {"www.2noodles.blorg", true},
488 {"www.n--oodles.blorg", true},
489 {"www.noodl_es.blorg", true},
490 {"www.no-_odles.blorg", true},
491 {"www_.noodles.blorg", true},
492 {"www.noodles.blorg.", true},
493 {"_privet._tcp.local", true},
494 // 63-char label (before or without dot) allowed
495 {"z23456789a123456789a123456789a123456789a123456789a123456789a123", true},
496 {"z23456789a123456789a123456789a123456789a123456789a123456789a123.",
497 true},
498 // 64-char label (before or without dot) disallowed
499 {"123456789a123456789a123456789a123456789a123456789a123456789a1234",
500 false},
501 {"123456789a123456789a123456789a123456789a123456789a123456789a1234.",
502 false},
503 // 253-char host allowed
504 {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
505 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
506 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
507 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc",
508 true},
509 // 253-char+dot host allowed
510 {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
511 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
512 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
513 "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc.",
514 true},
515 // 254-char host disallowed
516 {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
517 "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
518 "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
519 "123456789.123456789.123456789.123456789.1234",
520 false},
521 // 254-char+dot host disallowed
522 {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
523 "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
524 "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
525 "123456789.123456789.123456789.123456789.1234.",
526 false},
527 };
528
529 for (const auto& compliant_host : compliant_host_cases) {
530 EXPECT_EQ(compliant_host.expected_output,
531 IsCanonicalizedHostCompliant(compliant_host.host))
532 << compliant_host.host;
533 }
534 }
535
536 struct NonUniqueNameTestData {
537 bool is_unique;
538 const char* const hostname;
539 };
540
541 // Google Test pretty-printer.
PrintTo(const NonUniqueNameTestData & data,std::ostream * os)542 void PrintTo(const NonUniqueNameTestData& data, std::ostream* os) {
543 ASSERT_TRUE(data.hostname);
544 *os << " hostname: " << testing::PrintToString(data.hostname)
545 << "; is_unique: " << testing::PrintToString(data.is_unique);
546 }
547
548 const NonUniqueNameTestData kNonUniqueNameTestData[] = {
549 // eTLDs
550 {true, "com"},
551 {true, "com."},
552 {true, ".com"},
553 {true, "co.uk"},
554 {true, "co.uk."},
555 {true, ".co.uk"},
556 {false, "notarealtld"},
557 {false, ".notarealtld"},
558 {false, "notarealtld."},
559 // Domains under ICANN-assigned domains.
560 {true, "google.com"},
561 {true, "google.co.uk"},
562 // Domains under private registries.
563 {true, "appspot.com"},
564 {true, "test.appspot.com"},
565 // Unreserved IPv4 addresses (in various forms).
566 {true, "8.8.8.8"},
567 {true, "99.64.0.0"},
568 {true, "212.15.0.0"},
569 {true, "212.15"},
570 {true, "212.15.0"},
571 {true, "3557752832"},
572 // Reserved IPv4 addresses (in various forms).
573 {false, "192.168.0.0"},
574 {false, "192.168.0.6"},
575 {false, "10.0.0.5"},
576 {false, "10.0"},
577 {false, "10.0.0"},
578 {false, "3232235526"},
579 // Unreserved IPv6 addresses.
580 {true, "FFC0:ba98:7654:3210:FEDC:BA98:7654:3210"},
581 {true, "2000:ba98:7654:2301:EFCD:BA98:7654:3210"},
582 // Reserved IPv6 addresses.
583 {false, "::192.9.5.5"},
584 {false, "FEED::BEEF"},
585 {false, "FEC0:ba98:7654:3210:FEDC:BA98:7654:3210"},
586 // 'internal'/non-IANA assigned domains.
587 {false, "intranet"},
588 {false, "intranet."},
589 {false, "intranet.example"},
590 {false, "host.intranet.example"},
591 // gTLDs under discussion, but not yet assigned.
592 {false, "intranet.corp"},
593 {false, "intranet.internal"},
594 // Invalid host names are treated as unique - but expected to be
595 // filtered out before then.
596 {true, "junk)(£)$*!@~#"},
597 {true, "w$w.example.com"},
598 {true, "nocolonsallowed:example"},
599 {true, "[::4.5.6.9]"},
600 };
601
602 class UrlUtilNonUniqueNameTest
603 : public testing::TestWithParam<NonUniqueNameTestData> {
604 public:
605 ~UrlUtilNonUniqueNameTest() override = default;
606
607 protected:
IsUnique(const std::string & hostname)608 bool IsUnique(const std::string& hostname) {
609 return !IsHostnameNonUnique(hostname);
610 }
611 };
612
613 // Test that internal/non-unique names are properly identified as such, but
614 // that IP addresses and hosts beneath registry-controlled domains are flagged
615 // as unique names.
TEST_P(UrlUtilNonUniqueNameTest,IsHostnameNonUnique)616 TEST_P(UrlUtilNonUniqueNameTest, IsHostnameNonUnique) {
617 const NonUniqueNameTestData& test_data = GetParam();
618
619 EXPECT_EQ(test_data.is_unique, IsUnique(test_data.hostname));
620 }
621
622 INSTANTIATE_TEST_SUITE_P(All,
623 UrlUtilNonUniqueNameTest,
624 testing::ValuesIn(kNonUniqueNameTestData));
625
TEST(UrlUtilTest,IsLocalhost)626 TEST(UrlUtilTest, IsLocalhost) {
627 EXPECT_TRUE(HostStringIsLocalhost("localhost"));
628 EXPECT_TRUE(HostStringIsLocalhost("localHosT"));
629 EXPECT_TRUE(HostStringIsLocalhost("localhost."));
630 EXPECT_TRUE(HostStringIsLocalhost("localHost."));
631 EXPECT_TRUE(HostStringIsLocalhost("127.0.0.1"));
632 EXPECT_TRUE(HostStringIsLocalhost("127.0.1.0"));
633 EXPECT_TRUE(HostStringIsLocalhost("127.1.0.0"));
634 EXPECT_TRUE(HostStringIsLocalhost("127.0.0.255"));
635 EXPECT_TRUE(HostStringIsLocalhost("127.0.255.0"));
636 EXPECT_TRUE(HostStringIsLocalhost("127.255.0.0"));
637 EXPECT_TRUE(HostStringIsLocalhost("::1"));
638 EXPECT_TRUE(HostStringIsLocalhost("0:0:0:0:0:0:0:1"));
639 EXPECT_TRUE(HostStringIsLocalhost("foo.localhost"));
640 EXPECT_TRUE(HostStringIsLocalhost("foo.localhost."));
641 EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST"));
642 EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST."));
643
644 EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain"));
645 EXPECT_FALSE(HostStringIsLocalhost("localhost.localDOMain"));
646 EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain."));
647 EXPECT_FALSE(HostStringIsLocalhost("localhost6"));
648 EXPECT_FALSE(HostStringIsLocalhost("localhost6."));
649 EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6"));
650 EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6."));
651
652 EXPECT_FALSE(HostStringIsLocalhost("localhostx"));
653 EXPECT_FALSE(HostStringIsLocalhost("localhost.x"));
654 EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain"));
655 EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain.x"));
656 EXPECT_FALSE(HostStringIsLocalhost("localhost6x"));
657 EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain6"));
658 EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain"));
659 EXPECT_FALSE(HostStringIsLocalhost("127.0.0.1.1"));
660 EXPECT_FALSE(HostStringIsLocalhost(".127.0.0.255"));
661 EXPECT_FALSE(HostStringIsLocalhost("::2"));
662 EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
663 EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:1:0:0:1"));
664 EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
665 EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:0:0:0:0:1"));
666 EXPECT_FALSE(HostStringIsLocalhost("foo.localhost.com"));
667 EXPECT_FALSE(HostStringIsLocalhost("foo.localhoste"));
668 EXPECT_FALSE(HostStringIsLocalhost("foo.localhos"));
669 EXPECT_FALSE(HostStringIsLocalhost("[::1]"));
670
671 GURL localhost6("http://[::1]/");
672 EXPECT_TRUE(IsLocalhost(localhost6));
673 }
674
675 class UrlUtilTypedTest : public ::testing::TestWithParam<bool> {
676 public:
UrlUtilTypedTest()677 UrlUtilTypedTest()
678 : use_standard_compliant_non_special_scheme_url_parsing_(GetParam()) {
679 if (use_standard_compliant_non_special_scheme_url_parsing_) {
680 scoped_feature_list_.InitAndEnableFeature(
681 url::kStandardCompliantNonSpecialSchemeURLParsing);
682 } else {
683 scoped_feature_list_.InitAndDisableFeature(
684 url::kStandardCompliantNonSpecialSchemeURLParsing);
685 }
686 }
687
688 protected:
689 bool use_standard_compliant_non_special_scheme_url_parsing_;
690
691 private:
692 base::test::ScopedFeatureList scoped_feature_list_;
693 };
694
695 INSTANTIATE_TEST_SUITE_P(All, UrlUtilTypedTest, ::testing::Bool());
696
TEST(UrlUtilTest,SimplifyUrlForRequest)697 TEST(UrlUtilTest, SimplifyUrlForRequest) {
698 struct {
699 const char* const input_url;
700 const char* const expected_simplified_url;
701 } tests[] = {
702 {
703 // Reference section should be stripped.
704 "http://www.google.com:78/foobar?query=1#hash",
705 "http://www.google.com:78/foobar?query=1",
706 },
707 {
708 // Reference section can itself contain #.
709 "http://192.168.0.1?query=1#hash#10#11#13#14",
710 "http://192.168.0.1?query=1",
711 },
712 { // Strip username/password.
713 "http://user:pass@google.com",
714 "http://google.com/",
715 },
716 { // Strip both the reference and the username/password.
717 "http://user:pass@google.com:80/sup?yo#X#X",
718 "http://google.com/sup?yo",
719 },
720 { // Try an HTTPS URL -- strip both the reference and the username/password.
721 "https://user:pass@google.com:80/sup?yo#X#X",
722 "https://google.com:80/sup?yo",
723 },
724 { // Try an FTP URL -- strip both the reference and the username/password.
725 "ftp://user:pass@google.com:80/sup?yo#X#X",
726 "ftp://google.com:80/sup?yo",
727 },
728 };
729 for (const auto& test : tests) {
730 SCOPED_TRACE(test.input_url);
731 GURL input_url(GURL(test.input_url));
732 GURL expected_url(GURL(test.expected_simplified_url));
733 EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
734 }
735 }
736
TEST_P(UrlUtilTypedTest,SimplifyUrlForRequest)737 TEST_P(UrlUtilTypedTest, SimplifyUrlForRequest) {
738 static constexpr struct {
739 const char* const input_url;
740 const char* const expected_when_compliant;
741 const char* const expected_when_non_compliant;
742 } tests[] = {
743 {
744 // Try a non-special URL
745 "foobar://user:pass@google.com:80/sup?yo#X#X",
746 "foobar://google.com:80/sup?yo",
747 "foobar://user:pass@google.com:80/sup?yo",
748 },
749 };
750
751 for (const auto& test : tests) {
752 SCOPED_TRACE(test.input_url);
753 GURL simplified = SimplifyUrlForRequest(GURL(test.input_url));
754 if (use_standard_compliant_non_special_scheme_url_parsing_) {
755 EXPECT_EQ(simplified, GURL(test.expected_when_compliant));
756 } else {
757 EXPECT_EQ(simplified, GURL(test.expected_when_non_compliant));
758 }
759 }
760 }
761
TEST(UrlUtilTest,ChangeWebSocketSchemeToHttpScheme)762 TEST(UrlUtilTest, ChangeWebSocketSchemeToHttpScheme) {
763 struct {
764 const char* const input_url;
765 const char* const expected_output_url;
766 } tests[] = {
767 {"ws://google.com:78/path?query=1", "http://google.com:78/path?query=1"},
768 {"wss://google.com:441/path?q=1", "https://google.com:441/path?q=1"}};
769 for (const auto& test : tests) {
770 GURL input_url(test.input_url);
771 GURL expected_output_url(test.expected_output_url);
772 EXPECT_EQ(expected_output_url,
773 ChangeWebSocketSchemeToHttpScheme(input_url));
774 }
775 }
776
TEST(UrlUtilTest,SchemeHasNetworkHost)777 TEST(UrlUtilTest, SchemeHasNetworkHost) {
778 const char kCustomSchemeWithHostPortAndUserInformation[] = "foo";
779 const char kCustomSchemeWithHostAndPort[] = "bar";
780 const char kCustomSchemeWithHost[] = "baz";
781 const char kCustomSchemeWithoutAuthority[] = "qux";
782 const char kNonStandardScheme[] = "not-registered";
783
784 url::ScopedSchemeRegistryForTests scheme_registry;
785 AddStandardScheme(kCustomSchemeWithHostPortAndUserInformation,
786 url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
787 AddStandardScheme(kCustomSchemeWithHostAndPort,
788 url::SCHEME_WITH_HOST_AND_PORT);
789 AddStandardScheme(kCustomSchemeWithHost, url::SCHEME_WITH_HOST);
790 AddStandardScheme(kCustomSchemeWithoutAuthority,
791 url::SCHEME_WITHOUT_AUTHORITY);
792
793 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpScheme));
794 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpsScheme));
795 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWsScheme));
796 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWssScheme));
797 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFtpScheme));
798 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFileScheme));
799 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(
800 kCustomSchemeWithHostPortAndUserInformation));
801 EXPECT_TRUE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHostAndPort));
802
803 EXPECT_FALSE(IsStandardSchemeWithNetworkHost(url::kFileSystemScheme));
804 EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHost));
805 EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithoutAuthority));
806 EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kNonStandardScheme));
807 }
808
TEST(UrlUtilTest,GetIdentityFromURL)809 TEST(UrlUtilTest, GetIdentityFromURL) {
810 struct {
811 const char* const input_url;
812 const char* const expected_username;
813 const char* const expected_password;
814 } tests[] = {
815 {
816 "http://username:password@google.com",
817 "username",
818 "password",
819 },
820 {
821 // Test for http://crbug.com/19200
822 "http://username:p@ssword@google.com",
823 "username",
824 "p@ssword",
825 },
826 {
827 // Special URL characters should be unescaped.
828 "http://username:p%3fa%26s%2fs%23@google.com",
829 "username",
830 "p?a&s/s#",
831 },
832 {
833 // Username contains %20, password %25.
834 "http://use rname:password%25@google.com",
835 "use rname",
836 "password%",
837 },
838 {
839 // Username and password contain forward / backward slashes.
840 "http://username%2F:password%5C@google.com",
841 "username/",
842 "password\\",
843 },
844 {
845 // Keep %00 and %01 as-is, and ignore other escaped characters when
846 // present.
847 "http://use%00rname%20:pass%01word%25@google.com",
848 "use%00rname%20",
849 "pass%01word%25",
850 },
851 {
852 // Keep CR and LF as-is.
853 "http://use%0Arname:pass%0Dword@google.com",
854 "use%0Arname",
855 "pass%0Dword",
856 },
857 {
858 // Use a '+' in the username.
859 "http://use+rname:password@google.com",
860 "use+rname",
861 "password",
862 },
863 {
864 // Use a '&' in the password.
865 "http://username:p&ssword@google.com",
866 "username",
867 "p&ssword",
868 },
869 {
870 // These UTF-8 characters are considered unsafe to unescape by
871 // UnescapeURLComponent, but raise no special concerns as part of the
872 // identity portion of a URL.
873 "http://%F0%9F%94%92:%E2%80%82@google.com",
874 "\xF0\x9F\x94\x92",
875 "\xE2\x80\x82",
876 },
877 {
878 // Leave invalid UTF-8 alone, and leave valid UTF-8 characters alone
879 // if there's also an invalid character in the string - strings should
880 // not be partially unescaped.
881 "http://%81:%E2%80%82%E2%80@google.com",
882 "%81",
883 "%E2%80%82%E2%80",
884 },
885 };
886 for (const auto& test : tests) {
887 SCOPED_TRACE(test.input_url);
888 GURL url(test.input_url);
889
890 std::u16string username, password;
891 GetIdentityFromURL(url, &username, &password);
892
893 EXPECT_EQ(base::UTF8ToUTF16(test.expected_username), username);
894 EXPECT_EQ(base::UTF8ToUTF16(test.expected_password), password);
895 }
896 }
897
898 // Try extracting a username which was encoded with UTF8.
TEST(UrlUtilTest,GetIdentityFromURL_UTF8)899 TEST(UrlUtilTest, GetIdentityFromURL_UTF8) {
900 GURL url(u"http://foo:\x4f60\x597d@blah.com");
901
902 EXPECT_EQ("foo", url.username());
903 EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
904
905 // Extract the unescaped identity.
906 std::u16string username, password;
907 GetIdentityFromURL(url, &username, &password);
908
909 // Verify that it was decoded as UTF8.
910 EXPECT_EQ(u"foo", username);
911 EXPECT_EQ(u"\x4f60\x597d", password);
912 }
913
TEST(UrlUtilTest,GoogleHost)914 TEST(UrlUtilTest, GoogleHost) {
915 struct {
916 GURL url;
917 bool expected_output;
918 } google_host_cases[] = {
919 {GURL("http://.google.com"), true},
920 {GURL("http://.youtube.com"), true},
921 {GURL("http://.gmail.com"), true},
922 {GURL("http://.doubleclick.net"), true},
923 {GURL("http://.gstatic.com"), true},
924 {GURL("http://.googlevideo.com"), true},
925 {GURL("http://.googleusercontent.com"), true},
926 {GURL("http://.googlesyndication.com"), true},
927 {GURL("http://.google-analytics.com"), true},
928 {GURL("http://.googleadservices.com"), true},
929 {GURL("http://.googleapis.com"), true},
930 {GURL("http://a.google.com"), true},
931 {GURL("http://b.youtube.com"), true},
932 {GURL("http://c.gmail.com"), true},
933 {GURL("http://google.com"), false},
934 {GURL("http://youtube.com"), false},
935 {GURL("http://gmail.com"), false},
936 {GURL("http://google.coma"), false},
937 {GURL("http://agoogle.com"), false},
938 {GURL("http://oogle.com"), false},
939 {GURL("http://google.co"), false},
940 {GURL("http://oggole.com"), false},
941 };
942
943 for (const auto& host : google_host_cases) {
944 EXPECT_EQ(host.expected_output, HasGoogleHost(host.url));
945 }
946 }
947
TEST(UrlUtilTest,IsLocalHostname)948 TEST(UrlUtilTest, IsLocalHostname) {
949 EXPECT_TRUE(IsLocalHostname("localhost"));
950 EXPECT_TRUE(IsLocalHostname("localhost."));
951 EXPECT_TRUE(IsLocalHostname("LOCALhost"));
952 EXPECT_TRUE(IsLocalHostname("LOCALhost."));
953 EXPECT_TRUE(IsLocalHostname("abc.localhost"));
954 EXPECT_TRUE(IsLocalHostname("abc.localhost."));
955 EXPECT_TRUE(IsLocalHostname("abc.LOCALhost"));
956 EXPECT_TRUE(IsLocalHostname("abc.LOCALhost."));
957 EXPECT_TRUE(IsLocalHostname("abc.def.localhost"));
958
959 EXPECT_FALSE(IsLocalHostname("localhost.actuallynot"));
960 EXPECT_FALSE(IsLocalHostname("notlocalhost"));
961 EXPECT_FALSE(IsLocalHostname("notlocalhost."));
962 EXPECT_FALSE(IsLocalHostname("still.notlocalhost"));
963 EXPECT_FALSE(IsLocalHostname("localhostjustkidding"));
964 }
965
TEST(UrlUtilTest,GoogleHostWithAlpnH3)966 TEST(UrlUtilTest, GoogleHostWithAlpnH3) {
967 struct {
968 std::string_view host;
969 bool expected_output;
970 } test_cases[] = {
971 {"google.com", true}, {"www.google.com", true},
972 {"google.CoM", true}, {"www.Google.cOm", true},
973 {"www.google.cat", false}, {"www.google.co.in", false},
974 {"www.google.co.jp", false},
975 };
976
977 for (const auto& host : test_cases) {
978 EXPECT_EQ(host.expected_output, IsGoogleHostWithAlpnH3(host.host));
979 }
980 }
981
982 } // namespace
983 } // namespace net
984