• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/url_util.h"
6 
7 #include <optional>
8 #include <ostream>
9 
10 #include "base/format_macros.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "base/test/scoped_feature_list.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "url/gurl.h"
15 #include "url/scheme_host_port.h"
16 #include "url/url_features.h"
17 #include "url/url_util.h"
18 
19 using base::ASCIIToUTF16;
20 using base::WideToUTF16;
21 
22 namespace net {
23 namespace {
24 
TEST(UrlUtilTest,AppendQueryParameter)25 TEST(UrlUtilTest, AppendQueryParameter) {
26   // Appending a name-value pair to a URL without a query component.
27   EXPECT_EQ("http://example.com/path?name=value",
28             AppendQueryParameter(GURL("http://example.com/path"),
29                                  "name", "value").spec());
30 
31   // Appending a name-value pair to a URL with a query component.
32   // The original component should be preserved, and the new pair should be
33   // appended with '&'.
34   EXPECT_EQ("http://example.com/path?existing=one&name=value",
35             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
36                                  "name", "value").spec());
37 
38   // Appending a name-value pair with unsafe characters included. The
39   // unsafe characters should be escaped.
40   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
41             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
42                                  "na me", "v.alue=")
43                 .spec());
44 }
45 
TEST(UrlUtilTest,AppendOrReplaceQueryParameter)46 TEST(UrlUtilTest, AppendOrReplaceQueryParameter) {
47   // Appending a name-value pair to a URL without a query component.
48   EXPECT_EQ("http://example.com/path?name=value",
49             AppendOrReplaceQueryParameter(GURL("http://example.com/path"),
50                                  "name", "value").spec());
51 
52   // Appending a name-value pair to a URL with a query component.
53   // The original component should be preserved, and the new pair should be
54   // appended with '&'.
55   EXPECT_EQ("http://example.com/path?existing=one&name=value",
56       AppendOrReplaceQueryParameter(
57           GURL("http://example.com/path?existing=one"),
58           "name", "value").spec());
59 
60   // Appending a name-value pair with unsafe characters included. The
61   // unsafe characters should be escaped.
62   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
63       AppendOrReplaceQueryParameter(
64           GURL("http://example.com/path?existing=one"),
65           "na me", "v.alue=").spec());
66 
67   // Replace value of an existing paramater.
68   EXPECT_EQ("http://example.com/path?existing=one&name=new",
69       AppendOrReplaceQueryParameter(
70           GURL("http://example.com/path?existing=one&name=old"),
71           "name", "new").spec());
72 
73   // Replace a name-value pair with unsafe characters included. The
74   // unsafe characters should be escaped.
75   EXPECT_EQ("http://example.com/path?na+me=n.ew%3D&existing=one",
76       AppendOrReplaceQueryParameter(
77           GURL("http://example.com/path?na+me=old&existing=one"),
78           "na me", "n.ew=").spec());
79 
80   // Replace the value of first parameter with this name only.
81   EXPECT_EQ("http://example.com/path?name=new&existing=one&name=old",
82       AppendOrReplaceQueryParameter(
83           GURL("http://example.com/path?name=old&existing=one&name=old"),
84           "name", "new").spec());
85 
86   // Preserve the content of the original params regardless of our failure to
87   // interpret them correctly.
88   EXPECT_EQ("http://example.com/path?bar&name=new&left=&"
89             "=right&=&&name=again",
90       AppendOrReplaceQueryParameter(
91           GURL("http://example.com/path?bar&name=old&left=&"
92                 "=right&=&&name=again"),
93           "name", "new").spec());
94 
95   // ----- Removing the key using nullopt value -----
96 
97   // Removes the name-value pair from the URL preserving other query parameters.
98   EXPECT_EQ("http://example.com/path?abc=xyz",
99             AppendOrReplaceQueryParameter(
100                 GURL("http://example.com/path?name=value&abc=xyz"), "name",
101                 std::nullopt)
102                 .spec());
103 
104   // Removes the name-value pair from the URL.
105   EXPECT_EQ("http://example.com/path?",
106             AppendOrReplaceQueryParameter(
107                 GURL("http://example.com/path?existing=one"), "existing",
108                 std::nullopt)
109                 .spec());
110 
111   // Removes the first name-value pair.
112   EXPECT_EQ("http://example.com/path?c=d&e=f",
113             AppendOrReplaceQueryParameter(
114                 GURL("http://example.com/path?a=b&c=d&e=f"), "a", std::nullopt)
115                 .spec());
116 
117   // Removes a name-value pair in between two query params.
118   EXPECT_EQ(
119       "http://example.com/path?existing=one&hello=world",
120       AppendOrReplaceQueryParameter(
121           GURL("http://example.com/path?existing=one&replace=sure&hello=world"),
122           "replace", std::nullopt)
123           .spec());
124 
125   // Removes the last name-value pair.
126   EXPECT_EQ("http://example.com/path?existing=one",
127             AppendOrReplaceQueryParameter(
128                 GURL("http://example.com/path?existing=one&replace=sure"),
129                 "replace", std::nullopt)
130                 .spec());
131 
132   // Removing a name-value pair with unsafe characters included. The
133   // unsafe characters should be escaped.
134   EXPECT_EQ("http://example.com/path?existing=one&hello=world",
135             AppendOrReplaceQueryParameter(
136                 GURL("http://example.com/"
137                      "path?existing=one&na+me=v.alue%3D&hello=world"),
138                 "na me", std::nullopt)
139                 .spec());
140 
141   // Does nothing if the provided query param key does not exist.
142   EXPECT_EQ("http://example.com/path?existing=one&name=old",
143             AppendOrReplaceQueryParameter(
144                 GURL("http://example.com/path?existing=one&name=old"), "old",
145                 std::nullopt)
146                 .spec());
147 
148   // Remove the value of first parameter with this name only.
149   EXPECT_EQ(
150       "http://example.com/path?existing=one&name=old",
151       AppendOrReplaceQueryParameter(
152           GURL("http://example.com/path?name=something&existing=one&name=old"),
153           "name", std::nullopt)
154           .spec());
155 
156   // Preserve the content of the original params regardless of our failure to
157   // interpret them correctly.
158   EXPECT_EQ(
159       "http://example.com/path?bar&left=&"
160       "=right&=&&name=again",
161       AppendOrReplaceQueryParameter(
162           GURL("http://example.com/path?bar&name=old&left=&"
163                "=right&=&&name=again"),
164           "name", std::nullopt)
165           .spec());
166 }
167 
TEST(UrlUtilTest,AppendOrReplaceRef)168 TEST(UrlUtilTest, AppendOrReplaceRef) {
169   // Setting a new ref should append it.
170   EXPECT_EQ("http://example.com/path#ref",
171             AppendOrReplaceRef(GURL("http://example.com/path"), "ref").spec());
172 
173   // Setting a ref over an existing one should replace it.
174   EXPECT_EQ("http://example.com/path#ref",
175             AppendOrReplaceRef(GURL("http://example.com/path#old_ref"), "ref")
176                 .spec());
177 
178   // Setting a ref on a url with existing query parameters should simply append
179   // it at the end
180   EXPECT_EQ(
181       "http://example.com/path?query=value#ref",
182       AppendOrReplaceRef(GURL("http://example.com/path?query=value#ref"), "ref")
183           .spec());
184 
185   // Setting a ref on a url with existing query parameters and with special
186   // encoded characters: `special-chars?query=value#ref chars%\";'`
187   EXPECT_EQ(
188       "http://example.com/special-chars?query=value#ref%20chars%%22;'",
189       AppendOrReplaceRef(GURL("http://example.com/special-chars?query=value"),
190                          "ref chars%\";'")
191           .spec());
192 
193   // Testing adding a ref to a URL with specially encoded characters.
194   // `special chars%\";'?query=value#ref`
195   EXPECT_EQ(
196       "http://example.com/special%20chars%%22;'?query=value#ref",
197       AppendOrReplaceRef(
198           GURL("http://example.com/special chars%\";'?query=value"), "ref")
199           .spec());
200 }
201 
TEST(UrlUtilTest,GetValueForKeyInQuery)202 TEST(UrlUtilTest, GetValueForKeyInQuery) {
203   GURL url("http://example.com/path?name=value&boolParam&"
204            "url=http://test.com/q?n1%3Dv1%26n2");
205   std::string value;
206 
207   // False when getting a non-existent query param.
208   EXPECT_FALSE(GetValueForKeyInQuery(url, "non-exist", &value));
209 
210   // True when query param exist.
211   EXPECT_TRUE(GetValueForKeyInQuery(url, "name", &value));
212   EXPECT_EQ("value", value);
213 
214   EXPECT_TRUE(GetValueForKeyInQuery(url, "boolParam", &value));
215   EXPECT_EQ("", value);
216 
217   EXPECT_TRUE(GetValueForKeyInQuery(url, "url", &value));
218   EXPECT_EQ("http://test.com/q?n1=v1&n2", value);
219 }
220 
TEST(UrlUtilTest,GetValueForKeyInQueryInvalidURL)221 TEST(UrlUtilTest, GetValueForKeyInQueryInvalidURL) {
222   GURL url("http://%01/?test");
223   std::string value;
224 
225   // Always false when parsing an invalid URL.
226   EXPECT_FALSE(GetValueForKeyInQuery(url, "test", &value));
227 }
228 
TEST(UrlUtilTest,ParseQuery)229 TEST(UrlUtilTest, ParseQuery) {
230   const GURL url("http://example.com/path?name=value&boolParam&"
231                  "url=http://test.com/q?n1%3Dv1%26n2&"
232                  "multikey=value1&multikey=value2&multikey");
233   QueryIterator it(url);
234 
235   ASSERT_FALSE(it.IsAtEnd());
236   EXPECT_EQ("name", it.GetKey());
237   EXPECT_EQ("value", it.GetValue());
238   EXPECT_EQ("value", it.GetUnescapedValue());
239   it.Advance();
240 
241   ASSERT_FALSE(it.IsAtEnd());
242   EXPECT_EQ("boolParam", it.GetKey());
243   EXPECT_EQ("", it.GetValue());
244   EXPECT_EQ("", it.GetUnescapedValue());
245   it.Advance();
246 
247   ASSERT_FALSE(it.IsAtEnd());
248   EXPECT_EQ("url", it.GetKey());
249   EXPECT_EQ("http://test.com/q?n1%3Dv1%26n2", it.GetValue());
250   EXPECT_EQ("http://test.com/q?n1=v1&n2", it.GetUnescapedValue());
251   it.Advance();
252 
253   ASSERT_FALSE(it.IsAtEnd());
254   EXPECT_EQ("multikey", it.GetKey());
255   EXPECT_EQ("value1", it.GetValue());
256   EXPECT_EQ("value1", it.GetUnescapedValue());
257   it.Advance();
258 
259   ASSERT_FALSE(it.IsAtEnd());
260   EXPECT_EQ("multikey", it.GetKey());
261   EXPECT_EQ("value2", it.GetValue());
262   EXPECT_EQ("value2", it.GetUnescapedValue());
263   it.Advance();
264 
265   ASSERT_FALSE(it.IsAtEnd());
266   EXPECT_EQ("multikey", it.GetKey());
267   EXPECT_EQ("", it.GetValue());
268   EXPECT_EQ("", it.GetUnescapedValue());
269   it.Advance();
270 
271   EXPECT_TRUE(it.IsAtEnd());
272 }
273 
TEST(UrlUtilTest,ParseQueryInvalidURL)274 TEST(UrlUtilTest, ParseQueryInvalidURL) {
275   const GURL url("http://%01/?test");
276   QueryIterator it(url);
277   EXPECT_TRUE(it.IsAtEnd());
278 }
279 
TEST(UrlUtilTest,ParseHostAndPort)280 TEST(UrlUtilTest, ParseHostAndPort) {
281   const struct {
282     const char* const input;
283     bool success;
284     const char* const expected_host;
285     int expected_port;
286   } tests[] = {
287     // Valid inputs:
288     {"foo:10", true, "foo", 10},
289     {"foo", true, "foo", -1},
290     {
291       "[1080:0:0:0:8:800:200C:4171]:11",
292       true,
293       "1080:0:0:0:8:800:200C:4171",
294       11
295     },
296     {
297       "[1080:0:0:0:8:800:200C:4171]",
298       true,
299       "1080:0:0:0:8:800:200C:4171",
300       -1
301     },
302 
303     // Because no validation is done on the host, the following are accepted,
304     // even though they are invalid names.
305     {"]", true, "]", -1},
306     {"::1", true, ":", 1},
307     // Invalid inputs:
308     {"foo:bar", false, "", -1},
309     {"foo:", false, "", -1},
310     {":", false, "", -1},
311     {":80", false, "", -1},
312     {"", false, "", -1},
313     {"porttoolong:300000", false, "", -1},
314     {"usrname@host", false, "", -1},
315     {"usrname:password@host", false, "", -1},
316     {":password@host", false, "", -1},
317     {":password@host:80", false, "", -1},
318     {":password@host", false, "", -1},
319     {"@host", false, "", -1},
320     {"[", false, "", -1},
321     {"[]", false, "", -1},
322   };
323 
324   for (const auto& test : tests) {
325     std::string host;
326     int port;
327     bool ok = ParseHostAndPort(test.input, &host, &port);
328     EXPECT_EQ(test.success, ok);
329 
330     if (test.success) {
331       EXPECT_EQ(test.expected_host, host);
332       EXPECT_EQ(test.expected_port, port);
333     }
334   }
335 }
TEST(UrlUtilTest,GetHostAndPort)336 TEST(UrlUtilTest, GetHostAndPort) {
337   const struct {
338     GURL url;
339     const char* const expected_host_and_port;
340   } tests[] = {
341     { GURL("http://www.foo.com/x"), "www.foo.com:80"},
342     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
343 
344     // For IPv6 literals should always include the brackets.
345     { GURL("http://[1::2]/x"), "[1::2]:80"},
346     { GURL("http://[::a]:33/x"), "[::a]:33"},
347   };
348   for (const auto& test : tests) {
349     std::string host_and_port = GetHostAndPort(test.url);
350     EXPECT_EQ(std::string(test.expected_host_and_port), host_and_port);
351   }
352 }
353 
TEST(UrlUtilTest,GetHostAndOptionalPort)354 TEST(UrlUtilTest, GetHostAndOptionalPort) {
355   const struct {
356     GURL url;
357     const char* const expected_host_and_port;
358   } tests[] = {
359       {GURL("http://www.foo.com/x"), "www.foo.com"},
360       {GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
361       {GURL("http://www.foo.com:443/x"), "www.foo.com:443"},
362 
363       {GURL("https://www.foo.com/x"), "www.foo.com"},
364       {GURL("https://www.foo.com:80/x"), "www.foo.com:80"},
365 
366       // For IPv6 literals should always include the brackets.
367       {GURL("http://[1::2]/x"), "[1::2]"},
368       {GURL("http://[::a]:33/x"), "[::a]:33"},
369   };
370   for (const auto& test : tests) {
371     EXPECT_EQ(test.expected_host_and_port, GetHostAndOptionalPort(test.url));
372     // Also test the SchemeHostPort variant.
373     EXPECT_EQ(test.expected_host_and_port,
374               GetHostAndOptionalPort(url::SchemeHostPort(test.url)));
375   }
376 }
377 
TEST(UrlUtilTest,GetHostOrSpecFromURL)378 TEST(UrlUtilTest, GetHostOrSpecFromURL) {
379   EXPECT_EQ("example.com",
380             GetHostOrSpecFromURL(GURL("http://example.com/test")));
381   EXPECT_EQ("example.com",
382             GetHostOrSpecFromURL(GURL("http://example.com./test")));
383   EXPECT_EQ("file:///tmp/test.html",
384             GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
385 }
386 
TEST(UrlUtilTest,GetSuperdomain)387 TEST(UrlUtilTest, GetSuperdomain) {
388   struct {
389     const char* const domain;
390     const char* const expected_superdomain;
391   } tests[] = {
392       // Basic cases
393       {"foo.bar.example", "bar.example"},
394       {"bar.example", "example"},
395       {"example", ""},
396 
397       // Returned value may be an eTLD.
398       {"google.com", "com"},
399       {"google.co.uk", "co.uk"},
400 
401       // Weird cases.
402       {"", ""},
403       {"has.trailing.dot.", "trailing.dot."},
404       {"dot.", ""},
405       {".has.leading.dot", "has.leading.dot"},
406       {".", ""},
407       {"..", "."},
408       {"127.0.0.1", "0.0.1"},
409   };
410 
411   for (const auto& test : tests) {
412     EXPECT_EQ(test.expected_superdomain, GetSuperdomain(test.domain));
413   }
414 }
415 
TEST(UrlUtilTest,IsSubdomainOf)416 TEST(UrlUtilTest, IsSubdomainOf) {
417   struct {
418     const char* subdomain;
419     const char* superdomain;
420     bool is_subdomain;
421   } tests[] = {
422       {"bar.foo.com", "foo.com", true},
423       {"barfoo.com", "foo.com", false},
424       {"bar.foo.com", "com", true},
425       {"bar.foo.com", "other.com", false},
426       {"bar.foo.com", "bar.foo.com", true},
427       {"bar.foo.com", "baz.foo.com", false},
428       {"bar.foo.com", "baz.bar.foo.com", false},
429       {"bar.foo.com", "ar.foo.com", false},
430       {"foo.com", "foo.com.", false},
431       {"bar.foo.com", "foo.com.", false},
432       {"", "", true},
433       {"a", "", false},
434       {"", "a", false},
435       {"127.0.0.1", "0.0.1", true},  // Don't do this...
436   };
437 
438   for (const auto& test : tests) {
439     EXPECT_EQ(test.is_subdomain,
440               IsSubdomainOf(test.subdomain, test.superdomain));
441   }
442 }
443 
TEST(UrlUtilTest,CompliantHost)444 TEST(UrlUtilTest, CompliantHost) {
445   struct {
446     const char* const host;
447     bool expected_output;
448   } compliant_host_cases[] = {
449       {"", false},
450       {"a", true},
451       {"-", false},
452       {"_", false},
453       {".", false},
454       {"9", true},
455       {"9a", true},
456       {"9_", true},
457       {"a.", true},
458       {".a", false},
459       {"a.a", true},
460       {"9.a", true},
461       {"a.9", true},
462       {"_9a", false},
463       {"-9a", false},
464       {"a.a9", true},
465       {"_.9a", true},
466       {"a.-a9", false},
467       {"a+9a", false},
468       {"-a.a9", true},
469       {"a_.a9", true},
470       {"1-.a-b", true},
471       {"1_.a-b", true},
472       {"1-2.a_b", true},
473       {"a.b.c.d.e", true},
474       {"1.2.3.4.5", true},
475       {"1.2.3..4.5", false},
476       {"1.2.3.4.5.", true},
477       {"1.2.3.4.5..", false},
478       {"%20%20noodles.blorg", false},
479       {"noo dles.blorg ", false},
480       {"noo dles.blorg. ", false},
481       {"^noodles.blorg", false},
482       {"noodles^.blorg", false},
483       {"noo&dles.blorg", false},
484       {"noodles.blorg`", false},
485       {"www.noodles.blorg", true},
486       {"1www.noodles.blorg", true},
487       {"www.2noodles.blorg", true},
488       {"www.n--oodles.blorg", true},
489       {"www.noodl_es.blorg", true},
490       {"www.no-_odles.blorg", true},
491       {"www_.noodles.blorg", true},
492       {"www.noodles.blorg.", true},
493       {"_privet._tcp.local", true},
494       // 63-char label (before or without dot) allowed
495       {"z23456789a123456789a123456789a123456789a123456789a123456789a123", true},
496       {"z23456789a123456789a123456789a123456789a123456789a123456789a123.",
497        true},
498       // 64-char label (before or without dot) disallowed
499       {"123456789a123456789a123456789a123456789a123456789a123456789a1234",
500        false},
501       {"123456789a123456789a123456789a123456789a123456789a123456789a1234.",
502        false},
503       // 253-char host allowed
504       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
505        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
506        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
507        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc",
508        true},
509       // 253-char+dot host allowed
510       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
511        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
512        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
513        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc.",
514        true},
515       // 254-char host disallowed
516       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
517        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
518        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
519        "123456789.123456789.123456789.123456789.1234",
520        false},
521       // 254-char+dot host disallowed
522       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
523        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
524        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
525        "123456789.123456789.123456789.123456789.1234.",
526        false},
527   };
528 
529   for (const auto& compliant_host : compliant_host_cases) {
530     EXPECT_EQ(compliant_host.expected_output,
531               IsCanonicalizedHostCompliant(compliant_host.host))
532         << compliant_host.host;
533   }
534 }
535 
536 struct NonUniqueNameTestData {
537   bool is_unique;
538   const char* const hostname;
539 };
540 
541 // Google Test pretty-printer.
PrintTo(const NonUniqueNameTestData & data,std::ostream * os)542 void PrintTo(const NonUniqueNameTestData& data, std::ostream* os) {
543   ASSERT_TRUE(data.hostname);
544   *os << " hostname: " << testing::PrintToString(data.hostname)
545       << "; is_unique: " << testing::PrintToString(data.is_unique);
546 }
547 
548 const NonUniqueNameTestData kNonUniqueNameTestData[] = {
549     // eTLDs
550     {true, "com"},
551     {true, "com."},
552     {true, ".com"},
553     {true, "co.uk"},
554     {true, "co.uk."},
555     {true, ".co.uk"},
556     {false, "notarealtld"},
557     {false, ".notarealtld"},
558     {false, "notarealtld."},
559     // Domains under ICANN-assigned domains.
560     {true, "google.com"},
561     {true, "google.co.uk"},
562     // Domains under private registries.
563     {true, "appspot.com"},
564     {true, "test.appspot.com"},
565     // Unreserved IPv4 addresses (in various forms).
566     {true, "8.8.8.8"},
567     {true, "99.64.0.0"},
568     {true, "212.15.0.0"},
569     {true, "212.15"},
570     {true, "212.15.0"},
571     {true, "3557752832"},
572     // Reserved IPv4 addresses (in various forms).
573     {false, "192.168.0.0"},
574     {false, "192.168.0.6"},
575     {false, "10.0.0.5"},
576     {false, "10.0"},
577     {false, "10.0.0"},
578     {false, "3232235526"},
579     // Unreserved IPv6 addresses.
580     {true, "FFC0:ba98:7654:3210:FEDC:BA98:7654:3210"},
581     {true, "2000:ba98:7654:2301:EFCD:BA98:7654:3210"},
582     // Reserved IPv6 addresses.
583     {false, "::192.9.5.5"},
584     {false, "FEED::BEEF"},
585     {false, "FEC0:ba98:7654:3210:FEDC:BA98:7654:3210"},
586     // 'internal'/non-IANA assigned domains.
587     {false, "intranet"},
588     {false, "intranet."},
589     {false, "intranet.example"},
590     {false, "host.intranet.example"},
591     // gTLDs under discussion, but not yet assigned.
592     {false, "intranet.corp"},
593     {false, "intranet.internal"},
594     // Invalid host names are treated as unique - but expected to be
595     // filtered out before then.
596     {true, "junk)(£)$*!@~#"},
597     {true, "w$w.example.com"},
598     {true, "nocolonsallowed:example"},
599     {true, "[::4.5.6.9]"},
600 };
601 
602 class UrlUtilNonUniqueNameTest
603     : public testing::TestWithParam<NonUniqueNameTestData> {
604  public:
605   ~UrlUtilNonUniqueNameTest() override = default;
606 
607  protected:
IsUnique(const std::string & hostname)608   bool IsUnique(const std::string& hostname) {
609     return !IsHostnameNonUnique(hostname);
610   }
611 };
612 
613 // Test that internal/non-unique names are properly identified as such, but
614 // that IP addresses and hosts beneath registry-controlled domains are flagged
615 // as unique names.
TEST_P(UrlUtilNonUniqueNameTest,IsHostnameNonUnique)616 TEST_P(UrlUtilNonUniqueNameTest, IsHostnameNonUnique) {
617   const NonUniqueNameTestData& test_data = GetParam();
618 
619   EXPECT_EQ(test_data.is_unique, IsUnique(test_data.hostname));
620 }
621 
622 INSTANTIATE_TEST_SUITE_P(All,
623                          UrlUtilNonUniqueNameTest,
624                          testing::ValuesIn(kNonUniqueNameTestData));
625 
TEST(UrlUtilTest,IsLocalhost)626 TEST(UrlUtilTest, IsLocalhost) {
627   EXPECT_TRUE(HostStringIsLocalhost("localhost"));
628   EXPECT_TRUE(HostStringIsLocalhost("localHosT"));
629   EXPECT_TRUE(HostStringIsLocalhost("localhost."));
630   EXPECT_TRUE(HostStringIsLocalhost("localHost."));
631   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.1"));
632   EXPECT_TRUE(HostStringIsLocalhost("127.0.1.0"));
633   EXPECT_TRUE(HostStringIsLocalhost("127.1.0.0"));
634   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.255"));
635   EXPECT_TRUE(HostStringIsLocalhost("127.0.255.0"));
636   EXPECT_TRUE(HostStringIsLocalhost("127.255.0.0"));
637   EXPECT_TRUE(HostStringIsLocalhost("::1"));
638   EXPECT_TRUE(HostStringIsLocalhost("0:0:0:0:0:0:0:1"));
639   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost"));
640   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost."));
641   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST"));
642   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST."));
643 
644   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain"));
645   EXPECT_FALSE(HostStringIsLocalhost("localhost.localDOMain"));
646   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain."));
647   EXPECT_FALSE(HostStringIsLocalhost("localhost6"));
648   EXPECT_FALSE(HostStringIsLocalhost("localhost6."));
649   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6"));
650   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6."));
651 
652   EXPECT_FALSE(HostStringIsLocalhost("localhostx"));
653   EXPECT_FALSE(HostStringIsLocalhost("localhost.x"));
654   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain"));
655   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain.x"));
656   EXPECT_FALSE(HostStringIsLocalhost("localhost6x"));
657   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain6"));
658   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain"));
659   EXPECT_FALSE(HostStringIsLocalhost("127.0.0.1.1"));
660   EXPECT_FALSE(HostStringIsLocalhost(".127.0.0.255"));
661   EXPECT_FALSE(HostStringIsLocalhost("::2"));
662   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
663   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:1:0:0:1"));
664   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
665   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:0:0:0:0:1"));
666   EXPECT_FALSE(HostStringIsLocalhost("foo.localhost.com"));
667   EXPECT_FALSE(HostStringIsLocalhost("foo.localhoste"));
668   EXPECT_FALSE(HostStringIsLocalhost("foo.localhos"));
669   EXPECT_FALSE(HostStringIsLocalhost("[::1]"));
670 
671   GURL localhost6("http://[::1]/");
672   EXPECT_TRUE(IsLocalhost(localhost6));
673 }
674 
675 class UrlUtilTypedTest : public ::testing::TestWithParam<bool> {
676  public:
UrlUtilTypedTest()677   UrlUtilTypedTest()
678       : use_standard_compliant_non_special_scheme_url_parsing_(GetParam()) {
679     if (use_standard_compliant_non_special_scheme_url_parsing_) {
680       scoped_feature_list_.InitAndEnableFeature(
681           url::kStandardCompliantNonSpecialSchemeURLParsing);
682     } else {
683       scoped_feature_list_.InitAndDisableFeature(
684           url::kStandardCompliantNonSpecialSchemeURLParsing);
685     }
686   }
687 
688  protected:
689   bool use_standard_compliant_non_special_scheme_url_parsing_;
690 
691  private:
692   base::test::ScopedFeatureList scoped_feature_list_;
693 };
694 
695 INSTANTIATE_TEST_SUITE_P(All, UrlUtilTypedTest, ::testing::Bool());
696 
TEST(UrlUtilTest,SimplifyUrlForRequest)697 TEST(UrlUtilTest, SimplifyUrlForRequest) {
698   struct {
699     const char* const input_url;
700     const char* const expected_simplified_url;
701   } tests[] = {
702     {
703       // Reference section should be stripped.
704       "http://www.google.com:78/foobar?query=1#hash",
705       "http://www.google.com:78/foobar?query=1",
706     },
707     {
708       // Reference section can itself contain #.
709       "http://192.168.0.1?query=1#hash#10#11#13#14",
710       "http://192.168.0.1?query=1",
711     },
712     { // Strip username/password.
713       "http://user:pass@google.com",
714       "http://google.com/",
715     },
716     { // Strip both the reference and the username/password.
717       "http://user:pass@google.com:80/sup?yo#X#X",
718       "http://google.com/sup?yo",
719     },
720     { // Try an HTTPS URL -- strip both the reference and the username/password.
721       "https://user:pass@google.com:80/sup?yo#X#X",
722       "https://google.com:80/sup?yo",
723     },
724     { // Try an FTP URL -- strip both the reference and the username/password.
725       "ftp://user:pass@google.com:80/sup?yo#X#X",
726       "ftp://google.com:80/sup?yo",
727     },
728   };
729   for (const auto& test : tests) {
730     SCOPED_TRACE(test.input_url);
731     GURL input_url(GURL(test.input_url));
732     GURL expected_url(GURL(test.expected_simplified_url));
733     EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
734   }
735 }
736 
TEST_P(UrlUtilTypedTest,SimplifyUrlForRequest)737 TEST_P(UrlUtilTypedTest, SimplifyUrlForRequest) {
738   static constexpr struct {
739     const char* const input_url;
740     const char* const expected_when_compliant;
741     const char* const expected_when_non_compliant;
742   } tests[] = {
743       {
744           // Try a non-special URL
745           "foobar://user:pass@google.com:80/sup?yo#X#X",
746           "foobar://google.com:80/sup?yo",
747           "foobar://user:pass@google.com:80/sup?yo",
748       },
749   };
750 
751   for (const auto& test : tests) {
752     SCOPED_TRACE(test.input_url);
753     GURL simplified = SimplifyUrlForRequest(GURL(test.input_url));
754     if (use_standard_compliant_non_special_scheme_url_parsing_) {
755       EXPECT_EQ(simplified, GURL(test.expected_when_compliant));
756     } else {
757       EXPECT_EQ(simplified, GURL(test.expected_when_non_compliant));
758     }
759   }
760 }
761 
TEST(UrlUtilTest,ChangeWebSocketSchemeToHttpScheme)762 TEST(UrlUtilTest, ChangeWebSocketSchemeToHttpScheme) {
763   struct {
764     const char* const input_url;
765     const char* const expected_output_url;
766   } tests[] = {
767       {"ws://google.com:78/path?query=1", "http://google.com:78/path?query=1"},
768       {"wss://google.com:441/path?q=1", "https://google.com:441/path?q=1"}};
769   for (const auto& test : tests) {
770     GURL input_url(test.input_url);
771     GURL expected_output_url(test.expected_output_url);
772     EXPECT_EQ(expected_output_url,
773               ChangeWebSocketSchemeToHttpScheme(input_url));
774   }
775 }
776 
TEST(UrlUtilTest,SchemeHasNetworkHost)777 TEST(UrlUtilTest, SchemeHasNetworkHost) {
778   const char kCustomSchemeWithHostPortAndUserInformation[] = "foo";
779   const char kCustomSchemeWithHostAndPort[] = "bar";
780   const char kCustomSchemeWithHost[] = "baz";
781   const char kCustomSchemeWithoutAuthority[] = "qux";
782   const char kNonStandardScheme[] = "not-registered";
783 
784   url::ScopedSchemeRegistryForTests scheme_registry;
785   AddStandardScheme(kCustomSchemeWithHostPortAndUserInformation,
786                     url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
787   AddStandardScheme(kCustomSchemeWithHostAndPort,
788                     url::SCHEME_WITH_HOST_AND_PORT);
789   AddStandardScheme(kCustomSchemeWithHost, url::SCHEME_WITH_HOST);
790   AddStandardScheme(kCustomSchemeWithoutAuthority,
791                     url::SCHEME_WITHOUT_AUTHORITY);
792 
793   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpScheme));
794   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpsScheme));
795   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWsScheme));
796   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWssScheme));
797   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFtpScheme));
798   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFileScheme));
799   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(
800       kCustomSchemeWithHostPortAndUserInformation));
801   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHostAndPort));
802 
803   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(url::kFileSystemScheme));
804   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHost));
805   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithoutAuthority));
806   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kNonStandardScheme));
807 }
808 
TEST(UrlUtilTest,GetIdentityFromURL)809 TEST(UrlUtilTest, GetIdentityFromURL) {
810   struct {
811     const char* const input_url;
812     const char* const expected_username;
813     const char* const expected_password;
814   } tests[] = {
815       {
816           "http://username:password@google.com",
817           "username",
818           "password",
819       },
820       {
821           // Test for http://crbug.com/19200
822           "http://username:p@ssword@google.com",
823           "username",
824           "p@ssword",
825       },
826       {
827           // Special URL characters should be unescaped.
828           "http://username:p%3fa%26s%2fs%23@google.com",
829           "username",
830           "p?a&s/s#",
831       },
832       {
833           // Username contains %20, password %25.
834           "http://use rname:password%25@google.com",
835           "use rname",
836           "password%",
837       },
838       {
839           // Username and password contain forward / backward slashes.
840           "http://username%2F:password%5C@google.com",
841           "username/",
842           "password\\",
843       },
844       {
845           // Keep %00 and %01 as-is, and ignore other escaped characters when
846           // present.
847           "http://use%00rname%20:pass%01word%25@google.com",
848           "use%00rname%20",
849           "pass%01word%25",
850       },
851       {
852           // Keep CR and LF as-is.
853           "http://use%0Arname:pass%0Dword@google.com",
854           "use%0Arname",
855           "pass%0Dword",
856       },
857       {
858           // Use a '+' in the username.
859           "http://use+rname:password@google.com",
860           "use+rname",
861           "password",
862       },
863       {
864           // Use a '&' in the password.
865           "http://username:p&ssword@google.com",
866           "username",
867           "p&ssword",
868       },
869       {
870           // These UTF-8 characters are considered unsafe to unescape by
871           // UnescapeURLComponent, but raise no special concerns as part of the
872           // identity portion of a URL.
873           "http://%F0%9F%94%92:%E2%80%82@google.com",
874           "\xF0\x9F\x94\x92",
875           "\xE2\x80\x82",
876       },
877       {
878           // Leave invalid UTF-8 alone, and leave valid UTF-8 characters alone
879           // if there's also an invalid character in the string - strings should
880           // not be partially unescaped.
881           "http://%81:%E2%80%82%E2%80@google.com",
882           "%81",
883           "%E2%80%82%E2%80",
884       },
885   };
886   for (const auto& test : tests) {
887     SCOPED_TRACE(test.input_url);
888     GURL url(test.input_url);
889 
890     std::u16string username, password;
891     GetIdentityFromURL(url, &username, &password);
892 
893     EXPECT_EQ(base::UTF8ToUTF16(test.expected_username), username);
894     EXPECT_EQ(base::UTF8ToUTF16(test.expected_password), password);
895   }
896 }
897 
898 // Try extracting a username which was encoded with UTF8.
TEST(UrlUtilTest,GetIdentityFromURL_UTF8)899 TEST(UrlUtilTest, GetIdentityFromURL_UTF8) {
900   GURL url(u"http://foo:\x4f60\x597d@blah.com");
901 
902   EXPECT_EQ("foo", url.username());
903   EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
904 
905   // Extract the unescaped identity.
906   std::u16string username, password;
907   GetIdentityFromURL(url, &username, &password);
908 
909   // Verify that it was decoded as UTF8.
910   EXPECT_EQ(u"foo", username);
911   EXPECT_EQ(u"\x4f60\x597d", password);
912 }
913 
TEST(UrlUtilTest,GoogleHost)914 TEST(UrlUtilTest, GoogleHost) {
915   struct {
916     GURL url;
917     bool expected_output;
918   } google_host_cases[] = {
919       {GURL("http://.google.com"), true},
920       {GURL("http://.youtube.com"), true},
921       {GURL("http://.gmail.com"), true},
922       {GURL("http://.doubleclick.net"), true},
923       {GURL("http://.gstatic.com"), true},
924       {GURL("http://.googlevideo.com"), true},
925       {GURL("http://.googleusercontent.com"), true},
926       {GURL("http://.googlesyndication.com"), true},
927       {GURL("http://.google-analytics.com"), true},
928       {GURL("http://.googleadservices.com"), true},
929       {GURL("http://.googleapis.com"), true},
930       {GURL("http://a.google.com"), true},
931       {GURL("http://b.youtube.com"), true},
932       {GURL("http://c.gmail.com"), true},
933       {GURL("http://google.com"), false},
934       {GURL("http://youtube.com"), false},
935       {GURL("http://gmail.com"), false},
936       {GURL("http://google.coma"), false},
937       {GURL("http://agoogle.com"), false},
938       {GURL("http://oogle.com"), false},
939       {GURL("http://google.co"), false},
940       {GURL("http://oggole.com"), false},
941   };
942 
943   for (const auto& host : google_host_cases) {
944     EXPECT_EQ(host.expected_output, HasGoogleHost(host.url));
945   }
946 }
947 
TEST(UrlUtilTest,IsLocalHostname)948 TEST(UrlUtilTest, IsLocalHostname) {
949   EXPECT_TRUE(IsLocalHostname("localhost"));
950   EXPECT_TRUE(IsLocalHostname("localhost."));
951   EXPECT_TRUE(IsLocalHostname("LOCALhost"));
952   EXPECT_TRUE(IsLocalHostname("LOCALhost."));
953   EXPECT_TRUE(IsLocalHostname("abc.localhost"));
954   EXPECT_TRUE(IsLocalHostname("abc.localhost."));
955   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost"));
956   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost."));
957   EXPECT_TRUE(IsLocalHostname("abc.def.localhost"));
958 
959   EXPECT_FALSE(IsLocalHostname("localhost.actuallynot"));
960   EXPECT_FALSE(IsLocalHostname("notlocalhost"));
961   EXPECT_FALSE(IsLocalHostname("notlocalhost."));
962   EXPECT_FALSE(IsLocalHostname("still.notlocalhost"));
963   EXPECT_FALSE(IsLocalHostname("localhostjustkidding"));
964 }
965 
TEST(UrlUtilTest,GoogleHostWithAlpnH3)966 TEST(UrlUtilTest, GoogleHostWithAlpnH3) {
967   struct {
968     std::string_view host;
969     bool expected_output;
970   } test_cases[] = {
971       {"google.com", true},        {"www.google.com", true},
972       {"google.CoM", true},        {"www.Google.cOm", true},
973       {"www.google.cat", false},   {"www.google.co.in", false},
974       {"www.google.co.jp", false},
975   };
976 
977   for (const auto& host : test_cases) {
978     EXPECT_EQ(host.expected_output, IsGoogleHostWithAlpnH3(host.host));
979   }
980 }
981 
982 }  // namespace
983 }  // namespace net
984