• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/url_util.h"
6 
7 #include <optional>
8 #include <ostream>
9 
10 #include "base/format_macros.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "testing/gtest/include/gtest/gtest.h"
13 #include "url/gurl.h"
14 #include "url/scheme_host_port.h"
15 #include "url/url_util.h"
16 
17 using base::ASCIIToUTF16;
18 using base::WideToUTF16;
19 
20 namespace net {
21 namespace {
22 
TEST(UrlUtilTest,AppendQueryParameter)23 TEST(UrlUtilTest, AppendQueryParameter) {
24   // Appending a name-value pair to a URL without a query component.
25   EXPECT_EQ("http://example.com/path?name=value",
26             AppendQueryParameter(GURL("http://example.com/path"),
27                                  "name", "value").spec());
28 
29   // Appending a name-value pair to a URL with a query component.
30   // The original component should be preserved, and the new pair should be
31   // appended with '&'.
32   EXPECT_EQ("http://example.com/path?existing=one&name=value",
33             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
34                                  "name", "value").spec());
35 
36   // Appending a name-value pair with unsafe characters included. The
37   // unsafe characters should be escaped.
38   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
39             AppendQueryParameter(GURL("http://example.com/path?existing=one"),
40                                  "na me", "v.alue=")
41                 .spec());
42 }
43 
TEST(UrlUtilTest,AppendOrReplaceQueryParameter)44 TEST(UrlUtilTest, AppendOrReplaceQueryParameter) {
45   // Appending a name-value pair to a URL without a query component.
46   EXPECT_EQ("http://example.com/path?name=value",
47             AppendOrReplaceQueryParameter(GURL("http://example.com/path"),
48                                  "name", "value").spec());
49 
50   // Appending a name-value pair to a URL with a query component.
51   // The original component should be preserved, and the new pair should be
52   // appended with '&'.
53   EXPECT_EQ("http://example.com/path?existing=one&name=value",
54       AppendOrReplaceQueryParameter(
55           GURL("http://example.com/path?existing=one"),
56           "name", "value").spec());
57 
58   // Appending a name-value pair with unsafe characters included. The
59   // unsafe characters should be escaped.
60   EXPECT_EQ("http://example.com/path?existing=one&na+me=v.alue%3D",
61       AppendOrReplaceQueryParameter(
62           GURL("http://example.com/path?existing=one"),
63           "na me", "v.alue=").spec());
64 
65   // Replace value of an existing paramater.
66   EXPECT_EQ("http://example.com/path?existing=one&name=new",
67       AppendOrReplaceQueryParameter(
68           GURL("http://example.com/path?existing=one&name=old"),
69           "name", "new").spec());
70 
71   // Replace a name-value pair with unsafe characters included. The
72   // unsafe characters should be escaped.
73   EXPECT_EQ("http://example.com/path?na+me=n.ew%3D&existing=one",
74       AppendOrReplaceQueryParameter(
75           GURL("http://example.com/path?na+me=old&existing=one"),
76           "na me", "n.ew=").spec());
77 
78   // Replace the value of first parameter with this name only.
79   EXPECT_EQ("http://example.com/path?name=new&existing=one&name=old",
80       AppendOrReplaceQueryParameter(
81           GURL("http://example.com/path?name=old&existing=one&name=old"),
82           "name", "new").spec());
83 
84   // Preserve the content of the original params regardless of our failure to
85   // interpret them correctly.
86   EXPECT_EQ("http://example.com/path?bar&name=new&left=&"
87             "=right&=&&name=again",
88       AppendOrReplaceQueryParameter(
89           GURL("http://example.com/path?bar&name=old&left=&"
90                 "=right&=&&name=again"),
91           "name", "new").spec());
92 
93   // ----- Removing the key using nullopt value -----
94 
95   // Removes the name-value pair from the URL preserving other query parameters.
96   EXPECT_EQ("http://example.com/path?abc=xyz",
97             AppendOrReplaceQueryParameter(
98                 GURL("http://example.com/path?name=value&abc=xyz"), "name",
99                 std::nullopt)
100                 .spec());
101 
102   // Removes the name-value pair from the URL.
103   EXPECT_EQ("http://example.com/path?",
104             AppendOrReplaceQueryParameter(
105                 GURL("http://example.com/path?existing=one"), "existing",
106                 std::nullopt)
107                 .spec());
108 
109   // Removes the first name-value pair.
110   EXPECT_EQ("http://example.com/path?c=d&e=f",
111             AppendOrReplaceQueryParameter(
112                 GURL("http://example.com/path?a=b&c=d&e=f"), "a", std::nullopt)
113                 .spec());
114 
115   // Removes a name-value pair in between two query params.
116   EXPECT_EQ(
117       "http://example.com/path?existing=one&hello=world",
118       AppendOrReplaceQueryParameter(
119           GURL("http://example.com/path?existing=one&replace=sure&hello=world"),
120           "replace", std::nullopt)
121           .spec());
122 
123   // Removes the last name-value pair.
124   EXPECT_EQ("http://example.com/path?existing=one",
125             AppendOrReplaceQueryParameter(
126                 GURL("http://example.com/path?existing=one&replace=sure"),
127                 "replace", std::nullopt)
128                 .spec());
129 
130   // Removing a name-value pair with unsafe characters included. The
131   // unsafe characters should be escaped.
132   EXPECT_EQ("http://example.com/path?existing=one&hello=world",
133             AppendOrReplaceQueryParameter(
134                 GURL("http://example.com/"
135                      "path?existing=one&na+me=v.alue%3D&hello=world"),
136                 "na me", std::nullopt)
137                 .spec());
138 
139   // Does nothing if the provided query param key does not exist.
140   EXPECT_EQ("http://example.com/path?existing=one&name=old",
141             AppendOrReplaceQueryParameter(
142                 GURL("http://example.com/path?existing=one&name=old"), "old",
143                 std::nullopt)
144                 .spec());
145 
146   // Remove the value of first parameter with this name only.
147   EXPECT_EQ(
148       "http://example.com/path?existing=one&name=old",
149       AppendOrReplaceQueryParameter(
150           GURL("http://example.com/path?name=something&existing=one&name=old"),
151           "name", std::nullopt)
152           .spec());
153 
154   // Preserve the content of the original params regardless of our failure to
155   // interpret them correctly.
156   EXPECT_EQ(
157       "http://example.com/path?bar&left=&"
158       "=right&=&&name=again",
159       AppendOrReplaceQueryParameter(
160           GURL("http://example.com/path?bar&name=old&left=&"
161                "=right&=&&name=again"),
162           "name", std::nullopt)
163           .spec());
164 }
165 
TEST(UrlUtilTest,AppendOrReplaceRef)166 TEST(UrlUtilTest, AppendOrReplaceRef) {
167   // Setting a new ref should append it.
168   EXPECT_EQ("http://example.com/path#ref",
169             AppendOrReplaceRef(GURL("http://example.com/path"), "ref").spec());
170 
171   // Setting a ref over an existing one should replace it.
172   EXPECT_EQ("http://example.com/path#ref",
173             AppendOrReplaceRef(GURL("http://example.com/path#old_ref"), "ref")
174                 .spec());
175 
176   // Setting a ref on a url with existing query parameters should simply append
177   // it at the end
178   EXPECT_EQ(
179       "http://example.com/path?query=value#ref",
180       AppendOrReplaceRef(GURL("http://example.com/path?query=value#ref"), "ref")
181           .spec());
182 
183   // Setting a ref on a url with existing query parameters and with special
184   // encoded characters: `special-chars?query=value#ref chars%\";'`
185   EXPECT_EQ(
186       "http://example.com/special-chars?query=value#ref%20chars%%22;'",
187       AppendOrReplaceRef(GURL("http://example.com/special-chars?query=value"),
188                          "ref chars%\";'")
189           .spec());
190 
191   // Testing adding a ref to a URL with specially encoded characters.
192   // `special chars%\";'?query=value#ref`
193   EXPECT_EQ(
194       "http://example.com/special%20chars%%22;'?query=value#ref",
195       AppendOrReplaceRef(
196           GURL("http://example.com/special chars%\";'?query=value"), "ref")
197           .spec());
198 }
199 
TEST(UrlUtilTest,GetValueForKeyInQuery)200 TEST(UrlUtilTest, GetValueForKeyInQuery) {
201   GURL url("http://example.com/path?name=value&boolParam&"
202            "url=http://test.com/q?n1%3Dv1%26n2");
203   std::string value;
204 
205   // False when getting a non-existent query param.
206   EXPECT_FALSE(GetValueForKeyInQuery(url, "non-exist", &value));
207 
208   // True when query param exist.
209   EXPECT_TRUE(GetValueForKeyInQuery(url, "name", &value));
210   EXPECT_EQ("value", value);
211 
212   EXPECT_TRUE(GetValueForKeyInQuery(url, "boolParam", &value));
213   EXPECT_EQ("", value);
214 
215   EXPECT_TRUE(GetValueForKeyInQuery(url, "url", &value));
216   EXPECT_EQ("http://test.com/q?n1=v1&n2", value);
217 }
218 
TEST(UrlUtilTest,GetValueForKeyInQueryInvalidURL)219 TEST(UrlUtilTest, GetValueForKeyInQueryInvalidURL) {
220   GURL url("http://%01/?test");
221   std::string value;
222 
223   // Always false when parsing an invalid URL.
224   EXPECT_FALSE(GetValueForKeyInQuery(url, "test", &value));
225 }
226 
TEST(UrlUtilTest,ParseQuery)227 TEST(UrlUtilTest, ParseQuery) {
228   const GURL url("http://example.com/path?name=value&boolParam&"
229                  "url=http://test.com/q?n1%3Dv1%26n2&"
230                  "multikey=value1&multikey=value2&multikey");
231   QueryIterator it(url);
232 
233   ASSERT_FALSE(it.IsAtEnd());
234   EXPECT_EQ("name", it.GetKey());
235   EXPECT_EQ("value", it.GetValue());
236   EXPECT_EQ("value", it.GetUnescapedValue());
237   it.Advance();
238 
239   ASSERT_FALSE(it.IsAtEnd());
240   EXPECT_EQ("boolParam", it.GetKey());
241   EXPECT_EQ("", it.GetValue());
242   EXPECT_EQ("", it.GetUnescapedValue());
243   it.Advance();
244 
245   ASSERT_FALSE(it.IsAtEnd());
246   EXPECT_EQ("url", it.GetKey());
247   EXPECT_EQ("http://test.com/q?n1%3Dv1%26n2", it.GetValue());
248   EXPECT_EQ("http://test.com/q?n1=v1&n2", it.GetUnescapedValue());
249   it.Advance();
250 
251   ASSERT_FALSE(it.IsAtEnd());
252   EXPECT_EQ("multikey", it.GetKey());
253   EXPECT_EQ("value1", it.GetValue());
254   EXPECT_EQ("value1", it.GetUnescapedValue());
255   it.Advance();
256 
257   ASSERT_FALSE(it.IsAtEnd());
258   EXPECT_EQ("multikey", it.GetKey());
259   EXPECT_EQ("value2", it.GetValue());
260   EXPECT_EQ("value2", it.GetUnescapedValue());
261   it.Advance();
262 
263   ASSERT_FALSE(it.IsAtEnd());
264   EXPECT_EQ("multikey", it.GetKey());
265   EXPECT_EQ("", it.GetValue());
266   EXPECT_EQ("", it.GetUnescapedValue());
267   it.Advance();
268 
269   EXPECT_TRUE(it.IsAtEnd());
270 }
271 
TEST(UrlUtilTest,ParseQueryInvalidURL)272 TEST(UrlUtilTest, ParseQueryInvalidURL) {
273   const GURL url("http://%01/?test");
274   QueryIterator it(url);
275   EXPECT_TRUE(it.IsAtEnd());
276 }
277 
TEST(UrlUtilTest,ParseHostAndPort)278 TEST(UrlUtilTest, ParseHostAndPort) {
279   const struct {
280     const char* const input;
281     bool success;
282     const char* const expected_host;
283     int expected_port;
284   } tests[] = {
285     // Valid inputs:
286     {"foo:10", true, "foo", 10},
287     {"foo", true, "foo", -1},
288     {
289       "[1080:0:0:0:8:800:200C:4171]:11",
290       true,
291       "1080:0:0:0:8:800:200C:4171",
292       11
293     },
294     {
295       "[1080:0:0:0:8:800:200C:4171]",
296       true,
297       "1080:0:0:0:8:800:200C:4171",
298       -1
299     },
300 
301     // Because no validation is done on the host, the following are accepted,
302     // even though they are invalid names.
303     {"]", true, "]", -1},
304     {"::1", true, ":", 1},
305     // Invalid inputs:
306     {"foo:bar", false, "", -1},
307     {"foo:", false, "", -1},
308     {":", false, "", -1},
309     {":80", false, "", -1},
310     {"", false, "", -1},
311     {"porttoolong:300000", false, "", -1},
312     {"usrname@host", false, "", -1},
313     {"usrname:password@host", false, "", -1},
314     {":password@host", false, "", -1},
315     {":password@host:80", false, "", -1},
316     {":password@host", false, "", -1},
317     {"@host", false, "", -1},
318     {"[", false, "", -1},
319     {"[]", false, "", -1},
320   };
321 
322   for (const auto& test : tests) {
323     std::string host;
324     int port;
325     bool ok = ParseHostAndPort(test.input, &host, &port);
326     EXPECT_EQ(test.success, ok);
327 
328     if (test.success) {
329       EXPECT_EQ(test.expected_host, host);
330       EXPECT_EQ(test.expected_port, port);
331     }
332   }
333 }
TEST(UrlUtilTest,GetHostAndPort)334 TEST(UrlUtilTest, GetHostAndPort) {
335   const struct {
336     GURL url;
337     const char* const expected_host_and_port;
338   } tests[] = {
339     { GURL("http://www.foo.com/x"), "www.foo.com:80"},
340     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
341 
342     // For IPv6 literals should always include the brackets.
343     { GURL("http://[1::2]/x"), "[1::2]:80"},
344     { GURL("http://[::a]:33/x"), "[::a]:33"},
345   };
346   for (const auto& test : tests) {
347     std::string host_and_port = GetHostAndPort(test.url);
348     EXPECT_EQ(std::string(test.expected_host_and_port), host_and_port);
349   }
350 }
351 
TEST(UrlUtilTest,GetHostAndOptionalPort)352 TEST(UrlUtilTest, GetHostAndOptionalPort) {
353   const struct {
354     GURL url;
355     const char* const expected_host_and_port;
356   } tests[] = {
357       {GURL("http://www.foo.com/x"), "www.foo.com"},
358       {GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
359       {GURL("http://www.foo.com:443/x"), "www.foo.com:443"},
360 
361       {GURL("https://www.foo.com/x"), "www.foo.com"},
362       {GURL("https://www.foo.com:80/x"), "www.foo.com:80"},
363 
364       // For IPv6 literals should always include the brackets.
365       {GURL("http://[1::2]/x"), "[1::2]"},
366       {GURL("http://[::a]:33/x"), "[::a]:33"},
367   };
368   for (const auto& test : tests) {
369     EXPECT_EQ(test.expected_host_and_port, GetHostAndOptionalPort(test.url));
370     // Also test the SchemeHostPort variant.
371     EXPECT_EQ(test.expected_host_and_port,
372               GetHostAndOptionalPort(url::SchemeHostPort(test.url)));
373   }
374 }
375 
TEST(UrlUtilTest,GetHostOrSpecFromURL)376 TEST(UrlUtilTest, GetHostOrSpecFromURL) {
377   EXPECT_EQ("example.com",
378             GetHostOrSpecFromURL(GURL("http://example.com/test")));
379   EXPECT_EQ("example.com",
380             GetHostOrSpecFromURL(GURL("http://example.com./test")));
381   EXPECT_EQ("file:///tmp/test.html",
382             GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
383 }
384 
TEST(UrlUtilTest,GetSuperdomain)385 TEST(UrlUtilTest, GetSuperdomain) {
386   struct {
387     const char* const domain;
388     const char* const expected_superdomain;
389   } tests[] = {
390       // Basic cases
391       {"foo.bar.example", "bar.example"},
392       {"bar.example", "example"},
393       {"example", ""},
394 
395       // Returned value may be an eTLD.
396       {"google.com", "com"},
397       {"google.co.uk", "co.uk"},
398 
399       // Weird cases.
400       {"", ""},
401       {"has.trailing.dot.", "trailing.dot."},
402       {"dot.", ""},
403       {".has.leading.dot", "has.leading.dot"},
404       {".", ""},
405       {"..", "."},
406       {"127.0.0.1", "0.0.1"},
407   };
408 
409   for (const auto& test : tests) {
410     EXPECT_EQ(test.expected_superdomain, GetSuperdomain(test.domain));
411   }
412 }
413 
TEST(UrlUtilTest,IsSubdomainOf)414 TEST(UrlUtilTest, IsSubdomainOf) {
415   struct {
416     const char* subdomain;
417     const char* superdomain;
418     bool is_subdomain;
419   } tests[] = {
420       {"bar.foo.com", "foo.com", true},
421       {"barfoo.com", "foo.com", false},
422       {"bar.foo.com", "com", true},
423       {"bar.foo.com", "other.com", false},
424       {"bar.foo.com", "bar.foo.com", true},
425       {"bar.foo.com", "baz.foo.com", false},
426       {"bar.foo.com", "baz.bar.foo.com", false},
427       {"bar.foo.com", "ar.foo.com", false},
428       {"foo.com", "foo.com.", false},
429       {"bar.foo.com", "foo.com.", false},
430       {"", "", true},
431       {"a", "", false},
432       {"", "a", false},
433       {"127.0.0.1", "0.0.1", true},  // Don't do this...
434   };
435 
436   for (const auto& test : tests) {
437     EXPECT_EQ(test.is_subdomain,
438               IsSubdomainOf(test.subdomain, test.superdomain));
439   }
440 }
441 
TEST(UrlUtilTest,CompliantHost)442 TEST(UrlUtilTest, CompliantHost) {
443   struct {
444     const char* const host;
445     bool expected_output;
446   } compliant_host_cases[] = {
447       {"", false},
448       {"a", true},
449       {"-", false},
450       {"_", false},
451       {".", false},
452       {"9", true},
453       {"9a", true},
454       {"9_", true},
455       {"a.", true},
456       {".a", false},
457       {"a.a", true},
458       {"9.a", true},
459       {"a.9", true},
460       {"_9a", false},
461       {"-9a", false},
462       {"a.a9", true},
463       {"_.9a", true},
464       {"a.-a9", false},
465       {"a+9a", false},
466       {"-a.a9", true},
467       {"a_.a9", true},
468       {"1-.a-b", true},
469       {"1_.a-b", true},
470       {"1-2.a_b", true},
471       {"a.b.c.d.e", true},
472       {"1.2.3.4.5", true},
473       {"1.2.3..4.5", false},
474       {"1.2.3.4.5.", true},
475       {"1.2.3.4.5..", false},
476       {"%20%20noodles.blorg", false},
477       {"noo dles.blorg ", false},
478       {"noo dles.blorg. ", false},
479       {"^noodles.blorg", false},
480       {"noodles^.blorg", false},
481       {"noo&dles.blorg", false},
482       {"noodles.blorg`", false},
483       {"www.noodles.blorg", true},
484       {"1www.noodles.blorg", true},
485       {"www.2noodles.blorg", true},
486       {"www.n--oodles.blorg", true},
487       {"www.noodl_es.blorg", true},
488       {"www.no-_odles.blorg", true},
489       {"www_.noodles.blorg", true},
490       {"www.noodles.blorg.", true},
491       {"_privet._tcp.local", true},
492       // 63-char label (before or without dot) allowed
493       {"z23456789a123456789a123456789a123456789a123456789a123456789a123", true},
494       {"z23456789a123456789a123456789a123456789a123456789a123456789a123.",
495        true},
496       // 64-char label (before or without dot) disallowed
497       {"123456789a123456789a123456789a123456789a123456789a123456789a1234",
498        false},
499       {"123456789a123456789a123456789a123456789a123456789a123456789a1234.",
500        false},
501       // 253-char host allowed
502       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
503        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
504        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
505        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc",
506        true},
507       // 253-char+dot host allowed
508       {"abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
509        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
510        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi.abcdefghi."
511        "abcdefghi.abcdefghi.abcdefghi.abcdefghi.abc.",
512        true},
513       // 254-char host disallowed
514       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
515        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
516        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
517        "123456789.123456789.123456789.123456789.1234",
518        false},
519       // 254-char+dot host disallowed
520       {"123456789.123456789.123456789.123456789.123456789.123456789.123456789."
521        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
522        "123456789.123456789.123456789.123456789.123456789.123456789.123456789."
523        "123456789.123456789.123456789.123456789.1234.",
524        false},
525   };
526 
527   for (const auto& compliant_host : compliant_host_cases) {
528     EXPECT_EQ(compliant_host.expected_output,
529               IsCanonicalizedHostCompliant(compliant_host.host))
530         << compliant_host.host;
531   }
532 }
533 
534 struct NonUniqueNameTestData {
535   bool is_unique;
536   const char* const hostname;
537 };
538 
539 // Google Test pretty-printer.
PrintTo(const NonUniqueNameTestData & data,std::ostream * os)540 void PrintTo(const NonUniqueNameTestData& data, std::ostream* os) {
541   ASSERT_TRUE(data.hostname);
542   *os << " hostname: " << testing::PrintToString(data.hostname)
543       << "; is_unique: " << testing::PrintToString(data.is_unique);
544 }
545 
546 const NonUniqueNameTestData kNonUniqueNameTestData[] = {
547     // Domains under ICANN-assigned domains.
548     { true, "google.com" },
549     { true, "google.co.uk" },
550     // Domains under private registries.
551     { true, "appspot.com" },
552     { true, "test.appspot.com" },
553     // Unreserved IPv4 addresses (in various forms).
554     { true, "8.8.8.8" },
555     { true, "99.64.0.0" },
556     { true, "212.15.0.0" },
557     { true, "212.15" },
558     { true, "212.15.0" },
559     { true, "3557752832" },
560     // Reserved IPv4 addresses (in various forms).
561     { false, "192.168.0.0" },
562     { false, "192.168.0.6" },
563     { false, "10.0.0.5" },
564     { false, "10.0" },
565     { false, "10.0.0" },
566     { false, "3232235526" },
567     // Unreserved IPv6 addresses.
568     { true, "FFC0:ba98:7654:3210:FEDC:BA98:7654:3210" },
569     { true, "2000:ba98:7654:2301:EFCD:BA98:7654:3210" },
570     // Reserved IPv6 addresses.
571     { false, "::192.9.5.5" },
572     { false, "FEED::BEEF" },
573     { false, "FEC0:ba98:7654:3210:FEDC:BA98:7654:3210" },
574     // 'internal'/non-IANA assigned domains.
575     { false, "intranet" },
576     { false, "intranet." },
577     { false, "intranet.example" },
578     { false, "host.intranet.example" },
579     // gTLDs under discussion, but not yet assigned.
580     { false, "intranet.corp" },
581     { false, "intranet.internal" },
582     // Invalid host names are treated as unique - but expected to be
583     // filtered out before then.
584     { true, "junk)(£)$*!@~#" },
585     { true, "w$w.example.com" },
586     { true, "nocolonsallowed:example" },
587     { true, "[::4.5.6.9]" },
588 };
589 
590 class UrlUtilNonUniqueNameTest
591     : public testing::TestWithParam<NonUniqueNameTestData> {
592  public:
593   ~UrlUtilNonUniqueNameTest() override = default;
594 
595  protected:
IsUnique(const std::string & hostname)596   bool IsUnique(const std::string& hostname) {
597     return !IsHostnameNonUnique(hostname);
598   }
599 };
600 
601 // Test that internal/non-unique names are properly identified as such, but
602 // that IP addresses and hosts beneath registry-controlled domains are flagged
603 // as unique names.
TEST_P(UrlUtilNonUniqueNameTest,IsHostnameNonUnique)604 TEST_P(UrlUtilNonUniqueNameTest, IsHostnameNonUnique) {
605   const NonUniqueNameTestData& test_data = GetParam();
606 
607   EXPECT_EQ(test_data.is_unique, IsUnique(test_data.hostname));
608 }
609 
610 INSTANTIATE_TEST_SUITE_P(All,
611                          UrlUtilNonUniqueNameTest,
612                          testing::ValuesIn(kNonUniqueNameTestData));
613 
TEST(UrlUtilTest,IsLocalhost)614 TEST(UrlUtilTest, IsLocalhost) {
615   EXPECT_TRUE(HostStringIsLocalhost("localhost"));
616   EXPECT_TRUE(HostStringIsLocalhost("localHosT"));
617   EXPECT_TRUE(HostStringIsLocalhost("localhost."));
618   EXPECT_TRUE(HostStringIsLocalhost("localHost."));
619   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.1"));
620   EXPECT_TRUE(HostStringIsLocalhost("127.0.1.0"));
621   EXPECT_TRUE(HostStringIsLocalhost("127.1.0.0"));
622   EXPECT_TRUE(HostStringIsLocalhost("127.0.0.255"));
623   EXPECT_TRUE(HostStringIsLocalhost("127.0.255.0"));
624   EXPECT_TRUE(HostStringIsLocalhost("127.255.0.0"));
625   EXPECT_TRUE(HostStringIsLocalhost("::1"));
626   EXPECT_TRUE(HostStringIsLocalhost("0:0:0:0:0:0:0:1"));
627   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost"));
628   EXPECT_TRUE(HostStringIsLocalhost("foo.localhost."));
629   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST"));
630   EXPECT_TRUE(HostStringIsLocalhost("foo.localhoST."));
631 
632   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain"));
633   EXPECT_FALSE(HostStringIsLocalhost("localhost.localDOMain"));
634   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain."));
635   EXPECT_FALSE(HostStringIsLocalhost("localhost6"));
636   EXPECT_FALSE(HostStringIsLocalhost("localhost6."));
637   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6"));
638   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain6."));
639 
640   EXPECT_FALSE(HostStringIsLocalhost("localhostx"));
641   EXPECT_FALSE(HostStringIsLocalhost("localhost.x"));
642   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain"));
643   EXPECT_FALSE(HostStringIsLocalhost("foo.localdomain.x"));
644   EXPECT_FALSE(HostStringIsLocalhost("localhost6x"));
645   EXPECT_FALSE(HostStringIsLocalhost("localhost.localdomain6"));
646   EXPECT_FALSE(HostStringIsLocalhost("localhost6.localdomain"));
647   EXPECT_FALSE(HostStringIsLocalhost("127.0.0.1.1"));
648   EXPECT_FALSE(HostStringIsLocalhost(".127.0.0.255"));
649   EXPECT_FALSE(HostStringIsLocalhost("::2"));
650   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
651   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:1:0:0:1"));
652   EXPECT_FALSE(HostStringIsLocalhost("::1:1"));
653   EXPECT_FALSE(HostStringIsLocalhost("0:0:0:0:0:0:0:0:1"));
654   EXPECT_FALSE(HostStringIsLocalhost("foo.localhost.com"));
655   EXPECT_FALSE(HostStringIsLocalhost("foo.localhoste"));
656   EXPECT_FALSE(HostStringIsLocalhost("foo.localhos"));
657   EXPECT_FALSE(HostStringIsLocalhost("[::1]"));
658 
659   GURL localhost6("http://[::1]/");
660   EXPECT_TRUE(IsLocalhost(localhost6));
661 }
662 
TEST(UrlUtilTest,SimplifyUrlForRequest)663 TEST(UrlUtilTest, SimplifyUrlForRequest) {
664   struct {
665     const char* const input_url;
666     const char* const expected_simplified_url;
667   } tests[] = {
668     {
669       // Reference section should be stripped.
670       "http://www.google.com:78/foobar?query=1#hash",
671       "http://www.google.com:78/foobar?query=1",
672     },
673     {
674       // Reference section can itself contain #.
675       "http://192.168.0.1?query=1#hash#10#11#13#14",
676       "http://192.168.0.1?query=1",
677     },
678     { // Strip username/password.
679       "http://user:pass@google.com",
680       "http://google.com/",
681     },
682     { // Strip both the reference and the username/password.
683       "http://user:pass@google.com:80/sup?yo#X#X",
684       "http://google.com/sup?yo",
685     },
686     { // Try an HTTPS URL -- strip both the reference and the username/password.
687       "https://user:pass@google.com:80/sup?yo#X#X",
688       "https://google.com:80/sup?yo",
689     },
690     { // Try an FTP URL -- strip both the reference and the username/password.
691       "ftp://user:pass@google.com:80/sup?yo#X#X",
692       "ftp://google.com:80/sup?yo",
693     },
694     { // Try a nonstandard URL
695       "foobar://user:pass@google.com:80/sup?yo#X#X",
696       "foobar://user:pass@google.com:80/sup?yo",
697     },
698   };
699   for (const auto& test : tests) {
700     SCOPED_TRACE(test.input_url);
701     GURL input_url(GURL(test.input_url));
702     GURL expected_url(GURL(test.expected_simplified_url));
703     EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
704   }
705 }
706 
TEST(UrlUtilTest,ChangeWebSocketSchemeToHttpScheme)707 TEST(UrlUtilTest, ChangeWebSocketSchemeToHttpScheme) {
708   struct {
709     const char* const input_url;
710     const char* const expected_output_url;
711   } tests[] = {
712       {"ws://google.com:78/path?query=1", "http://google.com:78/path?query=1"},
713       {"wss://google.com:441/path?q=1", "https://google.com:441/path?q=1"}};
714   for (const auto& test : tests) {
715     GURL input_url(test.input_url);
716     GURL expected_output_url(test.expected_output_url);
717     EXPECT_EQ(expected_output_url,
718               ChangeWebSocketSchemeToHttpScheme(input_url));
719   }
720 }
721 
TEST(UrlUtilTest,SchemeHasNetworkHost)722 TEST(UrlUtilTest, SchemeHasNetworkHost) {
723   const char kCustomSchemeWithHostPortAndUserInformation[] = "foo";
724   const char kCustomSchemeWithHostAndPort[] = "bar";
725   const char kCustomSchemeWithHost[] = "baz";
726   const char kCustomSchemeWithoutAuthority[] = "qux";
727   const char kNonStandardScheme[] = "not-registered";
728 
729   url::ScopedSchemeRegistryForTests scheme_registry;
730   AddStandardScheme(kCustomSchemeWithHostPortAndUserInformation,
731                     url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
732   AddStandardScheme(kCustomSchemeWithHostAndPort,
733                     url::SCHEME_WITH_HOST_AND_PORT);
734   AddStandardScheme(kCustomSchemeWithHost, url::SCHEME_WITH_HOST);
735   AddStandardScheme(kCustomSchemeWithoutAuthority,
736                     url::SCHEME_WITHOUT_AUTHORITY);
737 
738   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpScheme));
739   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kHttpsScheme));
740   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWsScheme));
741   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kWssScheme));
742   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFtpScheme));
743   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(url::kFileScheme));
744   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(
745       kCustomSchemeWithHostPortAndUserInformation));
746   EXPECT_TRUE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHostAndPort));
747 
748   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(url::kFileSystemScheme));
749   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithHost));
750   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kCustomSchemeWithoutAuthority));
751   EXPECT_FALSE(IsStandardSchemeWithNetworkHost(kNonStandardScheme));
752 }
753 
TEST(UrlUtilTest,GetIdentityFromURL)754 TEST(UrlUtilTest, GetIdentityFromURL) {
755   struct {
756     const char* const input_url;
757     const char* const expected_username;
758     const char* const expected_password;
759   } tests[] = {
760       {
761           "http://username:password@google.com",
762           "username",
763           "password",
764       },
765       {
766           // Test for http://crbug.com/19200
767           "http://username:p@ssword@google.com",
768           "username",
769           "p@ssword",
770       },
771       {
772           // Special URL characters should be unescaped.
773           "http://username:p%3fa%26s%2fs%23@google.com",
774           "username",
775           "p?a&s/s#",
776       },
777       {
778           // Username contains %20, password %25.
779           "http://use rname:password%25@google.com",
780           "use rname",
781           "password%",
782       },
783       {
784           // Username and password contain forward / backward slashes.
785           "http://username%2F:password%5C@google.com",
786           "username/",
787           "password\\",
788       },
789       {
790           // Keep %00 and %01 as-is, and ignore other escaped characters when
791           // present.
792           "http://use%00rname%20:pass%01word%25@google.com",
793           "use%00rname%20",
794           "pass%01word%25",
795       },
796       {
797           // Keep CR and LF as-is.
798           "http://use%0Arname:pass%0Dword@google.com",
799           "use%0Arname",
800           "pass%0Dword",
801       },
802       {
803           // Use a '+' in the username.
804           "http://use+rname:password@google.com",
805           "use+rname",
806           "password",
807       },
808       {
809           // Use a '&' in the password.
810           "http://username:p&ssword@google.com",
811           "username",
812           "p&ssword",
813       },
814       {
815           // These UTF-8 characters are considered unsafe to unescape by
816           // UnescapeURLComponent, but raise no special concerns as part of the
817           // identity portion of a URL.
818           "http://%F0%9F%94%92:%E2%80%82@google.com",
819           "\xF0\x9F\x94\x92",
820           "\xE2\x80\x82",
821       },
822       {
823           // Leave invalid UTF-8 alone, and leave valid UTF-8 characters alone
824           // if there's also an invalid character in the string - strings should
825           // not be partially unescaped.
826           "http://%81:%E2%80%82%E2%80@google.com",
827           "%81",
828           "%E2%80%82%E2%80",
829       },
830   };
831   for (const auto& test : tests) {
832     SCOPED_TRACE(test.input_url);
833     GURL url(test.input_url);
834 
835     std::u16string username, password;
836     GetIdentityFromURL(url, &username, &password);
837 
838     EXPECT_EQ(base::UTF8ToUTF16(test.expected_username), username);
839     EXPECT_EQ(base::UTF8ToUTF16(test.expected_password), password);
840   }
841 }
842 
843 // Try extracting a username which was encoded with UTF8.
TEST(UrlUtilTest,GetIdentityFromURL_UTF8)844 TEST(UrlUtilTest, GetIdentityFromURL_UTF8) {
845   GURL url(u"http://foo:\x4f60\x597d@blah.com");
846 
847   EXPECT_EQ("foo", url.username());
848   EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
849 
850   // Extract the unescaped identity.
851   std::u16string username, password;
852   GetIdentityFromURL(url, &username, &password);
853 
854   // Verify that it was decoded as UTF8.
855   EXPECT_EQ(u"foo", username);
856   EXPECT_EQ(u"\x4f60\x597d", password);
857 }
858 
TEST(UrlUtilTest,GoogleHost)859 TEST(UrlUtilTest, GoogleHost) {
860   struct {
861     GURL url;
862     bool expected_output;
863   } google_host_cases[] = {
864       {GURL("http://.google.com"), true},
865       {GURL("http://.youtube.com"), true},
866       {GURL("http://.gmail.com"), true},
867       {GURL("http://.doubleclick.net"), true},
868       {GURL("http://.gstatic.com"), true},
869       {GURL("http://.googlevideo.com"), true},
870       {GURL("http://.googleusercontent.com"), true},
871       {GURL("http://.googlesyndication.com"), true},
872       {GURL("http://.google-analytics.com"), true},
873       {GURL("http://.googleadservices.com"), true},
874       {GURL("http://.googleapis.com"), true},
875       {GURL("http://a.google.com"), true},
876       {GURL("http://b.youtube.com"), true},
877       {GURL("http://c.gmail.com"), true},
878       {GURL("http://google.com"), false},
879       {GURL("http://youtube.com"), false},
880       {GURL("http://gmail.com"), false},
881       {GURL("http://google.coma"), false},
882       {GURL("http://agoogle.com"), false},
883       {GURL("http://oogle.com"), false},
884       {GURL("http://google.co"), false},
885       {GURL("http://oggole.com"), false},
886   };
887 
888   for (const auto& host : google_host_cases) {
889     EXPECT_EQ(host.expected_output, HasGoogleHost(host.url));
890   }
891 }
892 
TEST(UrlUtilTest,IsLocalHostname)893 TEST(UrlUtilTest, IsLocalHostname) {
894   EXPECT_TRUE(IsLocalHostname("localhost"));
895   EXPECT_TRUE(IsLocalHostname("localhost."));
896   EXPECT_TRUE(IsLocalHostname("LOCALhost"));
897   EXPECT_TRUE(IsLocalHostname("LOCALhost."));
898   EXPECT_TRUE(IsLocalHostname("abc.localhost"));
899   EXPECT_TRUE(IsLocalHostname("abc.localhost."));
900   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost"));
901   EXPECT_TRUE(IsLocalHostname("abc.LOCALhost."));
902   EXPECT_TRUE(IsLocalHostname("abc.def.localhost"));
903 
904   EXPECT_FALSE(IsLocalHostname("localhost.actuallynot"));
905   EXPECT_FALSE(IsLocalHostname("notlocalhost"));
906   EXPECT_FALSE(IsLocalHostname("notlocalhost."));
907   EXPECT_FALSE(IsLocalHostname("still.notlocalhost"));
908   EXPECT_FALSE(IsLocalHostname("localhostjustkidding"));
909 }
910 
TEST(UrlUtilTest,GoogleHostWithAlpnH3)911 TEST(UrlUtilTest, GoogleHostWithAlpnH3) {
912   struct {
913     std::string_view host;
914     bool expected_output;
915   } test_cases[] = {
916       {"google.com", true},        {"www.google.com", true},
917       {"google.CoM", true},        {"www.Google.cOm", true},
918       {"www.google.cat", false},   {"www.google.co.in", false},
919       {"www.google.co.jp", false},
920   };
921 
922   for (const auto& host : test_cases) {
923     EXPECT_EQ(host.expected_output, IsGoogleHostWithAlpnH3(host.host));
924   }
925 }
926 
927 }  // namespace
928 }  // namespace net
929