• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007 Google Inc. All Rights Reserved.
2 // Author: brettw@google.com (Brett Wilson)
3 
4 #include "googleurl/src/gurl.h"
5 #include "googleurl/src/url_canon.h"
6 #include "googleurl/src/url_test_utils.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 
9 // Some implementations of base/basictypes.h may define ARRAYSIZE.
10 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
11 // which is in our version of basictypes.h.
12 #ifndef ARRAYSIZE
13 #define ARRAYSIZE ARRAYSIZE_UNSAFE
14 #endif
15 
16 using url_test_utils::WStringToUTF16;
17 using url_test_utils::ConvertUTF8ToUTF16;
18 
19 namespace {
20 
21 template<typename CHAR>
SetupReplacement(void (url_canon::Replacements<CHAR>::* func)(const CHAR *,const url_parse::Component &),url_canon::Replacements<CHAR> * replacements,const CHAR * str)22 void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
23                           const url_parse::Component&),
24                       url_canon::Replacements<CHAR>* replacements,
25                       const CHAR* str) {
26   if (str) {
27     url_parse::Component comp;
28     if (str[0])
29       comp.len = static_cast<int>(strlen(str));
30     (replacements->*func)(str, comp);
31   }
32 }
33 
34 // Returns the canonicalized string for the given URL string for the
35 // GURLTest.Types test.
TypesTestCase(const char * src)36 std::string TypesTestCase(const char* src) {
37   GURL gurl(src);
38   return gurl.possibly_invalid_spec();
39 }
40 
41 }  // namespace
42 
43 // Different types of URLs should be handled differently by url_util, and
44 // handed off to different canonicalizers.
TEST(GURLTest,Types)45 TEST(GURLTest, Types) {
46   // URLs with unknown schemes should be treated as path URLs, even when they
47   // have things like "://".
48   EXPECT_EQ("something:///HOSTNAME.com/",
49             TypesTestCase("something:///HOSTNAME.com/"));
50 
51   // In the reverse, known schemes should always trigger standard URL handling.
52   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
53   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
54   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
55   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
56 
57 #ifdef WIN32
58   // URLs that look like absolute Windows drive specs.
59   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
60   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
61   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
62   EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
63 #endif
64 }
65 
66 // Test the basic creation and querying of components in a GURL. We assume
67 // the parser is already tested and works, so we are mostly interested if the
68 // object does the right thing with the results.
TEST(GURLTest,Components)69 TEST(GURLTest, Components) {
70   GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
71   EXPECT_TRUE(url.is_valid());
72   EXPECT_TRUE(url.SchemeIs("http"));
73   EXPECT_FALSE(url.SchemeIsFile());
74 
75   // This is the narrow version of the URL, which should match the wide input.
76   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
77 
78   EXPECT_EQ("http", url.scheme());
79   EXPECT_EQ("user", url.username());
80   EXPECT_EQ("pass", url.password());
81   EXPECT_EQ("google.com", url.host());
82   EXPECT_EQ("99", url.port());
83   EXPECT_EQ(99, url.IntPort());
84   EXPECT_EQ("/foo;bar", url.path());
85   EXPECT_EQ("q=a", url.query());
86   EXPECT_EQ("ref", url.ref());
87 }
88 
TEST(GURLTest,Empty)89 TEST(GURLTest, Empty) {
90   GURL url;
91   EXPECT_FALSE(url.is_valid());
92   EXPECT_EQ("", url.spec());
93 
94   EXPECT_EQ("", url.scheme());
95   EXPECT_EQ("", url.username());
96   EXPECT_EQ("", url.password());
97   EXPECT_EQ("", url.host());
98   EXPECT_EQ("", url.port());
99   EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort());
100   EXPECT_EQ("", url.path());
101   EXPECT_EQ("", url.query());
102   EXPECT_EQ("", url.ref());
103 }
104 
TEST(GURLTest,Copy)105 TEST(GURLTest, Copy) {
106   GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
107 
108   GURL url2(url);
109   EXPECT_TRUE(url2.is_valid());
110 
111   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
112   EXPECT_EQ("http", url2.scheme());
113   EXPECT_EQ("user", url2.username());
114   EXPECT_EQ("pass", url2.password());
115   EXPECT_EQ("google.com", url2.host());
116   EXPECT_EQ("99", url2.port());
117   EXPECT_EQ(99, url2.IntPort());
118   EXPECT_EQ("/foo;bar", url2.path());
119   EXPECT_EQ("q=a", url2.query());
120   EXPECT_EQ("ref", url2.ref());
121 
122   // Copying of invalid URL should be invalid
123   GURL invalid;
124   GURL invalid2(invalid);
125   EXPECT_FALSE(invalid2.is_valid());
126   EXPECT_EQ("", invalid2.spec());
127   EXPECT_EQ("", invalid2.scheme());
128   EXPECT_EQ("", invalid2.username());
129   EXPECT_EQ("", invalid2.password());
130   EXPECT_EQ("", invalid2.host());
131   EXPECT_EQ("", invalid2.port());
132   EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort());
133   EXPECT_EQ("", invalid2.path());
134   EXPECT_EQ("", invalid2.query());
135   EXPECT_EQ("", invalid2.ref());
136 }
137 
138 // Given an invalid URL, we should still get most of the components.
TEST(GURLTest,Invalid)139 TEST(GURLTest, Invalid) {
140   GURL url("http:google.com:foo");
141   EXPECT_FALSE(url.is_valid());
142   EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
143 
144   EXPECT_EQ("http", url.scheme());
145   EXPECT_EQ("", url.username());
146   EXPECT_EQ("", url.password());
147   EXPECT_EQ("google.com", url.host());
148   EXPECT_EQ("foo", url.port());
149   EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort());
150   EXPECT_EQ("/", url.path());
151   EXPECT_EQ("", url.query());
152   EXPECT_EQ("", url.ref());
153 }
154 
TEST(GURLTest,Resolve)155 TEST(GURLTest, Resolve) {
156   // The tricky cases for relative URL resolving are tested in the
157   // canonicalizer unit test. Here, we just test that the GURL integration
158   // works properly.
159   struct ResolveCase {
160     const char* base;
161     const char* relative;
162     bool expected_valid;
163     const char* expected;
164   } resolve_cases[] = {
165     {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
166     {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
167     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
168     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
169     {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
170       // Unknown schemes are not standard.
171     {"data:blahblah", "http://google.com/", true, "http://google.com/"},
172     {"data:blahblah", "http:google.com", true, "http://google.com/"},
173     {"data:/blahblah", "file.html", false, ""},
174   };
175 
176   for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) {
177     // 8-bit code path.
178     GURL input(resolve_cases[i].base);
179     GURL output = input.Resolve(resolve_cases[i].relative);
180     EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
181     EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
182 
183     // Wide code path.
184     GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
185     GURL outputw =
186         input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
187     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
188     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
189   }
190 }
191 
TEST(GURLTest,GetOrigin)192 TEST(GURLTest, GetOrigin) {
193   struct TestCase {
194     const char* input;
195     const char* expected;
196   } cases[] = {
197     {"http://www.google.com", "http://www.google.com/"},
198     {"javascript:window.alert(\"hello,world\");", ""},
199     {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
200     {"http://user@www.google.com", "http://www.google.com/"},
201     {"http://:pass@www.google.com", "http://www.google.com/"},
202     {"http://:@www.google.com", "http://www.google.com/"},
203   };
204   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
205     GURL url(cases[i].input);
206     GURL origin = url.GetOrigin();
207     EXPECT_EQ(cases[i].expected, origin.spec());
208   }
209 }
210 
TEST(GURLTest,GetWithEmptyPath)211 TEST(GURLTest, GetWithEmptyPath) {
212   struct TestCase {
213     const char* input;
214     const char* expected;
215   } cases[] = {
216     {"http://www.google.com", "http://www.google.com/"},
217     {"javascript:window.alert(\"hello, world\");", ""},
218     {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
219   };
220 
221   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
222     GURL url(cases[i].input);
223     GURL empty_path = url.GetWithEmptyPath();
224     EXPECT_EQ(cases[i].expected, empty_path.spec());
225   }
226 }
227 
TEST(GURLTest,Replacements)228 TEST(GURLTest, Replacements) {
229   // The url canonicalizer replacement test will handle most of these case.
230   // The most important thing to do here is to check that the proper
231   // canonicalizer gets called based on the scheme of the input.
232   struct ReplaceCase {
233     const char* base;
234     const char* scheme;
235     const char* username;
236     const char* password;
237     const char* host;
238     const char* port;
239     const char* path;
240     const char* query;
241     const char* ref;
242     const char* expected;
243   } replace_cases[] = {
244     {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
245     {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
246     {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
247 #ifdef WIN32
248     {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
249 #endif
250   };
251 
252   for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) {
253     const ReplaceCase& cur = replace_cases[i];
254     GURL url(cur.base);
255     GURL::Replacements repl;
256     SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
257     SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
258     SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
259     SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
260     SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
261     SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
262     SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
263     SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
264     GURL output = url.ReplaceComponents(repl);
265 
266     EXPECT_EQ(replace_cases[i].expected, output.spec());
267   }
268 }
269 
TEST(GURLTest,PathForRequest)270 TEST(GURLTest, PathForRequest) {
271   struct TestCase {
272     const char* input;
273     const char* expected;
274   } cases[] = {
275     {"http://www.google.com", "/"},
276     {"http://www.google.com/", "/"},
277     {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"},
278     {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"},
279     {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"},
280   };
281 
282   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
283     GURL url(cases[i].input);
284     std::string path_request = url.PathForRequest();
285     EXPECT_EQ(cases[i].expected, path_request);
286   }
287 }
288 
TEST(GURLTest,EffectiveIntPort)289 TEST(GURLTest, EffectiveIntPort) {
290   struct PortTest {
291     const char* spec;
292     int expected_int_port;
293   } port_tests[] = {
294     // http
295     {"http://www.google.com/", 80},
296     {"http://www.google.com:80/", 80},
297     {"http://www.google.com:443/", 443},
298 
299     // https
300     {"https://www.google.com/", 443},
301     {"https://www.google.com:443/", 443},
302     {"https://www.google.com:80/", 80},
303 
304     // ftp
305     {"ftp://www.google.com/", 21},
306     {"ftp://www.google.com:21/", 21},
307     {"ftp://www.google.com:80/", 80},
308 
309     // gopher
310     {"gopher://www.google.com/", 70},
311     {"gopher://www.google.com:70/", 70},
312     {"gopher://www.google.com:80/", 80},
313 
314     // file - no port
315     {"file://www.google.com/", url_parse::PORT_UNSPECIFIED},
316     {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED},
317 
318     // data - no port
319     {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED},
320     {"data:www.google.com", url_parse::PORT_UNSPECIFIED},
321   };
322 
323   for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) {
324     GURL url(port_tests[i].spec);
325     EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
326   }
327 }
328 
TEST(GURLTest,IPAddress)329 TEST(GURLTest, IPAddress) {
330   struct IPTest {
331     const char* spec;
332     bool expected_ip;
333   } ip_tests[] = {
334     {"http://www.google.com/", false},
335     {"http://192.168.9.1/", true},
336     {"http://192.168.9.1.2/", false},
337     {"http://192.168.m.1/", false},
338     {"http://2001:db8::1/", false},
339     {"http://[2001:db8::1]/", true},
340     {"", false},
341     {"some random input!", false},
342   };
343 
344   for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) {
345     GURL url(ip_tests[i].spec);
346     EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
347   }
348 }
349 
TEST(GURLTest,HostNoBrackets)350 TEST(GURLTest, HostNoBrackets) {
351   struct TestCase {
352     const char* input;
353     const char* expected_host;
354     const char* expected_plainhost;
355   } cases[] = {
356     {"http://www.google.com", "www.google.com", "www.google.com"},
357     {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
358     {"http://[::]/", "[::]", "::"},
359 
360     // Don't require a valid URL, but don't crash either.
361     {"http://[]/", "[]", ""},
362     {"http://[x]/", "[x]", "x"},
363     {"http://[x/", "[x", "[x"},
364     {"http://x]/", "x]", "x]"},
365     {"http://[/", "[", "["},
366     {"http://]/", "]", "]"},
367     {"", "", ""},
368   };
369   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
370     GURL url(cases[i].input);
371     EXPECT_EQ(cases[i].expected_host, url.host());
372     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
373   }
374 }
375 
TEST(GURLTest,DomainIs)376 TEST(GURLTest, DomainIs) {
377   const char google_domain[] = "google.com";
378 
379   GURL url_1("http://www.google.com:99/foo");
380   EXPECT_TRUE(url_1.DomainIs(google_domain));
381 
382   GURL url_2("http://google.com:99/foo");
383   EXPECT_TRUE(url_2.DomainIs(google_domain));
384 
385   GURL url_3("http://google.com./foo");
386   EXPECT_TRUE(url_3.DomainIs(google_domain));
387 
388   GURL url_4("http://google.com/foo");
389   EXPECT_FALSE(url_4.DomainIs("google.com."));
390 
391   GURL url_5("http://google.com./foo");
392   EXPECT_TRUE(url_5.DomainIs("google.com."));
393 
394   GURL url_6("http://www.google.com./foo");
395   EXPECT_TRUE(url_6.DomainIs(".com."));
396 
397   GURL url_7("http://www.balabala.com/foo");
398   EXPECT_FALSE(url_7.DomainIs(google_domain));
399 
400   GURL url_8("http://www.google.com.cn/foo");
401   EXPECT_FALSE(url_8.DomainIs(google_domain));
402 
403   GURL url_9("http://www.iamnotgoogle.com/foo");
404   EXPECT_FALSE(url_9.DomainIs(google_domain));
405 
406   GURL url_10("http://www.iamnotgoogle.com../foo");
407   EXPECT_FALSE(url_10.DomainIs(".com"));
408 }
409 
410 // Newlines should be stripped from inputs.
TEST(GURLTest,Newlines)411 TEST(GURLTest, Newlines) {
412   // Constructor.
413   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
414   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
415 
416   // Relative path resolver.
417   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
418   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
419 
420   // Note that newlines are NOT stripped from ReplaceComponents.
421 }
422 
TEST(GURLTest,IsStandard)423 TEST(GURLTest, IsStandard) {
424   GURL a("http:foo/bar");
425   EXPECT_TRUE(a.IsStandard());
426 
427   GURL b("foo:bar/baz");
428   EXPECT_FALSE(b.IsStandard());
429 
430   GURL c("foo://bar/baz");
431   EXPECT_FALSE(c.IsStandard());
432 }
433