• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/net_util.h"
6 
7 #include <algorithm>
8 
9 #include "base/file_path.h"
10 #include "base/format_macros.h"
11 #include "base/string_number_conversions.h"
12 #include "base/string_util.h"
13 #include "base/stringprintf.h"
14 #include "base/sys_string_conversions.h"
15 #include "base/test/test_file_util.h"
16 #include "base/time.h"
17 #include "base/utf_string_conversions.h"
18 #include "googleurl/src/gurl.h"
19 #include "net/base/sys_addrinfo.h"
20 #include "testing/gtest/include/gtest/gtest.h"
21 
22 namespace net {
23 
24 namespace {
25 
26 static const size_t kNpos = string16::npos;
27 
28 struct FileCase {
29   const wchar_t* file;
30   const char* url;
31 };
32 
33 struct HeaderCase {
34   const wchar_t* header_name;
35   const wchar_t* expected;
36 };
37 
38 struct HeaderParamCase {
39   const wchar_t* header_name;
40   const wchar_t* param_name;
41   const wchar_t* expected;
42 };
43 
44 struct FileNameCDCase {
45   const char* header_field;
46   const char* referrer_charset;
47   const wchar_t* expected;
48 };
49 
50 const wchar_t* kLanguages[] = {
51   L"",      L"en",    L"zh-CN",       L"ja",    L"ko",
52   L"he",    L"ar",    L"ru",          L"el",    L"fr",
53   L"de",    L"pt",    L"sv",          L"th",    L"hi",
54   L"de,en", L"el,en", L"zh-TW,en",    L"ko,ja", L"he,ru,en",
55   L"zh,ru,en"
56 };
57 
58 struct IDNTestCase {
59   const char* input;
60   const wchar_t* unicode_output;
61   const bool unicode_allowed[arraysize(kLanguages)];
62 };
63 
64 // TODO(jungshik) This is just a random sample of languages and is far
65 // from exhaustive.  We may have to generate all the combinations
66 // of languages (powerset of a set of all the languages).
67 const IDNTestCase idn_cases[] = {
68   // No IDN
69   {"www.google.com", L"www.google.com",
70    {true,  true,  true,  true,  true,
71     true,  true,  true,  true,  true,
72     true,  true,  true,  true,  true,
73     true,  true,  true,  true,  true,
74     true}},
75   {"www.google.com.", L"www.google.com.",
76    {true,  true,  true,  true,  true,
77     true,  true,  true,  true,  true,
78     true,  true,  true,  true,  true,
79     true,  true,  true,  true,  true,
80     true}},
81   {".", L".",
82    {true,  true,  true,  true,  true,
83     true,  true,  true,  true,  true,
84     true,  true,  true,  true,  true,
85     true,  true,  true,  true,  true,
86     true}},
87   {"", L"",
88    {true,  true,  true,  true,  true,
89     true,  true,  true,  true,  true,
90     true,  true,  true,  true,  true,
91     true,  true,  true,  true,  true,
92     true}},
93   // IDN
94   // Hanzi (Traditional Chinese)
95   {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
96    {true,  false, true,  true,  false,
97     false, false, false, false, false,
98     false, false, false, false, false,
99     false, false, true,  true,  false,
100     true}},
101   // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
102   {"xn--cy2a840a.com", L"\x89c6\x9891.com",
103    {true,  false, true,  false,  false,
104     false, false, false, false, false,
105     false, false, false, false, false,
106     false, false, false, false,  false,
107     true}},
108   // Hanzi + '123'
109   {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
110    {true,  false, true,  true,  false,
111     false, false, false, false, false,
112     false, false, false, false, false,
113     false, false, true,  true,  false,
114     true}},
115   // Hanzi + Latin : U+56FD is simplified and is regarded
116   // as not supported in zh-TW.
117   {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
118    {false, false, true,  true,  false,
119     false, false, false, false, false,
120     false, false, false, false, false,
121     false, false, false, true,  false,
122     true}},
123   // Kanji + Kana (Japanese)
124   {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
125    {true,  false, false, true,  false,
126     false, false, false, false, false,
127     false, false, false, false, false,
128     false, false, false, true,  false,
129     false}},
130   // Katakana including U+30FC
131   {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
132    {true, false, false, true,  false,
133     false, false, false, false, false,
134     false, false, false, false, false,
135     false, false, false, true, false,
136     }},
137   {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
138    {true, false, false, true,  false,
139     false, false, false, false, false,
140     false, false, false, false, false,
141     false, false, false, true, false,
142     }},
143   // Katakana + Latin (Japanese)
144   // TODO(jungshik): Change 'false' in the first element to 'true'
145   // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
146   // of our IsIDNComponentInSingleScript().
147   {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
148    {false, false, false, true,  false,
149     false, false, false, false, false,
150     false, false, false, false, false,
151     false, false, false, true, false,
152     }},
153   {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
154    {false, false, false, true,  false,
155     false, false, false, false, false,
156     false, false, false, false, false,
157     false, false, false, true, false,
158     }},
159   // Hangul (Korean)
160   {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
161    {true,  false, false, false, true,
162     false, false, false, false, false,
163     false, false, false, false, false,
164     false, false, false, true,  false,
165     false}},
166   // b<u-umlaut>cher (German)
167   {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
168    {true,  false, false, false, false,
169     false, false, false, false, true,
170     true,  false,  false, false, false,
171     true,  false, false, false, false,
172     false}},
173   // a with diaeresis
174   {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
175    {true,  false, false, false, false,
176     false, false, false, false, false,
177     true,  false, true, false, false,
178     true,  false, false, false, false,
179     false}},
180   // c-cedilla (French)
181   {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
182    {true,  false, false, false, false,
183     false, false, false, false, true,
184     false, true,  false, false, false,
185     false, false, false, false, false,
186     false}},
187   // caf'e with acute accent' (French)
188   {"xn--caf-dma.fr", L"caf\x00e9.fr",
189    {true,  false, false, false, false,
190     false, false, false, false, true,
191     false, true,  true,  false, false,
192     false, false, false, false, false,
193     false}},
194   // c-cedillla and a with tilde (Portuguese)
195   {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
196    {true,  false, false, false, false,
197     false, false, false, false, false,
198     false, true,  false, false, false,
199     false, false, false, false, false,
200     false}},
201   // s with caron
202   {"xn--achy-f6a.com", L"\x0161" L"achy.com",
203    {true,  false, false, false, false,
204     false, false, false, false, false,
205     false, false, false, false, false,
206     false, false, false, false, false,
207     false}},
208   // TODO(jungshik) : Add examples with Cyrillic letters
209   // only used in some languages written in Cyrillic.
210   // Eutopia (Greek)
211   {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
212    {true,  false, false, false, false,
213     false, false, false, true,  false,
214     false, false, false, false, false,
215     false, true,  false, false, false,
216     false}},
217   // Eutopia + 123 (Greek)
218   {"xn---123-pldm0haj2bk.gr",
219    L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
220    {true,  false, false, false, false,
221     false, false, false, true,  false,
222     false, false, false, false, false,
223     false, true,  false, false, false,
224     false}},
225   // Cyrillic (Russian)
226   {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
227    {true,  false, false, false, false,
228     false, false, true,  false, false,
229     false, false, false, false, false,
230     false, false, false, false, true,
231     true}},
232   // Cyrillic + 123 (Russian)
233   {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
234    {true,  false, false, false, false,
235     false, false, true,  false, false,
236     false, false, false, false, false,
237     false, false, false, false, true,
238     true}},
239   // Arabic
240   {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
241    {true,  false, false, false, false,
242     false, true,  false, false, false,
243     false, false, false, false, false,
244     false, false, false, false, false,
245     false}},
246   // Hebrew
247   {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
248    {true,  false, false, false, false,
249     true,  false, false, false, false,
250     false, false, false, false, false,
251     false, false, false, false, true,
252     false}},
253   // Thai
254   {"xn--12c2cc4ag3b4ccu.th",
255    L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
256    {true,  false, false, false, false,
257     false, false, false, false, false,
258     false, false, false, true,  false,
259     false, false, false, false, false,
260     false}},
261   // Devangari (Hindi)
262   {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
263    {true,  false, false, false, false,
264     false, false, false, false, false,
265     false, false, false, false, true,
266     false, false, false, false, false,
267     false}},
268   // Invalid IDN
269   {"xn--hello?world.com", NULL,
270    {false, false, false, false, false,
271     false, false, false, false, false,
272     false, false, false, false, false,
273     false, false, false, false, false,
274     false}},
275   // Unsafe IDNs
276   // "payp<alpha>l.com"
277   {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
278    {false, false, false, false, false,
279     false, false, false, false, false,
280     false, false, false, false, false,
281     false, false, false, false, false,
282     false}},
283   // google.gr with Greek omicron and epsilon
284   {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
285    {false, false, false, false, false,
286     false, false, false, false, false,
287     false, false, false, false, false,
288     false, false, false, false, false,
289     false}},
290   // google.ru with Cyrillic o
291   {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
292    {false, false, false, false, false,
293     false, false, false, false, false,
294     false, false, false, false, false,
295     false, false, false, false, false,
296     false}},
297   // h<e with acute>llo<China in Han>.cn
298   {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
299    {false, false, false, false, false,
300     false, false, false, false, false,
301     false, false, false, false, false,
302     false, false, false, false, false,
303     false}},
304   // <Greek rho><Cyrillic a><Cyrillic u>.ru
305   {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
306    {false, false, false, false, false,
307     false, false, false, false, false,
308     false, false, false, false, false,
309     false, false, false, false, false,
310     false}},
311   // One that's really long that will force a buffer realloc
312   {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
313        "aaaaaaa",
314    L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
315        L"aaaaaaaa",
316    {true,  true,  true,  true,  true,
317     true,  true,  true,  true,  true,
318     true,  true,  true,  true,  true,
319     true,  true,  true,  true,  true,
320     true}},
321   // Test cases for characters we blacklisted although allowed in IDN.
322   // Embedded spaces will be turned to %20 in the display.
323   // TODO(jungshik): We need to have more cases. This is a typical
324   // data-driven trap. The following test cases need to be separated
325   // and tested only for a couple of languages.
326   {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
327     {false, false, false, false, false,
328      false, false, false, false, false,
329      false, false, false, false, false,
330      false, false, false, false, false,
331      false}},
332   {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
333     {false, false, false, false, false,
334      false, false, false, false, false,
335      false, false, false, false, false,
336      false, false, false, false, false,
337   }},
338   {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
339     {false, false, false, false, false,
340      false, false, false, false, false,
341      false, false, false, false, false,
342      false, false, false, false, false,
343   }},
344   {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
345     {false, false, false, false, false,
346      false, false, false, false, false,
347      false, false, false, false, false,
348      false, false, false, false, false,
349   }},
350   {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
351     {false, false, false, false, false,
352      false, false, false, false, false,
353      false, false, false, false, false,
354      false, false, false, false, false,
355   }},
356 #if 0
357   // These two cases are special. We need a separate test.
358   // U+3000 and U+3002 are normalized to ASCII space and dot.
359   {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
360     {false, false, true,  false, false,
361      false, false, false, false, false,
362      false, false, false, false, false,
363      false, false, true,  false, false,
364      true}},
365   {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
366     {false, false, true,  false, false,
367      false, false, false, false, false,
368      false, false, false, false, false,
369      false, false, true,  false, false,
370      true}},
371 #endif
372 };
373 
374 struct AdjustOffsetCase {
375   size_t input_offset;
376   size_t output_offset;
377 };
378 
379 struct CompliantHostCase {
380   const char* host;
381   const char* desired_tld;
382   bool expected_output;
383 };
384 
385 struct SuggestedFilenameCase {
386   const char* url;
387   const char* content_disp_header;
388   const char* referrer_charset;
389   const wchar_t* default_filename;
390   const wchar_t* expected_filename;
391 };
392 
393 struct UrlTestData {
394   const char* description;
395   const char* input;
396   const char* languages;
397   FormatUrlTypes format_types;
398   UnescapeRule::Type escape_rules;
399   const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily.
400   size_t prefix_len;
401 };
402 
403 // Returns an addrinfo for the given 32-bit address (IPv4.)
404 // The result lives in static storage, so don't delete it.
405 // |bytes| should be an array of length 4.
GetIPv4Address(const uint8 * bytes,int port)406 const struct addrinfo* GetIPv4Address(const uint8* bytes, int port) {
407   static struct addrinfo static_ai;
408   static struct sockaddr_in static_addr4;
409 
410   struct addrinfo* ai = &static_ai;
411   ai->ai_socktype = SOCK_STREAM;
412   memset(ai, 0, sizeof(static_ai));
413 
414   ai->ai_family = AF_INET;
415   ai->ai_addrlen = sizeof(static_addr4);
416 
417   struct sockaddr_in* addr4 = &static_addr4;
418   memset(addr4, 0, sizeof(static_addr4));
419   addr4->sin_port = htons(port);
420   addr4->sin_family = ai->ai_family;
421   memcpy(&addr4->sin_addr, bytes, 4);
422 
423   ai->ai_addr = (sockaddr*)addr4;
424   return ai;
425 }
426 
427 // Returns a addrinfo for the given 128-bit address (IPv6.)
428 // The result lives in static storage, so don't delete it.
429 // |bytes| should be an array of length 16.
GetIPv6Address(const uint8 * bytes,int port)430 const struct addrinfo* GetIPv6Address(const uint8* bytes, int port) {
431   static struct addrinfo static_ai;
432   static struct sockaddr_in6 static_addr6;
433 
434   struct addrinfo* ai = &static_ai;
435   ai->ai_socktype = SOCK_STREAM;
436   memset(ai, 0, sizeof(static_ai));
437 
438   ai->ai_family = AF_INET6;
439   ai->ai_addrlen = sizeof(static_addr6);
440 
441   struct sockaddr_in6* addr6 = &static_addr6;
442   memset(addr6, 0, sizeof(static_addr6));
443   addr6->sin6_port = htons(port);
444   addr6->sin6_family = ai->ai_family;
445   memcpy(&addr6->sin6_addr, bytes, 16);
446 
447   ai->ai_addr = (sockaddr*)addr6;
448   return ai;
449 }
450 
451 // A helper for IDN*{Fast,Slow}.
452 // Append "::<language list>" to |expected| and |actual| to make it
453 // easy to tell which sub-case fails without debugging.
AppendLanguagesToOutputs(const wchar_t * languages,std::wstring * expected,std::wstring * actual)454 void AppendLanguagesToOutputs(const wchar_t* languages,
455                               std::wstring* expected,
456                               std::wstring* actual) {
457   expected->append(L"::");
458   expected->append(languages);
459   actual->append(L"::");
460   actual->append(languages);
461 }
462 
463 // Helper to strignize an IP number (used to define expectations).
DumpIPNumber(const IPAddressNumber & v)464 std::string DumpIPNumber(const IPAddressNumber& v) {
465   std::string out;
466   for (size_t i = 0; i < v.size(); ++i) {
467     if (i != 0)
468       out.append(",");
469     out.append(base::IntToString(static_cast<int>(v[i])));
470   }
471   return out;
472 }
473 
474 }  // anonymous namespace
475 
TEST(NetUtilTest,FileURLConversion)476 TEST(NetUtilTest, FileURLConversion) {
477   // a list of test file names and the corresponding URLs
478   const FileCase round_trip_cases[] = {
479 #if defined(OS_WIN)
480     {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"},
481     {L"\\\\some computer\\foo\\bar.txt",
482      "file://some%20computer/foo/bar.txt"}, // UNC
483     {L"D:\\Name;with%some symbols*#",
484      "file:///D:/Name%3Bwith%25some%20symbols*%23"},
485     // issue 14153: To be tested with the OS default codepage other than 1252.
486     {L"D:\\latin1\\caf\x00E9\x00DD.txt",
487      "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
488     {L"D:\\otherlatin\\caf\x0119.txt",
489      "file:///D:/otherlatin/caf%C4%99.txt"},
490     {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
491      "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
492     {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
493      "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
494          "%E9%A1%B5.doc"},
495     {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt",  // Math alphabet "AB"
496      "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
497 #elif defined(OS_POSIX)
498     {L"/foo/bar.txt", "file:///foo/bar.txt"},
499     {L"/foo/BAR.txt", "file:///foo/BAR.txt"},
500     {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
501     {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
502     {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
503     {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
504     {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
505     {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
506     {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
507      "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
508          "%91%E9%A1%B5.doc"},
509     {L"/plane1/\x1D400\x1D401.txt",  // Math alphabet "AB"
510      "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
511 #endif
512   };
513 
514   // First, we'll test that we can round-trip all of the above cases of URLs
515   FilePath output;
516   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) {
517     // convert to the file URL
518     GURL file_url(FilePathToFileURL(
519                       file_util::WStringAsFilePath(round_trip_cases[i].file)));
520     EXPECT_EQ(round_trip_cases[i].url, file_url.spec());
521 
522     // Back to the filename.
523     EXPECT_TRUE(FileURLToFilePath(file_url, &output));
524     EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output));
525   }
526 
527   // Test that various file: URLs get decoded into the correct file type
528   FileCase url_cases[] = {
529 #if defined(OS_WIN)
530     {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"},
531     {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"},
532     {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
533     {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"},
534     {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"},
535     {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"},
536     {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
537     {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"},
538 #elif defined(OS_POSIX)
539     {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"},
540     {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"},
541     {L"/foo/bar.txt", "file:/foo/bar.txt"},
542     {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
543     {L"/foo/bar.txt", "file:foo/bar.txt"},
544     {L"/bar.txt", "file://foo/bar.txt"},
545     {L"/foo/bar.txt", "file:///foo/bar.txt"},
546     {L"/foo/bar.txt", "file:////foo/bar.txt"},
547     {L"/foo/bar.txt", "file:////foo//bar.txt"},
548     {L"/foo/bar.txt", "file:////foo///bar.txt"},
549     {L"/foo/bar.txt", "file:////foo////bar.txt"},
550     {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
551     {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"},
552     // We get these wrong because GURL turns back slashes into forward
553     // slashes.
554     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
555     //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"},
556     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
557     //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"},
558     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
559 #endif
560   };
561   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) {
562     FileURLToFilePath(GURL(url_cases[i].url), &output);
563     EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output));
564   }
565 
566   // Unfortunately, UTF8ToWide discards invalid UTF8 input.
567 #ifdef BUG_878908_IS_FIXED
568   // Test that no conversion happens if the UTF-8 input is invalid, and that
569   // the input is preserved in UTF-8
570   const char invalid_utf8[] = "file:///d:/Blah/\xff.doc";
571   const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc";
572   EXPECT_TRUE(FileURLToFilePath(
573       GURL(std::string(invalid_utf8)), &output));
574   EXPECT_EQ(std::wstring(invalid_wide), output);
575 #endif
576 
577   // Test that if a file URL is malformed, we get a failure
578   EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output));
579 }
580 
TEST(NetUtilTest,GetIdentityFromURL)581 TEST(NetUtilTest, GetIdentityFromURL) {
582   struct {
583     const char* input_url;
584     const char* expected_username;
585     const char* expected_password;
586   } tests[] = {
587     {
588       "http://username:password@google.com",
589       "username",
590       "password",
591     },
592     { // Test for http://crbug.com/19200
593       "http://username:p@ssword@google.com",
594       "username",
595       "p@ssword",
596     },
597     { // Special URL characters should be unescaped.
598       "http://username:p%3fa%26s%2fs%23@google.com",
599       "username",
600       "p?a&s/s#",
601     },
602     { // Username contains %20.
603       "http://use rname:password@google.com",
604       "use rname",
605       "password",
606     },
607     { // Keep %00 as is.
608       "http://use%00rname:password@google.com",
609       "use%00rname",
610       "password",
611     },
612     { // Use a '+' in the username.
613       "http://use+rname:password@google.com",
614       "use+rname",
615       "password",
616     },
617     { // Use a '&' in the password.
618       "http://username:p&ssword@google.com",
619       "username",
620       "p&ssword",
621     },
622   };
623   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
624     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
625                                     tests[i].input_url));
626     GURL url(tests[i].input_url);
627 
628     string16 username, password;
629     GetIdentityFromURL(url, &username, &password);
630 
631     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_username), username);
632     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_password), password);
633   }
634 }
635 
636 // Try extracting a username which was encoded with UTF8.
TEST(NetUtilTest,GetIdentityFromURL_UTF8)637 TEST(NetUtilTest, GetIdentityFromURL_UTF8) {
638   GURL url(WideToUTF16(L"http://foo:\x4f60\x597d@blah.com"));
639 
640   EXPECT_EQ("foo", url.username());
641   EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
642 
643   // Extract the unescaped identity.
644   string16 username, password;
645   GetIdentityFromURL(url, &username, &password);
646 
647   // Verify that it was decoded as UTF8.
648   EXPECT_EQ(ASCIIToUTF16("foo"), username);
649   EXPECT_EQ(WideToUTF16(L"\x4f60\x597d"), password);
650 }
651 
652 // Just a bunch of fake headers.
653 const wchar_t* google_headers =
654     L"HTTP/1.1 200 OK\n"
655     L"Content-TYPE: text/html; charset=utf-8\n"
656     L"Content-disposition: attachment; filename=\"download.pdf\"\n"
657     L"Content-Length: 378557\n"
658     L"X-Google-Google1: 314159265\n"
659     L"X-Google-Google2: aaaa2:7783,bbb21:9441\n"
660     L"X-Google-Google4: home\n"
661     L"Transfer-Encoding: chunked\n"
662     L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n"
663     L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n"
664     L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee"
665         L"fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n"
666     L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n"
667     L"Server: GFE/1.3\n"
668     L"Transfer-Encoding: chunked\n"
669     L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n"
670     L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n"
671     L"X-Malformed: bla; arg=test\"\n"
672     L"X-Malformed2: bla; arg=\n"
673     L"X-Test: bla; arg1=val1; arg2=val2";
674 
TEST(NetUtilTest,GetSpecificHeader)675 TEST(NetUtilTest, GetSpecificHeader) {
676   const HeaderCase tests[] = {
677     {L"content-type", L"text/html; charset=utf-8"},
678     {L"CONTENT-LENGTH", L"378557"},
679     {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"},
680     {L"Bad-Header", L""},
681     {L"", L""},
682   };
683 
684   // Test first with google_headers.
685   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
686     std::wstring result = GetSpecificHeader(google_headers,
687                                                  tests[i].header_name);
688     EXPECT_EQ(result, tests[i].expected);
689   }
690 
691   // Test again with empty headers.
692   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
693     std::wstring result = GetSpecificHeader(L"", tests[i].header_name);
694     EXPECT_EQ(result, std::wstring());
695   }
696 }
697 
TEST(NetUtilTest,GetHeaderParamValue)698 TEST(NetUtilTest, GetHeaderParamValue) {
699   const HeaderParamCase tests[] = {
700     {L"Content-type", L"charset", L"utf-8"},
701     {L"content-disposition", L"filename", L"download.pdf"},
702     {L"Content-Type", L"badparam", L""},
703     {L"X-Malformed", L"arg", L"test\""},
704     {L"X-Malformed2", L"arg", L""},
705     {L"X-Test", L"arg1", L"val1"},
706     {L"X-Test", L"arg2", L"val2"},
707     {L"Bad-Header", L"badparam", L""},
708     {L"Bad-Header", L"", L""},
709     {L"", L"badparam", L""},
710     {L"", L"", L""},
711   };
712   // TODO(mpcomplete): add tests for other formats of headers.
713 
714   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
715     std::wstring header_value =
716         GetSpecificHeader(google_headers, tests[i].header_name);
717     std::wstring result =
718         GetHeaderParamValue(header_value, tests[i].param_name,
719                             QuoteRule::REMOVE_OUTER_QUOTES);
720     EXPECT_EQ(result, tests[i].expected);
721   }
722 
723   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
724     std::wstring header_value =
725         GetSpecificHeader(L"", tests[i].header_name);
726     std::wstring result =
727         GetHeaderParamValue(header_value, tests[i].param_name,
728                             QuoteRule::REMOVE_OUTER_QUOTES);
729     EXPECT_EQ(result, std::wstring());
730   }
731 }
732 
TEST(NetUtilTest,GetHeaderParamValueQuotes)733 TEST(NetUtilTest, GetHeaderParamValueQuotes) {
734   struct {
735     const char* header;
736     const char* expected_with_quotes;
737     const char* expected_without_quotes;
738   } tests[] = {
739     {"filename=foo", "foo", "foo"},
740     {"filename=\"foo\"", "\"foo\"", "foo"},
741     {"filename=foo\"", "foo\"", "foo\""},
742     {"filename=fo\"o", "fo\"o", "fo\"o"},
743   };
744 
745   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
746     std::string actual_with_quotes =
747         GetHeaderParamValue(tests[i].header, "filename",
748                             QuoteRule::KEEP_OUTER_QUOTES);
749     std::string actual_without_quotes =
750         GetHeaderParamValue(tests[i].header, "filename",
751                             QuoteRule::REMOVE_OUTER_QUOTES);
752     EXPECT_EQ(tests[i].expected_with_quotes, actual_with_quotes)
753         << "Failed while processing: " << tests[i].header;
754     EXPECT_EQ(tests[i].expected_without_quotes, actual_without_quotes)
755         << "Failed while processing: " << tests[i].header;
756   }
757 }
758 
TEST(NetUtilTest,GetFileNameFromCD)759 TEST(NetUtilTest, GetFileNameFromCD) {
760   const FileNameCDCase tests[] = {
761     // Test various forms of C-D header fields emitted by web servers.
762     {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
763     {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
764     {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
765     {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
766     {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
767     {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
768     {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
769     {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
770     {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
771     {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
772     {"content-disposition: filename \t\t  =abcde.pdf", "", L"abcde.pdf"},
773     {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
774     {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
775      L"abc de.pdf"},
776     // Unbalanced quotation mark
777     {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"},
778     // Whitespaces are converted to a space.
779     {"content-disposition: inline; filename=\"abc  \t\nde.pdf\"", "",
780      L"abc    de.pdf"},
781     // %-escaped UTF-8
782     {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
783      "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
784     {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
785      "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
786     {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
787      "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220  \xc608\xc220.jpg"},
788     // RFC 2047 with various charsets and Q/B encodings
789     {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
790      "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
791     {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
792      "", L"\xc608\xc220 3.png"},
793     {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
794      "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
795     {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
796      "_3=2Epng?=", "", L"\U00010330 3.png"},
797     {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
798      "", L"caf\x00e9 .png"},
799     // Space after an encoded word should be removed.
800     {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
801      "", L"caf\x00e9 .png"},
802     // Two encoded words with different charsets (not very likely to be emitted
803     // by web servers in the wild). Spaces between them are removed.
804     {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
805      " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
806      L"\xc608\xc220 3\xc608\xc220.png"},
807     {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
808      "  =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
809     // Non-ASCII string is passed through and treated as UTF-8 as long as
810     // it's valid as UTF-8 and regardless of |referrer_charset|.
811     {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
812      "iso-8859-1", L"caf\x00e9.png"},
813     {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
814      "", L"caf\x00e9.png"},
815     // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
816     {"Content-Disposition: attachment; filename=caf\xe5.png",
817      "windows-1253", L"caf\x03b5.png"},
818 #if 0
819     // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
820     // TODO(jungshik): We need to set the OS default codepage
821     // to a specific value before testing. On Windows, we can use
822     // SetThreadLocale().
823     {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
824      "", L"\xac00\xac01.png"},
825 #endif
826     // Failure cases
827     // Invalid hex-digit "G"
828     {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
829      L""},
830     // Incomplete RFC 2047 encoded-word (missing '='' at the end)
831     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
832     // Extra character at the end of an encoded word
833     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
834      "", L""},
835     // Extra token at the end of an encoded word
836     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
837      "", L""},
838     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
839      "",  L""},
840     // Incomplete hex-escaped chars
841     {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
842      "", L""},
843     {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
844     // %-escaped non-UTF-8 encoding is an "error"
845     {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
846     // Two RFC 2047 encoded words in a row without a space is an error.
847     {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
848      "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
849 
850     // RFC 5987 tests with Filename*  : see http://tools.ietf.org/html/rfc5987
851     {"Content-Disposition: attachment; filename*=foo.html", "", L""},
852     {"Content-Disposition: attachment; filename*=foo'.html", "", L""},
853     {"Content-Disposition: attachment; filename*=''foo'.html", "", L""},
854     {"Content-Disposition: attachment; filename*=''foo.html'", "", L""},
855     {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""},
856     {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'",
857      "", L""},
858     {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""},
859     {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "",
860       L"foo.html"},
861     {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "",
862       L"foo.html"},
863     // charset cannot be omitted.
864     {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""},
865     // Non-ASCII bytes are not allowed.
866     {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "",
867       L""},
868     {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "",
869       L""},
870     // TODO(jshin): Space should be %-encoded, but currently, we allow
871     // spaces.
872     {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "",
873       L"cafe foo.png"},
874 
875     // Filename* tests converted from Q-encoded tests above.
876     {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
877      "", L"\x82b8\x8853" L"3.png"},
878     {"Content-Disposition: attachment; filename*=utf-8''"
879       "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
880     {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
881       L"\U00010330 3.png"},
882     {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
883      L"\xc608\xc220.png"},
884     {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "",
885       L"caf\x00e9.png"},
886 
887     // http://greenbytes.de/tech/tc2231/ filename* test cases.
888     // attwithisofn2231iso
889     {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "",
890       L"foo-\xe4.html"},
891     // attwithfn2231utf8
892     {"Content-Disposition: attachment; filename*="
893       "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
894     // attwithfn2231noc : no encoding specified but UTF-8 is used.
895     {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
896       "", L""},
897     // attwithfn2231utf8comp
898     {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "",
899       L"foo-\xe4.html"},
900 #ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
901     // This does not work because we treat ISO-8859-1 synonymous with
902     // Windows-1252 per HTML5. For HTTP, in theory, we're not
903     // supposed to.
904     // attwithfn2231utf8-bad
905     {"Content-Disposition: attachment; filename*="
906       "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
907 #endif
908     // attwithfn2231ws1
909     {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "",
910       L""},
911     // attwithfn2231ws2
912     {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "",
913       L"foo-\xe4.html"},
914     // attwithfn2231ws3
915     {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "",
916       L"foo-\xe4.html"},
917     // attwithfn2231quot
918     {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
919       "", L""},
920     // attfnboth
921     {"Content-Disposition: attachment; filename=\"foo-ae.html\"; "
922       "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
923     // attfnboth2
924     {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; "
925       "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
926     // attnewandfn
927     {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "",
928       L"foo.html"},
929   };
930   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
931     EXPECT_EQ(tests[i].expected,
932               UTF8ToWide(GetFileNameFromCD(tests[i].header_field,
933                                            tests[i].referrer_charset)))
934         << "Failed on input: " << tests[i].header_field;
935   }
936 }
937 
TEST(NetUtilTest,IDNToUnicodeFast)938 TEST(NetUtilTest, IDNToUnicodeFast) {
939   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
940     for (size_t j = 0; j < arraysize(kLanguages); j++) {
941       // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
942       if (j == 3 || j == 17 || j == 18)
943         continue;
944       std::wstring output(IDNToUnicode(idn_cases[i].input,
945           strlen(idn_cases[i].input), kLanguages[j], NULL));
946       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
947           idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
948       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
949       EXPECT_EQ(expected, output);
950     }
951   }
952 }
953 
TEST(NetUtilTest,IDNToUnicodeSlow)954 TEST(NetUtilTest, IDNToUnicodeSlow) {
955   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
956     for (size_t j = 0; j < arraysize(kLanguages); j++) {
957       // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
958       if (!(j == 3 || j == 17 || j == 18))
959         continue;
960       std::wstring output(IDNToUnicode(idn_cases[i].input,
961           strlen(idn_cases[i].input), kLanguages[j], NULL));
962       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
963           idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
964       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
965       EXPECT_EQ(expected, output);
966     }
967   }
968 }
969 
TEST(NetUtilTest,IDNToUnicodeAdjustOffset)970 TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
971   const AdjustOffsetCase adjust_cases[] = {
972     {0, 0},
973     {2, 2},
974     {4, 4},
975     {5, 5},
976     {6, string16::npos},
977     {16, string16::npos},
978     {17, 7},
979     {18, 8},
980     {19, string16::npos},
981     {25, string16::npos},
982     {34, 12},
983     {35, 13},
984     {38, 16},
985     {39, string16::npos},
986     {string16::npos, string16::npos},
987   };
988   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
989     size_t offset = adjust_cases[i].input_offset;
990     // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
991     IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
992                       &offset);
993     EXPECT_EQ(adjust_cases[i].output_offset, offset);
994   }
995 
996   std::vector<size_t> offsets;
997   for (size_t i = 0; i < 40; ++i)
998     offsets.push_back(i);
999   IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
1000                           L"zh-CN", &offsets);
1001   size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
1002                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
1003                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1004                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
1005                        16, kNpos};
1006   ASSERT_EQ(40U, arraysize(expected));
1007   for (size_t i = 0; i < 40; ++i)
1008     EXPECT_EQ(expected[i], offsets[i]);
1009 }
1010 
TEST(NetUtilTest,CompliantHost)1011 TEST(NetUtilTest, CompliantHost) {
1012   const CompliantHostCase compliant_host_cases[] = {
1013     {"", "", false},
1014     {"a", "", true},
1015     {"-", "", false},
1016     {".", "", false},
1017     {"9", "", false},
1018     {"9", "a", true},
1019     {"9a", "", false},
1020     {"9a", "a", true},
1021     {"a.", "", true},
1022     {"a.a", "", true},
1023     {"9.a", "", true},
1024     {"a.9", "", false},
1025     {"_9a", "", false},
1026     {"a.a9", "", true},
1027     {"a.9a", "", false},
1028     {"a+9a", "", false},
1029     {"1-.a-b", "", false},
1030     {"1-2.a_b", "", true},
1031     {"a.b.c.d.e", "", true},
1032     {"1.2.3.4.e", "", true},
1033     {"a.b.c.d.5", "", false},
1034     {"1.2.3.4.e.", "", true},
1035     {"a.b.c.d.5.", "", false},
1036   };
1037 
1038   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) {
1039     EXPECT_EQ(compliant_host_cases[i].expected_output,
1040         IsCanonicalizedHostCompliant(compliant_host_cases[i].host,
1041                                           compliant_host_cases[i].desired_tld));
1042   }
1043 }
1044 
TEST(NetUtilTest,StripWWW)1045 TEST(NetUtilTest, StripWWW) {
1046   EXPECT_EQ(string16(), StripWWW(string16()));
1047   EXPECT_EQ(string16(), StripWWW(ASCIIToUTF16("www.")));
1048   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
1049   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
1050 }
1051 
TEST(NetUtilTest,GetSuggestedFilename)1052 TEST(NetUtilTest, GetSuggestedFilename) {
1053   const SuggestedFilenameCase test_cases[] = {
1054     {"http://www.google.com/",
1055      "Content-disposition: attachment; filename=test.html",
1056      "",
1057      L"",
1058      L"test.html"},
1059     {"http://www.google.com/",
1060      "Content-disposition: attachment; filename=\"test.html\"",
1061      "",
1062      L"",
1063      L"test.html"},
1064     {"http://www.google.com/path/test.html",
1065      "Content-disposition: attachment",
1066      "",
1067      L"",
1068      L"test.html"},
1069     {"http://www.google.com/path/test.html",
1070      "Content-disposition: attachment;",
1071      "",
1072      L"",
1073      L"test.html"},
1074     {"http://www.google.com/",
1075      "",
1076      "",
1077      L"",
1078      L"www.google.com"},
1079     {"http://www.google.com/test.html",
1080      "",
1081      "",
1082      L"",
1083      L"test.html"},
1084     // Now that we use googleurl's ExtractFileName, this case falls back
1085     // to the hostname. If this behavior is not desirable, we'd better
1086     // change ExtractFileName (in url_parse).
1087     {"http://www.google.com/path/",
1088      "",
1089      "",
1090      L"",
1091      L"www.google.com"},
1092     {"http://www.google.com/path",
1093      "",
1094      "",
1095      L"",
1096      L"path"},
1097     {"file:///",
1098      "",
1099      "",
1100      L"",
1101      L"download"},
1102     {"non-standard-scheme:",
1103      "",
1104      "",
1105      L"",
1106      L"download"},
1107     {"http://www.google.com/",
1108      "Content-disposition: attachment; filename =\"test.html\"",
1109      "",
1110      L"download",
1111      L"test.html"},
1112     {"http://www.google.com/",
1113      "",
1114      "",
1115      L"download",
1116      L"download"},
1117     {"http://www.google.com/",
1118      "Content-disposition: attachment; filename=\"../test.html\"",
1119      "",
1120      L"",
1121      L"_test.html"},
1122     {"http://www.google.com/",
1123      "Content-disposition: attachment; filename=\"..\\test.html\"",
1124      "",
1125      L"",
1126      L"_test.html"},
1127     {"http://www.google.com/",
1128      "Content-disposition: attachment; filename=\"..\"",
1129      "",
1130      L"download",
1131      L"download"},
1132     {"http://www.google.com/test.html",
1133      "Content-disposition: attachment; filename=\"..\"",
1134      "",
1135      L"download",
1136      L"test.html"},
1137     // Below is a small subset of cases taken from GetFileNameFromCD test above.
1138     {"http://www.google.com/",
1139      "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
1140      "%EC%98%88%EC%88%A0.jpg\"",
1141      "",
1142      L"",
1143      L"\uc608\uc220 \uc608\uc220.jpg"},
1144     {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
1145      "",
1146      "",
1147      L"download",
1148      L"\uc608\uc220 \uc608\uc220.jpg"},
1149     {"http://www.google.com/",
1150      "Content-disposition: attachment;",
1151      "",
1152      L"\uB2E4\uC6B4\uB85C\uB4DC",
1153      L"\uB2E4\uC6B4\uB85C\uB4DC"},
1154     {"http://www.google.com/",
1155      "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
1156      "D13=2Epng?=\"",
1157      "",
1158      L"download",
1159      L"\u82b8\u88533.png"},
1160     {"http://www.example.com/images?id=3",
1161      "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
1162      "iso-8859-1",
1163      L"",
1164      L"caf\u00e9.png"},
1165     {"http://www.example.com/images?id=3",
1166      "Content-Disposition: attachment; filename=caf\xe5.png",
1167      "windows-1253",
1168      L"",
1169      L"caf\u03b5.png"},
1170     {"http://www.example.com/file?id=3",
1171      "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
1172      "GBK",
1173      L"",
1174      L"\u4e0b\u8f7d.zip"},
1175     // Invalid C-D header. Extracts filename from url.
1176     {"http://www.google.com/test.html",
1177      "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
1178      "",
1179      L"",
1180      L"test.html"},
1181     // about: and data: URLs
1182     {"about:chrome",
1183      "",
1184      "",
1185      L"",
1186      L"download"},
1187     {"data:,looks/like/a.path",
1188      "",
1189      "",
1190      L"",
1191      L"download"},
1192     {"data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=",
1193      "",
1194      "",
1195      L"",
1196      L"download"},
1197     {"data:,looks/like/a.path",
1198      "",
1199      "",
1200      L"default_filename_is_given",
1201      L"default_filename_is_given"},
1202     {"data:,looks/like/a.path",
1203      "",
1204      "",
1205      L"\u65e5\u672c\u8a9e",  // Japanese Kanji.
1206      L"\u65e5\u672c\u8a9e"},
1207     // Dotfiles. Ensures preceeding period(s) stripped.
1208     {"http://www.google.com/.test.html",
1209     "",
1210     "",
1211     L"",
1212     L"test.html"},
1213     {"http://www.google.com/.test",
1214     "",
1215     "",
1216     L"",
1217     L"test"},
1218     {"http://www.google.com/..test",
1219     "",
1220     "",
1221     L"",
1222     L"test"},
1223     // The filename encoding is specified by the referrer charset.
1224     {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1225      "",
1226      "iso-8859-1",
1227      L"",
1228      L"V\u00fdvojov\u00e1 psychologie.doc"},
1229     // The filename encoding doesn't match the referrer charset, the
1230     // system charset, or UTF-8.
1231     // TODO(jshin): we need to handle this case.
1232 #if 0
1233     {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1234      "",
1235      "utf-8",
1236      L"",
1237      L"V\u00fdvojov\u00e1 psychologie.doc",
1238     },
1239 #endif
1240   };
1241   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1242     std::wstring default_name = test_cases[i].default_filename;
1243     string16 filename = GetSuggestedFilename(
1244         GURL(test_cases[i].url), test_cases[i].content_disp_header,
1245         test_cases[i].referrer_charset, WideToUTF16(default_name));
1246     EXPECT_EQ(std::wstring(test_cases[i].expected_filename),
1247               UTF16ToWide(filename))
1248       << "Iteration " << i << ": " << test_cases[i].url;
1249   }
1250 }
1251 
1252 // This is currently a windows specific function.
1253 #if defined(OS_WIN)
1254 namespace {
1255 
1256 struct GetDirectoryListingEntryCase {
1257   const wchar_t* name;
1258   const char* raw_bytes;
1259   bool is_dir;
1260   int64 filesize;
1261   base::Time time;
1262   const char* expected;
1263 };
1264 
1265 }  // namespace
TEST(NetUtilTest,GetDirectoryListingEntry)1266 TEST(NetUtilTest, GetDirectoryListingEntry) {
1267   const GetDirectoryListingEntryCase test_cases[] = {
1268     {L"Foo",
1269      "",
1270      false,
1271      10000,
1272      base::Time(),
1273      "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
1274     {L"quo\"tes",
1275      "",
1276      false,
1277      10000,
1278      base::Time(),
1279      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1280          "\n"},
1281     {L"quo\"tes",
1282      "quo\"tes",
1283      false,
1284      10000,
1285      base::Time(),
1286      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1287          "\n"},
1288     // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
1289     // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
1290     {L"\xD55C\xAE00.txt",
1291      "",
1292      false,
1293      10000,
1294      base::Time(),
1295      "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
1296          ",0,\"9.8 kB\",\"\");</script>\n"},
1297     // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
1298     // a local or remote file in EUC-KR.
1299     {L"\xD55C\xAE00.txt",
1300      "\xC7\xD1\xB1\xDB.txt",
1301      false,
1302      10000,
1303      base::Time(),
1304      "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
1305          ",0,\"9.8 kB\",\"\");</script>\n"},
1306   };
1307 
1308   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1309     const std::string results = GetDirectoryListingEntry(
1310         WideToUTF16(test_cases[i].name),
1311         test_cases[i].raw_bytes,
1312         test_cases[i].is_dir,
1313         test_cases[i].filesize,
1314         test_cases[i].time);
1315     EXPECT_EQ(test_cases[i].expected, results);
1316   }
1317 }
1318 
1319 #endif
1320 
TEST(NetUtilTest,ParseHostAndPort)1321 TEST(NetUtilTest, ParseHostAndPort) {
1322   const struct {
1323     const char* input;
1324     bool success;
1325     const char* expected_host;
1326     int expected_port;
1327   } tests[] = {
1328     // Valid inputs:
1329     {"foo:10", true, "foo", 10},
1330     {"foo", true, "foo", -1},
1331     {
1332       "[1080:0:0:0:8:800:200C:4171]:11",
1333       true,
1334       "[1080:0:0:0:8:800:200C:4171]",
1335       11,
1336     },
1337     // Invalid inputs:
1338     {"foo:bar", false, "", -1},
1339     {"foo:", false, "", -1},
1340     {":", false, "", -1},
1341     {":80", false, "", -1},
1342     {"", false, "", -1},
1343     {"porttoolong:300000", false, "", -1},
1344     {"usrname@host", false, "", -1},
1345     {"usrname:password@host", false, "", -1},
1346     {":password@host", false, "", -1},
1347     {":password@host:80", false, "", -1},
1348     {":password@host", false, "", -1},
1349     {"@host", false, "", -1},
1350   };
1351 
1352   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1353     std::string host;
1354     int port;
1355     bool ok = ParseHostAndPort(tests[i].input, &host, &port);
1356 
1357     EXPECT_EQ(tests[i].success, ok);
1358 
1359     if (tests[i].success) {
1360       EXPECT_EQ(tests[i].expected_host, host);
1361       EXPECT_EQ(tests[i].expected_port, port);
1362     }
1363   }
1364 }
1365 
TEST(NetUtilTest,GetHostAndPort)1366 TEST(NetUtilTest, GetHostAndPort) {
1367   const struct {
1368     GURL url;
1369     const char* expected_host_and_port;
1370   } tests[] = {
1371     { GURL("http://www.foo.com/x"), "www.foo.com:80"},
1372     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1373 
1374     // For IPv6 literals should always include the brackets.
1375     { GURL("http://[1::2]/x"), "[1::2]:80"},
1376     { GURL("http://[::a]:33/x"), "[::a]:33"},
1377   };
1378   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1379     std::string host_and_port = GetHostAndPort(tests[i].url);
1380     EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1381   }
1382 }
1383 
TEST(NetUtilTest,GetHostAndOptionalPort)1384 TEST(NetUtilTest, GetHostAndOptionalPort) {
1385   const struct {
1386     GURL url;
1387     const char* expected_host_and_port;
1388   } tests[] = {
1389     { GURL("http://www.foo.com/x"), "www.foo.com"},
1390     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1391 
1392     // For IPv6 literals should always include the brackets.
1393     { GURL("http://[1::2]/x"), "[1::2]"},
1394     { GURL("http://[::a]:33/x"), "[::a]:33"},
1395   };
1396   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1397     std::string host_and_port = GetHostAndOptionalPort(tests[i].url);
1398     EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1399   }
1400 }
1401 
1402 
TEST(NetUtilTest,NetAddressToString_IPv4)1403 TEST(NetUtilTest, NetAddressToString_IPv4) {
1404   const struct {
1405     uint8 addr[4];
1406     const char* result;
1407   } tests[] = {
1408     {{0, 0, 0, 0}, "0.0.0.0"},
1409     {{127, 0, 0, 1}, "127.0.0.1"},
1410     {{192, 168, 0, 1}, "192.168.0.1"},
1411   };
1412 
1413   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1414     const addrinfo* ai = GetIPv4Address(tests[i].addr, 80);
1415     std::string result = NetAddressToString(ai);
1416     EXPECT_EQ(std::string(tests[i].result), result);
1417   }
1418 }
1419 
TEST(NetUtilTest,NetAddressToString_IPv6)1420 TEST(NetUtilTest, NetAddressToString_IPv6) {
1421   const struct {
1422     uint8 addr[16];
1423     const char* result;
1424   } tests[] = {
1425     {{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1426       0x98, 0x76, 0x54, 0x32, 0x10},
1427      "fedc:ba98:7654:3210:fedc:ba98:7654:3210"},
1428   };
1429 
1430   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1431     const addrinfo* ai = GetIPv6Address(tests[i].addr, 80);
1432     std::string result = NetAddressToString(ai);
1433     // Allow NetAddressToString() to fail, in case the system doesn't
1434     // support IPv6.
1435     if (!result.empty())
1436       EXPECT_EQ(std::string(tests[i].result), result);
1437   }
1438 }
1439 
TEST(NetUtilTest,NetAddressToStringWithPort_IPv4)1440 TEST(NetUtilTest, NetAddressToStringWithPort_IPv4) {
1441   uint8 addr[] = {127, 0, 0, 1};
1442   const addrinfo* ai = GetIPv4Address(addr, 166);
1443   std::string result = NetAddressToStringWithPort(ai);
1444   EXPECT_EQ("127.0.0.1:166", result);
1445 }
1446 
TEST(NetUtilTest,NetAddressToStringWithPort_IPv6)1447 TEST(NetUtilTest, NetAddressToStringWithPort_IPv6) {
1448   uint8 addr[] = {
1449       0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1450       0x98, 0x76, 0x54, 0x32, 0x10
1451   };
1452   const addrinfo* ai = GetIPv6Address(addr, 361);
1453   std::string result = NetAddressToStringWithPort(ai);
1454 
1455   // May fail on systems that don't support IPv6.
1456   if (!result.empty())
1457     EXPECT_EQ("[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:361", result);
1458 }
1459 
TEST(NetUtilTest,GetHostName)1460 TEST(NetUtilTest, GetHostName) {
1461   // We can't check the result of GetHostName() directly, since the result
1462   // will differ across machines. Our goal here is to simply exercise the
1463   // code path, and check that things "look about right".
1464   std::string hostname = GetHostName();
1465   EXPECT_FALSE(hostname.empty());
1466 }
1467 
TEST(NetUtilTest,FormatUrl)1468 TEST(NetUtilTest, FormatUrl) {
1469   FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
1470   const UrlTestData tests[] = {
1471     {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
1472 
1473     {"Simple URL",
1474      "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
1475      L"http://www.google.com/", 7},
1476 
1477     {"With a port number and a reference",
1478      "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
1479      UnescapeRule::NORMAL,
1480      L"http://www.google.com:8080/#\x30B0", 7},
1481 
1482     // -------- IDN tests --------
1483     {"Japanese IDN with ja",
1484      "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1485      UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1486 
1487     {"Japanese IDN with en",
1488      "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
1489      UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
1490 
1491     {"Japanese IDN without any languages",
1492      "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
1493      UnescapeRule::NORMAL,
1494      // Single script is safe for empty languages.
1495      L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1496 
1497     {"mailto: with Japanese IDN",
1498      "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1499      UnescapeRule::NORMAL,
1500      // GURL doesn't assume an email address's domain part as a host name.
1501      L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
1502 
1503     {"file: with Japanese IDN",
1504      "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1505      UnescapeRule::NORMAL,
1506      L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
1507 
1508     {"ftp: with Japanese IDN",
1509      "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1510      UnescapeRule::NORMAL,
1511      L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
1512 
1513     // -------- omit_username_password flag tests --------
1514     {"With username and password, omit_username_password=false",
1515      "http://user:passwd@example.com/foo", "",
1516      kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1517      L"http://user:passwd@example.com/foo", 19},
1518 
1519     {"With username and password, omit_username_password=true",
1520      "http://user:passwd@example.com/foo", "", default_format_type,
1521      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1522 
1523     {"With username and no password",
1524      "http://user@example.com/foo", "", default_format_type,
1525      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1526 
1527     {"Just '@' without username and password",
1528      "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
1529      L"http://example.com/foo", 7},
1530 
1531     // GURL doesn't think local-part of an email address is username for URL.
1532     {"mailto:, omit_username_password=true",
1533      "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
1534      L"mailto:foo@example.com", 7},
1535 
1536     // -------- unescape flag tests --------
1537     {"Do not unescape",
1538      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1539      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1540      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1541      UnescapeRule::NONE,
1542      // GURL parses %-encoded hostnames into Punycode.
1543      L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1544      L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
1545 
1546     {"Unescape normally",
1547      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1548      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1549      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1550      UnescapeRule::NORMAL,
1551      L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
1552      L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
1553 
1554     {"Unescape normally including unescape spaces",
1555      "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
1556      UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
1557 
1558     /*
1559     {"unescape=true with some special characters",
1560     "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
1561     kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1562     L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
1563     */
1564     // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
1565 
1566     // -------- omit http: --------
1567     {"omit http with user name",
1568      "http://user@example.com/foo", "", kFormatUrlOmitAll,
1569      UnescapeRule::NORMAL, L"example.com/foo", 0},
1570 
1571     {"omit http",
1572      "http://www.google.com/", "en", kFormatUrlOmitHTTP,
1573      UnescapeRule::NORMAL, L"www.google.com/",
1574      0},
1575 
1576     {"omit http with https",
1577      "https://www.google.com/", "en", kFormatUrlOmitHTTP,
1578      UnescapeRule::NORMAL, L"https://www.google.com/",
1579      8},
1580 
1581     {"omit http starts with ftp.",
1582      "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1583      UnescapeRule::NORMAL, L"http://ftp.google.com/",
1584      7},
1585 
1586     // -------- omit trailing slash on bare hostname --------
1587     {"omit slash when it's the entire path",
1588      "http://www.google.com/", "en",
1589      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1590      L"http://www.google.com", 7},
1591     {"omit slash when there's a ref",
1592      "http://www.google.com/#ref", "en",
1593      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1594      L"http://www.google.com/#ref", 7},
1595     {"omit slash when there's a query",
1596      "http://www.google.com/?", "en",
1597      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1598      L"http://www.google.com/?", 7},
1599     {"omit slash when it's not the entire path",
1600      "http://www.google.com/foo", "en",
1601      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1602      L"http://www.google.com/foo", 7},
1603     {"omit slash for nonstandard URLs",
1604      "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1605      UnescapeRule::NORMAL, L"data:/", 5},
1606     {"omit slash for file URLs",
1607      "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1608      UnescapeRule::NORMAL, L"file:///", 7},
1609 
1610     // -------- view-source: --------
1611     {"view-source",
1612      "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
1613      UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
1614      19},
1615 
1616     {"view-source of view-source",
1617      "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
1618      default_format_type, UnescapeRule::NORMAL,
1619      L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
1620 
1621     // view-source should omit http and trailing slash where non-view-source
1622     // would.
1623     {"view-source omit http",
1624      "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
1625      UnescapeRule::NORMAL, L"view-source:a.b/c",
1626      12},
1627     {"view-source omit http starts with ftp.",
1628      "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
1629      UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
1630      19},
1631     {"view-source omit slash when it's the entire path",
1632      "view-source:http://a.b/", "en", kFormatUrlOmitAll,
1633      UnescapeRule::NORMAL, L"view-source:a.b",
1634      12},
1635   };
1636 
1637   for (size_t i = 0; i < arraysize(tests); ++i) {
1638     size_t prefix_len;
1639     string16 formatted = FormatUrl(
1640         GURL(tests[i].input), tests[i].languages, tests[i].format_types,
1641         tests[i].escape_rules, NULL, &prefix_len, NULL);
1642     EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
1643     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
1644   }
1645 }
1646 
TEST(NetUtilTest,FormatUrlParsed)1647 TEST(NetUtilTest, FormatUrlParsed) {
1648   // No unescape case.
1649   url_parse::Parsed parsed;
1650   string16 formatted = FormatUrl(
1651       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1652            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1653       "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
1654       NULL);
1655   EXPECT_EQ(WideToUTF16(
1656       L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1657       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
1658   EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
1659       formatted.substr(parsed.username.begin, parsed.username.len));
1660   EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
1661       formatted.substr(parsed.password.begin, parsed.password.len));
1662   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1663       formatted.substr(parsed.host.begin, parsed.host.len));
1664   EXPECT_EQ(WideToUTF16(L"8080"),
1665       formatted.substr(parsed.port.begin, parsed.port.len));
1666   EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
1667       formatted.substr(parsed.path.begin, parsed.path.len));
1668   EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
1669       formatted.substr(parsed.query.begin, parsed.query.len));
1670   EXPECT_EQ(WideToUTF16(L"\x30B0"),
1671       formatted.substr(parsed.ref.begin, parsed.ref.len));
1672 
1673   // Unescape case.
1674   formatted = FormatUrl(
1675       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1676            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1677       "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
1678       NULL);
1679   EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1680       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1681   EXPECT_EQ(WideToUTF16(L"\x30B0"),
1682       formatted.substr(parsed.username.begin, parsed.username.len));
1683   EXPECT_EQ(WideToUTF16(L"\x30FC"),
1684       formatted.substr(parsed.password.begin, parsed.password.len));
1685   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1686       formatted.substr(parsed.host.begin, parsed.host.len));
1687   EXPECT_EQ(WideToUTF16(L"8080"),
1688       formatted.substr(parsed.port.begin, parsed.port.len));
1689   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1690       formatted.substr(parsed.path.begin, parsed.path.len));
1691   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1692       formatted.substr(parsed.query.begin, parsed.query.len));
1693   EXPECT_EQ(WideToUTF16(L"\x30B0"),
1694       formatted.substr(parsed.ref.begin, parsed.ref.len));
1695 
1696   // Omit_username_password + unescape case.
1697   formatted = FormatUrl(
1698       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1699            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1700       "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1701       NULL, NULL);
1702   EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
1703       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1704   EXPECT_FALSE(parsed.username.is_valid());
1705   EXPECT_FALSE(parsed.password.is_valid());
1706   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1707       formatted.substr(parsed.host.begin, parsed.host.len));
1708   EXPECT_EQ(WideToUTF16(L"8080"),
1709       formatted.substr(parsed.port.begin, parsed.port.len));
1710   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1711       formatted.substr(parsed.path.begin, parsed.path.len));
1712   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1713       formatted.substr(parsed.query.begin, parsed.query.len));
1714   EXPECT_EQ(WideToUTF16(L"\x30B0"),
1715       formatted.substr(parsed.ref.begin, parsed.ref.len));
1716 
1717   // View-source case.
1718   formatted = FormatUrl(
1719       GURL("view-source:http://user:passwd@host:81/path?query#ref"),
1720       "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1721       NULL, NULL);
1722   EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
1723       formatted);
1724   EXPECT_EQ(WideToUTF16(L"view-source:http"),
1725       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1726   EXPECT_FALSE(parsed.username.is_valid());
1727   EXPECT_FALSE(parsed.password.is_valid());
1728   EXPECT_EQ(WideToUTF16(L"host"),
1729       formatted.substr(parsed.host.begin, parsed.host.len));
1730   EXPECT_EQ(WideToUTF16(L"81"),
1731       formatted.substr(parsed.port.begin, parsed.port.len));
1732   EXPECT_EQ(WideToUTF16(L"/path"),
1733       formatted.substr(parsed.path.begin, parsed.path.len));
1734   EXPECT_EQ(WideToUTF16(L"query"),
1735       formatted.substr(parsed.query.begin, parsed.query.len));
1736   EXPECT_EQ(WideToUTF16(L"ref"),
1737       formatted.substr(parsed.ref.begin, parsed.ref.len));
1738 
1739   // omit http case.
1740   formatted = FormatUrl(
1741       GURL("http://host:8000/a?b=c#d"),
1742       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1743   EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
1744   EXPECT_FALSE(parsed.scheme.is_valid());
1745   EXPECT_FALSE(parsed.username.is_valid());
1746   EXPECT_FALSE(parsed.password.is_valid());
1747   EXPECT_EQ(WideToUTF16(L"host"),
1748       formatted.substr(parsed.host.begin, parsed.host.len));
1749   EXPECT_EQ(WideToUTF16(L"8000"),
1750       formatted.substr(parsed.port.begin, parsed.port.len));
1751   EXPECT_EQ(WideToUTF16(L"/a"),
1752       formatted.substr(parsed.path.begin, parsed.path.len));
1753   EXPECT_EQ(WideToUTF16(L"b=c"),
1754       formatted.substr(parsed.query.begin, parsed.query.len));
1755   EXPECT_EQ(WideToUTF16(L"d"),
1756       formatted.substr(parsed.ref.begin, parsed.ref.len));
1757 
1758   // omit http starts with ftp case.
1759   formatted = FormatUrl(
1760       GURL("http://ftp.host:8000/a?b=c#d"),
1761       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1762   EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
1763   EXPECT_TRUE(parsed.scheme.is_valid());
1764   EXPECT_FALSE(parsed.username.is_valid());
1765   EXPECT_FALSE(parsed.password.is_valid());
1766   EXPECT_EQ(WideToUTF16(L"http"),
1767       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1768   EXPECT_EQ(WideToUTF16(L"ftp.host"),
1769       formatted.substr(parsed.host.begin, parsed.host.len));
1770   EXPECT_EQ(WideToUTF16(L"8000"),
1771       formatted.substr(parsed.port.begin, parsed.port.len));
1772   EXPECT_EQ(WideToUTF16(L"/a"),
1773       formatted.substr(parsed.path.begin, parsed.path.len));
1774   EXPECT_EQ(WideToUTF16(L"b=c"),
1775       formatted.substr(parsed.query.begin, parsed.query.len));
1776   EXPECT_EQ(WideToUTF16(L"d"),
1777       formatted.substr(parsed.ref.begin, parsed.ref.len));
1778 
1779   // omit http starts with 'f' case.
1780   formatted = FormatUrl(
1781       GURL("http://f/"),
1782       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1783   EXPECT_EQ(WideToUTF16(L"f/"), formatted);
1784   EXPECT_FALSE(parsed.scheme.is_valid());
1785   EXPECT_FALSE(parsed.username.is_valid());
1786   EXPECT_FALSE(parsed.password.is_valid());
1787   EXPECT_FALSE(parsed.port.is_valid());
1788   EXPECT_TRUE(parsed.path.is_valid());
1789   EXPECT_FALSE(parsed.query.is_valid());
1790   EXPECT_FALSE(parsed.ref.is_valid());
1791   EXPECT_EQ(WideToUTF16(L"f"),
1792       formatted.substr(parsed.host.begin, parsed.host.len));
1793   EXPECT_EQ(WideToUTF16(L"/"),
1794       formatted.substr(parsed.path.begin, parsed.path.len));
1795 }
1796 
TEST(NetUtilTest,FormatUrlAdjustOffset)1797 TEST(NetUtilTest, FormatUrlAdjustOffset) {
1798   const AdjustOffsetCase basic_cases[] = {
1799     {0, 0},
1800     {3, 3},
1801     {5, 5},
1802     {6, 6},
1803     {13, 13},
1804     {21, 21},
1805     {22, 22},
1806     {23, 23},
1807     {25, 25},
1808     {26, string16::npos},
1809     {500000, string16::npos},
1810     {string16::npos, string16::npos},
1811   };
1812   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
1813     size_t offset = basic_cases[i].input_offset;
1814     FormatUrl(GURL("http://www.google.com/foo/"), "en",
1815                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1816                    NULL, NULL, &offset);
1817     EXPECT_EQ(basic_cases[i].output_offset, offset);
1818   }
1819 
1820   size_t url_size = 26;
1821   std::vector<size_t> offsets;
1822   for (size_t i = 0; i < url_size + 1; ++i)
1823     offsets.push_back(i);
1824   FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
1825                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1826                        NULL, NULL, &offsets);
1827   for (size_t i = 0; i < url_size; ++i)
1828     EXPECT_EQ(i, offsets[i]);
1829   EXPECT_EQ(kNpos, offsets[url_size]);
1830 
1831   const struct {
1832     const char* input_url;
1833     size_t input_offset;
1834     size_t output_offset;
1835   } omit_auth_cases[] = {
1836     {"http://foo:bar@www.google.com/", 6, 6},
1837     {"http://foo:bar@www.google.com/", 7, string16::npos},
1838     {"http://foo:bar@www.google.com/", 8, string16::npos},
1839     {"http://foo:bar@www.google.com/", 10, string16::npos},
1840     {"http://foo:bar@www.google.com/", 11, string16::npos},
1841     {"http://foo:bar@www.google.com/", 14, string16::npos},
1842     {"http://foo:bar@www.google.com/", 15, 7},
1843     {"http://foo:bar@www.google.com/", 25, 17},
1844     {"http://foo@www.google.com/", 9, string16::npos},
1845     {"http://foo@www.google.com/", 11, 7},
1846   };
1847   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
1848     size_t offset = omit_auth_cases[i].input_offset;
1849     FormatUrl(GURL(omit_auth_cases[i].input_url), "en",
1850                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1851                    NULL, NULL, &offset);
1852     EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
1853   }
1854 
1855   url_size = 30;
1856   offsets.clear();
1857   for (size_t i = 0; i < url_size; ++i)
1858     offsets.push_back(i);
1859   FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
1860                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1861                        NULL, NULL, &offsets);
1862   for (size_t i = 0; i < 7; ++i)
1863     EXPECT_EQ(i, offsets[i]);
1864   for (size_t i = 7; i < 15; ++i)
1865     EXPECT_EQ(kNpos, offsets[i]);
1866   for (size_t i = 16; i < url_size; ++i)
1867     EXPECT_EQ(i - 8 , offsets[i]);
1868 
1869   const AdjustOffsetCase view_source_cases[] = {
1870     {0, 0},
1871     {3, 3},
1872     {11, 11},
1873     {12, 12},
1874     {13, 13},
1875     {18, 18},
1876     {19, string16::npos},
1877     {20, string16::npos},
1878     {23, 19},
1879     {26, 22},
1880     {string16::npos, string16::npos},
1881   };
1882   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
1883     size_t offset = view_source_cases[i].input_offset;
1884     FormatUrl(GURL("view-source:http://foo@www.google.com/"), "en",
1885                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1886                    NULL, NULL, &offset);
1887     EXPECT_EQ(view_source_cases[i].output_offset, offset);
1888   }
1889 
1890   url_size = 38;
1891   offsets.clear();
1892   for (size_t i = 0; i < url_size; ++i)
1893     offsets.push_back(i);
1894   FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
1895                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1896                        NULL, NULL, &offsets);
1897   size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1898                        17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
1899                        24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
1900   ASSERT_EQ(url_size, arraysize(expected));
1901   for (size_t i = 0; i < url_size; ++i)
1902     EXPECT_EQ(expected[i], offsets[i]);
1903 
1904   const AdjustOffsetCase idn_hostname_cases[] = {
1905     {8, string16::npos},
1906     {16, string16::npos},
1907     {24, string16::npos},
1908     {25, 12},
1909     {30, 17},
1910   };
1911   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
1912     size_t offset = idn_hostname_cases[i].input_offset;
1913     // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
1914     FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1915                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1916                    NULL, NULL, &offset);
1917     EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
1918   }
1919 
1920   url_size = 33;
1921   offsets.clear();
1922   for (size_t i = 0; i < url_size; ++i)
1923     offsets.push_back(i);
1924   FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1925                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1926                        NULL, NULL, &offsets);
1927   size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
1928                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1929                          kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
1930                          17, 18, 19};
1931   ASSERT_EQ(url_size, arraysize(expected_1));
1932   for (size_t i = 0; i < url_size; ++i)
1933     EXPECT_EQ(expected_1[i], offsets[i]);
1934 
1935   const AdjustOffsetCase unescape_cases[] = {
1936     {25, 25},
1937     {26, string16::npos},
1938     {27, string16::npos},
1939     {28, 26},
1940     {35, string16::npos},
1941     {41, 31},
1942     {59, 33},
1943     {60, string16::npos},
1944     {67, string16::npos},
1945     {68, string16::npos},
1946   };
1947   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
1948     size_t offset = unescape_cases[i].input_offset;
1949     // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
1950     FormatUrl(GURL(
1951         "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1952         "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL,
1953         NULL, &offset);
1954     EXPECT_EQ(unescape_cases[i].output_offset, offset);
1955   }
1956 
1957   url_size = 68;
1958   offsets.clear();
1959   for (size_t i = 0; i < url_size; ++i)
1960     offsets.push_back(i);
1961   FormatUrlWithOffsets(GURL(
1962       "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1963       "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
1964       &offsets);
1965   size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1966                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
1967                          26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
1968                          kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
1969                          kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
1970                          kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
1971                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
1972   ASSERT_EQ(url_size, arraysize(expected_2));
1973   for (size_t i = 0; i < url_size; ++i)
1974     EXPECT_EQ(expected_2[i], offsets[i]);
1975 
1976   const AdjustOffsetCase ref_cases[] = {
1977     {30, 30},
1978     {31, 31},
1979     {32, string16::npos},
1980     {34, 32},
1981     {35, string16::npos},
1982     {37, 33},
1983     {38, string16::npos},
1984   };
1985   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
1986     size_t offset = ref_cases[i].input_offset;
1987     // "http://www.google.com/foo.html#\x30B0\x30B0z"
1988     FormatUrl(GURL(
1989         "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
1990         kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
1991         &offset);
1992     EXPECT_EQ(ref_cases[i].output_offset, offset);
1993   }
1994 
1995   url_size = 38;
1996   offsets.clear();
1997   for (size_t i = 0; i < url_size; ++i)
1998     offsets.push_back(i);
1999   // "http://www.google.com/foo.html#\x30B0\x30B0z"
2000   FormatUrlWithOffsets(GURL(
2001       "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
2002       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
2003       &offsets);
2004   size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2005                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
2006                          30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
2007   ASSERT_EQ(url_size, arraysize(expected_3));
2008   for (size_t i = 0; i < url_size; ++i)
2009     EXPECT_EQ(expected_3[i], offsets[i]);
2010 
2011   const AdjustOffsetCase omit_http_cases[] = {
2012     {0, string16::npos},
2013     {3, string16::npos},
2014     {7, 0},
2015     {8, 1},
2016   };
2017   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_cases); ++i) {
2018     size_t offset = omit_http_cases[i].input_offset;
2019     FormatUrl(GURL("http://www.google.com"), "en",
2020         kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2021     EXPECT_EQ(omit_http_cases[i].output_offset, offset);
2022   }
2023 
2024   url_size = 23;
2025   offsets.clear();
2026   for (size_t i = 0; i < url_size; ++i)
2027     offsets.push_back(i);
2028   FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
2029       kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2030   size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
2031                          2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
2032   ASSERT_EQ(url_size, arraysize(expected_4));
2033   for (size_t i = 0; i < url_size; ++i)
2034     EXPECT_EQ(expected_4[i], offsets[i]);
2035 
2036   const AdjustOffsetCase omit_http_start_with_ftp[] = {
2037     {0, 0},
2038     {3, 3},
2039     {8, 8},
2040   };
2041   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_start_with_ftp); ++i) {
2042     size_t offset = omit_http_start_with_ftp[i].input_offset;
2043     FormatUrl(GURL("http://ftp.google.com"), "en",
2044         kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2045     EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
2046   }
2047 
2048   url_size = 23;
2049   offsets.clear();
2050   for (size_t i = 0; i < url_size; ++i)
2051     offsets.push_back(i);
2052   FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
2053       kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2054   size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2055                          16, 17, 18, 19, 20, 21, kNpos};
2056   ASSERT_EQ(url_size, arraysize(expected_5));
2057   for (size_t i = 0; i < url_size; ++i)
2058     EXPECT_EQ(expected_5[i], offsets[i]);
2059 
2060   const AdjustOffsetCase omit_all_cases[] = {
2061     {12, 0},
2062     {13, 1},
2063     {0, string16::npos},
2064     {3, string16::npos},
2065   };
2066   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_all_cases); ++i) {
2067     size_t offset = omit_all_cases[i].input_offset;
2068     FormatUrl(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2069                    UnescapeRule::NORMAL, NULL, NULL, &offset);
2070     EXPECT_EQ(omit_all_cases[i].output_offset, offset);
2071   }
2072 
2073   url_size = 21;
2074   offsets.clear();
2075   for (size_t i = 0; i < url_size; ++i)
2076     offsets.push_back(i);
2077   FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2078                        UnescapeRule::NORMAL, NULL, NULL, &offsets);
2079   size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
2080                          kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
2081                          kNpos};
2082   ASSERT_EQ(url_size, arraysize(expected_6));
2083   for (size_t i = 0; i < url_size; ++i)
2084     EXPECT_EQ(expected_6[i], offsets[i]);
2085 }
2086 
TEST(NetUtilTest,SimplifyUrlForRequest)2087 TEST(NetUtilTest, SimplifyUrlForRequest) {
2088   struct {
2089     const char* input_url;
2090     const char* expected_simplified_url;
2091   } tests[] = {
2092     {
2093       // Reference section should be stripped.
2094       "http://www.google.com:78/foobar?query=1#hash",
2095       "http://www.google.com:78/foobar?query=1",
2096     },
2097     {
2098       // Reference section can itself contain #.
2099       "http://192.168.0.1?query=1#hash#10#11#13#14",
2100       "http://192.168.0.1?query=1",
2101     },
2102     { // Strip username/password.
2103       "http://user:pass@google.com",
2104       "http://google.com/",
2105     },
2106     { // Strip both the reference and the username/password.
2107       "http://user:pass@google.com:80/sup?yo#X#X",
2108       "http://google.com/sup?yo",
2109     },
2110     { // Try an HTTPS URL -- strip both the reference and the username/password.
2111       "https://user:pass@google.com:80/sup?yo#X#X",
2112       "https://google.com:80/sup?yo",
2113     },
2114     { // Try an FTP URL -- strip both the reference and the username/password.
2115       "ftp://user:pass@google.com:80/sup?yo#X#X",
2116       "ftp://google.com:80/sup?yo",
2117     },
2118     { // Try an nonstandard URL
2119       "foobar://user:pass@google.com:80/sup?yo#X#X",
2120       "foobar://user:pass@google.com:80/sup?yo#X#X",
2121     },
2122   };
2123   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2124     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
2125                                     tests[i].input_url));
2126     GURL input_url(GURL(tests[i].input_url));
2127     GURL expected_url(GURL(tests[i].expected_simplified_url));
2128     EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
2129   }
2130 }
2131 
TEST(NetUtilTest,SetExplicitlyAllowedPortsTest)2132 TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
2133   std::string invalid[] = { "1,2,a", "'1','2'", "1, 2, 3", "1 0,11,12" };
2134   std::string valid[] = { "", "1", "1,2", "1,2,3", "10,11,12,13" };
2135 
2136   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(invalid); ++i) {
2137     SetExplicitlyAllowedPorts(invalid[i]);
2138     EXPECT_EQ(0, static_cast<int>(explicitly_allowed_ports.size()));
2139   }
2140 
2141   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(valid); ++i) {
2142     SetExplicitlyAllowedPorts(valid[i]);
2143     EXPECT_EQ(i, explicitly_allowed_ports.size());
2144   }
2145 }
2146 
TEST(NetUtilTest,GetHostOrSpecFromURL)2147 TEST(NetUtilTest, GetHostOrSpecFromURL) {
2148   EXPECT_EQ("example.com",
2149             GetHostOrSpecFromURL(GURL("http://example.com/test")));
2150   EXPECT_EQ("example.com",
2151             GetHostOrSpecFromURL(GURL("http://example.com./test")));
2152   EXPECT_EQ("file:///tmp/test.html",
2153             GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
2154 }
2155 
2156 // Test that invalid IP literals fail to parse.
TEST(NetUtilTest,ParseIPLiteralToNumber_FailParse)2157 TEST(NetUtilTest, ParseIPLiteralToNumber_FailParse) {
2158   IPAddressNumber number;
2159 
2160   EXPECT_FALSE(ParseIPLiteralToNumber("bad value", &number));
2161   EXPECT_FALSE(ParseIPLiteralToNumber("bad:value", &number));
2162   EXPECT_FALSE(ParseIPLiteralToNumber("", &number));
2163   EXPECT_FALSE(ParseIPLiteralToNumber("192.168.0.1:30", &number));
2164   EXPECT_FALSE(ParseIPLiteralToNumber("  192.168.0.1  ", &number));
2165   EXPECT_FALSE(ParseIPLiteralToNumber("[::1]", &number));
2166 }
2167 
2168 // Test parsing an IPv4 literal.
TEST(NetUtilTest,ParseIPLiteralToNumber_IPv4)2169 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv4) {
2170   IPAddressNumber number;
2171   EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &number));
2172   EXPECT_EQ("192,168,0,1", DumpIPNumber(number));
2173 }
2174 
2175 // Test parsing an IPv6 literal.
TEST(NetUtilTest,ParseIPLiteralToNumber_IPv6)2176 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv6) {
2177   IPAddressNumber number;
2178   EXPECT_TRUE(ParseIPLiteralToNumber("1:abcd::3:4:ff", &number));
2179   EXPECT_EQ("0,1,171,205,0,0,0,0,0,0,0,3,0,4,0,255", DumpIPNumber(number));
2180 }
2181 
2182 // Test mapping an IPv4 address to an IPv6 address.
TEST(NetUtilTest,ConvertIPv4NumberToIPv6Number)2183 TEST(NetUtilTest, ConvertIPv4NumberToIPv6Number) {
2184   IPAddressNumber ipv4_number;
2185   EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &ipv4_number));
2186 
2187   IPAddressNumber ipv6_number =
2188       ConvertIPv4NumberToIPv6Number(ipv4_number);
2189 
2190   // ::ffff:192.168.1.1
2191   EXPECT_EQ("0,0,0,0,0,0,0,0,0,0,255,255,192,168,0,1",
2192             DumpIPNumber(ipv6_number));
2193 }
2194 
2195 // Test parsing invalid CIDR notation literals.
TEST(NetUtilTest,ParseCIDRBlock_Invalid)2196 TEST(NetUtilTest, ParseCIDRBlock_Invalid) {
2197   const char* bad_literals[] = {
2198       "foobar",
2199       "",
2200       "192.168.0.1",
2201       "::1",
2202       "/",
2203       "/1",
2204       "1",
2205       "192.168.1.1/-1",
2206       "192.168.1.1/33",
2207       "::1/-3",
2208       "a::3/129",
2209       "::1/x",
2210       "192.168.0.1//11"
2211   };
2212 
2213   for (size_t i = 0; i < arraysize(bad_literals); ++i) {
2214     IPAddressNumber ip_number;
2215     size_t prefix_length_in_bits;
2216 
2217     EXPECT_FALSE(ParseCIDRBlock(bad_literals[i],
2218                                      &ip_number,
2219                                      &prefix_length_in_bits));
2220   }
2221 }
2222 
2223 // Test parsing a valid CIDR notation literal.
TEST(NetUtilTest,ParseCIDRBlock_Valid)2224 TEST(NetUtilTest, ParseCIDRBlock_Valid) {
2225   IPAddressNumber ip_number;
2226   size_t prefix_length_in_bits;
2227 
2228   EXPECT_TRUE(ParseCIDRBlock("192.168.0.1/11",
2229                                   &ip_number,
2230                                   &prefix_length_in_bits));
2231 
2232   EXPECT_EQ("192,168,0,1", DumpIPNumber(ip_number));
2233   EXPECT_EQ(11u, prefix_length_in_bits);
2234 }
2235 
TEST(NetUtilTest,IPNumberMatchesPrefix)2236 TEST(NetUtilTest, IPNumberMatchesPrefix) {
2237   struct {
2238     const char* cidr_literal;
2239     const char* ip_literal;
2240     bool expected_to_match;
2241   } tests[] = {
2242     // IPv4 prefix with IPv4 inputs.
2243     {
2244       "10.10.1.32/27",
2245       "10.10.1.44",
2246       true
2247     },
2248     {
2249       "10.10.1.32/27",
2250       "10.10.1.90",
2251       false
2252     },
2253     {
2254       "10.10.1.32/27",
2255       "10.10.1.90",
2256       false
2257     },
2258 
2259     // IPv6 prefix with IPv6 inputs.
2260     {
2261       "2001:db8::/32",
2262       "2001:DB8:3:4::5",
2263       true
2264     },
2265     {
2266       "2001:db8::/32",
2267       "2001:c8::",
2268       false
2269     },
2270 
2271     // IPv6 prefix with IPv4 inputs.
2272     {
2273       "2001:db8::/33",
2274       "192.168.0.1",
2275       false
2276     },
2277     {
2278       "::ffff:192.168.0.1/112",
2279       "192.168.33.77",
2280       true
2281     },
2282 
2283     // IPv4 prefix with IPv6 inputs.
2284     {
2285       "10.11.33.44/16",
2286       "::ffff:0a0b:89",
2287       true
2288     },
2289     {
2290       "10.11.33.44/16",
2291       "::ffff:10.12.33.44",
2292       false
2293     },
2294   };
2295   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2296     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s, %s", i,
2297                                     tests[i].cidr_literal,
2298                                     tests[i].ip_literal));
2299 
2300     IPAddressNumber ip_number;
2301     EXPECT_TRUE(ParseIPLiteralToNumber(tests[i].ip_literal, &ip_number));
2302 
2303     IPAddressNumber ip_prefix;
2304     size_t prefix_length_in_bits;
2305 
2306     EXPECT_TRUE(ParseCIDRBlock(tests[i].cidr_literal,
2307                                &ip_prefix,
2308                                &prefix_length_in_bits));
2309 
2310     EXPECT_EQ(tests[i].expected_to_match,
2311               IPNumberMatchesPrefix(ip_number,
2312                                     ip_prefix,
2313                                     prefix_length_in_bits));
2314   }
2315 }
2316 
TEST(NetUtilTest,IsLocalhost)2317 TEST(NetUtilTest, IsLocalhost) {
2318   EXPECT_TRUE(net::IsLocalhost("localhost"));
2319   EXPECT_TRUE(net::IsLocalhost("localhost.localdomain"));
2320   EXPECT_TRUE(net::IsLocalhost("localhost6"));
2321   EXPECT_TRUE(net::IsLocalhost("localhost6.localdomain6"));
2322   EXPECT_TRUE(net::IsLocalhost("127.0.0.1"));
2323   EXPECT_TRUE(net::IsLocalhost("127.0.1.0"));
2324   EXPECT_TRUE(net::IsLocalhost("127.1.0.0"));
2325   EXPECT_TRUE(net::IsLocalhost("127.0.0.255"));
2326   EXPECT_TRUE(net::IsLocalhost("127.0.255.0"));
2327   EXPECT_TRUE(net::IsLocalhost("127.255.0.0"));
2328   EXPECT_TRUE(net::IsLocalhost("::1"));
2329   EXPECT_TRUE(net::IsLocalhost("0:0:0:0:0:0:0:1"));
2330 
2331   EXPECT_FALSE(net::IsLocalhost("localhostx"));
2332   EXPECT_FALSE(net::IsLocalhost("foo.localdomain"));
2333   EXPECT_FALSE(net::IsLocalhost("localhost6x"));
2334   EXPECT_FALSE(net::IsLocalhost("localhost.localdomain6"));
2335   EXPECT_FALSE(net::IsLocalhost("localhost6.localdomain"));
2336   EXPECT_FALSE(net::IsLocalhost("127.0.0.1.1"));
2337   EXPECT_FALSE(net::IsLocalhost(".127.0.0.255"));
2338   EXPECT_FALSE(net::IsLocalhost("::2"));
2339   EXPECT_FALSE(net::IsLocalhost("::1:1"));
2340   EXPECT_FALSE(net::IsLocalhost("0:0:0:0:1:0:0:1"));
2341   EXPECT_FALSE(net::IsLocalhost("::1:1"));
2342   EXPECT_FALSE(net::IsLocalhost("0:0:0:0:0:0:0:0:1"));
2343 }
2344 
2345 // Verify GetNetworkList().
TEST(NetUtilTest,GetNetworkList)2346 TEST(NetUtilTest, GetNetworkList) {
2347   NetworkInterfaceList list;
2348   ASSERT_TRUE(GetNetworkList(&list));
2349 
2350   for (NetworkInterfaceList::iterator it = list.begin();
2351        it != list.end(); ++it) {
2352     // Verify that the name is not empty.
2353     EXPECT_FALSE(it->name.empty());
2354 
2355     // Verify that the address is correct.
2356     EXPECT_TRUE(it->address.size() == kIPv4AddressSize ||
2357                 it->address.size() == kIPv6AddressSize)
2358         << "Invalid address of size " << it->address.size();
2359     bool all_zeroes = true;
2360     for (size_t i = 0; i < it->address.size(); ++i) {
2361       if (it->address[i] != 0) {
2362         all_zeroes = false;
2363         break;
2364       }
2365     }
2366     EXPECT_FALSE(all_zeroes);
2367   }
2368 }
2369 
TEST(NetUtilTest,AdjustComponentOffset)2370 TEST(NetUtilTest, AdjustComponentOffset) {
2371   std::vector<size_t> old_offsets;
2372   for (size_t i = 0; i < 10; ++i)
2373     old_offsets.push_back(i);
2374   std::vector<size_t> new_offsets;
2375   std::transform(old_offsets.begin(),
2376                  old_offsets.end(),
2377                  std::back_inserter(new_offsets),
2378                  ClampComponentOffset(5));
2379   size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
2380   EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
2381   EXPECT_EQ(new_offsets.size(), old_offsets.size());
2382   for (size_t i = 0; i < arraysize(expected_1); ++i)
2383     EXPECT_EQ(expected_1[i], new_offsets[i]);
2384 }
2385 
2386 }  // namespace net
2387