1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/net_util.h"
6
7 #include <algorithm>
8
9 #include "base/file_path.h"
10 #include "base/format_macros.h"
11 #include "base/string_number_conversions.h"
12 #include "base/string_util.h"
13 #include "base/stringprintf.h"
14 #include "base/sys_string_conversions.h"
15 #include "base/test/test_file_util.h"
16 #include "base/time.h"
17 #include "base/utf_string_conversions.h"
18 #include "googleurl/src/gurl.h"
19 #include "net/base/sys_addrinfo.h"
20 #include "testing/gtest/include/gtest/gtest.h"
21
22 namespace net {
23
24 namespace {
25
26 static const size_t kNpos = string16::npos;
27
28 struct FileCase {
29 const wchar_t* file;
30 const char* url;
31 };
32
33 struct HeaderCase {
34 const wchar_t* header_name;
35 const wchar_t* expected;
36 };
37
38 struct HeaderParamCase {
39 const wchar_t* header_name;
40 const wchar_t* param_name;
41 const wchar_t* expected;
42 };
43
44 struct FileNameCDCase {
45 const char* header_field;
46 const char* referrer_charset;
47 const wchar_t* expected;
48 };
49
50 const wchar_t* kLanguages[] = {
51 L"", L"en", L"zh-CN", L"ja", L"ko",
52 L"he", L"ar", L"ru", L"el", L"fr",
53 L"de", L"pt", L"sv", L"th", L"hi",
54 L"de,en", L"el,en", L"zh-TW,en", L"ko,ja", L"he,ru,en",
55 L"zh,ru,en"
56 };
57
58 struct IDNTestCase {
59 const char* input;
60 const wchar_t* unicode_output;
61 const bool unicode_allowed[arraysize(kLanguages)];
62 };
63
64 // TODO(jungshik) This is just a random sample of languages and is far
65 // from exhaustive. We may have to generate all the combinations
66 // of languages (powerset of a set of all the languages).
67 const IDNTestCase idn_cases[] = {
68 // No IDN
69 {"www.google.com", L"www.google.com",
70 {true, true, true, true, true,
71 true, true, true, true, true,
72 true, true, true, true, true,
73 true, true, true, true, true,
74 true}},
75 {"www.google.com.", L"www.google.com.",
76 {true, true, true, true, true,
77 true, true, true, true, true,
78 true, true, true, true, true,
79 true, true, true, true, true,
80 true}},
81 {".", L".",
82 {true, true, true, true, true,
83 true, true, true, true, true,
84 true, true, true, true, true,
85 true, true, true, true, true,
86 true}},
87 {"", L"",
88 {true, true, true, true, true,
89 true, true, true, true, true,
90 true, true, true, true, true,
91 true, true, true, true, true,
92 true}},
93 // IDN
94 // Hanzi (Traditional Chinese)
95 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
96 {true, false, true, true, false,
97 false, false, false, false, false,
98 false, false, false, false, false,
99 false, false, true, true, false,
100 true}},
101 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
102 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
103 {true, false, true, false, false,
104 false, false, false, false, false,
105 false, false, false, false, false,
106 false, false, false, false, false,
107 true}},
108 // Hanzi + '123'
109 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
110 {true, false, true, true, false,
111 false, false, false, false, false,
112 false, false, false, false, false,
113 false, false, true, true, false,
114 true}},
115 // Hanzi + Latin : U+56FD is simplified and is regarded
116 // as not supported in zh-TW.
117 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
118 {false, false, true, true, false,
119 false, false, false, false, false,
120 false, false, false, false, false,
121 false, false, false, true, false,
122 true}},
123 // Kanji + Kana (Japanese)
124 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
125 {true, false, false, true, false,
126 false, false, false, false, false,
127 false, false, false, false, false,
128 false, false, false, true, false,
129 false}},
130 // Katakana including U+30FC
131 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
132 {true, false, false, true, false,
133 false, false, false, false, false,
134 false, false, false, false, false,
135 false, false, false, true, false,
136 }},
137 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
138 {true, false, false, true, false,
139 false, false, false, false, false,
140 false, false, false, false, false,
141 false, false, false, true, false,
142 }},
143 // Katakana + Latin (Japanese)
144 // TODO(jungshik): Change 'false' in the first element to 'true'
145 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
146 // of our IsIDNComponentInSingleScript().
147 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
148 {false, false, false, true, false,
149 false, false, false, false, false,
150 false, false, false, false, false,
151 false, false, false, true, false,
152 }},
153 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
154 {false, false, false, true, false,
155 false, false, false, false, false,
156 false, false, false, false, false,
157 false, false, false, true, false,
158 }},
159 // Hangul (Korean)
160 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
161 {true, false, false, false, true,
162 false, false, false, false, false,
163 false, false, false, false, false,
164 false, false, false, true, false,
165 false}},
166 // b<u-umlaut>cher (German)
167 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
168 {true, false, false, false, false,
169 false, false, false, false, true,
170 true, false, false, false, false,
171 true, false, false, false, false,
172 false}},
173 // a with diaeresis
174 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
175 {true, false, false, false, false,
176 false, false, false, false, false,
177 true, false, true, false, false,
178 true, false, false, false, false,
179 false}},
180 // c-cedilla (French)
181 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
182 {true, false, false, false, false,
183 false, false, false, false, true,
184 false, true, false, false, false,
185 false, false, false, false, false,
186 false}},
187 // caf'e with acute accent' (French)
188 {"xn--caf-dma.fr", L"caf\x00e9.fr",
189 {true, false, false, false, false,
190 false, false, false, false, true,
191 false, true, true, false, false,
192 false, false, false, false, false,
193 false}},
194 // c-cedillla and a with tilde (Portuguese)
195 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
196 {true, false, false, false, false,
197 false, false, false, false, false,
198 false, true, false, false, false,
199 false, false, false, false, false,
200 false}},
201 // s with caron
202 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
203 {true, false, false, false, false,
204 false, false, false, false, false,
205 false, false, false, false, false,
206 false, false, false, false, false,
207 false}},
208 // TODO(jungshik) : Add examples with Cyrillic letters
209 // only used in some languages written in Cyrillic.
210 // Eutopia (Greek)
211 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
212 {true, false, false, false, false,
213 false, false, false, true, false,
214 false, false, false, false, false,
215 false, true, false, false, false,
216 false}},
217 // Eutopia + 123 (Greek)
218 {"xn---123-pldm0haj2bk.gr",
219 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
220 {true, false, false, false, false,
221 false, false, false, true, false,
222 false, false, false, false, false,
223 false, true, false, false, false,
224 false}},
225 // Cyrillic (Russian)
226 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
227 {true, false, false, false, false,
228 false, false, true, false, false,
229 false, false, false, false, false,
230 false, false, false, false, true,
231 true}},
232 // Cyrillic + 123 (Russian)
233 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
234 {true, false, false, false, false,
235 false, false, true, false, false,
236 false, false, false, false, false,
237 false, false, false, false, true,
238 true}},
239 // Arabic
240 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
241 {true, false, false, false, false,
242 false, true, false, false, false,
243 false, false, false, false, false,
244 false, false, false, false, false,
245 false}},
246 // Hebrew
247 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
248 {true, false, false, false, false,
249 true, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, true,
252 false}},
253 // Thai
254 {"xn--12c2cc4ag3b4ccu.th",
255 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
256 {true, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, true, false,
259 false, false, false, false, false,
260 false}},
261 // Devangari (Hindi)
262 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
263 {true, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, true,
266 false, false, false, false, false,
267 false}},
268 // Invalid IDN
269 {"xn--hello?world.com", NULL,
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // Unsafe IDNs
276 // "payp<alpha>l.com"
277 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
278 {false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false, false, false, false, false,
282 false}},
283 // google.gr with Greek omicron and epsilon
284 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
285 {false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false, false, false, false, false,
289 false}},
290 // google.ru with Cyrillic o
291 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
292 {false, false, false, false, false,
293 false, false, false, false, false,
294 false, false, false, false, false,
295 false, false, false, false, false,
296 false}},
297 // h<e with acute>llo<China in Han>.cn
298 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
299 {false, false, false, false, false,
300 false, false, false, false, false,
301 false, false, false, false, false,
302 false, false, false, false, false,
303 false}},
304 // <Greek rho><Cyrillic a><Cyrillic u>.ru
305 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
306 {false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false, false, false, false, false,
310 false}},
311 // One that's really long that will force a buffer realloc
312 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
313 "aaaaaaa",
314 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
315 L"aaaaaaaa",
316 {true, true, true, true, true,
317 true, true, true, true, true,
318 true, true, true, true, true,
319 true, true, true, true, true,
320 true}},
321 // Test cases for characters we blacklisted although allowed in IDN.
322 // Embedded spaces will be turned to %20 in the display.
323 // TODO(jungshik): We need to have more cases. This is a typical
324 // data-driven trap. The following test cases need to be separated
325 // and tested only for a couple of languages.
326 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
327 {false, false, false, false, false,
328 false, false, false, false, false,
329 false, false, false, false, false,
330 false, false, false, false, false,
331 false}},
332 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
333 {false, false, false, false, false,
334 false, false, false, false, false,
335 false, false, false, false, false,
336 false, false, false, false, false,
337 }},
338 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
339 {false, false, false, false, false,
340 false, false, false, false, false,
341 false, false, false, false, false,
342 false, false, false, false, false,
343 }},
344 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
345 {false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
348 false, false, false, false, false,
349 }},
350 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
351 {false, false, false, false, false,
352 false, false, false, false, false,
353 false, false, false, false, false,
354 false, false, false, false, false,
355 }},
356 #if 0
357 // These two cases are special. We need a separate test.
358 // U+3000 and U+3002 are normalized to ASCII space and dot.
359 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
360 {false, false, true, false, false,
361 false, false, false, false, false,
362 false, false, false, false, false,
363 false, false, true, false, false,
364 true}},
365 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
366 {false, false, true, false, false,
367 false, false, false, false, false,
368 false, false, false, false, false,
369 false, false, true, false, false,
370 true}},
371 #endif
372 };
373
374 struct AdjustOffsetCase {
375 size_t input_offset;
376 size_t output_offset;
377 };
378
379 struct CompliantHostCase {
380 const char* host;
381 const char* desired_tld;
382 bool expected_output;
383 };
384
385 struct SuggestedFilenameCase {
386 const char* url;
387 const char* content_disp_header;
388 const char* referrer_charset;
389 const wchar_t* default_filename;
390 const wchar_t* expected_filename;
391 };
392
393 struct UrlTestData {
394 const char* description;
395 const char* input;
396 const char* languages;
397 FormatUrlTypes format_types;
398 UnescapeRule::Type escape_rules;
399 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
400 size_t prefix_len;
401 };
402
403 // Returns an addrinfo for the given 32-bit address (IPv4.)
404 // The result lives in static storage, so don't delete it.
405 // |bytes| should be an array of length 4.
GetIPv4Address(const uint8 * bytes,int port)406 const struct addrinfo* GetIPv4Address(const uint8* bytes, int port) {
407 static struct addrinfo static_ai;
408 static struct sockaddr_in static_addr4;
409
410 struct addrinfo* ai = &static_ai;
411 ai->ai_socktype = SOCK_STREAM;
412 memset(ai, 0, sizeof(static_ai));
413
414 ai->ai_family = AF_INET;
415 ai->ai_addrlen = sizeof(static_addr4);
416
417 struct sockaddr_in* addr4 = &static_addr4;
418 memset(addr4, 0, sizeof(static_addr4));
419 addr4->sin_port = htons(port);
420 addr4->sin_family = ai->ai_family;
421 memcpy(&addr4->sin_addr, bytes, 4);
422
423 ai->ai_addr = (sockaddr*)addr4;
424 return ai;
425 }
426
427 // Returns a addrinfo for the given 128-bit address (IPv6.)
428 // The result lives in static storage, so don't delete it.
429 // |bytes| should be an array of length 16.
GetIPv6Address(const uint8 * bytes,int port)430 const struct addrinfo* GetIPv6Address(const uint8* bytes, int port) {
431 static struct addrinfo static_ai;
432 static struct sockaddr_in6 static_addr6;
433
434 struct addrinfo* ai = &static_ai;
435 ai->ai_socktype = SOCK_STREAM;
436 memset(ai, 0, sizeof(static_ai));
437
438 ai->ai_family = AF_INET6;
439 ai->ai_addrlen = sizeof(static_addr6);
440
441 struct sockaddr_in6* addr6 = &static_addr6;
442 memset(addr6, 0, sizeof(static_addr6));
443 addr6->sin6_port = htons(port);
444 addr6->sin6_family = ai->ai_family;
445 memcpy(&addr6->sin6_addr, bytes, 16);
446
447 ai->ai_addr = (sockaddr*)addr6;
448 return ai;
449 }
450
451 // A helper for IDN*{Fast,Slow}.
452 // Append "::<language list>" to |expected| and |actual| to make it
453 // easy to tell which sub-case fails without debugging.
AppendLanguagesToOutputs(const wchar_t * languages,std::wstring * expected,std::wstring * actual)454 void AppendLanguagesToOutputs(const wchar_t* languages,
455 std::wstring* expected,
456 std::wstring* actual) {
457 expected->append(L"::");
458 expected->append(languages);
459 actual->append(L"::");
460 actual->append(languages);
461 }
462
463 // Helper to strignize an IP number (used to define expectations).
DumpIPNumber(const IPAddressNumber & v)464 std::string DumpIPNumber(const IPAddressNumber& v) {
465 std::string out;
466 for (size_t i = 0; i < v.size(); ++i) {
467 if (i != 0)
468 out.append(",");
469 out.append(base::IntToString(static_cast<int>(v[i])));
470 }
471 return out;
472 }
473
474 } // anonymous namespace
475
TEST(NetUtilTest,FileURLConversion)476 TEST(NetUtilTest, FileURLConversion) {
477 // a list of test file names and the corresponding URLs
478 const FileCase round_trip_cases[] = {
479 #if defined(OS_WIN)
480 {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"},
481 {L"\\\\some computer\\foo\\bar.txt",
482 "file://some%20computer/foo/bar.txt"}, // UNC
483 {L"D:\\Name;with%some symbols*#",
484 "file:///D:/Name%3Bwith%25some%20symbols*%23"},
485 // issue 14153: To be tested with the OS default codepage other than 1252.
486 {L"D:\\latin1\\caf\x00E9\x00DD.txt",
487 "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
488 {L"D:\\otherlatin\\caf\x0119.txt",
489 "file:///D:/otherlatin/caf%C4%99.txt"},
490 {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
491 "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
492 {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
493 "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
494 "%E9%A1%B5.doc"},
495 {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB"
496 "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
497 #elif defined(OS_POSIX)
498 {L"/foo/bar.txt", "file:///foo/bar.txt"},
499 {L"/foo/BAR.txt", "file:///foo/BAR.txt"},
500 {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
501 {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
502 {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
503 {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
504 {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
505 {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
506 {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
507 "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
508 "%91%E9%A1%B5.doc"},
509 {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB"
510 "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
511 #endif
512 };
513
514 // First, we'll test that we can round-trip all of the above cases of URLs
515 FilePath output;
516 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) {
517 // convert to the file URL
518 GURL file_url(FilePathToFileURL(
519 file_util::WStringAsFilePath(round_trip_cases[i].file)));
520 EXPECT_EQ(round_trip_cases[i].url, file_url.spec());
521
522 // Back to the filename.
523 EXPECT_TRUE(FileURLToFilePath(file_url, &output));
524 EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output));
525 }
526
527 // Test that various file: URLs get decoded into the correct file type
528 FileCase url_cases[] = {
529 #if defined(OS_WIN)
530 {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"},
531 {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"},
532 {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
533 {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"},
534 {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"},
535 {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"},
536 {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
537 {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"},
538 #elif defined(OS_POSIX)
539 {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"},
540 {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"},
541 {L"/foo/bar.txt", "file:/foo/bar.txt"},
542 {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
543 {L"/foo/bar.txt", "file:foo/bar.txt"},
544 {L"/bar.txt", "file://foo/bar.txt"},
545 {L"/foo/bar.txt", "file:///foo/bar.txt"},
546 {L"/foo/bar.txt", "file:////foo/bar.txt"},
547 {L"/foo/bar.txt", "file:////foo//bar.txt"},
548 {L"/foo/bar.txt", "file:////foo///bar.txt"},
549 {L"/foo/bar.txt", "file:////foo////bar.txt"},
550 {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
551 {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"},
552 // We get these wrong because GURL turns back slashes into forward
553 // slashes.
554 //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
555 //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"},
556 //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
557 //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"},
558 //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
559 #endif
560 };
561 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) {
562 FileURLToFilePath(GURL(url_cases[i].url), &output);
563 EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output));
564 }
565
566 // Unfortunately, UTF8ToWide discards invalid UTF8 input.
567 #ifdef BUG_878908_IS_FIXED
568 // Test that no conversion happens if the UTF-8 input is invalid, and that
569 // the input is preserved in UTF-8
570 const char invalid_utf8[] = "file:///d:/Blah/\xff.doc";
571 const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc";
572 EXPECT_TRUE(FileURLToFilePath(
573 GURL(std::string(invalid_utf8)), &output));
574 EXPECT_EQ(std::wstring(invalid_wide), output);
575 #endif
576
577 // Test that if a file URL is malformed, we get a failure
578 EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output));
579 }
580
TEST(NetUtilTest,GetIdentityFromURL)581 TEST(NetUtilTest, GetIdentityFromURL) {
582 struct {
583 const char* input_url;
584 const char* expected_username;
585 const char* expected_password;
586 } tests[] = {
587 {
588 "http://username:password@google.com",
589 "username",
590 "password",
591 },
592 { // Test for http://crbug.com/19200
593 "http://username:p@ssword@google.com",
594 "username",
595 "p@ssword",
596 },
597 { // Special URL characters should be unescaped.
598 "http://username:p%3fa%26s%2fs%23@google.com",
599 "username",
600 "p?a&s/s#",
601 },
602 { // Username contains %20.
603 "http://use rname:password@google.com",
604 "use rname",
605 "password",
606 },
607 { // Keep %00 as is.
608 "http://use%00rname:password@google.com",
609 "use%00rname",
610 "password",
611 },
612 { // Use a '+' in the username.
613 "http://use+rname:password@google.com",
614 "use+rname",
615 "password",
616 },
617 { // Use a '&' in the password.
618 "http://username:p&ssword@google.com",
619 "username",
620 "p&ssword",
621 },
622 };
623 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
624 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
625 tests[i].input_url));
626 GURL url(tests[i].input_url);
627
628 string16 username, password;
629 GetIdentityFromURL(url, &username, &password);
630
631 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_username), username);
632 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_password), password);
633 }
634 }
635
636 // Try extracting a username which was encoded with UTF8.
TEST(NetUtilTest,GetIdentityFromURL_UTF8)637 TEST(NetUtilTest, GetIdentityFromURL_UTF8) {
638 GURL url(WideToUTF16(L"http://foo:\x4f60\x597d@blah.com"));
639
640 EXPECT_EQ("foo", url.username());
641 EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
642
643 // Extract the unescaped identity.
644 string16 username, password;
645 GetIdentityFromURL(url, &username, &password);
646
647 // Verify that it was decoded as UTF8.
648 EXPECT_EQ(ASCIIToUTF16("foo"), username);
649 EXPECT_EQ(WideToUTF16(L"\x4f60\x597d"), password);
650 }
651
652 // Just a bunch of fake headers.
653 const wchar_t* google_headers =
654 L"HTTP/1.1 200 OK\n"
655 L"Content-TYPE: text/html; charset=utf-8\n"
656 L"Content-disposition: attachment; filename=\"download.pdf\"\n"
657 L"Content-Length: 378557\n"
658 L"X-Google-Google1: 314159265\n"
659 L"X-Google-Google2: aaaa2:7783,bbb21:9441\n"
660 L"X-Google-Google4: home\n"
661 L"Transfer-Encoding: chunked\n"
662 L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n"
663 L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n"
664 L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee"
665 L"fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n"
666 L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n"
667 L"Server: GFE/1.3\n"
668 L"Transfer-Encoding: chunked\n"
669 L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n"
670 L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n"
671 L"X-Malformed: bla; arg=test\"\n"
672 L"X-Malformed2: bla; arg=\n"
673 L"X-Test: bla; arg1=val1; arg2=val2";
674
TEST(NetUtilTest,GetSpecificHeader)675 TEST(NetUtilTest, GetSpecificHeader) {
676 const HeaderCase tests[] = {
677 {L"content-type", L"text/html; charset=utf-8"},
678 {L"CONTENT-LENGTH", L"378557"},
679 {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"},
680 {L"Bad-Header", L""},
681 {L"", L""},
682 };
683
684 // Test first with google_headers.
685 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
686 std::wstring result = GetSpecificHeader(google_headers,
687 tests[i].header_name);
688 EXPECT_EQ(result, tests[i].expected);
689 }
690
691 // Test again with empty headers.
692 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
693 std::wstring result = GetSpecificHeader(L"", tests[i].header_name);
694 EXPECT_EQ(result, std::wstring());
695 }
696 }
697
TEST(NetUtilTest,GetHeaderParamValue)698 TEST(NetUtilTest, GetHeaderParamValue) {
699 const HeaderParamCase tests[] = {
700 {L"Content-type", L"charset", L"utf-8"},
701 {L"content-disposition", L"filename", L"download.pdf"},
702 {L"Content-Type", L"badparam", L""},
703 {L"X-Malformed", L"arg", L"test\""},
704 {L"X-Malformed2", L"arg", L""},
705 {L"X-Test", L"arg1", L"val1"},
706 {L"X-Test", L"arg2", L"val2"},
707 {L"Bad-Header", L"badparam", L""},
708 {L"Bad-Header", L"", L""},
709 {L"", L"badparam", L""},
710 {L"", L"", L""},
711 };
712 // TODO(mpcomplete): add tests for other formats of headers.
713
714 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
715 std::wstring header_value =
716 GetSpecificHeader(google_headers, tests[i].header_name);
717 std::wstring result =
718 GetHeaderParamValue(header_value, tests[i].param_name,
719 QuoteRule::REMOVE_OUTER_QUOTES);
720 EXPECT_EQ(result, tests[i].expected);
721 }
722
723 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
724 std::wstring header_value =
725 GetSpecificHeader(L"", tests[i].header_name);
726 std::wstring result =
727 GetHeaderParamValue(header_value, tests[i].param_name,
728 QuoteRule::REMOVE_OUTER_QUOTES);
729 EXPECT_EQ(result, std::wstring());
730 }
731 }
732
TEST(NetUtilTest,GetHeaderParamValueQuotes)733 TEST(NetUtilTest, GetHeaderParamValueQuotes) {
734 struct {
735 const char* header;
736 const char* expected_with_quotes;
737 const char* expected_without_quotes;
738 } tests[] = {
739 {"filename=foo", "foo", "foo"},
740 {"filename=\"foo\"", "\"foo\"", "foo"},
741 {"filename=foo\"", "foo\"", "foo\""},
742 {"filename=fo\"o", "fo\"o", "fo\"o"},
743 };
744
745 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
746 std::string actual_with_quotes =
747 GetHeaderParamValue(tests[i].header, "filename",
748 QuoteRule::KEEP_OUTER_QUOTES);
749 std::string actual_without_quotes =
750 GetHeaderParamValue(tests[i].header, "filename",
751 QuoteRule::REMOVE_OUTER_QUOTES);
752 EXPECT_EQ(tests[i].expected_with_quotes, actual_with_quotes)
753 << "Failed while processing: " << tests[i].header;
754 EXPECT_EQ(tests[i].expected_without_quotes, actual_without_quotes)
755 << "Failed while processing: " << tests[i].header;
756 }
757 }
758
TEST(NetUtilTest,GetFileNameFromCD)759 TEST(NetUtilTest, GetFileNameFromCD) {
760 const FileNameCDCase tests[] = {
761 // Test various forms of C-D header fields emitted by web servers.
762 {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
763 {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
764 {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
765 {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
766 {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
767 {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
768 {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
769 {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
770 {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
771 {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
772 {"content-disposition: filename \t\t =abcde.pdf", "", L"abcde.pdf"},
773 {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
774 {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
775 L"abc de.pdf"},
776 // Unbalanced quotation mark
777 {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"},
778 // Whitespaces are converted to a space.
779 {"content-disposition: inline; filename=\"abc \t\nde.pdf\"", "",
780 L"abc de.pdf"},
781 // %-escaped UTF-8
782 {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
783 "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
784 {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
785 "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
786 {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
787 "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
788 // RFC 2047 with various charsets and Q/B encodings
789 {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
790 "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
791 {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
792 "", L"\xc608\xc220 3.png"},
793 {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
794 "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
795 {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
796 "_3=2Epng?=", "", L"\U00010330 3.png"},
797 {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
798 "", L"caf\x00e9 .png"},
799 // Space after an encoded word should be removed.
800 {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
801 "", L"caf\x00e9 .png"},
802 // Two encoded words with different charsets (not very likely to be emitted
803 // by web servers in the wild). Spaces between them are removed.
804 {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
805 " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
806 L"\xc608\xc220 3\xc608\xc220.png"},
807 {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
808 " =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
809 // Non-ASCII string is passed through and treated as UTF-8 as long as
810 // it's valid as UTF-8 and regardless of |referrer_charset|.
811 {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
812 "iso-8859-1", L"caf\x00e9.png"},
813 {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
814 "", L"caf\x00e9.png"},
815 // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
816 {"Content-Disposition: attachment; filename=caf\xe5.png",
817 "windows-1253", L"caf\x03b5.png"},
818 #if 0
819 // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
820 // TODO(jungshik): We need to set the OS default codepage
821 // to a specific value before testing. On Windows, we can use
822 // SetThreadLocale().
823 {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
824 "", L"\xac00\xac01.png"},
825 #endif
826 // Failure cases
827 // Invalid hex-digit "G"
828 {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
829 L""},
830 // Incomplete RFC 2047 encoded-word (missing '='' at the end)
831 {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
832 // Extra character at the end of an encoded word
833 {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
834 "", L""},
835 // Extra token at the end of an encoded word
836 {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
837 "", L""},
838 {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
839 "", L""},
840 // Incomplete hex-escaped chars
841 {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
842 "", L""},
843 {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
844 // %-escaped non-UTF-8 encoding is an "error"
845 {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
846 // Two RFC 2047 encoded words in a row without a space is an error.
847 {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
848 "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
849
850 // RFC 5987 tests with Filename* : see http://tools.ietf.org/html/rfc5987
851 {"Content-Disposition: attachment; filename*=foo.html", "", L""},
852 {"Content-Disposition: attachment; filename*=foo'.html", "", L""},
853 {"Content-Disposition: attachment; filename*=''foo'.html", "", L""},
854 {"Content-Disposition: attachment; filename*=''foo.html'", "", L""},
855 {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""},
856 {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'",
857 "", L""},
858 {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""},
859 {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "",
860 L"foo.html"},
861 {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "",
862 L"foo.html"},
863 // charset cannot be omitted.
864 {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""},
865 // Non-ASCII bytes are not allowed.
866 {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "",
867 L""},
868 {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "",
869 L""},
870 // TODO(jshin): Space should be %-encoded, but currently, we allow
871 // spaces.
872 {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "",
873 L"cafe foo.png"},
874
875 // Filename* tests converted from Q-encoded tests above.
876 {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
877 "", L"\x82b8\x8853" L"3.png"},
878 {"Content-Disposition: attachment; filename*=utf-8''"
879 "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
880 {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
881 L"\U00010330 3.png"},
882 {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
883 L"\xc608\xc220.png"},
884 {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "",
885 L"caf\x00e9.png"},
886
887 // http://greenbytes.de/tech/tc2231/ filename* test cases.
888 // attwithisofn2231iso
889 {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "",
890 L"foo-\xe4.html"},
891 // attwithfn2231utf8
892 {"Content-Disposition: attachment; filename*="
893 "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
894 // attwithfn2231noc : no encoding specified but UTF-8 is used.
895 {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
896 "", L""},
897 // attwithfn2231utf8comp
898 {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "",
899 L"foo-\xe4.html"},
900 #ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
901 // This does not work because we treat ISO-8859-1 synonymous with
902 // Windows-1252 per HTML5. For HTTP, in theory, we're not
903 // supposed to.
904 // attwithfn2231utf8-bad
905 {"Content-Disposition: attachment; filename*="
906 "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
907 #endif
908 // attwithfn2231ws1
909 {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "",
910 L""},
911 // attwithfn2231ws2
912 {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "",
913 L"foo-\xe4.html"},
914 // attwithfn2231ws3
915 {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "",
916 L"foo-\xe4.html"},
917 // attwithfn2231quot
918 {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
919 "", L""},
920 // attfnboth
921 {"Content-Disposition: attachment; filename=\"foo-ae.html\"; "
922 "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
923 // attfnboth2
924 {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; "
925 "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
926 // attnewandfn
927 {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "",
928 L"foo.html"},
929 };
930 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
931 EXPECT_EQ(tests[i].expected,
932 UTF8ToWide(GetFileNameFromCD(tests[i].header_field,
933 tests[i].referrer_charset)))
934 << "Failed on input: " << tests[i].header_field;
935 }
936 }
937
TEST(NetUtilTest,IDNToUnicodeFast)938 TEST(NetUtilTest, IDNToUnicodeFast) {
939 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
940 for (size_t j = 0; j < arraysize(kLanguages); j++) {
941 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
942 if (j == 3 || j == 17 || j == 18)
943 continue;
944 std::wstring output(IDNToUnicode(idn_cases[i].input,
945 strlen(idn_cases[i].input), kLanguages[j], NULL));
946 std::wstring expected(idn_cases[i].unicode_allowed[j] ?
947 idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
948 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
949 EXPECT_EQ(expected, output);
950 }
951 }
952 }
953
TEST(NetUtilTest,IDNToUnicodeSlow)954 TEST(NetUtilTest, IDNToUnicodeSlow) {
955 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
956 for (size_t j = 0; j < arraysize(kLanguages); j++) {
957 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
958 if (!(j == 3 || j == 17 || j == 18))
959 continue;
960 std::wstring output(IDNToUnicode(idn_cases[i].input,
961 strlen(idn_cases[i].input), kLanguages[j], NULL));
962 std::wstring expected(idn_cases[i].unicode_allowed[j] ?
963 idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
964 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
965 EXPECT_EQ(expected, output);
966 }
967 }
968 }
969
TEST(NetUtilTest,IDNToUnicodeAdjustOffset)970 TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
971 const AdjustOffsetCase adjust_cases[] = {
972 {0, 0},
973 {2, 2},
974 {4, 4},
975 {5, 5},
976 {6, string16::npos},
977 {16, string16::npos},
978 {17, 7},
979 {18, 8},
980 {19, string16::npos},
981 {25, string16::npos},
982 {34, 12},
983 {35, 13},
984 {38, 16},
985 {39, string16::npos},
986 {string16::npos, string16::npos},
987 };
988 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
989 size_t offset = adjust_cases[i].input_offset;
990 // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
991 IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
992 &offset);
993 EXPECT_EQ(adjust_cases[i].output_offset, offset);
994 }
995
996 std::vector<size_t> offsets;
997 for (size_t i = 0; i < 40; ++i)
998 offsets.push_back(i);
999 IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
1000 L"zh-CN", &offsets);
1001 size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
1002 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
1003 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1004 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
1005 16, kNpos};
1006 ASSERT_EQ(40U, arraysize(expected));
1007 for (size_t i = 0; i < 40; ++i)
1008 EXPECT_EQ(expected[i], offsets[i]);
1009 }
1010
TEST(NetUtilTest,CompliantHost)1011 TEST(NetUtilTest, CompliantHost) {
1012 const CompliantHostCase compliant_host_cases[] = {
1013 {"", "", false},
1014 {"a", "", true},
1015 {"-", "", false},
1016 {".", "", false},
1017 {"9", "", false},
1018 {"9", "a", true},
1019 {"9a", "", false},
1020 {"9a", "a", true},
1021 {"a.", "", true},
1022 {"a.a", "", true},
1023 {"9.a", "", true},
1024 {"a.9", "", false},
1025 {"_9a", "", false},
1026 {"a.a9", "", true},
1027 {"a.9a", "", false},
1028 {"a+9a", "", false},
1029 {"1-.a-b", "", false},
1030 {"1-2.a_b", "", true},
1031 {"a.b.c.d.e", "", true},
1032 {"1.2.3.4.e", "", true},
1033 {"a.b.c.d.5", "", false},
1034 {"1.2.3.4.e.", "", true},
1035 {"a.b.c.d.5.", "", false},
1036 };
1037
1038 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) {
1039 EXPECT_EQ(compliant_host_cases[i].expected_output,
1040 IsCanonicalizedHostCompliant(compliant_host_cases[i].host,
1041 compliant_host_cases[i].desired_tld));
1042 }
1043 }
1044
TEST(NetUtilTest,StripWWW)1045 TEST(NetUtilTest, StripWWW) {
1046 EXPECT_EQ(string16(), StripWWW(string16()));
1047 EXPECT_EQ(string16(), StripWWW(ASCIIToUTF16("www.")));
1048 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
1049 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
1050 }
1051
TEST(NetUtilTest,GetSuggestedFilename)1052 TEST(NetUtilTest, GetSuggestedFilename) {
1053 const SuggestedFilenameCase test_cases[] = {
1054 {"http://www.google.com/",
1055 "Content-disposition: attachment; filename=test.html",
1056 "",
1057 L"",
1058 L"test.html"},
1059 {"http://www.google.com/",
1060 "Content-disposition: attachment; filename=\"test.html\"",
1061 "",
1062 L"",
1063 L"test.html"},
1064 {"http://www.google.com/path/test.html",
1065 "Content-disposition: attachment",
1066 "",
1067 L"",
1068 L"test.html"},
1069 {"http://www.google.com/path/test.html",
1070 "Content-disposition: attachment;",
1071 "",
1072 L"",
1073 L"test.html"},
1074 {"http://www.google.com/",
1075 "",
1076 "",
1077 L"",
1078 L"www.google.com"},
1079 {"http://www.google.com/test.html",
1080 "",
1081 "",
1082 L"",
1083 L"test.html"},
1084 // Now that we use googleurl's ExtractFileName, this case falls back
1085 // to the hostname. If this behavior is not desirable, we'd better
1086 // change ExtractFileName (in url_parse).
1087 {"http://www.google.com/path/",
1088 "",
1089 "",
1090 L"",
1091 L"www.google.com"},
1092 {"http://www.google.com/path",
1093 "",
1094 "",
1095 L"",
1096 L"path"},
1097 {"file:///",
1098 "",
1099 "",
1100 L"",
1101 L"download"},
1102 {"non-standard-scheme:",
1103 "",
1104 "",
1105 L"",
1106 L"download"},
1107 {"http://www.google.com/",
1108 "Content-disposition: attachment; filename =\"test.html\"",
1109 "",
1110 L"download",
1111 L"test.html"},
1112 {"http://www.google.com/",
1113 "",
1114 "",
1115 L"download",
1116 L"download"},
1117 {"http://www.google.com/",
1118 "Content-disposition: attachment; filename=\"../test.html\"",
1119 "",
1120 L"",
1121 L"_test.html"},
1122 {"http://www.google.com/",
1123 "Content-disposition: attachment; filename=\"..\\test.html\"",
1124 "",
1125 L"",
1126 L"_test.html"},
1127 {"http://www.google.com/",
1128 "Content-disposition: attachment; filename=\"..\"",
1129 "",
1130 L"download",
1131 L"download"},
1132 {"http://www.google.com/test.html",
1133 "Content-disposition: attachment; filename=\"..\"",
1134 "",
1135 L"download",
1136 L"test.html"},
1137 // Below is a small subset of cases taken from GetFileNameFromCD test above.
1138 {"http://www.google.com/",
1139 "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
1140 "%EC%98%88%EC%88%A0.jpg\"",
1141 "",
1142 L"",
1143 L"\uc608\uc220 \uc608\uc220.jpg"},
1144 {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
1145 "",
1146 "",
1147 L"download",
1148 L"\uc608\uc220 \uc608\uc220.jpg"},
1149 {"http://www.google.com/",
1150 "Content-disposition: attachment;",
1151 "",
1152 L"\uB2E4\uC6B4\uB85C\uB4DC",
1153 L"\uB2E4\uC6B4\uB85C\uB4DC"},
1154 {"http://www.google.com/",
1155 "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
1156 "D13=2Epng?=\"",
1157 "",
1158 L"download",
1159 L"\u82b8\u88533.png"},
1160 {"http://www.example.com/images?id=3",
1161 "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
1162 "iso-8859-1",
1163 L"",
1164 L"caf\u00e9.png"},
1165 {"http://www.example.com/images?id=3",
1166 "Content-Disposition: attachment; filename=caf\xe5.png",
1167 "windows-1253",
1168 L"",
1169 L"caf\u03b5.png"},
1170 {"http://www.example.com/file?id=3",
1171 "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
1172 "GBK",
1173 L"",
1174 L"\u4e0b\u8f7d.zip"},
1175 // Invalid C-D header. Extracts filename from url.
1176 {"http://www.google.com/test.html",
1177 "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
1178 "",
1179 L"",
1180 L"test.html"},
1181 // about: and data: URLs
1182 {"about:chrome",
1183 "",
1184 "",
1185 L"",
1186 L"download"},
1187 {"data:,looks/like/a.path",
1188 "",
1189 "",
1190 L"",
1191 L"download"},
1192 {"data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=",
1193 "",
1194 "",
1195 L"",
1196 L"download"},
1197 {"data:,looks/like/a.path",
1198 "",
1199 "",
1200 L"default_filename_is_given",
1201 L"default_filename_is_given"},
1202 {"data:,looks/like/a.path",
1203 "",
1204 "",
1205 L"\u65e5\u672c\u8a9e", // Japanese Kanji.
1206 L"\u65e5\u672c\u8a9e"},
1207 // Dotfiles. Ensures preceeding period(s) stripped.
1208 {"http://www.google.com/.test.html",
1209 "",
1210 "",
1211 L"",
1212 L"test.html"},
1213 {"http://www.google.com/.test",
1214 "",
1215 "",
1216 L"",
1217 L"test"},
1218 {"http://www.google.com/..test",
1219 "",
1220 "",
1221 L"",
1222 L"test"},
1223 // The filename encoding is specified by the referrer charset.
1224 {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1225 "",
1226 "iso-8859-1",
1227 L"",
1228 L"V\u00fdvojov\u00e1 psychologie.doc"},
1229 // The filename encoding doesn't match the referrer charset, the
1230 // system charset, or UTF-8.
1231 // TODO(jshin): we need to handle this case.
1232 #if 0
1233 {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1234 "",
1235 "utf-8",
1236 L"",
1237 L"V\u00fdvojov\u00e1 psychologie.doc",
1238 },
1239 #endif
1240 };
1241 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1242 std::wstring default_name = test_cases[i].default_filename;
1243 string16 filename = GetSuggestedFilename(
1244 GURL(test_cases[i].url), test_cases[i].content_disp_header,
1245 test_cases[i].referrer_charset, WideToUTF16(default_name));
1246 EXPECT_EQ(std::wstring(test_cases[i].expected_filename),
1247 UTF16ToWide(filename))
1248 << "Iteration " << i << ": " << test_cases[i].url;
1249 }
1250 }
1251
1252 // This is currently a windows specific function.
1253 #if defined(OS_WIN)
1254 namespace {
1255
1256 struct GetDirectoryListingEntryCase {
1257 const wchar_t* name;
1258 const char* raw_bytes;
1259 bool is_dir;
1260 int64 filesize;
1261 base::Time time;
1262 const char* expected;
1263 };
1264
1265 } // namespace
TEST(NetUtilTest,GetDirectoryListingEntry)1266 TEST(NetUtilTest, GetDirectoryListingEntry) {
1267 const GetDirectoryListingEntryCase test_cases[] = {
1268 {L"Foo",
1269 "",
1270 false,
1271 10000,
1272 base::Time(),
1273 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
1274 {L"quo\"tes",
1275 "",
1276 false,
1277 10000,
1278 base::Time(),
1279 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1280 "\n"},
1281 {L"quo\"tes",
1282 "quo\"tes",
1283 false,
1284 10000,
1285 base::Time(),
1286 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1287 "\n"},
1288 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
1289 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
1290 {L"\xD55C\xAE00.txt",
1291 "",
1292 false,
1293 10000,
1294 base::Time(),
1295 "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
1296 ",0,\"9.8 kB\",\"\");</script>\n"},
1297 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
1298 // a local or remote file in EUC-KR.
1299 {L"\xD55C\xAE00.txt",
1300 "\xC7\xD1\xB1\xDB.txt",
1301 false,
1302 10000,
1303 base::Time(),
1304 "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
1305 ",0,\"9.8 kB\",\"\");</script>\n"},
1306 };
1307
1308 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1309 const std::string results = GetDirectoryListingEntry(
1310 WideToUTF16(test_cases[i].name),
1311 test_cases[i].raw_bytes,
1312 test_cases[i].is_dir,
1313 test_cases[i].filesize,
1314 test_cases[i].time);
1315 EXPECT_EQ(test_cases[i].expected, results);
1316 }
1317 }
1318
1319 #endif
1320
TEST(NetUtilTest,ParseHostAndPort)1321 TEST(NetUtilTest, ParseHostAndPort) {
1322 const struct {
1323 const char* input;
1324 bool success;
1325 const char* expected_host;
1326 int expected_port;
1327 } tests[] = {
1328 // Valid inputs:
1329 {"foo:10", true, "foo", 10},
1330 {"foo", true, "foo", -1},
1331 {
1332 "[1080:0:0:0:8:800:200C:4171]:11",
1333 true,
1334 "[1080:0:0:0:8:800:200C:4171]",
1335 11,
1336 },
1337 // Invalid inputs:
1338 {"foo:bar", false, "", -1},
1339 {"foo:", false, "", -1},
1340 {":", false, "", -1},
1341 {":80", false, "", -1},
1342 {"", false, "", -1},
1343 {"porttoolong:300000", false, "", -1},
1344 {"usrname@host", false, "", -1},
1345 {"usrname:password@host", false, "", -1},
1346 {":password@host", false, "", -1},
1347 {":password@host:80", false, "", -1},
1348 {":password@host", false, "", -1},
1349 {"@host", false, "", -1},
1350 };
1351
1352 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1353 std::string host;
1354 int port;
1355 bool ok = ParseHostAndPort(tests[i].input, &host, &port);
1356
1357 EXPECT_EQ(tests[i].success, ok);
1358
1359 if (tests[i].success) {
1360 EXPECT_EQ(tests[i].expected_host, host);
1361 EXPECT_EQ(tests[i].expected_port, port);
1362 }
1363 }
1364 }
1365
TEST(NetUtilTest,GetHostAndPort)1366 TEST(NetUtilTest, GetHostAndPort) {
1367 const struct {
1368 GURL url;
1369 const char* expected_host_and_port;
1370 } tests[] = {
1371 { GURL("http://www.foo.com/x"), "www.foo.com:80"},
1372 { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1373
1374 // For IPv6 literals should always include the brackets.
1375 { GURL("http://[1::2]/x"), "[1::2]:80"},
1376 { GURL("http://[::a]:33/x"), "[::a]:33"},
1377 };
1378 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1379 std::string host_and_port = GetHostAndPort(tests[i].url);
1380 EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1381 }
1382 }
1383
TEST(NetUtilTest,GetHostAndOptionalPort)1384 TEST(NetUtilTest, GetHostAndOptionalPort) {
1385 const struct {
1386 GURL url;
1387 const char* expected_host_and_port;
1388 } tests[] = {
1389 { GURL("http://www.foo.com/x"), "www.foo.com"},
1390 { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1391
1392 // For IPv6 literals should always include the brackets.
1393 { GURL("http://[1::2]/x"), "[1::2]"},
1394 { GURL("http://[::a]:33/x"), "[::a]:33"},
1395 };
1396 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1397 std::string host_and_port = GetHostAndOptionalPort(tests[i].url);
1398 EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1399 }
1400 }
1401
1402
TEST(NetUtilTest,NetAddressToString_IPv4)1403 TEST(NetUtilTest, NetAddressToString_IPv4) {
1404 const struct {
1405 uint8 addr[4];
1406 const char* result;
1407 } tests[] = {
1408 {{0, 0, 0, 0}, "0.0.0.0"},
1409 {{127, 0, 0, 1}, "127.0.0.1"},
1410 {{192, 168, 0, 1}, "192.168.0.1"},
1411 };
1412
1413 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1414 const addrinfo* ai = GetIPv4Address(tests[i].addr, 80);
1415 std::string result = NetAddressToString(ai);
1416 EXPECT_EQ(std::string(tests[i].result), result);
1417 }
1418 }
1419
TEST(NetUtilTest,NetAddressToString_IPv6)1420 TEST(NetUtilTest, NetAddressToString_IPv6) {
1421 const struct {
1422 uint8 addr[16];
1423 const char* result;
1424 } tests[] = {
1425 {{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1426 0x98, 0x76, 0x54, 0x32, 0x10},
1427 "fedc:ba98:7654:3210:fedc:ba98:7654:3210"},
1428 };
1429
1430 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1431 const addrinfo* ai = GetIPv6Address(tests[i].addr, 80);
1432 std::string result = NetAddressToString(ai);
1433 // Allow NetAddressToString() to fail, in case the system doesn't
1434 // support IPv6.
1435 if (!result.empty())
1436 EXPECT_EQ(std::string(tests[i].result), result);
1437 }
1438 }
1439
TEST(NetUtilTest,NetAddressToStringWithPort_IPv4)1440 TEST(NetUtilTest, NetAddressToStringWithPort_IPv4) {
1441 uint8 addr[] = {127, 0, 0, 1};
1442 const addrinfo* ai = GetIPv4Address(addr, 166);
1443 std::string result = NetAddressToStringWithPort(ai);
1444 EXPECT_EQ("127.0.0.1:166", result);
1445 }
1446
TEST(NetUtilTest,NetAddressToStringWithPort_IPv6)1447 TEST(NetUtilTest, NetAddressToStringWithPort_IPv6) {
1448 uint8 addr[] = {
1449 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1450 0x98, 0x76, 0x54, 0x32, 0x10
1451 };
1452 const addrinfo* ai = GetIPv6Address(addr, 361);
1453 std::string result = NetAddressToStringWithPort(ai);
1454
1455 // May fail on systems that don't support IPv6.
1456 if (!result.empty())
1457 EXPECT_EQ("[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:361", result);
1458 }
1459
TEST(NetUtilTest,GetHostName)1460 TEST(NetUtilTest, GetHostName) {
1461 // We can't check the result of GetHostName() directly, since the result
1462 // will differ across machines. Our goal here is to simply exercise the
1463 // code path, and check that things "look about right".
1464 std::string hostname = GetHostName();
1465 EXPECT_FALSE(hostname.empty());
1466 }
1467
TEST(NetUtilTest,FormatUrl)1468 TEST(NetUtilTest, FormatUrl) {
1469 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
1470 const UrlTestData tests[] = {
1471 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
1472
1473 {"Simple URL",
1474 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
1475 L"http://www.google.com/", 7},
1476
1477 {"With a port number and a reference",
1478 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
1479 UnescapeRule::NORMAL,
1480 L"http://www.google.com:8080/#\x30B0", 7},
1481
1482 // -------- IDN tests --------
1483 {"Japanese IDN with ja",
1484 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1485 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1486
1487 {"Japanese IDN with en",
1488 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
1489 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
1490
1491 {"Japanese IDN without any languages",
1492 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
1493 UnescapeRule::NORMAL,
1494 // Single script is safe for empty languages.
1495 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1496
1497 {"mailto: with Japanese IDN",
1498 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1499 UnescapeRule::NORMAL,
1500 // GURL doesn't assume an email address's domain part as a host name.
1501 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
1502
1503 {"file: with Japanese IDN",
1504 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1505 UnescapeRule::NORMAL,
1506 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
1507
1508 {"ftp: with Japanese IDN",
1509 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1510 UnescapeRule::NORMAL,
1511 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
1512
1513 // -------- omit_username_password flag tests --------
1514 {"With username and password, omit_username_password=false",
1515 "http://user:passwd@example.com/foo", "",
1516 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1517 L"http://user:passwd@example.com/foo", 19},
1518
1519 {"With username and password, omit_username_password=true",
1520 "http://user:passwd@example.com/foo", "", default_format_type,
1521 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1522
1523 {"With username and no password",
1524 "http://user@example.com/foo", "", default_format_type,
1525 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1526
1527 {"Just '@' without username and password",
1528 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
1529 L"http://example.com/foo", 7},
1530
1531 // GURL doesn't think local-part of an email address is username for URL.
1532 {"mailto:, omit_username_password=true",
1533 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
1534 L"mailto:foo@example.com", 7},
1535
1536 // -------- unescape flag tests --------
1537 {"Do not unescape",
1538 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1539 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1540 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1541 UnescapeRule::NONE,
1542 // GURL parses %-encoded hostnames into Punycode.
1543 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1544 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
1545
1546 {"Unescape normally",
1547 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1548 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1549 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1550 UnescapeRule::NORMAL,
1551 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
1552 L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
1553
1554 {"Unescape normally including unescape spaces",
1555 "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
1556 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
1557
1558 /*
1559 {"unescape=true with some special characters",
1560 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
1561 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1562 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
1563 */
1564 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
1565
1566 // -------- omit http: --------
1567 {"omit http with user name",
1568 "http://user@example.com/foo", "", kFormatUrlOmitAll,
1569 UnescapeRule::NORMAL, L"example.com/foo", 0},
1570
1571 {"omit http",
1572 "http://www.google.com/", "en", kFormatUrlOmitHTTP,
1573 UnescapeRule::NORMAL, L"www.google.com/",
1574 0},
1575
1576 {"omit http with https",
1577 "https://www.google.com/", "en", kFormatUrlOmitHTTP,
1578 UnescapeRule::NORMAL, L"https://www.google.com/",
1579 8},
1580
1581 {"omit http starts with ftp.",
1582 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1583 UnescapeRule::NORMAL, L"http://ftp.google.com/",
1584 7},
1585
1586 // -------- omit trailing slash on bare hostname --------
1587 {"omit slash when it's the entire path",
1588 "http://www.google.com/", "en",
1589 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1590 L"http://www.google.com", 7},
1591 {"omit slash when there's a ref",
1592 "http://www.google.com/#ref", "en",
1593 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1594 L"http://www.google.com/#ref", 7},
1595 {"omit slash when there's a query",
1596 "http://www.google.com/?", "en",
1597 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1598 L"http://www.google.com/?", 7},
1599 {"omit slash when it's not the entire path",
1600 "http://www.google.com/foo", "en",
1601 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1602 L"http://www.google.com/foo", 7},
1603 {"omit slash for nonstandard URLs",
1604 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1605 UnescapeRule::NORMAL, L"data:/", 5},
1606 {"omit slash for file URLs",
1607 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1608 UnescapeRule::NORMAL, L"file:///", 7},
1609
1610 // -------- view-source: --------
1611 {"view-source",
1612 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
1613 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
1614 19},
1615
1616 {"view-source of view-source",
1617 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
1618 default_format_type, UnescapeRule::NORMAL,
1619 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
1620
1621 // view-source should omit http and trailing slash where non-view-source
1622 // would.
1623 {"view-source omit http",
1624 "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
1625 UnescapeRule::NORMAL, L"view-source:a.b/c",
1626 12},
1627 {"view-source omit http starts with ftp.",
1628 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
1629 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
1630 19},
1631 {"view-source omit slash when it's the entire path",
1632 "view-source:http://a.b/", "en", kFormatUrlOmitAll,
1633 UnescapeRule::NORMAL, L"view-source:a.b",
1634 12},
1635 };
1636
1637 for (size_t i = 0; i < arraysize(tests); ++i) {
1638 size_t prefix_len;
1639 string16 formatted = FormatUrl(
1640 GURL(tests[i].input), tests[i].languages, tests[i].format_types,
1641 tests[i].escape_rules, NULL, &prefix_len, NULL);
1642 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
1643 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
1644 }
1645 }
1646
TEST(NetUtilTest,FormatUrlParsed)1647 TEST(NetUtilTest, FormatUrlParsed) {
1648 // No unescape case.
1649 url_parse::Parsed parsed;
1650 string16 formatted = FormatUrl(
1651 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1652 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1653 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
1654 NULL);
1655 EXPECT_EQ(WideToUTF16(
1656 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1657 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
1658 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
1659 formatted.substr(parsed.username.begin, parsed.username.len));
1660 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
1661 formatted.substr(parsed.password.begin, parsed.password.len));
1662 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1663 formatted.substr(parsed.host.begin, parsed.host.len));
1664 EXPECT_EQ(WideToUTF16(L"8080"),
1665 formatted.substr(parsed.port.begin, parsed.port.len));
1666 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
1667 formatted.substr(parsed.path.begin, parsed.path.len));
1668 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
1669 formatted.substr(parsed.query.begin, parsed.query.len));
1670 EXPECT_EQ(WideToUTF16(L"\x30B0"),
1671 formatted.substr(parsed.ref.begin, parsed.ref.len));
1672
1673 // Unescape case.
1674 formatted = FormatUrl(
1675 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1676 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1677 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
1678 NULL);
1679 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1680 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1681 EXPECT_EQ(WideToUTF16(L"\x30B0"),
1682 formatted.substr(parsed.username.begin, parsed.username.len));
1683 EXPECT_EQ(WideToUTF16(L"\x30FC"),
1684 formatted.substr(parsed.password.begin, parsed.password.len));
1685 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1686 formatted.substr(parsed.host.begin, parsed.host.len));
1687 EXPECT_EQ(WideToUTF16(L"8080"),
1688 formatted.substr(parsed.port.begin, parsed.port.len));
1689 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1690 formatted.substr(parsed.path.begin, parsed.path.len));
1691 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1692 formatted.substr(parsed.query.begin, parsed.query.len));
1693 EXPECT_EQ(WideToUTF16(L"\x30B0"),
1694 formatted.substr(parsed.ref.begin, parsed.ref.len));
1695
1696 // Omit_username_password + unescape case.
1697 formatted = FormatUrl(
1698 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1699 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1700 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1701 NULL, NULL);
1702 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
1703 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1704 EXPECT_FALSE(parsed.username.is_valid());
1705 EXPECT_FALSE(parsed.password.is_valid());
1706 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1707 formatted.substr(parsed.host.begin, parsed.host.len));
1708 EXPECT_EQ(WideToUTF16(L"8080"),
1709 formatted.substr(parsed.port.begin, parsed.port.len));
1710 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1711 formatted.substr(parsed.path.begin, parsed.path.len));
1712 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1713 formatted.substr(parsed.query.begin, parsed.query.len));
1714 EXPECT_EQ(WideToUTF16(L"\x30B0"),
1715 formatted.substr(parsed.ref.begin, parsed.ref.len));
1716
1717 // View-source case.
1718 formatted = FormatUrl(
1719 GURL("view-source:http://user:passwd@host:81/path?query#ref"),
1720 "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1721 NULL, NULL);
1722 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
1723 formatted);
1724 EXPECT_EQ(WideToUTF16(L"view-source:http"),
1725 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1726 EXPECT_FALSE(parsed.username.is_valid());
1727 EXPECT_FALSE(parsed.password.is_valid());
1728 EXPECT_EQ(WideToUTF16(L"host"),
1729 formatted.substr(parsed.host.begin, parsed.host.len));
1730 EXPECT_EQ(WideToUTF16(L"81"),
1731 formatted.substr(parsed.port.begin, parsed.port.len));
1732 EXPECT_EQ(WideToUTF16(L"/path"),
1733 formatted.substr(parsed.path.begin, parsed.path.len));
1734 EXPECT_EQ(WideToUTF16(L"query"),
1735 formatted.substr(parsed.query.begin, parsed.query.len));
1736 EXPECT_EQ(WideToUTF16(L"ref"),
1737 formatted.substr(parsed.ref.begin, parsed.ref.len));
1738
1739 // omit http case.
1740 formatted = FormatUrl(
1741 GURL("http://host:8000/a?b=c#d"),
1742 "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1743 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
1744 EXPECT_FALSE(parsed.scheme.is_valid());
1745 EXPECT_FALSE(parsed.username.is_valid());
1746 EXPECT_FALSE(parsed.password.is_valid());
1747 EXPECT_EQ(WideToUTF16(L"host"),
1748 formatted.substr(parsed.host.begin, parsed.host.len));
1749 EXPECT_EQ(WideToUTF16(L"8000"),
1750 formatted.substr(parsed.port.begin, parsed.port.len));
1751 EXPECT_EQ(WideToUTF16(L"/a"),
1752 formatted.substr(parsed.path.begin, parsed.path.len));
1753 EXPECT_EQ(WideToUTF16(L"b=c"),
1754 formatted.substr(parsed.query.begin, parsed.query.len));
1755 EXPECT_EQ(WideToUTF16(L"d"),
1756 formatted.substr(parsed.ref.begin, parsed.ref.len));
1757
1758 // omit http starts with ftp case.
1759 formatted = FormatUrl(
1760 GURL("http://ftp.host:8000/a?b=c#d"),
1761 "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1762 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
1763 EXPECT_TRUE(parsed.scheme.is_valid());
1764 EXPECT_FALSE(parsed.username.is_valid());
1765 EXPECT_FALSE(parsed.password.is_valid());
1766 EXPECT_EQ(WideToUTF16(L"http"),
1767 formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1768 EXPECT_EQ(WideToUTF16(L"ftp.host"),
1769 formatted.substr(parsed.host.begin, parsed.host.len));
1770 EXPECT_EQ(WideToUTF16(L"8000"),
1771 formatted.substr(parsed.port.begin, parsed.port.len));
1772 EXPECT_EQ(WideToUTF16(L"/a"),
1773 formatted.substr(parsed.path.begin, parsed.path.len));
1774 EXPECT_EQ(WideToUTF16(L"b=c"),
1775 formatted.substr(parsed.query.begin, parsed.query.len));
1776 EXPECT_EQ(WideToUTF16(L"d"),
1777 formatted.substr(parsed.ref.begin, parsed.ref.len));
1778
1779 // omit http starts with 'f' case.
1780 formatted = FormatUrl(
1781 GURL("http://f/"),
1782 "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1783 EXPECT_EQ(WideToUTF16(L"f/"), formatted);
1784 EXPECT_FALSE(parsed.scheme.is_valid());
1785 EXPECT_FALSE(parsed.username.is_valid());
1786 EXPECT_FALSE(parsed.password.is_valid());
1787 EXPECT_FALSE(parsed.port.is_valid());
1788 EXPECT_TRUE(parsed.path.is_valid());
1789 EXPECT_FALSE(parsed.query.is_valid());
1790 EXPECT_FALSE(parsed.ref.is_valid());
1791 EXPECT_EQ(WideToUTF16(L"f"),
1792 formatted.substr(parsed.host.begin, parsed.host.len));
1793 EXPECT_EQ(WideToUTF16(L"/"),
1794 formatted.substr(parsed.path.begin, parsed.path.len));
1795 }
1796
TEST(NetUtilTest,FormatUrlAdjustOffset)1797 TEST(NetUtilTest, FormatUrlAdjustOffset) {
1798 const AdjustOffsetCase basic_cases[] = {
1799 {0, 0},
1800 {3, 3},
1801 {5, 5},
1802 {6, 6},
1803 {13, 13},
1804 {21, 21},
1805 {22, 22},
1806 {23, 23},
1807 {25, 25},
1808 {26, string16::npos},
1809 {500000, string16::npos},
1810 {string16::npos, string16::npos},
1811 };
1812 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
1813 size_t offset = basic_cases[i].input_offset;
1814 FormatUrl(GURL("http://www.google.com/foo/"), "en",
1815 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1816 NULL, NULL, &offset);
1817 EXPECT_EQ(basic_cases[i].output_offset, offset);
1818 }
1819
1820 size_t url_size = 26;
1821 std::vector<size_t> offsets;
1822 for (size_t i = 0; i < url_size + 1; ++i)
1823 offsets.push_back(i);
1824 FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
1825 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1826 NULL, NULL, &offsets);
1827 for (size_t i = 0; i < url_size; ++i)
1828 EXPECT_EQ(i, offsets[i]);
1829 EXPECT_EQ(kNpos, offsets[url_size]);
1830
1831 const struct {
1832 const char* input_url;
1833 size_t input_offset;
1834 size_t output_offset;
1835 } omit_auth_cases[] = {
1836 {"http://foo:bar@www.google.com/", 6, 6},
1837 {"http://foo:bar@www.google.com/", 7, string16::npos},
1838 {"http://foo:bar@www.google.com/", 8, string16::npos},
1839 {"http://foo:bar@www.google.com/", 10, string16::npos},
1840 {"http://foo:bar@www.google.com/", 11, string16::npos},
1841 {"http://foo:bar@www.google.com/", 14, string16::npos},
1842 {"http://foo:bar@www.google.com/", 15, 7},
1843 {"http://foo:bar@www.google.com/", 25, 17},
1844 {"http://foo@www.google.com/", 9, string16::npos},
1845 {"http://foo@www.google.com/", 11, 7},
1846 };
1847 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
1848 size_t offset = omit_auth_cases[i].input_offset;
1849 FormatUrl(GURL(omit_auth_cases[i].input_url), "en",
1850 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1851 NULL, NULL, &offset);
1852 EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
1853 }
1854
1855 url_size = 30;
1856 offsets.clear();
1857 for (size_t i = 0; i < url_size; ++i)
1858 offsets.push_back(i);
1859 FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
1860 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1861 NULL, NULL, &offsets);
1862 for (size_t i = 0; i < 7; ++i)
1863 EXPECT_EQ(i, offsets[i]);
1864 for (size_t i = 7; i < 15; ++i)
1865 EXPECT_EQ(kNpos, offsets[i]);
1866 for (size_t i = 16; i < url_size; ++i)
1867 EXPECT_EQ(i - 8 , offsets[i]);
1868
1869 const AdjustOffsetCase view_source_cases[] = {
1870 {0, 0},
1871 {3, 3},
1872 {11, 11},
1873 {12, 12},
1874 {13, 13},
1875 {18, 18},
1876 {19, string16::npos},
1877 {20, string16::npos},
1878 {23, 19},
1879 {26, 22},
1880 {string16::npos, string16::npos},
1881 };
1882 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
1883 size_t offset = view_source_cases[i].input_offset;
1884 FormatUrl(GURL("view-source:http://foo@www.google.com/"), "en",
1885 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1886 NULL, NULL, &offset);
1887 EXPECT_EQ(view_source_cases[i].output_offset, offset);
1888 }
1889
1890 url_size = 38;
1891 offsets.clear();
1892 for (size_t i = 0; i < url_size; ++i)
1893 offsets.push_back(i);
1894 FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
1895 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1896 NULL, NULL, &offsets);
1897 size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1898 17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
1899 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
1900 ASSERT_EQ(url_size, arraysize(expected));
1901 for (size_t i = 0; i < url_size; ++i)
1902 EXPECT_EQ(expected[i], offsets[i]);
1903
1904 const AdjustOffsetCase idn_hostname_cases[] = {
1905 {8, string16::npos},
1906 {16, string16::npos},
1907 {24, string16::npos},
1908 {25, 12},
1909 {30, 17},
1910 };
1911 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
1912 size_t offset = idn_hostname_cases[i].input_offset;
1913 // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
1914 FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1915 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1916 NULL, NULL, &offset);
1917 EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
1918 }
1919
1920 url_size = 33;
1921 offsets.clear();
1922 for (size_t i = 0; i < url_size; ++i)
1923 offsets.push_back(i);
1924 FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1925 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1926 NULL, NULL, &offsets);
1927 size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
1928 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1929 kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
1930 17, 18, 19};
1931 ASSERT_EQ(url_size, arraysize(expected_1));
1932 for (size_t i = 0; i < url_size; ++i)
1933 EXPECT_EQ(expected_1[i], offsets[i]);
1934
1935 const AdjustOffsetCase unescape_cases[] = {
1936 {25, 25},
1937 {26, string16::npos},
1938 {27, string16::npos},
1939 {28, 26},
1940 {35, string16::npos},
1941 {41, 31},
1942 {59, 33},
1943 {60, string16::npos},
1944 {67, string16::npos},
1945 {68, string16::npos},
1946 };
1947 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
1948 size_t offset = unescape_cases[i].input_offset;
1949 // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
1950 FormatUrl(GURL(
1951 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1952 "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL,
1953 NULL, &offset);
1954 EXPECT_EQ(unescape_cases[i].output_offset, offset);
1955 }
1956
1957 url_size = 68;
1958 offsets.clear();
1959 for (size_t i = 0; i < url_size; ++i)
1960 offsets.push_back(i);
1961 FormatUrlWithOffsets(GURL(
1962 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1963 "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
1964 &offsets);
1965 size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1966 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
1967 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
1968 kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
1969 kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
1970 kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
1971 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
1972 ASSERT_EQ(url_size, arraysize(expected_2));
1973 for (size_t i = 0; i < url_size; ++i)
1974 EXPECT_EQ(expected_2[i], offsets[i]);
1975
1976 const AdjustOffsetCase ref_cases[] = {
1977 {30, 30},
1978 {31, 31},
1979 {32, string16::npos},
1980 {34, 32},
1981 {35, string16::npos},
1982 {37, 33},
1983 {38, string16::npos},
1984 };
1985 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
1986 size_t offset = ref_cases[i].input_offset;
1987 // "http://www.google.com/foo.html#\x30B0\x30B0z"
1988 FormatUrl(GURL(
1989 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
1990 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
1991 &offset);
1992 EXPECT_EQ(ref_cases[i].output_offset, offset);
1993 }
1994
1995 url_size = 38;
1996 offsets.clear();
1997 for (size_t i = 0; i < url_size; ++i)
1998 offsets.push_back(i);
1999 // "http://www.google.com/foo.html#\x30B0\x30B0z"
2000 FormatUrlWithOffsets(GURL(
2001 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
2002 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
2003 &offsets);
2004 size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2005 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
2006 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
2007 ASSERT_EQ(url_size, arraysize(expected_3));
2008 for (size_t i = 0; i < url_size; ++i)
2009 EXPECT_EQ(expected_3[i], offsets[i]);
2010
2011 const AdjustOffsetCase omit_http_cases[] = {
2012 {0, string16::npos},
2013 {3, string16::npos},
2014 {7, 0},
2015 {8, 1},
2016 };
2017 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_cases); ++i) {
2018 size_t offset = omit_http_cases[i].input_offset;
2019 FormatUrl(GURL("http://www.google.com"), "en",
2020 kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2021 EXPECT_EQ(omit_http_cases[i].output_offset, offset);
2022 }
2023
2024 url_size = 23;
2025 offsets.clear();
2026 for (size_t i = 0; i < url_size; ++i)
2027 offsets.push_back(i);
2028 FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
2029 kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2030 size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
2031 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
2032 ASSERT_EQ(url_size, arraysize(expected_4));
2033 for (size_t i = 0; i < url_size; ++i)
2034 EXPECT_EQ(expected_4[i], offsets[i]);
2035
2036 const AdjustOffsetCase omit_http_start_with_ftp[] = {
2037 {0, 0},
2038 {3, 3},
2039 {8, 8},
2040 };
2041 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_start_with_ftp); ++i) {
2042 size_t offset = omit_http_start_with_ftp[i].input_offset;
2043 FormatUrl(GURL("http://ftp.google.com"), "en",
2044 kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2045 EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
2046 }
2047
2048 url_size = 23;
2049 offsets.clear();
2050 for (size_t i = 0; i < url_size; ++i)
2051 offsets.push_back(i);
2052 FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
2053 kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2054 size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2055 16, 17, 18, 19, 20, 21, kNpos};
2056 ASSERT_EQ(url_size, arraysize(expected_5));
2057 for (size_t i = 0; i < url_size; ++i)
2058 EXPECT_EQ(expected_5[i], offsets[i]);
2059
2060 const AdjustOffsetCase omit_all_cases[] = {
2061 {12, 0},
2062 {13, 1},
2063 {0, string16::npos},
2064 {3, string16::npos},
2065 };
2066 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_all_cases); ++i) {
2067 size_t offset = omit_all_cases[i].input_offset;
2068 FormatUrl(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2069 UnescapeRule::NORMAL, NULL, NULL, &offset);
2070 EXPECT_EQ(omit_all_cases[i].output_offset, offset);
2071 }
2072
2073 url_size = 21;
2074 offsets.clear();
2075 for (size_t i = 0; i < url_size; ++i)
2076 offsets.push_back(i);
2077 FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2078 UnescapeRule::NORMAL, NULL, NULL, &offsets);
2079 size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
2080 kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
2081 kNpos};
2082 ASSERT_EQ(url_size, arraysize(expected_6));
2083 for (size_t i = 0; i < url_size; ++i)
2084 EXPECT_EQ(expected_6[i], offsets[i]);
2085 }
2086
TEST(NetUtilTest,SimplifyUrlForRequest)2087 TEST(NetUtilTest, SimplifyUrlForRequest) {
2088 struct {
2089 const char* input_url;
2090 const char* expected_simplified_url;
2091 } tests[] = {
2092 {
2093 // Reference section should be stripped.
2094 "http://www.google.com:78/foobar?query=1#hash",
2095 "http://www.google.com:78/foobar?query=1",
2096 },
2097 {
2098 // Reference section can itself contain #.
2099 "http://192.168.0.1?query=1#hash#10#11#13#14",
2100 "http://192.168.0.1?query=1",
2101 },
2102 { // Strip username/password.
2103 "http://user:pass@google.com",
2104 "http://google.com/",
2105 },
2106 { // Strip both the reference and the username/password.
2107 "http://user:pass@google.com:80/sup?yo#X#X",
2108 "http://google.com/sup?yo",
2109 },
2110 { // Try an HTTPS URL -- strip both the reference and the username/password.
2111 "https://user:pass@google.com:80/sup?yo#X#X",
2112 "https://google.com:80/sup?yo",
2113 },
2114 { // Try an FTP URL -- strip both the reference and the username/password.
2115 "ftp://user:pass@google.com:80/sup?yo#X#X",
2116 "ftp://google.com:80/sup?yo",
2117 },
2118 { // Try an nonstandard URL
2119 "foobar://user:pass@google.com:80/sup?yo#X#X",
2120 "foobar://user:pass@google.com:80/sup?yo#X#X",
2121 },
2122 };
2123 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2124 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
2125 tests[i].input_url));
2126 GURL input_url(GURL(tests[i].input_url));
2127 GURL expected_url(GURL(tests[i].expected_simplified_url));
2128 EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
2129 }
2130 }
2131
TEST(NetUtilTest,SetExplicitlyAllowedPortsTest)2132 TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
2133 std::string invalid[] = { "1,2,a", "'1','2'", "1, 2, 3", "1 0,11,12" };
2134 std::string valid[] = { "", "1", "1,2", "1,2,3", "10,11,12,13" };
2135
2136 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(invalid); ++i) {
2137 SetExplicitlyAllowedPorts(invalid[i]);
2138 EXPECT_EQ(0, static_cast<int>(explicitly_allowed_ports.size()));
2139 }
2140
2141 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(valid); ++i) {
2142 SetExplicitlyAllowedPorts(valid[i]);
2143 EXPECT_EQ(i, explicitly_allowed_ports.size());
2144 }
2145 }
2146
TEST(NetUtilTest,GetHostOrSpecFromURL)2147 TEST(NetUtilTest, GetHostOrSpecFromURL) {
2148 EXPECT_EQ("example.com",
2149 GetHostOrSpecFromURL(GURL("http://example.com/test")));
2150 EXPECT_EQ("example.com",
2151 GetHostOrSpecFromURL(GURL("http://example.com./test")));
2152 EXPECT_EQ("file:///tmp/test.html",
2153 GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
2154 }
2155
2156 // Test that invalid IP literals fail to parse.
TEST(NetUtilTest,ParseIPLiteralToNumber_FailParse)2157 TEST(NetUtilTest, ParseIPLiteralToNumber_FailParse) {
2158 IPAddressNumber number;
2159
2160 EXPECT_FALSE(ParseIPLiteralToNumber("bad value", &number));
2161 EXPECT_FALSE(ParseIPLiteralToNumber("bad:value", &number));
2162 EXPECT_FALSE(ParseIPLiteralToNumber("", &number));
2163 EXPECT_FALSE(ParseIPLiteralToNumber("192.168.0.1:30", &number));
2164 EXPECT_FALSE(ParseIPLiteralToNumber(" 192.168.0.1 ", &number));
2165 EXPECT_FALSE(ParseIPLiteralToNumber("[::1]", &number));
2166 }
2167
2168 // Test parsing an IPv4 literal.
TEST(NetUtilTest,ParseIPLiteralToNumber_IPv4)2169 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv4) {
2170 IPAddressNumber number;
2171 EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &number));
2172 EXPECT_EQ("192,168,0,1", DumpIPNumber(number));
2173 }
2174
2175 // Test parsing an IPv6 literal.
TEST(NetUtilTest,ParseIPLiteralToNumber_IPv6)2176 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv6) {
2177 IPAddressNumber number;
2178 EXPECT_TRUE(ParseIPLiteralToNumber("1:abcd::3:4:ff", &number));
2179 EXPECT_EQ("0,1,171,205,0,0,0,0,0,0,0,3,0,4,0,255", DumpIPNumber(number));
2180 }
2181
2182 // Test mapping an IPv4 address to an IPv6 address.
TEST(NetUtilTest,ConvertIPv4NumberToIPv6Number)2183 TEST(NetUtilTest, ConvertIPv4NumberToIPv6Number) {
2184 IPAddressNumber ipv4_number;
2185 EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &ipv4_number));
2186
2187 IPAddressNumber ipv6_number =
2188 ConvertIPv4NumberToIPv6Number(ipv4_number);
2189
2190 // ::ffff:192.168.1.1
2191 EXPECT_EQ("0,0,0,0,0,0,0,0,0,0,255,255,192,168,0,1",
2192 DumpIPNumber(ipv6_number));
2193 }
2194
2195 // Test parsing invalid CIDR notation literals.
TEST(NetUtilTest,ParseCIDRBlock_Invalid)2196 TEST(NetUtilTest, ParseCIDRBlock_Invalid) {
2197 const char* bad_literals[] = {
2198 "foobar",
2199 "",
2200 "192.168.0.1",
2201 "::1",
2202 "/",
2203 "/1",
2204 "1",
2205 "192.168.1.1/-1",
2206 "192.168.1.1/33",
2207 "::1/-3",
2208 "a::3/129",
2209 "::1/x",
2210 "192.168.0.1//11"
2211 };
2212
2213 for (size_t i = 0; i < arraysize(bad_literals); ++i) {
2214 IPAddressNumber ip_number;
2215 size_t prefix_length_in_bits;
2216
2217 EXPECT_FALSE(ParseCIDRBlock(bad_literals[i],
2218 &ip_number,
2219 &prefix_length_in_bits));
2220 }
2221 }
2222
2223 // Test parsing a valid CIDR notation literal.
TEST(NetUtilTest,ParseCIDRBlock_Valid)2224 TEST(NetUtilTest, ParseCIDRBlock_Valid) {
2225 IPAddressNumber ip_number;
2226 size_t prefix_length_in_bits;
2227
2228 EXPECT_TRUE(ParseCIDRBlock("192.168.0.1/11",
2229 &ip_number,
2230 &prefix_length_in_bits));
2231
2232 EXPECT_EQ("192,168,0,1", DumpIPNumber(ip_number));
2233 EXPECT_EQ(11u, prefix_length_in_bits);
2234 }
2235
TEST(NetUtilTest,IPNumberMatchesPrefix)2236 TEST(NetUtilTest, IPNumberMatchesPrefix) {
2237 struct {
2238 const char* cidr_literal;
2239 const char* ip_literal;
2240 bool expected_to_match;
2241 } tests[] = {
2242 // IPv4 prefix with IPv4 inputs.
2243 {
2244 "10.10.1.32/27",
2245 "10.10.1.44",
2246 true
2247 },
2248 {
2249 "10.10.1.32/27",
2250 "10.10.1.90",
2251 false
2252 },
2253 {
2254 "10.10.1.32/27",
2255 "10.10.1.90",
2256 false
2257 },
2258
2259 // IPv6 prefix with IPv6 inputs.
2260 {
2261 "2001:db8::/32",
2262 "2001:DB8:3:4::5",
2263 true
2264 },
2265 {
2266 "2001:db8::/32",
2267 "2001:c8::",
2268 false
2269 },
2270
2271 // IPv6 prefix with IPv4 inputs.
2272 {
2273 "2001:db8::/33",
2274 "192.168.0.1",
2275 false
2276 },
2277 {
2278 "::ffff:192.168.0.1/112",
2279 "192.168.33.77",
2280 true
2281 },
2282
2283 // IPv4 prefix with IPv6 inputs.
2284 {
2285 "10.11.33.44/16",
2286 "::ffff:0a0b:89",
2287 true
2288 },
2289 {
2290 "10.11.33.44/16",
2291 "::ffff:10.12.33.44",
2292 false
2293 },
2294 };
2295 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2296 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s, %s", i,
2297 tests[i].cidr_literal,
2298 tests[i].ip_literal));
2299
2300 IPAddressNumber ip_number;
2301 EXPECT_TRUE(ParseIPLiteralToNumber(tests[i].ip_literal, &ip_number));
2302
2303 IPAddressNumber ip_prefix;
2304 size_t prefix_length_in_bits;
2305
2306 EXPECT_TRUE(ParseCIDRBlock(tests[i].cidr_literal,
2307 &ip_prefix,
2308 &prefix_length_in_bits));
2309
2310 EXPECT_EQ(tests[i].expected_to_match,
2311 IPNumberMatchesPrefix(ip_number,
2312 ip_prefix,
2313 prefix_length_in_bits));
2314 }
2315 }
2316
TEST(NetUtilTest,IsLocalhost)2317 TEST(NetUtilTest, IsLocalhost) {
2318 EXPECT_TRUE(net::IsLocalhost("localhost"));
2319 EXPECT_TRUE(net::IsLocalhost("localhost.localdomain"));
2320 EXPECT_TRUE(net::IsLocalhost("localhost6"));
2321 EXPECT_TRUE(net::IsLocalhost("localhost6.localdomain6"));
2322 EXPECT_TRUE(net::IsLocalhost("127.0.0.1"));
2323 EXPECT_TRUE(net::IsLocalhost("127.0.1.0"));
2324 EXPECT_TRUE(net::IsLocalhost("127.1.0.0"));
2325 EXPECT_TRUE(net::IsLocalhost("127.0.0.255"));
2326 EXPECT_TRUE(net::IsLocalhost("127.0.255.0"));
2327 EXPECT_TRUE(net::IsLocalhost("127.255.0.0"));
2328 EXPECT_TRUE(net::IsLocalhost("::1"));
2329 EXPECT_TRUE(net::IsLocalhost("0:0:0:0:0:0:0:1"));
2330
2331 EXPECT_FALSE(net::IsLocalhost("localhostx"));
2332 EXPECT_FALSE(net::IsLocalhost("foo.localdomain"));
2333 EXPECT_FALSE(net::IsLocalhost("localhost6x"));
2334 EXPECT_FALSE(net::IsLocalhost("localhost.localdomain6"));
2335 EXPECT_FALSE(net::IsLocalhost("localhost6.localdomain"));
2336 EXPECT_FALSE(net::IsLocalhost("127.0.0.1.1"));
2337 EXPECT_FALSE(net::IsLocalhost(".127.0.0.255"));
2338 EXPECT_FALSE(net::IsLocalhost("::2"));
2339 EXPECT_FALSE(net::IsLocalhost("::1:1"));
2340 EXPECT_FALSE(net::IsLocalhost("0:0:0:0:1:0:0:1"));
2341 EXPECT_FALSE(net::IsLocalhost("::1:1"));
2342 EXPECT_FALSE(net::IsLocalhost("0:0:0:0:0:0:0:0:1"));
2343 }
2344
2345 // Verify GetNetworkList().
TEST(NetUtilTest,GetNetworkList)2346 TEST(NetUtilTest, GetNetworkList) {
2347 NetworkInterfaceList list;
2348 ASSERT_TRUE(GetNetworkList(&list));
2349
2350 for (NetworkInterfaceList::iterator it = list.begin();
2351 it != list.end(); ++it) {
2352 // Verify that the name is not empty.
2353 EXPECT_FALSE(it->name.empty());
2354
2355 // Verify that the address is correct.
2356 EXPECT_TRUE(it->address.size() == kIPv4AddressSize ||
2357 it->address.size() == kIPv6AddressSize)
2358 << "Invalid address of size " << it->address.size();
2359 bool all_zeroes = true;
2360 for (size_t i = 0; i < it->address.size(); ++i) {
2361 if (it->address[i] != 0) {
2362 all_zeroes = false;
2363 break;
2364 }
2365 }
2366 EXPECT_FALSE(all_zeroes);
2367 }
2368 }
2369
TEST(NetUtilTest,AdjustComponentOffset)2370 TEST(NetUtilTest, AdjustComponentOffset) {
2371 std::vector<size_t> old_offsets;
2372 for (size_t i = 0; i < 10; ++i)
2373 old_offsets.push_back(i);
2374 std::vector<size_t> new_offsets;
2375 std::transform(old_offsets.begin(),
2376 old_offsets.end(),
2377 std::back_inserter(new_offsets),
2378 ClampComponentOffset(5));
2379 size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
2380 EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
2381 EXPECT_EQ(new_offsets.size(), old_offsets.size());
2382 for (size_t i = 0; i < arraysize(expected_1); ++i)
2383 EXPECT_EQ(expected_1[i], new_offsets[i]);
2384 }
2385
2386 } // namespace net
2387