1description("Canonicalization of host names."); 2 3cases = [ 4 // Basic canonicalization, uppercase should be converted to lowercase. 5 ["GoOgLe.CoM", "google.com"], 6 // Spaces and some other characters should be escaped. 7 ["Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com"], 8 // Exciting different types of spaces! 9 ["GOO\u00a0\u3000goo.com", "goo%20%20goo.com"], 10 // Other types of space (no-break, zero-width, zero-width-no-break) are 11 // name-prepped away to nothing. 12 ["GOO\u200b\u2060\ufeffgoo.com", "googoo.com"], 13 // Ideographic full stop (full-width period for Chinese, etc.) should be 14 // treated as a dot. 15 ["www.foo\u3002" + "bar.com", "www.foo.bar.com"], 16 // Invalid unicode characters should fail... 17 // ...In wide input, ICU will barf and we'll end up with the input as 18 // escaped UTF-8 (the invalid character should be replaced with the 19 // replacement character). 20 ["\ufdd0zyx.com", "%EF%BF%BDzyx.com"], 21 // ...This is the same as previous but with with escaped. 22 ["%ef%b7%90zyx.com", "%EF%BF%BDzyx.com"], 23 // Test name prepping, fullwidth input should be converted to ASCII and NOT 24 // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. 25 ["\uff27\uff4f.com", "go.com"], 26 // Test that fullwidth escaped values are properly name-prepped, 27 // then converted or rejected. 28 // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) 29 ["\uff05\uff14\uff11.com", "a.com"], 30 ["%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com"], 31 // ...%00 in fullwidth should fail (also as escaped UTF-8 input) 32 ["\uff05\uff10\uff10.com", "%00.com"], 33 ["%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com"], 34 // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN 35 ["\u4f60\u597d\u4f60\u597d", "xn--6qqa088eba"], 36 // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped 37 // UTF-8 (wide case). The output should be equivalent to the true wide 38 // character input above). 39 ["%E4%BD%A0%E5%A5%BD\u4f60\u597d", "xn--6qqa088eba"], 40 // Invalid escaped characters should fail and the percents should be 41 // escaped. 42 ["%zz%66%a", "%25zzf%25a"], 43 // If we get an invalid character that has been escaped. 44 ["%25", "%25"], 45 ["hello%00", "hello%00"], 46 // Escaped numbers should be treated like IP addresses if they are. 47 ["%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1"], 48 ["%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1"], 49 // Invalid escaping should trigger the regular host error handling. 50 ["%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01"], 51 // Something that isn't exactly an IP should get treated as a host and 52 // spaces escaped. 53 ["192.168.0.1 hello", "192.168.0.1%20hello"], 54 // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. 55 // These are "0Xc0.0250.01" in fullwidth. 56 ["\uff10\uff38\uff43\uff10\uff0e\uff10\uff12\uff15\uff10\uff0e\uff10\uff11", "192.168.0.1"], 57 // Broken IP addresses get marked as such. 58 ["192.168.0.257", "192.168.0.257"], 59 ["[google.com]", "[google.com]"], 60 // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e. 61 // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect). 62 ["\u0442(", "xn--%28-7ed"] 63]; 64 65for (var i = 0; i < cases.length; ++i) { 66 test_vector = cases[i][0]; 67 expected_result = cases[i][1]; 68 shouldBe("canonicalize('http://" + test_vector + "/')", 69 "'http://" + expected_result + "/'"); 70} 71 72var successfullyParsed = true; 73