1from test import test_support 2import unittest 3import urlparse 4 5RFC1808_BASE = "http://a/b/c/d;p?q#f" 6RFC2396_BASE = "http://a/b/c/d;p?q" 7RFC3986_BASE = 'http://a/b/c/d;p?q' 8SIMPLE_BASE = 'http://a/b/c/d' 9 10# A list of test cases. Each test case is a two-tuple that contains 11# a string with the query and a dictionary with the expected result. 12 13parse_qsl_test_cases = [ 14 ("", []), 15 ("&", []), 16 ("&&", []), 17 ("=", [('', '')]), 18 ("=a", [('', 'a')]), 19 ("a", [('a', '')]), 20 ("a=", [('a', '')]), 21 ("a=", [('a', '')]), 22 ("&a=b", [('a', 'b')]), 23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 24 ("a=1&a=2", [('a', '1'), ('a', '2')]), 25 (";", []), 26 (";;", []), 27 (";a=b", [('a', 'b')]), 28 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), 29 ("a=1;a=2", [('a', '1'), ('a', '2')]), 30 (b";", []), 31 (b";;", []), 32 (b";a=b", [(b'a', b'b')]), 33 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 34 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), 35] 36 37parse_qs_test_cases = [ 38 ("", {}), 39 ("&", {}), 40 ("&&", {}), 41 ("=", {'': ['']}), 42 ("=a", {'': ['a']}), 43 ("a", {'a': ['']}), 44 ("a=", {'a': ['']}), 45 ("&a=b", {'a': ['b']}), 46 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 47 ("a=1&a=2", {'a': ['1', '2']}), 48 (b"", {}), 49 (b"&", {}), 50 (b"&&", {}), 51 (b"=", {b'': [b'']}), 52 (b"=a", {b'': [b'a']}), 53 (b"a", {b'a': [b'']}), 54 (b"a=", {b'a': [b'']}), 55 (b"&a=b", {b'a': [b'b']}), 56 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 57 (b"a=1&a=2", {b'a': [b'1', b'2']}), 58 (";", {}), 59 (";;", {}), 60 (";a=b", {'a': ['b']}), 61 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), 62 ("a=1;a=2", {'a': ['1', '2']}), 63 (b";", {}), 64 (b";;", {}), 65 (b";a=b", {b'a': [b'b']}), 66 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 67 (b"a=1;a=2", {b'a': [b'1', b'2']}), 68] 69 70class UrlParseTestCase(unittest.TestCase): 71 72 def checkRoundtrips(self, url, parsed, split): 73 result = urlparse.urlparse(url) 74 self.assertEqual(result, parsed) 75 t = (result.scheme, result.netloc, result.path, 76 result.params, result.query, result.fragment) 77 self.assertEqual(t, parsed) 78 # put it back together and it should be the same 79 result2 = urlparse.urlunparse(result) 80 self.assertEqual(result2, url) 81 self.assertEqual(result2, result.geturl()) 82 83 # the result of geturl() is a fixpoint; we can always parse it 84 # again to get the same result: 85 result3 = urlparse.urlparse(result.geturl()) 86 self.assertEqual(result3.geturl(), result.geturl()) 87 self.assertEqual(result3, result) 88 self.assertEqual(result3.scheme, result.scheme) 89 self.assertEqual(result3.netloc, result.netloc) 90 self.assertEqual(result3.path, result.path) 91 self.assertEqual(result3.params, result.params) 92 self.assertEqual(result3.query, result.query) 93 self.assertEqual(result3.fragment, result.fragment) 94 self.assertEqual(result3.username, result.username) 95 self.assertEqual(result3.password, result.password) 96 self.assertEqual(result3.hostname, result.hostname) 97 self.assertEqual(result3.port, result.port) 98 99 # check the roundtrip using urlsplit() as well 100 result = urlparse.urlsplit(url) 101 self.assertEqual(result, split) 102 t = (result.scheme, result.netloc, result.path, 103 result.query, result.fragment) 104 self.assertEqual(t, split) 105 result2 = urlparse.urlunsplit(result) 106 self.assertEqual(result2, url) 107 self.assertEqual(result2, result.geturl()) 108 109 # check the fixpoint property of re-parsing the result of geturl() 110 result3 = urlparse.urlsplit(result.geturl()) 111 self.assertEqual(result3.geturl(), result.geturl()) 112 self.assertEqual(result3, result) 113 self.assertEqual(result3.scheme, result.scheme) 114 self.assertEqual(result3.netloc, result.netloc) 115 self.assertEqual(result3.path, result.path) 116 self.assertEqual(result3.query, result.query) 117 self.assertEqual(result3.fragment, result.fragment) 118 self.assertEqual(result3.username, result.username) 119 self.assertEqual(result3.password, result.password) 120 self.assertEqual(result3.hostname, result.hostname) 121 self.assertEqual(result3.port, result.port) 122 123 def test_qsl(self): 124 for orig, expect in parse_qsl_test_cases: 125 result = urlparse.parse_qsl(orig, keep_blank_values=True) 126 self.assertEqual(result, expect, "Error parsing %r" % orig) 127 expect_without_blanks = [v for v in expect if len(v[1])] 128 result = urlparse.parse_qsl(orig, keep_blank_values=False) 129 self.assertEqual(result, expect_without_blanks, 130 "Error parsing %r" % orig) 131 132 def test_qs(self): 133 for orig, expect in parse_qs_test_cases: 134 result = urlparse.parse_qs(orig, keep_blank_values=True) 135 self.assertEqual(result, expect, "Error parsing %r" % orig) 136 expect_without_blanks = dict( 137 [(v, expect[v]) for v in expect if len(expect[v][0])]) 138 result = urlparse.parse_qs(orig, keep_blank_values=False) 139 self.assertEqual(result, expect_without_blanks, 140 "Error parsing %r" % orig) 141 142 def test_roundtrips(self): 143 testcases = [ 144 ('file:///tmp/junk.txt', 145 ('file', '', '/tmp/junk.txt', '', '', ''), 146 ('file', '', '/tmp/junk.txt', '', '')), 147 ('imap://mail.python.org/mbox1', 148 ('imap', 'mail.python.org', '/mbox1', '', '', ''), 149 ('imap', 'mail.python.org', '/mbox1', '', '')), 150 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', 151 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 152 '', '', ''), 153 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 154 '', '')), 155 ('nfs://server/path/to/file.txt', 156 ('nfs', 'server', '/path/to/file.txt', '', '', ''), 157 ('nfs', 'server', '/path/to/file.txt', '', '')), 158 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', 159 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 160 '', '', ''), 161 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 162 '', '')), 163 ('git+ssh://git@github.com/user/project.git', 164 ('git+ssh', 'git@github.com','/user/project.git', 165 '','',''), 166 ('git+ssh', 'git@github.com','/user/project.git', 167 '', '')) 168 ] 169 for url, parsed, split in testcases: 170 self.checkRoundtrips(url, parsed, split) 171 172 def test_http_roundtrips(self): 173 # urlparse.urlsplit treats 'http:' as an optimized special case, 174 # so we test both 'http:' and 'https:' in all the following. 175 # Three cheers for white box knowledge! 176 testcases = [ 177 ('://www.python.org', 178 ('www.python.org', '', '', '', ''), 179 ('www.python.org', '', '', '')), 180 ('://www.python.org#abc', 181 ('www.python.org', '', '', '', 'abc'), 182 ('www.python.org', '', '', 'abc')), 183 ('://www.python.org?q=abc', 184 ('www.python.org', '', '', 'q=abc', ''), 185 ('www.python.org', '', 'q=abc', '')), 186 ('://www.python.org/#abc', 187 ('www.python.org', '/', '', '', 'abc'), 188 ('www.python.org', '/', '', 'abc')), 189 ('://a/b/c/d;p?q#f', 190 ('a', '/b/c/d', 'p', 'q', 'f'), 191 ('a', '/b/c/d;p', 'q', 'f')), 192 ] 193 for scheme in ('http', 'https'): 194 for url, parsed, split in testcases: 195 url = scheme + url 196 parsed = (scheme,) + parsed 197 split = (scheme,) + split 198 self.checkRoundtrips(url, parsed, split) 199 200 def checkJoin(self, base, relurl, expected): 201 self.assertEqual(urlparse.urljoin(base, relurl), expected, 202 (base, relurl, expected)) 203 204 def test_unparse_parse(self): 205 for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: 206 self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) 207 self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u) 208 209 def test_RFC1808(self): 210 # "normal" cases from RFC 1808: 211 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') 212 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') 213 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') 214 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') 215 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') 216 self.checkJoin(RFC1808_BASE, '//g', 'http://g') 217 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') 218 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 219 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') 220 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') 221 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 222 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 223 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') 224 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 225 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') 226 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') 227 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') 228 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') 229 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') 230 self.checkJoin(RFC1808_BASE, '../..', 'http://a/') 231 self.checkJoin(RFC1808_BASE, '../../', 'http://a/') 232 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') 233 234 # "abnormal" cases from RFC 1808: 235 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') 236 self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') 237 self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') 238 self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') 239 self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') 240 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') 241 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') 242 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') 243 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') 244 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') 245 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') 246 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') 247 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') 248 249 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), 250 # so we'll not actually run these tests (which expect 1808 behavior). 251 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') 252 #self.checkJoin(RFC1808_BASE, 'http:', 'http:') 253 254 def test_RFC2368(self): 255 # Issue 11467: path that starts with a number is not parsed correctly 256 self.assertEqual(urlparse.urlparse('mailto:1337@example.org'), 257 ('mailto', '', '1337@example.org', '', '', '')) 258 259 def test_RFC2396(self): 260 # cases from RFC 2396 261 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') 262 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') 263 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') 264 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') 265 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') 266 self.checkJoin(RFC2396_BASE, '//g', 'http://g') 267 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') 268 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') 269 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') 270 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 271 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') 272 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 273 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') 274 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') 275 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') 276 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') 277 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') 278 self.checkJoin(RFC2396_BASE, '../..', 'http://a/') 279 self.checkJoin(RFC2396_BASE, '../../', 'http://a/') 280 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') 281 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) 282 self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') 283 self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') 284 self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') 285 self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') 286 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') 287 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') 288 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') 289 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') 290 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') 291 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') 292 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') 293 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') 294 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 295 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') 296 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 297 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 298 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 299 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 300 301 def test_RFC3986(self): 302 # Test cases from RFC3986 303 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 304 self.checkJoin(RFC2396_BASE, ';x', 'http://a/b/c/;x') 305 self.checkJoin(RFC3986_BASE, 'g:h','g:h') 306 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') 307 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') 308 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') 309 self.checkJoin(RFC3986_BASE, '/g','http://a/g') 310 self.checkJoin(RFC3986_BASE, '//g','http://g') 311 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 312 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') 313 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') 314 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') 315 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') 316 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') 317 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') 318 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') 319 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') 320 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') 321 self.checkJoin(RFC3986_BASE, './','http://a/b/c/') 322 self.checkJoin(RFC3986_BASE, '..','http://a/b/') 323 self.checkJoin(RFC3986_BASE, '../','http://a/b/') 324 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') 325 self.checkJoin(RFC3986_BASE, '../..','http://a/') 326 self.checkJoin(RFC3986_BASE, '../../','http://a/') 327 self.checkJoin(RFC3986_BASE, '../../g','http://a/g') 328 329 #Abnormal Examples 330 331 # The 'abnormal scenarios' are incompatible with RFC2986 parsing 332 # Tests are here for reference. 333 334 #self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') 335 #self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') 336 #self.checkJoin(RFC3986_BASE, '/./g','http://a/g') 337 #self.checkJoin(RFC3986_BASE, '/../g','http://a/g') 338 339 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') 340 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') 341 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') 342 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') 343 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') 344 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') 345 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') 346 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') 347 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') 348 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') 349 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 350 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') 351 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') 352 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') 353 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser 354 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') # relaxed parser 355 356 # Test for issue9721 357 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') 358 359 def test_urljoins(self): 360 self.checkJoin(SIMPLE_BASE, 'g:h','g:h') 361 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 362 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 363 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') 364 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') 365 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') 366 self.checkJoin(SIMPLE_BASE, '/g','http://a/g') 367 self.checkJoin(SIMPLE_BASE, '//g','http://g') 368 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') 369 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') 370 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 371 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') 372 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') 373 self.checkJoin(SIMPLE_BASE, '..','http://a/b/') 374 self.checkJoin(SIMPLE_BASE, '../','http://a/b/') 375 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') 376 self.checkJoin(SIMPLE_BASE, '../..','http://a/') 377 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') 378 self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') 379 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') 380 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') 381 self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') 382 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') 383 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') 384 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 385 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 386 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') 387 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') 388 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') 389 self.checkJoin('http:///', '..','http:///') 390 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') 391 self.checkJoin('', 'http://a/./g', 'http://a/./g') 392 self.checkJoin('svn://pathtorepo/dir1','dir2','svn://pathtorepo/dir2') 393 self.checkJoin('svn+ssh://pathtorepo/dir1','dir2','svn+ssh://pathtorepo/dir2') 394 395 def test_RFC2732(self): 396 for url, hostname, port in [ 397 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), 398 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), 399 ('http://[::1]:5432/foo/', '::1', 5432), 400 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), 401 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), 402 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 403 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), 404 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), 405 ('http://[::ffff:12.34.56.78]:5432/foo/', 406 '::ffff:12.34.56.78', 5432), 407 ('http://Test.python.org/foo/', 'test.python.org', None), 408 ('http://12.34.56.78/foo/', '12.34.56.78', None), 409 ('http://[::1]/foo/', '::1', None), 410 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), 411 ('http://[dead:beef::]/foo/', 'dead:beef::', None), 412 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 413 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 414 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), 415 ('http://[::ffff:12.34.56.78]/foo/', 416 '::ffff:12.34.56.78', None), 417 ('http://Test.python.org:/foo/', 'test.python.org', None), 418 ('http://12.34.56.78:/foo/', '12.34.56.78', None), 419 ('http://[::1]:/foo/', '::1', None), 420 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), 421 ('http://[dead:beef::]:/foo/', 'dead:beef::', None), 422 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 423 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 424 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), 425 ('http://[::ffff:12.34.56.78]:/foo/', 426 '::ffff:12.34.56.78', None), 427 ]: 428 urlparsed = urlparse.urlparse(url) 429 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) 430 431 for invalid_url in [ 432 'http://::12.34.56.78]/', 433 'http://[::1/foo/', 434 'ftp://[::1/foo/bad]/bad', 435 'http://[::1/foo/bad]/bad', 436 'http://[::ffff:12.34.56.78']: 437 self.assertRaises(ValueError, urlparse.urlparse, invalid_url) 438 439 def test_urldefrag(self): 440 for url, defrag, frag in [ 441 ('http://python.org#frag', 'http://python.org', 'frag'), 442 ('http://python.org', 'http://python.org', ''), 443 ('http://python.org/#frag', 'http://python.org/', 'frag'), 444 ('http://python.org/', 'http://python.org/', ''), 445 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), 446 ('http://python.org/?q', 'http://python.org/?q', ''), 447 ('http://python.org/p#frag', 'http://python.org/p', 'frag'), 448 ('http://python.org/p?q', 'http://python.org/p?q', ''), 449 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), 450 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), 451 ]: 452 self.assertEqual(urlparse.urldefrag(url), (defrag, frag)) 453 454 def test_urlsplit_attributes(self): 455 url = "HTTP://WWW.PYTHON.ORG/doc/#frag" 456 p = urlparse.urlsplit(url) 457 self.assertEqual(p.scheme, "http") 458 self.assertEqual(p.netloc, "WWW.PYTHON.ORG") 459 self.assertEqual(p.path, "/doc/") 460 self.assertEqual(p.query, "") 461 self.assertEqual(p.fragment, "frag") 462 self.assertEqual(p.username, None) 463 self.assertEqual(p.password, None) 464 self.assertEqual(p.hostname, "www.python.org") 465 self.assertEqual(p.port, None) 466 # geturl() won't return exactly the original URL in this case 467 # since the scheme is always case-normalized 468 #self.assertEqual(p.geturl(), url) 469 470 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" 471 p = urlparse.urlsplit(url) 472 self.assertEqual(p.scheme, "http") 473 self.assertEqual(p.netloc, "User:Pass@www.python.org:080") 474 self.assertEqual(p.path, "/doc/") 475 self.assertEqual(p.query, "query=yes") 476 self.assertEqual(p.fragment, "frag") 477 self.assertEqual(p.username, "User") 478 self.assertEqual(p.password, "Pass") 479 self.assertEqual(p.hostname, "www.python.org") 480 self.assertEqual(p.port, 80) 481 self.assertEqual(p.geturl(), url) 482 483 # Addressing issue1698, which suggests Username can contain 484 # "@" characters. Though not RFC compliant, many ftp sites allow 485 # and request email addresses as usernames. 486 487 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 488 p = urlparse.urlsplit(url) 489 self.assertEqual(p.scheme, "http") 490 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") 491 self.assertEqual(p.path, "/doc/") 492 self.assertEqual(p.query, "query=yes") 493 self.assertEqual(p.fragment, "frag") 494 self.assertEqual(p.username, "User@example.com") 495 self.assertEqual(p.password, "Pass") 496 self.assertEqual(p.hostname, "www.python.org") 497 self.assertEqual(p.port, 80) 498 self.assertEqual(p.geturl(), url) 499 500 # Verify an illegal port of value greater than 65535 is set as None 501 url = "http://www.python.org:65536" 502 p = urlparse.urlsplit(url) 503 self.assertEqual(p.port, None) 504 505 def test_issue14072(self): 506 p1 = urlparse.urlsplit('tel:+31-641044153') 507 self.assertEqual(p1.scheme, 'tel') 508 self.assertEqual(p1.path, '+31-641044153') 509 510 p2 = urlparse.urlsplit('tel:+31641044153') 511 self.assertEqual(p2.scheme, 'tel') 512 self.assertEqual(p2.path, '+31641044153') 513 514 # Assert for urlparse 515 p1 = urlparse.urlparse('tel:+31-641044153') 516 self.assertEqual(p1.scheme, 'tel') 517 self.assertEqual(p1.path, '+31-641044153') 518 519 p2 = urlparse.urlparse('tel:+31641044153') 520 self.assertEqual(p2.scheme, 'tel') 521 self.assertEqual(p2.path, '+31641044153') 522 523 524 def test_telurl_params(self): 525 p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516') 526 self.assertEqual(p1.scheme, 'tel') 527 self.assertEqual(p1.path, '123-4') 528 self.assertEqual(p1.params, 'phone-context=+1-650-516') 529 530 p1 = urlparse.urlparse('tel:+1-201-555-0123') 531 self.assertEqual(p1.scheme, 'tel') 532 self.assertEqual(p1.path, '+1-201-555-0123') 533 self.assertEqual(p1.params, '') 534 535 p1 = urlparse.urlparse('tel:7042;phone-context=example.com') 536 self.assertEqual(p1.scheme, 'tel') 537 self.assertEqual(p1.path, '7042') 538 self.assertEqual(p1.params, 'phone-context=example.com') 539 540 p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555') 541 self.assertEqual(p1.scheme, 'tel') 542 self.assertEqual(p1.path, '863-1234') 543 self.assertEqual(p1.params, 'phone-context=+1-914-555') 544 545 546 def test_attributes_bad_port(self): 547 """Check handling of non-integer ports.""" 548 p = urlparse.urlsplit("http://www.example.net:foo") 549 self.assertEqual(p.netloc, "www.example.net:foo") 550 self.assertRaises(ValueError, lambda: p.port) 551 552 p = urlparse.urlparse("http://www.example.net:foo") 553 self.assertEqual(p.netloc, "www.example.net:foo") 554 self.assertRaises(ValueError, lambda: p.port) 555 556 def test_attributes_without_netloc(self): 557 # This example is straight from RFC 3261. It looks like it 558 # should allow the username, hostname, and port to be filled 559 # in, but doesn't. Since it's a URI and doesn't use the 560 # scheme://netloc syntax, the netloc and related attributes 561 # should be left empty. 562 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 563 p = urlparse.urlsplit(uri) 564 self.assertEqual(p.netloc, "") 565 self.assertEqual(p.username, None) 566 self.assertEqual(p.password, None) 567 self.assertEqual(p.hostname, None) 568 self.assertEqual(p.port, None) 569 self.assertEqual(p.geturl(), uri) 570 571 p = urlparse.urlparse(uri) 572 self.assertEqual(p.netloc, "") 573 self.assertEqual(p.username, None) 574 self.assertEqual(p.password, None) 575 self.assertEqual(p.hostname, None) 576 self.assertEqual(p.port, None) 577 self.assertEqual(p.geturl(), uri) 578 579 def test_caching(self): 580 # Test case for bug #1313119 581 uri = "http://example.com/doc/" 582 unicode_uri = unicode(uri) 583 584 urlparse.urlparse(unicode_uri) 585 p = urlparse.urlparse(uri) 586 self.assertEqual(type(p.scheme), type(uri)) 587 self.assertEqual(type(p.hostname), type(uri)) 588 self.assertEqual(type(p.path), type(uri)) 589 590 def test_noslash(self): 591 # Issue 1637: http://foo.com?query is legal 592 self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"), 593 ('http', 'example.com', '', '', 'blahblah=/foo', '')) 594 595 def test_anyscheme(self): 596 # Issue 7904: s3://foo.com/stuff has netloc "foo.com". 597 self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"), 598 ('s3','foo.com','/stuff','','','')) 599 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), 600 ('x-newscheme','foo.com','/stuff','','','')) 601 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), 602 ('x-newscheme','foo.com','/stuff','','query','fragment')) 603 self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"), 604 ('x-newscheme','foo.com','/stuff','','query','')) 605 606 def test_withoutscheme(self): 607 # Test urlparse without scheme 608 # Issue 754016: urlparse goes wrong with IP:port without scheme 609 # RFC 1808 specifies that netloc should start with //, urlparse expects 610 # the same, otherwise it classifies the portion of url as path. 611 self.assertEqual(urlparse.urlparse("path"), 612 ('','','path','','','')) 613 self.assertEqual(urlparse.urlparse("//www.python.org:80"), 614 ('','www.python.org:80','','','','')) 615 self.assertEqual(urlparse.urlparse("http://www.python.org:80"), 616 ('http','www.python.org:80','','','','')) 617 618 def test_portseparator(self): 619 # Issue 754016 makes changes for port separator ':' from scheme separator 620 self.assertEqual(urlparse.urlparse("path:80"), 621 ('','','path:80','','','')) 622 self.assertEqual(urlparse.urlparse("http:"),('http','','','','','')) 623 self.assertEqual(urlparse.urlparse("https:"),('https','','','','','')) 624 self.assertEqual(urlparse.urlparse("http://www.python.org:80"), 625 ('http','www.python.org:80','','','','')) 626 627def test_main(): 628 test_support.run_unittest(UrlParseTestCase) 629 630if __name__ == "__main__": 631 test_main() 632