1import sys 2import unicodedata 3import unittest 4import urllib.parse 5 6RFC1808_BASE = "http://a/b/c/d;p?q#f" 7RFC2396_BASE = "http://a/b/c/d;p?q" 8RFC3986_BASE = 'http://a/b/c/d;p?q' 9SIMPLE_BASE = 'http://a/b/c/d' 10 11# Each parse_qsl testcase is a two-tuple that contains 12# a string with the query and a list with the expected result. 13 14parse_qsl_test_cases = [ 15 ("", []), 16 ("&", []), 17 ("&&", []), 18 ("=", [('', '')]), 19 ("=a", [('', 'a')]), 20 ("a", [('a', '')]), 21 ("a=", [('a', '')]), 22 ("&a=b", [('a', 'b')]), 23 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 24 ("a=1&a=2", [('a', '1'), ('a', '2')]), 25 (b"", []), 26 (b"&", []), 27 (b"&&", []), 28 (b"=", [(b'', b'')]), 29 (b"=a", [(b'', b'a')]), 30 (b"a", [(b'a', b'')]), 31 (b"a=", [(b'a', b'')]), 32 (b"&a=b", [(b'a', b'b')]), 33 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 34 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), 35 (";", []), 36 (";;", []), 37 (";a=b", [('a', 'b')]), 38 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), 39 ("a=1;a=2", [('a', '1'), ('a', '2')]), 40 (b";", []), 41 (b";;", []), 42 (b";a=b", [(b'a', b'b')]), 43 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 44 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), 45] 46 47# Each parse_qs testcase is a two-tuple that contains 48# a string with the query and a dictionary with the expected result. 49 50parse_qs_test_cases = [ 51 ("", {}), 52 ("&", {}), 53 ("&&", {}), 54 ("=", {'': ['']}), 55 ("=a", {'': ['a']}), 56 ("a", {'a': ['']}), 57 ("a=", {'a': ['']}), 58 ("&a=b", {'a': ['b']}), 59 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 60 ("a=1&a=2", {'a': ['1', '2']}), 61 (b"", {}), 62 (b"&", {}), 63 (b"&&", {}), 64 (b"=", {b'': [b'']}), 65 (b"=a", {b'': [b'a']}), 66 (b"a", {b'a': [b'']}), 67 (b"a=", {b'a': [b'']}), 68 (b"&a=b", {b'a': [b'b']}), 69 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 70 (b"a=1&a=2", {b'a': [b'1', b'2']}), 71 (";", {}), 72 (";;", {}), 73 (";a=b", {'a': ['b']}), 74 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), 75 ("a=1;a=2", {'a': ['1', '2']}), 76 (b";", {}), 77 (b";;", {}), 78 (b";a=b", {b'a': [b'b']}), 79 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 80 (b"a=1;a=2", {b'a': [b'1', b'2']}), 81] 82 83class UrlParseTestCase(unittest.TestCase): 84 85 def checkRoundtrips(self, url, parsed, split): 86 result = urllib.parse.urlparse(url) 87 self.assertEqual(result, parsed) 88 t = (result.scheme, result.netloc, result.path, 89 result.params, result.query, result.fragment) 90 self.assertEqual(t, parsed) 91 # put it back together and it should be the same 92 result2 = urllib.parse.urlunparse(result) 93 self.assertEqual(result2, url) 94 self.assertEqual(result2, result.geturl()) 95 96 # the result of geturl() is a fixpoint; we can always parse it 97 # again to get the same result: 98 result3 = urllib.parse.urlparse(result.geturl()) 99 self.assertEqual(result3.geturl(), result.geturl()) 100 self.assertEqual(result3, result) 101 self.assertEqual(result3.scheme, result.scheme) 102 self.assertEqual(result3.netloc, result.netloc) 103 self.assertEqual(result3.path, result.path) 104 self.assertEqual(result3.params, result.params) 105 self.assertEqual(result3.query, result.query) 106 self.assertEqual(result3.fragment, result.fragment) 107 self.assertEqual(result3.username, result.username) 108 self.assertEqual(result3.password, result.password) 109 self.assertEqual(result3.hostname, result.hostname) 110 self.assertEqual(result3.port, result.port) 111 112 # check the roundtrip using urlsplit() as well 113 result = urllib.parse.urlsplit(url) 114 self.assertEqual(result, split) 115 t = (result.scheme, result.netloc, result.path, 116 result.query, result.fragment) 117 self.assertEqual(t, split) 118 result2 = urllib.parse.urlunsplit(result) 119 self.assertEqual(result2, url) 120 self.assertEqual(result2, result.geturl()) 121 122 # check the fixpoint property of re-parsing the result of geturl() 123 result3 = urllib.parse.urlsplit(result.geturl()) 124 self.assertEqual(result3.geturl(), result.geturl()) 125 self.assertEqual(result3, result) 126 self.assertEqual(result3.scheme, result.scheme) 127 self.assertEqual(result3.netloc, result.netloc) 128 self.assertEqual(result3.path, result.path) 129 self.assertEqual(result3.query, result.query) 130 self.assertEqual(result3.fragment, result.fragment) 131 self.assertEqual(result3.username, result.username) 132 self.assertEqual(result3.password, result.password) 133 self.assertEqual(result3.hostname, result.hostname) 134 self.assertEqual(result3.port, result.port) 135 136 def test_qsl(self): 137 for orig, expect in parse_qsl_test_cases: 138 result = urllib.parse.parse_qsl(orig, keep_blank_values=True) 139 self.assertEqual(result, expect, "Error parsing %r" % orig) 140 expect_without_blanks = [v for v in expect if len(v[1])] 141 result = urllib.parse.parse_qsl(orig, keep_blank_values=False) 142 self.assertEqual(result, expect_without_blanks, 143 "Error parsing %r" % orig) 144 145 def test_qs(self): 146 for orig, expect in parse_qs_test_cases: 147 result = urllib.parse.parse_qs(orig, keep_blank_values=True) 148 self.assertEqual(result, expect, "Error parsing %r" % orig) 149 expect_without_blanks = {v: expect[v] 150 for v in expect if len(expect[v][0])} 151 result = urllib.parse.parse_qs(orig, keep_blank_values=False) 152 self.assertEqual(result, expect_without_blanks, 153 "Error parsing %r" % orig) 154 155 def test_roundtrips(self): 156 str_cases = [ 157 ('file:///tmp/junk.txt', 158 ('file', '', '/tmp/junk.txt', '', '', ''), 159 ('file', '', '/tmp/junk.txt', '', '')), 160 ('imap://mail.python.org/mbox1', 161 ('imap', 'mail.python.org', '/mbox1', '', '', ''), 162 ('imap', 'mail.python.org', '/mbox1', '', '')), 163 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', 164 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 165 '', '', ''), 166 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 167 '', '')), 168 ('nfs://server/path/to/file.txt', 169 ('nfs', 'server', '/path/to/file.txt', '', '', ''), 170 ('nfs', 'server', '/path/to/file.txt', '', '')), 171 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', 172 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 173 '', '', ''), 174 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 175 '', '')), 176 ('git+ssh://git@github.com/user/project.git', 177 ('git+ssh', 'git@github.com','/user/project.git', 178 '','',''), 179 ('git+ssh', 'git@github.com','/user/project.git', 180 '', '')), 181 ] 182 def _encode(t): 183 return (t[0].encode('ascii'), 184 tuple(x.encode('ascii') for x in t[1]), 185 tuple(x.encode('ascii') for x in t[2])) 186 bytes_cases = [_encode(x) for x in str_cases] 187 for url, parsed, split in str_cases + bytes_cases: 188 self.checkRoundtrips(url, parsed, split) 189 190 def test_http_roundtrips(self): 191 # urllib.parse.urlsplit treats 'http:' as an optimized special case, 192 # so we test both 'http:' and 'https:' in all the following. 193 # Three cheers for white box knowledge! 194 str_cases = [ 195 ('://www.python.org', 196 ('www.python.org', '', '', '', ''), 197 ('www.python.org', '', '', '')), 198 ('://www.python.org#abc', 199 ('www.python.org', '', '', '', 'abc'), 200 ('www.python.org', '', '', 'abc')), 201 ('://www.python.org?q=abc', 202 ('www.python.org', '', '', 'q=abc', ''), 203 ('www.python.org', '', 'q=abc', '')), 204 ('://www.python.org/#abc', 205 ('www.python.org', '/', '', '', 'abc'), 206 ('www.python.org', '/', '', 'abc')), 207 ('://a/b/c/d;p?q#f', 208 ('a', '/b/c/d', 'p', 'q', 'f'), 209 ('a', '/b/c/d;p', 'q', 'f')), 210 ] 211 def _encode(t): 212 return (t[0].encode('ascii'), 213 tuple(x.encode('ascii') for x in t[1]), 214 tuple(x.encode('ascii') for x in t[2])) 215 bytes_cases = [_encode(x) for x in str_cases] 216 str_schemes = ('http', 'https') 217 bytes_schemes = (b'http', b'https') 218 str_tests = str_schemes, str_cases 219 bytes_tests = bytes_schemes, bytes_cases 220 for schemes, test_cases in (str_tests, bytes_tests): 221 for scheme in schemes: 222 for url, parsed, split in test_cases: 223 url = scheme + url 224 parsed = (scheme,) + parsed 225 split = (scheme,) + split 226 self.checkRoundtrips(url, parsed, split) 227 228 def checkJoin(self, base, relurl, expected): 229 str_components = (base, relurl, expected) 230 self.assertEqual(urllib.parse.urljoin(base, relurl), expected) 231 bytes_components = baseb, relurlb, expectedb = [ 232 x.encode('ascii') for x in str_components] 233 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) 234 235 def test_unparse_parse(self): 236 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] 237 bytes_cases = [x.encode('ascii') for x in str_cases] 238 for u in str_cases + bytes_cases: 239 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) 240 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) 241 242 def test_RFC1808(self): 243 # "normal" cases from RFC 1808: 244 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') 245 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') 246 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') 247 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') 248 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') 249 self.checkJoin(RFC1808_BASE, '//g', 'http://g') 250 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') 251 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 252 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') 253 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') 254 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 255 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 256 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') 257 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 258 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') 259 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') 260 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') 261 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') 262 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') 263 self.checkJoin(RFC1808_BASE, '../..', 'http://a/') 264 self.checkJoin(RFC1808_BASE, '../../', 'http://a/') 265 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') 266 267 # "abnormal" cases from RFC 1808: 268 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') 269 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') 270 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') 271 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') 272 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') 273 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') 274 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') 275 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') 276 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') 277 278 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), 279 # so we'll not actually run these tests (which expect 1808 behavior). 280 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') 281 #self.checkJoin(RFC1808_BASE, 'http:', 'http:') 282 283 # XXX: The following tests are no longer compatible with RFC3986 284 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') 285 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') 286 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') 287 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') 288 289 290 def test_RFC2368(self): 291 # Issue 11467: path that starts with a number is not parsed correctly 292 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), 293 ('mailto', '', '1337@example.org', '', '', '')) 294 295 def test_RFC2396(self): 296 # cases from RFC 2396 297 298 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') 299 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') 300 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') 301 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') 302 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') 303 self.checkJoin(RFC2396_BASE, '//g', 'http://g') 304 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') 305 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') 306 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') 307 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 308 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') 309 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 310 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') 311 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') 312 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') 313 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') 314 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') 315 self.checkJoin(RFC2396_BASE, '../..', 'http://a/') 316 self.checkJoin(RFC2396_BASE, '../../', 'http://a/') 317 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') 318 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) 319 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') 320 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') 321 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') 322 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') 323 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') 324 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') 325 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') 326 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') 327 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 328 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') 329 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 330 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 331 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 332 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 333 334 # XXX: The following tests are no longer compatible with RFC3986 335 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') 336 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') 337 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') 338 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') 339 340 def test_RFC3986(self): 341 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 342 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x') 343 self.checkJoin(RFC3986_BASE, 'g:h','g:h') 344 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') 345 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') 346 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') 347 self.checkJoin(RFC3986_BASE, '/g','http://a/g') 348 self.checkJoin(RFC3986_BASE, '//g','http://g') 349 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 350 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') 351 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') 352 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') 353 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') 354 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') 355 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') 356 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') 357 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') 358 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') 359 self.checkJoin(RFC3986_BASE, './','http://a/b/c/') 360 self.checkJoin(RFC3986_BASE, '..','http://a/b/') 361 self.checkJoin(RFC3986_BASE, '../','http://a/b/') 362 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') 363 self.checkJoin(RFC3986_BASE, '../..','http://a/') 364 self.checkJoin(RFC3986_BASE, '../../','http://a/') 365 self.checkJoin(RFC3986_BASE, '../../g','http://a/g') 366 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g') 367 368 # Abnormal Examples 369 370 # The 'abnormal scenarios' are incompatible with RFC2986 parsing 371 # Tests are here for reference. 372 373 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') 374 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') 375 self.checkJoin(RFC3986_BASE, '/./g','http://a/g') 376 self.checkJoin(RFC3986_BASE, '/../g','http://a/g') 377 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') 378 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') 379 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') 380 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') 381 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') 382 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') 383 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') 384 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') 385 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') 386 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') 387 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 388 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') 389 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') 390 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') 391 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser 392 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser 393 394 # Test for issue9721 395 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') 396 397 def test_urljoins(self): 398 self.checkJoin(SIMPLE_BASE, 'g:h','g:h') 399 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 400 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 401 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') 402 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') 403 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') 404 self.checkJoin(SIMPLE_BASE, '/g','http://a/g') 405 self.checkJoin(SIMPLE_BASE, '//g','http://g') 406 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') 407 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') 408 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 409 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') 410 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') 411 self.checkJoin(SIMPLE_BASE, '..','http://a/b/') 412 self.checkJoin(SIMPLE_BASE, '../','http://a/b/') 413 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') 414 self.checkJoin(SIMPLE_BASE, '../..','http://a/') 415 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') 416 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') 417 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') 418 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') 419 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') 420 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 421 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 422 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') 423 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') 424 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') 425 self.checkJoin('http:///', '..','http:///') 426 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') 427 self.checkJoin('', 'http://a/./g', 'http://a/./g') 428 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') 429 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') 430 self.checkJoin('ws://a/b','g','ws://a/g') 431 self.checkJoin('wss://a/b','g','wss://a/g') 432 433 # XXX: The following tests are no longer compatible with RFC3986 434 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') 435 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') 436 437 # test for issue22118 duplicate slashes 438 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo') 439 440 # Non-RFC-defined tests, covering variations of base and trailing 441 # slashes 442 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/') 443 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/') 444 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/') 445 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/') 446 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') 447 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') 448 449 # issue 23703: don't duplicate filename 450 self.checkJoin('a', 'b', 'b') 451 452 def test_RFC2732(self): 453 str_cases = [ 454 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), 455 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), 456 ('http://[::1]:5432/foo/', '::1', 5432), 457 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), 458 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), 459 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 460 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), 461 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), 462 ('http://[::ffff:12.34.56.78]:5432/foo/', 463 '::ffff:12.34.56.78', 5432), 464 ('http://Test.python.org/foo/', 'test.python.org', None), 465 ('http://12.34.56.78/foo/', '12.34.56.78', None), 466 ('http://[::1]/foo/', '::1', None), 467 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), 468 ('http://[dead:beef::]/foo/', 'dead:beef::', None), 469 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 470 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 471 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), 472 ('http://[::ffff:12.34.56.78]/foo/', 473 '::ffff:12.34.56.78', None), 474 ('http://Test.python.org:/foo/', 'test.python.org', None), 475 ('http://12.34.56.78:/foo/', '12.34.56.78', None), 476 ('http://[::1]:/foo/', '::1', None), 477 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), 478 ('http://[dead:beef::]:/foo/', 'dead:beef::', None), 479 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 480 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 481 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), 482 ('http://[::ffff:12.34.56.78]:/foo/', 483 '::ffff:12.34.56.78', None), 484 ] 485 def _encode(t): 486 return t[0].encode('ascii'), t[1].encode('ascii'), t[2] 487 bytes_cases = [_encode(x) for x in str_cases] 488 for url, hostname, port in str_cases + bytes_cases: 489 urlparsed = urllib.parse.urlparse(url) 490 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) 491 492 str_cases = [ 493 'http://::12.34.56.78]/', 494 'http://[::1/foo/', 495 'ftp://[::1/foo/bad]/bad', 496 'http://[::1/foo/bad]/bad', 497 'http://[::ffff:12.34.56.78'] 498 bytes_cases = [x.encode('ascii') for x in str_cases] 499 for invalid_url in str_cases + bytes_cases: 500 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) 501 502 def test_urldefrag(self): 503 str_cases = [ 504 ('http://python.org#frag', 'http://python.org', 'frag'), 505 ('http://python.org', 'http://python.org', ''), 506 ('http://python.org/#frag', 'http://python.org/', 'frag'), 507 ('http://python.org/', 'http://python.org/', ''), 508 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), 509 ('http://python.org/?q', 'http://python.org/?q', ''), 510 ('http://python.org/p#frag', 'http://python.org/p', 'frag'), 511 ('http://python.org/p?q', 'http://python.org/p?q', ''), 512 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), 513 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), 514 ] 515 def _encode(t): 516 return type(t)(x.encode('ascii') for x in t) 517 bytes_cases = [_encode(x) for x in str_cases] 518 for url, defrag, frag in str_cases + bytes_cases: 519 result = urllib.parse.urldefrag(url) 520 self.assertEqual(result.geturl(), url) 521 self.assertEqual(result, (defrag, frag)) 522 self.assertEqual(result.url, defrag) 523 self.assertEqual(result.fragment, frag) 524 525 def test_urlsplit_scoped_IPv6(self): 526 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') 527 self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt") 528 self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234') 529 530 p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') 531 self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt") 532 self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234') 533 534 def test_urlsplit_attributes(self): 535 url = "HTTP://WWW.PYTHON.ORG/doc/#frag" 536 p = urllib.parse.urlsplit(url) 537 self.assertEqual(p.scheme, "http") 538 self.assertEqual(p.netloc, "WWW.PYTHON.ORG") 539 self.assertEqual(p.path, "/doc/") 540 self.assertEqual(p.query, "") 541 self.assertEqual(p.fragment, "frag") 542 self.assertEqual(p.username, None) 543 self.assertEqual(p.password, None) 544 self.assertEqual(p.hostname, "www.python.org") 545 self.assertEqual(p.port, None) 546 # geturl() won't return exactly the original URL in this case 547 # since the scheme is always case-normalized 548 # We handle this by ignoring the first 4 characters of the URL 549 self.assertEqual(p.geturl()[4:], url[4:]) 550 551 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" 552 p = urllib.parse.urlsplit(url) 553 self.assertEqual(p.scheme, "http") 554 self.assertEqual(p.netloc, "User:Pass@www.python.org:080") 555 self.assertEqual(p.path, "/doc/") 556 self.assertEqual(p.query, "query=yes") 557 self.assertEqual(p.fragment, "frag") 558 self.assertEqual(p.username, "User") 559 self.assertEqual(p.password, "Pass") 560 self.assertEqual(p.hostname, "www.python.org") 561 self.assertEqual(p.port, 80) 562 self.assertEqual(p.geturl(), url) 563 564 # Addressing issue1698, which suggests Username can contain 565 # "@" characters. Though not RFC compliant, many ftp sites allow 566 # and request email addresses as usernames. 567 568 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 569 p = urllib.parse.urlsplit(url) 570 self.assertEqual(p.scheme, "http") 571 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") 572 self.assertEqual(p.path, "/doc/") 573 self.assertEqual(p.query, "query=yes") 574 self.assertEqual(p.fragment, "frag") 575 self.assertEqual(p.username, "User@example.com") 576 self.assertEqual(p.password, "Pass") 577 self.assertEqual(p.hostname, "www.python.org") 578 self.assertEqual(p.port, 80) 579 self.assertEqual(p.geturl(), url) 580 581 # And check them all again, only with bytes this time 582 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" 583 p = urllib.parse.urlsplit(url) 584 self.assertEqual(p.scheme, b"http") 585 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") 586 self.assertEqual(p.path, b"/doc/") 587 self.assertEqual(p.query, b"") 588 self.assertEqual(p.fragment, b"frag") 589 self.assertEqual(p.username, None) 590 self.assertEqual(p.password, None) 591 self.assertEqual(p.hostname, b"www.python.org") 592 self.assertEqual(p.port, None) 593 self.assertEqual(p.geturl()[4:], url[4:]) 594 595 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" 596 p = urllib.parse.urlsplit(url) 597 self.assertEqual(p.scheme, b"http") 598 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") 599 self.assertEqual(p.path, b"/doc/") 600 self.assertEqual(p.query, b"query=yes") 601 self.assertEqual(p.fragment, b"frag") 602 self.assertEqual(p.username, b"User") 603 self.assertEqual(p.password, b"Pass") 604 self.assertEqual(p.hostname, b"www.python.org") 605 self.assertEqual(p.port, 80) 606 self.assertEqual(p.geturl(), url) 607 608 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 609 p = urllib.parse.urlsplit(url) 610 self.assertEqual(p.scheme, b"http") 611 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") 612 self.assertEqual(p.path, b"/doc/") 613 self.assertEqual(p.query, b"query=yes") 614 self.assertEqual(p.fragment, b"frag") 615 self.assertEqual(p.username, b"User@example.com") 616 self.assertEqual(p.password, b"Pass") 617 self.assertEqual(p.hostname, b"www.python.org") 618 self.assertEqual(p.port, 80) 619 self.assertEqual(p.geturl(), url) 620 621 # Verify an illegal port raises ValueError 622 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" 623 p = urllib.parse.urlsplit(url) 624 with self.assertRaisesRegex(ValueError, "out of range"): 625 p.port 626 627 def test_attributes_bad_port(self): 628 """Check handling of invalid ports.""" 629 for bytes in (False, True): 630 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): 631 for port in ("foo", "1.5", "-1", "0x10"): 632 with self.subTest(bytes=bytes, parse=parse, port=port): 633 netloc = "www.example.net:" + port 634 url = "http://" + netloc 635 if bytes: 636 netloc = netloc.encode("ascii") 637 url = url.encode("ascii") 638 p = parse(url) 639 self.assertEqual(p.netloc, netloc) 640 with self.assertRaises(ValueError): 641 p.port 642 643 def test_attributes_without_netloc(self): 644 # This example is straight from RFC 3261. It looks like it 645 # should allow the username, hostname, and port to be filled 646 # in, but doesn't. Since it's a URI and doesn't use the 647 # scheme://netloc syntax, the netloc and related attributes 648 # should be left empty. 649 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 650 p = urllib.parse.urlsplit(uri) 651 self.assertEqual(p.netloc, "") 652 self.assertEqual(p.username, None) 653 self.assertEqual(p.password, None) 654 self.assertEqual(p.hostname, None) 655 self.assertEqual(p.port, None) 656 self.assertEqual(p.geturl(), uri) 657 658 p = urllib.parse.urlparse(uri) 659 self.assertEqual(p.netloc, "") 660 self.assertEqual(p.username, None) 661 self.assertEqual(p.password, None) 662 self.assertEqual(p.hostname, None) 663 self.assertEqual(p.port, None) 664 self.assertEqual(p.geturl(), uri) 665 666 # You guessed it, repeating the test with bytes input 667 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 668 p = urllib.parse.urlsplit(uri) 669 self.assertEqual(p.netloc, b"") 670 self.assertEqual(p.username, None) 671 self.assertEqual(p.password, None) 672 self.assertEqual(p.hostname, None) 673 self.assertEqual(p.port, None) 674 self.assertEqual(p.geturl(), uri) 675 676 p = urllib.parse.urlparse(uri) 677 self.assertEqual(p.netloc, b"") 678 self.assertEqual(p.username, None) 679 self.assertEqual(p.password, None) 680 self.assertEqual(p.hostname, None) 681 self.assertEqual(p.port, None) 682 self.assertEqual(p.geturl(), uri) 683 684 def test_noslash(self): 685 # Issue 1637: http://foo.com?query is legal 686 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), 687 ('http', 'example.com', '', '', 'blahblah=/foo', '')) 688 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), 689 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) 690 691 def test_withoutscheme(self): 692 # Test urlparse without scheme 693 # Issue 754016: urlparse goes wrong with IP:port without scheme 694 # RFC 1808 specifies that netloc should start with //, urlparse expects 695 # the same, otherwise it classifies the portion of url as path. 696 self.assertEqual(urllib.parse.urlparse("path"), 697 ('','','path','','','')) 698 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), 699 ('','www.python.org:80','','','','')) 700 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), 701 ('http','www.python.org:80','','','','')) 702 # Repeat for bytes input 703 self.assertEqual(urllib.parse.urlparse(b"path"), 704 (b'',b'',b'path',b'',b'',b'')) 705 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), 706 (b'',b'www.python.org:80',b'',b'',b'',b'')) 707 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), 708 (b'http',b'www.python.org:80',b'',b'',b'',b'')) 709 710 def test_portseparator(self): 711 # Issue 754016 makes changes for port separator ':' from scheme separator 712 self.assertEqual(urllib.parse.urlparse("path:80"), 713 ('','','path:80','','','')) 714 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) 715 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) 716 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), 717 ('http','www.python.org:80','','','','')) 718 # As usual, need to check bytes input as well 719 self.assertEqual(urllib.parse.urlparse(b"path:80"), 720 (b'',b'',b'path:80',b'',b'',b'')) 721 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) 722 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) 723 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), 724 (b'http',b'www.python.org:80',b'',b'',b'',b'')) 725 726 def test_usingsys(self): 727 # Issue 3314: sys module is used in the error 728 self.assertRaises(TypeError, urllib.parse.urlencode, "foo") 729 730 def test_anyscheme(self): 731 # Issue 7904: s3://foo.com/stuff has netloc "foo.com". 732 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), 733 ('s3', 'foo.com', '/stuff', '', '', '')) 734 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), 735 ('x-newscheme', 'foo.com', '/stuff', '', '', '')) 736 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), 737 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) 738 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), 739 ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) 740 741 # And for bytes... 742 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), 743 (b's3', b'foo.com', b'/stuff', b'', b'', b'')) 744 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), 745 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) 746 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), 747 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) 748 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), 749 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) 750 751 def test_default_scheme(self): 752 # Exercise the scheme parameter of urlparse() and urlsplit() 753 for func in (urllib.parse.urlparse, urllib.parse.urlsplit): 754 with self.subTest(function=func): 755 result = func("http://example.net/", "ftp") 756 self.assertEqual(result.scheme, "http") 757 result = func(b"http://example.net/", b"ftp") 758 self.assertEqual(result.scheme, b"http") 759 self.assertEqual(func("path", "ftp").scheme, "ftp") 760 self.assertEqual(func("path", scheme="ftp").scheme, "ftp") 761 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") 762 self.assertEqual(func("path").scheme, "") 763 self.assertEqual(func(b"path").scheme, b"") 764 self.assertEqual(func(b"path", "").scheme, b"") 765 766 def test_parse_fragments(self): 767 # Exercise the allow_fragments parameter of urlparse() and urlsplit() 768 tests = ( 769 ("http:#frag", "path", "frag"), 770 ("//example.net#frag", "path", "frag"), 771 ("index.html#frag", "path", "frag"), 772 (";a=b#frag", "params", "frag"), 773 ("?a=b#frag", "query", "frag"), 774 ("#frag", "path", "frag"), 775 ("abc#@frag", "path", "@frag"), 776 ("//abc#@frag", "path", "@frag"), 777 ("//abc:80#@frag", "path", "@frag"), 778 ("//abc#@frag:80", "path", "@frag:80"), 779 ) 780 for url, attr, expected_frag in tests: 781 for func in (urllib.parse.urlparse, urllib.parse.urlsplit): 782 if attr == "params" and func is urllib.parse.urlsplit: 783 attr = "path" 784 with self.subTest(url=url, function=func): 785 result = func(url, allow_fragments=False) 786 self.assertEqual(result.fragment, "") 787 self.assertTrue( 788 getattr(result, attr).endswith("#" + expected_frag)) 789 self.assertEqual(func(url, "", False).fragment, "") 790 791 result = func(url, allow_fragments=True) 792 self.assertEqual(result.fragment, expected_frag) 793 self.assertFalse( 794 getattr(result, attr).endswith(expected_frag)) 795 self.assertEqual(func(url, "", True).fragment, 796 expected_frag) 797 self.assertEqual(func(url).fragment, expected_frag) 798 799 def test_mixed_types_rejected(self): 800 # Several functions that process either strings or ASCII encoded bytes 801 # accept multiple arguments. Check they reject mixed type input 802 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 803 urllib.parse.urlparse("www.python.org", b"http") 804 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 805 urllib.parse.urlparse(b"www.python.org", "http") 806 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 807 urllib.parse.urlsplit("www.python.org", b"http") 808 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 809 urllib.parse.urlsplit(b"www.python.org", "http") 810 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 811 urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) 812 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 813 urllib.parse.urlunparse(("http", b"www.python.org","","","","")) 814 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 815 urllib.parse.urlunsplit((b"http", "www.python.org","","","")) 816 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 817 urllib.parse.urlunsplit(("http", b"www.python.org","","","")) 818 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 819 urllib.parse.urljoin("http://python.org", b"http://python.org") 820 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 821 urllib.parse.urljoin(b"http://python.org", "http://python.org") 822 823 def _check_result_type(self, str_type): 824 num_args = len(str_type._fields) 825 bytes_type = str_type._encoded_counterpart 826 self.assertIs(bytes_type._decoded_counterpart, str_type) 827 str_args = ('',) * num_args 828 bytes_args = (b'',) * num_args 829 str_result = str_type(*str_args) 830 bytes_result = bytes_type(*bytes_args) 831 encoding = 'ascii' 832 errors = 'strict' 833 self.assertEqual(str_result, str_args) 834 self.assertEqual(bytes_result.decode(), str_args) 835 self.assertEqual(bytes_result.decode(), str_result) 836 self.assertEqual(bytes_result.decode(encoding), str_args) 837 self.assertEqual(bytes_result.decode(encoding), str_result) 838 self.assertEqual(bytes_result.decode(encoding, errors), str_args) 839 self.assertEqual(bytes_result.decode(encoding, errors), str_result) 840 self.assertEqual(bytes_result, bytes_args) 841 self.assertEqual(str_result.encode(), bytes_args) 842 self.assertEqual(str_result.encode(), bytes_result) 843 self.assertEqual(str_result.encode(encoding), bytes_args) 844 self.assertEqual(str_result.encode(encoding), bytes_result) 845 self.assertEqual(str_result.encode(encoding, errors), bytes_args) 846 self.assertEqual(str_result.encode(encoding, errors), bytes_result) 847 848 def test_result_pairs(self): 849 # Check encoding and decoding between result pairs 850 result_types = [ 851 urllib.parse.DefragResult, 852 urllib.parse.SplitResult, 853 urllib.parse.ParseResult, 854 ] 855 for result_type in result_types: 856 self._check_result_type(result_type) 857 858 def test_parse_qs_encoding(self): 859 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") 860 self.assertEqual(result, {'key': ['\u0141\xE9']}) 861 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8") 862 self.assertEqual(result, {'key': ['\u0141\xE9']}) 863 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii") 864 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']}) 865 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii") 866 self.assertEqual(result, {'key': ['\u0141\ufffd-']}) 867 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii", 868 errors="ignore") 869 self.assertEqual(result, {'key': ['\u0141-']}) 870 871 def test_parse_qsl_encoding(self): 872 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1") 873 self.assertEqual(result, [('key', '\u0141\xE9')]) 874 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8") 875 self.assertEqual(result, [('key', '\u0141\xE9')]) 876 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii") 877 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) 878 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii") 879 self.assertEqual(result, [('key', '\u0141\ufffd-')]) 880 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii", 881 errors="ignore") 882 self.assertEqual(result, [('key', '\u0141-')]) 883 884 def test_parse_qsl_max_num_fields(self): 885 with self.assertRaises(ValueError): 886 urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) 887 with self.assertRaises(ValueError): 888 urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10) 889 urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) 890 891 def test_urlencode_sequences(self): 892 # Other tests incidentally urlencode things; test non-covered cases: 893 # Sequence and object values. 894 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) 895 # we cannot rely on ordering here 896 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'} 897 898 class Trivial: 899 def __str__(self): 900 return 'trivial' 901 902 result = urllib.parse.urlencode({'a': Trivial()}, True) 903 self.assertEqual(result, 'a=trivial') 904 905 def test_urlencode_quote_via(self): 906 result = urllib.parse.urlencode({'a': 'some value'}) 907 self.assertEqual(result, "a=some+value") 908 result = urllib.parse.urlencode({'a': 'some value/another'}, 909 quote_via=urllib.parse.quote) 910 self.assertEqual(result, "a=some%20value%2Fanother") 911 result = urllib.parse.urlencode({'a': 'some value/another'}, 912 safe='/', quote_via=urllib.parse.quote) 913 self.assertEqual(result, "a=some%20value/another") 914 915 def test_quote_from_bytes(self): 916 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo') 917 result = urllib.parse.quote_from_bytes(b'archaeological arcana') 918 self.assertEqual(result, 'archaeological%20arcana') 919 result = urllib.parse.quote_from_bytes(b'') 920 self.assertEqual(result, '') 921 922 def test_unquote_to_bytes(self): 923 result = urllib.parse.unquote_to_bytes('abc%20def') 924 self.assertEqual(result, b'abc def') 925 result = urllib.parse.unquote_to_bytes('') 926 self.assertEqual(result, b'') 927 928 def test_quote_errors(self): 929 self.assertRaises(TypeError, urllib.parse.quote, b'foo', 930 encoding='utf-8') 931 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict') 932 933 def test_issue14072(self): 934 p1 = urllib.parse.urlsplit('tel:+31-641044153') 935 self.assertEqual(p1.scheme, 'tel') 936 self.assertEqual(p1.path, '+31-641044153') 937 p2 = urllib.parse.urlsplit('tel:+31641044153') 938 self.assertEqual(p2.scheme, 'tel') 939 self.assertEqual(p2.path, '+31641044153') 940 # assert the behavior for urlparse 941 p1 = urllib.parse.urlparse('tel:+31-641044153') 942 self.assertEqual(p1.scheme, 'tel') 943 self.assertEqual(p1.path, '+31-641044153') 944 p2 = urllib.parse.urlparse('tel:+31641044153') 945 self.assertEqual(p2.scheme, 'tel') 946 self.assertEqual(p2.path, '+31641044153') 947 948 def test_port_casting_failure_message(self): 949 message = "Port could not be cast to integer value as 'oracle'" 950 p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') 951 with self.assertRaisesRegex(ValueError, message): 952 p1.port 953 954 p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle') 955 with self.assertRaisesRegex(ValueError, message): 956 p2.port 957 958 def test_telurl_params(self): 959 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516') 960 self.assertEqual(p1.scheme, 'tel') 961 self.assertEqual(p1.path, '123-4') 962 self.assertEqual(p1.params, 'phone-context=+1-650-516') 963 964 p1 = urllib.parse.urlparse('tel:+1-201-555-0123') 965 self.assertEqual(p1.scheme, 'tel') 966 self.assertEqual(p1.path, '+1-201-555-0123') 967 self.assertEqual(p1.params, '') 968 969 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com') 970 self.assertEqual(p1.scheme, 'tel') 971 self.assertEqual(p1.path, '7042') 972 self.assertEqual(p1.params, 'phone-context=example.com') 973 974 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555') 975 self.assertEqual(p1.scheme, 'tel') 976 self.assertEqual(p1.path, '863-1234') 977 self.assertEqual(p1.params, 'phone-context=+1-914-555') 978 979 def test_Quoter_repr(self): 980 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) 981 self.assertIn('Quoter', repr(quoter)) 982 983 def test_all(self): 984 expected = [] 985 undocumented = { 986 'splitattr', 'splithost', 'splitnport', 'splitpasswd', 987 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', 988 'splitvalue', 989 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', 990 } 991 for name in dir(urllib.parse): 992 if name.startswith('_') or name in undocumented: 993 continue 994 object = getattr(urllib.parse, name) 995 if getattr(object, '__module__', None) == 'urllib.parse': 996 expected.append(name) 997 self.assertCountEqual(urllib.parse.__all__, expected) 998 999 def test_urlsplit_normalization(self): 1000 # Certain characters should never occur in the netloc, 1001 # including under normalization. 1002 # Ensure that ALL of them are detected and cause an error 1003 illegal_chars = '/:#?@' 1004 hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars} 1005 denorm_chars = [ 1006 c for c in map(chr, range(128, sys.maxunicode)) 1007 if (hex_chars & set(unicodedata.decomposition(c).split())) 1008 and c not in illegal_chars 1009 ] 1010 # Sanity check that we found at least one such character 1011 self.assertIn('\u2100', denorm_chars) 1012 self.assertIn('\uFF03', denorm_chars) 1013 1014 # bpo-36742: Verify port separators are ignored when they 1015 # existed prior to decomposition 1016 urllib.parse.urlsplit('http://\u30d5\u309a:80') 1017 with self.assertRaises(ValueError): 1018 urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380') 1019 1020 for scheme in ["http", "https", "ftp"]: 1021 for netloc in ["netloc{}false.netloc", "n{}user@netloc"]: 1022 for c in denorm_chars: 1023 url = "{}://{}/path".format(scheme, netloc.format(c)) 1024 with self.subTest(url=url, char='{:04X}'.format(ord(c))): 1025 with self.assertRaises(ValueError): 1026 urllib.parse.urlsplit(url) 1027 1028class Utility_Tests(unittest.TestCase): 1029 """Testcase to test the various utility functions in the urllib.""" 1030 # In Python 2 this test class was in test_urllib. 1031 1032 def test_splittype(self): 1033 splittype = urllib.parse._splittype 1034 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) 1035 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) 1036 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) 1037 self.assertEqual(splittype('type:'), ('type', '')) 1038 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) 1039 1040 def test_splithost(self): 1041 splithost = urllib.parse._splithost 1042 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), 1043 ('www.example.org:80', '/foo/bar/baz.html')) 1044 self.assertEqual(splithost('//www.example.org:80'), 1045 ('www.example.org:80', '')) 1046 self.assertEqual(splithost('/foo/bar/baz.html'), 1047 (None, '/foo/bar/baz.html')) 1048 1049 # bpo-30500: # starts a fragment. 1050 self.assertEqual(splithost('//127.0.0.1#@host.com'), 1051 ('127.0.0.1', '/#@host.com')) 1052 self.assertEqual(splithost('//127.0.0.1#@host.com:80'), 1053 ('127.0.0.1', '/#@host.com:80')) 1054 self.assertEqual(splithost('//127.0.0.1:80#@host.com'), 1055 ('127.0.0.1:80', '/#@host.com')) 1056 1057 # Empty host is returned as empty string. 1058 self.assertEqual(splithost("///file"), 1059 ('', '/file')) 1060 1061 # Trailing semicolon, question mark and hash symbol are kept. 1062 self.assertEqual(splithost("//example.net/file;"), 1063 ('example.net', '/file;')) 1064 self.assertEqual(splithost("//example.net/file?"), 1065 ('example.net', '/file?')) 1066 self.assertEqual(splithost("//example.net/file#"), 1067 ('example.net', '/file#')) 1068 1069 def test_splituser(self): 1070 splituser = urllib.parse._splituser 1071 self.assertEqual(splituser('User:Pass@www.python.org:080'), 1072 ('User:Pass', 'www.python.org:080')) 1073 self.assertEqual(splituser('@www.python.org:080'), 1074 ('', 'www.python.org:080')) 1075 self.assertEqual(splituser('www.python.org:080'), 1076 (None, 'www.python.org:080')) 1077 self.assertEqual(splituser('User:Pass@'), 1078 ('User:Pass', '')) 1079 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), 1080 ('User@example.com:Pass', 'www.python.org:080')) 1081 1082 def test_splitpasswd(self): 1083 # Some of the password examples are not sensible, but it is added to 1084 # confirming to RFC2617 and addressing issue4675. 1085 splitpasswd = urllib.parse._splitpasswd 1086 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) 1087 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) 1088 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) 1089 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) 1090 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) 1091 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) 1092 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) 1093 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) 1094 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) 1095 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) 1096 self.assertEqual(splitpasswd('user:'), ('user', '')) 1097 self.assertEqual(splitpasswd('user'), ('user', None)) 1098 self.assertEqual(splitpasswd(':ab'), ('', 'ab')) 1099 1100 def test_splitport(self): 1101 splitport = urllib.parse._splitport 1102 self.assertEqual(splitport('parrot:88'), ('parrot', '88')) 1103 self.assertEqual(splitport('parrot'), ('parrot', None)) 1104 self.assertEqual(splitport('parrot:'), ('parrot', None)) 1105 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) 1106 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) 1107 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) 1108 self.assertEqual(splitport('[::1]'), ('[::1]', None)) 1109 self.assertEqual(splitport(':88'), ('', '88')) 1110 1111 def test_splitnport(self): 1112 splitnport = urllib.parse._splitnport 1113 self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) 1114 self.assertEqual(splitnport('parrot'), ('parrot', -1)) 1115 self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) 1116 self.assertEqual(splitnport('parrot:'), ('parrot', -1)) 1117 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) 1118 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) 1119 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) 1120 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) 1121 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) 1122 1123 def test_splitquery(self): 1124 # Normal cases are exercised by other tests; ensure that we also 1125 # catch cases with no port specified (testcase ensuring coverage) 1126 splitquery = urllib.parse._splitquery 1127 self.assertEqual(splitquery('http://python.org/fake?foo=bar'), 1128 ('http://python.org/fake', 'foo=bar')) 1129 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), 1130 ('http://python.org/fake?foo=bar', '')) 1131 self.assertEqual(splitquery('http://python.org/fake'), 1132 ('http://python.org/fake', None)) 1133 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) 1134 1135 def test_splittag(self): 1136 splittag = urllib.parse._splittag 1137 self.assertEqual(splittag('http://example.com?foo=bar#baz'), 1138 ('http://example.com?foo=bar', 'baz')) 1139 self.assertEqual(splittag('http://example.com?foo=bar#'), 1140 ('http://example.com?foo=bar', '')) 1141 self.assertEqual(splittag('#baz'), ('', 'baz')) 1142 self.assertEqual(splittag('http://example.com?foo=bar'), 1143 ('http://example.com?foo=bar', None)) 1144 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), 1145 ('http://example.com?foo=bar#baz', 'boo')) 1146 1147 def test_splitattr(self): 1148 splitattr = urllib.parse._splitattr 1149 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), 1150 ('/path', ['attr1=value1', 'attr2=value2'])) 1151 self.assertEqual(splitattr('/path;'), ('/path', [''])) 1152 self.assertEqual(splitattr(';attr1=value1;attr2=value2'), 1153 ('', ['attr1=value1', 'attr2=value2'])) 1154 self.assertEqual(splitattr('/path'), ('/path', [])) 1155 1156 def test_splitvalue(self): 1157 # Normal cases are exercised by other tests; test pathological cases 1158 # with no key/value pairs. (testcase ensuring coverage) 1159 splitvalue = urllib.parse._splitvalue 1160 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) 1161 self.assertEqual(splitvalue('foo='), ('foo', '')) 1162 self.assertEqual(splitvalue('=bar'), ('', 'bar')) 1163 self.assertEqual(splitvalue('foobar'), ('foobar', None)) 1164 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) 1165 1166 def test_to_bytes(self): 1167 result = urllib.parse._to_bytes('http://www.python.org') 1168 self.assertEqual(result, 'http://www.python.org') 1169 self.assertRaises(UnicodeError, urllib.parse._to_bytes, 1170 'http://www.python.org/medi\u00e6val') 1171 1172 def test_unwrap(self): 1173 for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>', 1174 'URL:scheme://host/path', 'scheme://host/path'): 1175 url = urllib.parse.unwrap(wrapped_url) 1176 self.assertEqual(url, 'scheme://host/path') 1177 1178 1179class DeprecationTest(unittest.TestCase): 1180 1181 def test_splittype_deprecation(self): 1182 with self.assertWarns(DeprecationWarning) as cm: 1183 urllib.parse.splittype('') 1184 self.assertEqual(str(cm.warning), 1185 'urllib.parse.splittype() is deprecated as of 3.8, ' 1186 'use urllib.parse.urlparse() instead') 1187 1188 def test_splithost_deprecation(self): 1189 with self.assertWarns(DeprecationWarning) as cm: 1190 urllib.parse.splithost('') 1191 self.assertEqual(str(cm.warning), 1192 'urllib.parse.splithost() is deprecated as of 3.8, ' 1193 'use urllib.parse.urlparse() instead') 1194 1195 def test_splituser_deprecation(self): 1196 with self.assertWarns(DeprecationWarning) as cm: 1197 urllib.parse.splituser('') 1198 self.assertEqual(str(cm.warning), 1199 'urllib.parse.splituser() is deprecated as of 3.8, ' 1200 'use urllib.parse.urlparse() instead') 1201 1202 def test_splitpasswd_deprecation(self): 1203 with self.assertWarns(DeprecationWarning) as cm: 1204 urllib.parse.splitpasswd('') 1205 self.assertEqual(str(cm.warning), 1206 'urllib.parse.splitpasswd() is deprecated as of 3.8, ' 1207 'use urllib.parse.urlparse() instead') 1208 1209 def test_splitport_deprecation(self): 1210 with self.assertWarns(DeprecationWarning) as cm: 1211 urllib.parse.splitport('') 1212 self.assertEqual(str(cm.warning), 1213 'urllib.parse.splitport() is deprecated as of 3.8, ' 1214 'use urllib.parse.urlparse() instead') 1215 1216 def test_splitnport_deprecation(self): 1217 with self.assertWarns(DeprecationWarning) as cm: 1218 urllib.parse.splitnport('') 1219 self.assertEqual(str(cm.warning), 1220 'urllib.parse.splitnport() is deprecated as of 3.8, ' 1221 'use urllib.parse.urlparse() instead') 1222 1223 def test_splitquery_deprecation(self): 1224 with self.assertWarns(DeprecationWarning) as cm: 1225 urllib.parse.splitquery('') 1226 self.assertEqual(str(cm.warning), 1227 'urllib.parse.splitquery() is deprecated as of 3.8, ' 1228 'use urllib.parse.urlparse() instead') 1229 1230 def test_splittag_deprecation(self): 1231 with self.assertWarns(DeprecationWarning) as cm: 1232 urllib.parse.splittag('') 1233 self.assertEqual(str(cm.warning), 1234 'urllib.parse.splittag() is deprecated as of 3.8, ' 1235 'use urllib.parse.urlparse() instead') 1236 1237 def test_splitattr_deprecation(self): 1238 with self.assertWarns(DeprecationWarning) as cm: 1239 urllib.parse.splitattr('') 1240 self.assertEqual(str(cm.warning), 1241 'urllib.parse.splitattr() is deprecated as of 3.8, ' 1242 'use urllib.parse.urlparse() instead') 1243 1244 def test_splitvalue_deprecation(self): 1245 with self.assertWarns(DeprecationWarning) as cm: 1246 urllib.parse.splitvalue('') 1247 self.assertEqual(str(cm.warning), 1248 'urllib.parse.splitvalue() is deprecated as of 3.8, ' 1249 'use urllib.parse.parse_qsl() instead') 1250 1251 def test_to_bytes_deprecation(self): 1252 with self.assertWarns(DeprecationWarning) as cm: 1253 urllib.parse.to_bytes('') 1254 self.assertEqual(str(cm.warning), 1255 'urllib.parse.to_bytes() is deprecated as of 3.8') 1256 1257 1258if __name__ == "__main__": 1259 unittest.main() 1260