1"""Regression tests for what was in Python 2's "urllib" module""" 2 3import urllib.parse 4import urllib.request 5import urllib.error 6import http.client 7import email.message 8import io 9import unittest 10from unittest.mock import patch 11from test import support 12import os 13try: 14 import ssl 15except ImportError: 16 ssl = None 17import sys 18import tempfile 19from nturl2path import url2pathname, pathname2url 20 21from base64 import b64encode 22import collections 23 24 25def hexescape(char): 26 """Escape char as RFC 2396 specifies""" 27 hex_repr = hex(ord(char))[2:].upper() 28 if len(hex_repr) == 1: 29 hex_repr = "0%s" % hex_repr 30 return "%" + hex_repr 31 32# Shortcut for testing FancyURLopener 33_urlopener = None 34 35 36def urlopen(url, data=None, proxies=None): 37 """urlopen(url [, data]) -> open file-like object""" 38 global _urlopener 39 if proxies is not None: 40 opener = urllib.request.FancyURLopener(proxies=proxies) 41 elif not _urlopener: 42 opener = FancyURLopener() 43 _urlopener = opener 44 else: 45 opener = _urlopener 46 if data is None: 47 return opener.open(url) 48 else: 49 return opener.open(url, data) 50 51 52def FancyURLopener(): 53 with support.check_warnings( 54 ('FancyURLopener style of invoking requests is deprecated.', 55 DeprecationWarning)): 56 return urllib.request.FancyURLopener() 57 58 59def fakehttp(fakedata, mock_close=False): 60 class FakeSocket(io.BytesIO): 61 io_refs = 1 62 63 def sendall(self, data): 64 FakeHTTPConnection.buf = data 65 66 def makefile(self, *args, **kwds): 67 self.io_refs += 1 68 return self 69 70 def read(self, amt=None): 71 if self.closed: 72 return b"" 73 return io.BytesIO.read(self, amt) 74 75 def readline(self, length=None): 76 if self.closed: 77 return b"" 78 return io.BytesIO.readline(self, length) 79 80 def close(self): 81 self.io_refs -= 1 82 if self.io_refs == 0: 83 io.BytesIO.close(self) 84 85 class FakeHTTPConnection(http.client.HTTPConnection): 86 87 # buffer to store data for verification in urlopen tests. 88 buf = None 89 90 def connect(self): 91 self.sock = FakeSocket(self.fakedata) 92 type(self).fakesock = self.sock 93 94 if mock_close: 95 # bpo-36918: HTTPConnection destructor calls close() which calls 96 # flush(). Problem: flush() calls self.fp.flush() which raises 97 # "ValueError: I/O operation on closed file" which is logged as an 98 # "Exception ignored in". Override close() to silence this error. 99 def close(self): 100 pass 101 FakeHTTPConnection.fakedata = fakedata 102 103 return FakeHTTPConnection 104 105 106class FakeHTTPMixin(object): 107 def fakehttp(self, fakedata, mock_close=False): 108 fake_http_class = fakehttp(fakedata, mock_close=mock_close) 109 self._connection_class = http.client.HTTPConnection 110 http.client.HTTPConnection = fake_http_class 111 112 def unfakehttp(self): 113 http.client.HTTPConnection = self._connection_class 114 115 116class FakeFTPMixin(object): 117 def fakeftp(self): 118 class FakeFtpWrapper(object): 119 def __init__(self, user, passwd, host, port, dirs, timeout=None, 120 persistent=True): 121 pass 122 123 def retrfile(self, file, type): 124 return io.BytesIO(), 0 125 126 def close(self): 127 pass 128 129 self._ftpwrapper_class = urllib.request.ftpwrapper 130 urllib.request.ftpwrapper = FakeFtpWrapper 131 132 def unfakeftp(self): 133 urllib.request.ftpwrapper = self._ftpwrapper_class 134 135 136class urlopen_FileTests(unittest.TestCase): 137 """Test urlopen() opening a temporary file. 138 139 Try to test as much functionality as possible so as to cut down on reliance 140 on connecting to the Net for testing. 141 142 """ 143 144 def setUp(self): 145 # Create a temp file to use for testing 146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, 147 "ascii") 148 f = open(support.TESTFN, 'wb') 149 try: 150 f.write(self.text) 151 finally: 152 f.close() 153 self.pathname = support.TESTFN 154 self.quoted_pathname = urllib.parse.quote(self.pathname) 155 self.returned_obj = urlopen("file:%s" % self.quoted_pathname) 156 157 def tearDown(self): 158 """Shut down the open object""" 159 self.returned_obj.close() 160 os.remove(support.TESTFN) 161 162 def test_interface(self): 163 # Make sure object returned by urlopen() has the specified methods 164 for attr in ("read", "readline", "readlines", "fileno", 165 "close", "info", "geturl", "getcode", "__iter__"): 166 self.assertTrue(hasattr(self.returned_obj, attr), 167 "object returned by urlopen() lacks %s attribute" % 168 attr) 169 170 def test_read(self): 171 self.assertEqual(self.text, self.returned_obj.read()) 172 173 def test_readline(self): 174 self.assertEqual(self.text, self.returned_obj.readline()) 175 self.assertEqual(b'', self.returned_obj.readline(), 176 "calling readline() after exhausting the file did not" 177 " return an empty string") 178 179 def test_readlines(self): 180 lines_list = self.returned_obj.readlines() 181 self.assertEqual(len(lines_list), 1, 182 "readlines() returned the wrong number of lines") 183 self.assertEqual(lines_list[0], self.text, 184 "readlines() returned improper text") 185 186 def test_fileno(self): 187 file_num = self.returned_obj.fileno() 188 self.assertIsInstance(file_num, int, "fileno() did not return an int") 189 self.assertEqual(os.read(file_num, len(self.text)), self.text, 190 "Reading on the file descriptor returned by fileno() " 191 "did not return the expected text") 192 193 def test_close(self): 194 # Test close() by calling it here and then having it be called again 195 # by the tearDown() method for the test 196 self.returned_obj.close() 197 198 def test_headers(self): 199 self.assertIsInstance(self.returned_obj.headers, email.message.Message) 200 201 def test_url(self): 202 self.assertEqual(self.returned_obj.url, self.quoted_pathname) 203 204 def test_status(self): 205 self.assertIsNone(self.returned_obj.status) 206 207 def test_info(self): 208 self.assertIsInstance(self.returned_obj.info(), email.message.Message) 209 210 def test_geturl(self): 211 self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname) 212 213 def test_getcode(self): 214 self.assertIsNone(self.returned_obj.getcode()) 215 216 def test_iter(self): 217 # Test iterator 218 # Don't need to count number of iterations since test would fail the 219 # instant it returned anything beyond the first line from the 220 # comparison. 221 # Use the iterator in the usual implicit way to test for ticket #4608. 222 for line in self.returned_obj: 223 self.assertEqual(line, self.text) 224 225 def test_relativelocalfile(self): 226 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) 227 228 229class ProxyTests(unittest.TestCase): 230 231 def setUp(self): 232 # Records changes to env vars 233 self.env = support.EnvironmentVarGuard() 234 # Delete all proxy related env vars 235 for k in list(os.environ): 236 if 'proxy' in k.lower(): 237 self.env.unset(k) 238 239 def tearDown(self): 240 # Restore all proxy related env vars 241 self.env.__exit__() 242 del self.env 243 244 def test_getproxies_environment_keep_no_proxies(self): 245 self.env.set('NO_PROXY', 'localhost') 246 proxies = urllib.request.getproxies_environment() 247 # getproxies_environment use lowered case truncated (no '_proxy') keys 248 self.assertEqual('localhost', proxies['no']) 249 # List of no_proxies with space. 250 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') 251 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) 252 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888')) 253 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234')) 254 255 def test_proxy_cgi_ignore(self): 256 try: 257 self.env.set('HTTP_PROXY', 'http://somewhere:3128') 258 proxies = urllib.request.getproxies_environment() 259 self.assertEqual('http://somewhere:3128', proxies['http']) 260 self.env.set('REQUEST_METHOD', 'GET') 261 proxies = urllib.request.getproxies_environment() 262 self.assertNotIn('http', proxies) 263 finally: 264 self.env.unset('REQUEST_METHOD') 265 self.env.unset('HTTP_PROXY') 266 267 def test_proxy_bypass_environment_host_match(self): 268 bypass = urllib.request.proxy_bypass_environment 269 self.env.set('NO_PROXY', 270 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') 271 self.assertTrue(bypass('localhost')) 272 self.assertTrue(bypass('LocalHost')) # MixedCase 273 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE 274 self.assertTrue(bypass('.localhost')) 275 self.assertTrue(bypass('newdomain.com:1234')) 276 self.assertTrue(bypass('.newdomain.com:1234')) 277 self.assertTrue(bypass('foo.d.o.t')) # issue 29142 278 self.assertTrue(bypass('d.o.t')) 279 self.assertTrue(bypass('anotherdomain.com:8888')) 280 self.assertTrue(bypass('.anotherdomain.com:8888')) 281 self.assertTrue(bypass('www.newdomain.com:1234')) 282 self.assertFalse(bypass('prelocalhost')) 283 self.assertFalse(bypass('newdomain.com')) # no port 284 self.assertFalse(bypass('newdomain.com:1235')) # wrong port 285 286 def test_proxy_bypass_environment_always_match(self): 287 bypass = urllib.request.proxy_bypass_environment 288 self.env.set('NO_PROXY', '*') 289 self.assertTrue(bypass('newdomain.com')) 290 self.assertTrue(bypass('newdomain.com:1234')) 291 self.env.set('NO_PROXY', '*, anotherdomain.com') 292 self.assertTrue(bypass('anotherdomain.com')) 293 self.assertFalse(bypass('newdomain.com')) 294 self.assertFalse(bypass('newdomain.com:1234')) 295 296 def test_proxy_bypass_environment_newline(self): 297 bypass = urllib.request.proxy_bypass_environment 298 self.env.set('NO_PROXY', 299 'localhost, anotherdomain.com, newdomain.com:1234') 300 self.assertFalse(bypass('localhost\n')) 301 self.assertFalse(bypass('anotherdomain.com:8888\n')) 302 self.assertFalse(bypass('newdomain.com:1234\n')) 303 304 305class ProxyTests_withOrderedEnv(unittest.TestCase): 306 307 def setUp(self): 308 # We need to test conditions, where variable order _is_ significant 309 self._saved_env = os.environ 310 # Monkey patch os.environ, start with empty fake environment 311 os.environ = collections.OrderedDict() 312 313 def tearDown(self): 314 os.environ = self._saved_env 315 316 def test_getproxies_environment_prefer_lowercase(self): 317 # Test lowercase preference with removal 318 os.environ['no_proxy'] = '' 319 os.environ['No_Proxy'] = 'localhost' 320 self.assertFalse(urllib.request.proxy_bypass_environment('localhost')) 321 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 322 os.environ['http_proxy'] = '' 323 os.environ['HTTP_PROXY'] = 'http://somewhere:3128' 324 proxies = urllib.request.getproxies_environment() 325 self.assertEqual({}, proxies) 326 # Test lowercase preference of proxy bypass and correct matching including ports 327 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' 328 os.environ['No_Proxy'] = 'xyz.com' 329 self.assertTrue(urllib.request.proxy_bypass_environment('localhost')) 330 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678')) 331 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234')) 332 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy')) 333 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 334 # Test lowercase preference with replacement 335 os.environ['http_proxy'] = 'http://somewhere:3128' 336 os.environ['Http_Proxy'] = 'http://somewhereelse:3128' 337 proxies = urllib.request.getproxies_environment() 338 self.assertEqual('http://somewhere:3128', proxies['http']) 339 340 341class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): 342 """Test urlopen() opening a fake http connection.""" 343 344 def check_read(self, ver): 345 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") 346 try: 347 fp = urlopen("http://python.org/") 348 self.assertEqual(fp.readline(), b"Hello!") 349 self.assertEqual(fp.readline(), b"") 350 self.assertEqual(fp.geturl(), 'http://python.org/') 351 self.assertEqual(fp.getcode(), 200) 352 finally: 353 self.unfakehttp() 354 355 def test_url_fragment(self): 356 # Issue #11703: geturl() omits fragments in the original URL. 357 url = 'http://docs.python.org/library/urllib.html#OK' 358 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 359 try: 360 fp = urllib.request.urlopen(url) 361 self.assertEqual(fp.geturl(), url) 362 finally: 363 self.unfakehttp() 364 365 def test_willclose(self): 366 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 367 try: 368 resp = urlopen("http://www.python.org") 369 self.assertTrue(resp.fp.will_close) 370 finally: 371 self.unfakehttp() 372 373 @unittest.skipUnless(ssl, "ssl module required") 374 def test_url_path_with_control_char_rejected(self): 375 for char_no in list(range(0, 0x21)) + [0x7f]: 376 char = chr(char_no) 377 schemeless_url = f"//localhost:7777/test{char}/" 378 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 379 try: 380 # We explicitly test urllib.request.urlopen() instead of the top 381 # level 'def urlopen()' function defined in this... (quite ugly) 382 # test suite. They use different url opening codepaths. Plain 383 # urlopen uses FancyURLOpener which goes via a codepath that 384 # calls urllib.parse.quote() on the URL which makes all of the 385 # above attempts at injection within the url _path_ safe. 386 escaped_char_repr = repr(char).replace('\\', r'\\') 387 InvalidURL = http.client.InvalidURL 388 with self.assertRaisesRegex( 389 InvalidURL, f"contain control.*{escaped_char_repr}"): 390 urllib.request.urlopen(f"http:{schemeless_url}") 391 with self.assertRaisesRegex( 392 InvalidURL, f"contain control.*{escaped_char_repr}"): 393 urllib.request.urlopen(f"https:{schemeless_url}") 394 # This code path quotes the URL so there is no injection. 395 resp = urlopen(f"http:{schemeless_url}") 396 self.assertNotIn(char, resp.geturl()) 397 finally: 398 self.unfakehttp() 399 400 @unittest.skipUnless(ssl, "ssl module required") 401 def test_url_path_with_newline_header_injection_rejected(self): 402 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 403 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" 404 schemeless_url = "//" + host + ":8080/test/?test=a" 405 try: 406 # We explicitly test urllib.request.urlopen() instead of the top 407 # level 'def urlopen()' function defined in this... (quite ugly) 408 # test suite. They use different url opening codepaths. Plain 409 # urlopen uses FancyURLOpener which goes via a codepath that 410 # calls urllib.parse.quote() on the URL which makes all of the 411 # above attempts at injection within the url _path_ safe. 412 InvalidURL = http.client.InvalidURL 413 with self.assertRaisesRegex( 414 InvalidURL, r"contain control.*\\r.*(found at least . .)"): 415 urllib.request.urlopen(f"http:{schemeless_url}") 416 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 417 urllib.request.urlopen(f"https:{schemeless_url}") 418 # This code path quotes the URL so there is no injection. 419 resp = urlopen(f"http:{schemeless_url}") 420 self.assertNotIn(' ', resp.geturl()) 421 self.assertNotIn('\r', resp.geturl()) 422 self.assertNotIn('\n', resp.geturl()) 423 finally: 424 self.unfakehttp() 425 426 @unittest.skipUnless(ssl, "ssl module required") 427 def test_url_host_with_control_char_rejected(self): 428 for char_no in list(range(0, 0x21)) + [0x7f]: 429 char = chr(char_no) 430 schemeless_url = f"//localhost{char}/test/" 431 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 432 try: 433 escaped_char_repr = repr(char).replace('\\', r'\\') 434 InvalidURL = http.client.InvalidURL 435 with self.assertRaisesRegex( 436 InvalidURL, f"contain control.*{escaped_char_repr}"): 437 urlopen(f"http:{schemeless_url}") 438 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): 439 urlopen(f"https:{schemeless_url}") 440 finally: 441 self.unfakehttp() 442 443 @unittest.skipUnless(ssl, "ssl module required") 444 def test_url_host_with_newline_header_injection_rejected(self): 445 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 446 host = "localhost\r\nX-injected: header\r\n" 447 schemeless_url = "//" + host + ":8080/test/?test=a" 448 try: 449 InvalidURL = http.client.InvalidURL 450 with self.assertRaisesRegex( 451 InvalidURL, r"contain control.*\\r"): 452 urlopen(f"http:{schemeless_url}") 453 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 454 urlopen(f"https:{schemeless_url}") 455 finally: 456 self.unfakehttp() 457 458 def test_read_0_9(self): 459 # "0.9" response accepted (but not "simple responses" without 460 # a status line) 461 self.check_read(b"0.9") 462 463 def test_read_1_0(self): 464 self.check_read(b"1.0") 465 466 def test_read_1_1(self): 467 self.check_read(b"1.1") 468 469 def test_read_bogus(self): 470 # urlopen() should raise OSError for many error codes. 471 self.fakehttp(b'''HTTP/1.1 401 Authentication Required 472Date: Wed, 02 Jan 2008 03:03:54 GMT 473Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 474Connection: close 475Content-Type: text/html; charset=iso-8859-1 476''', mock_close=True) 477 try: 478 self.assertRaises(OSError, urlopen, "http://python.org/") 479 finally: 480 self.unfakehttp() 481 482 def test_invalid_redirect(self): 483 # urlopen() should raise OSError for many error codes. 484 self.fakehttp(b'''HTTP/1.1 302 Found 485Date: Wed, 02 Jan 2008 03:03:54 GMT 486Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 487Location: file://guidocomputer.athome.com:/python/license 488Connection: close 489Content-Type: text/html; charset=iso-8859-1 490''', mock_close=True) 491 try: 492 msg = "Redirection to url 'file:" 493 with self.assertRaisesRegex(urllib.error.HTTPError, msg): 494 urlopen("http://python.org/") 495 finally: 496 self.unfakehttp() 497 498 def test_redirect_limit_independent(self): 499 # Ticket #12923: make sure independent requests each use their 500 # own retry limit. 501 for i in range(FancyURLopener().maxtries): 502 self.fakehttp(b'''HTTP/1.1 302 Found 503Location: file://guidocomputer.athome.com:/python/license 504Connection: close 505''', mock_close=True) 506 try: 507 self.assertRaises(urllib.error.HTTPError, urlopen, 508 "http://something") 509 finally: 510 self.unfakehttp() 511 512 def test_empty_socket(self): 513 # urlopen() raises OSError if the underlying socket does not send any 514 # data. (#1680230) 515 self.fakehttp(b'') 516 try: 517 self.assertRaises(OSError, urlopen, "http://something") 518 finally: 519 self.unfakehttp() 520 521 def test_missing_localfile(self): 522 # Test for #10836 523 with self.assertRaises(urllib.error.URLError) as e: 524 urlopen('file://localhost/a/file/which/doesnot/exists.py') 525 self.assertTrue(e.exception.filename) 526 self.assertTrue(e.exception.reason) 527 528 def test_file_notexists(self): 529 fd, tmp_file = tempfile.mkstemp() 530 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') 531 try: 532 self.assertTrue(os.path.exists(tmp_file)) 533 with urlopen(tmp_fileurl) as fobj: 534 self.assertTrue(fobj) 535 finally: 536 os.close(fd) 537 os.unlink(tmp_file) 538 self.assertFalse(os.path.exists(tmp_file)) 539 with self.assertRaises(urllib.error.URLError): 540 urlopen(tmp_fileurl) 541 542 def test_ftp_nohost(self): 543 test_ftp_url = 'ftp:///path' 544 with self.assertRaises(urllib.error.URLError) as e: 545 urlopen(test_ftp_url) 546 self.assertFalse(e.exception.filename) 547 self.assertTrue(e.exception.reason) 548 549 def test_ftp_nonexisting(self): 550 with self.assertRaises(urllib.error.URLError) as e: 551 urlopen('ftp://localhost/a/file/which/doesnot/exists.py') 552 self.assertFalse(e.exception.filename) 553 self.assertTrue(e.exception.reason) 554 555 @patch.object(urllib.request, 'MAXFTPCACHE', 0) 556 def test_ftp_cache_pruning(self): 557 self.fakeftp() 558 try: 559 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) 560 urlopen('ftp://localhost') 561 finally: 562 self.unfakeftp() 563 564 def test_userpass_inurl(self): 565 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 566 try: 567 fp = urlopen("http://user:pass@python.org/") 568 self.assertEqual(fp.readline(), b"Hello!") 569 self.assertEqual(fp.readline(), b"") 570 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') 571 self.assertEqual(fp.getcode(), 200) 572 finally: 573 self.unfakehttp() 574 575 def test_userpass_inurl_w_spaces(self): 576 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 577 try: 578 userpass = "a b:c d" 579 url = "http://{}@python.org/".format(userpass) 580 fakehttp_wrapper = http.client.HTTPConnection 581 authorization = ("Authorization: Basic %s\r\n" % 582 b64encode(userpass.encode("ASCII")).decode("ASCII")) 583 fp = urlopen(url) 584 # The authorization header must be in place 585 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) 586 self.assertEqual(fp.readline(), b"Hello!") 587 self.assertEqual(fp.readline(), b"") 588 # the spaces are quoted in URL so no match 589 self.assertNotEqual(fp.geturl(), url) 590 self.assertEqual(fp.getcode(), 200) 591 finally: 592 self.unfakehttp() 593 594 def test_URLopener_deprecation(self): 595 with support.check_warnings(('',DeprecationWarning)): 596 urllib.request.URLopener() 597 598 @unittest.skipUnless(ssl, "ssl module required") 599 def test_cafile_and_context(self): 600 context = ssl.create_default_context() 601 with support.check_warnings(('', DeprecationWarning)): 602 with self.assertRaises(ValueError): 603 urllib.request.urlopen( 604 "https://localhost", cafile="/nonexistent/path", context=context 605 ) 606 607 608class urlopen_DataTests(unittest.TestCase): 609 """Test urlopen() opening a data URL.""" 610 611 def setUp(self): 612 # clear _opener global variable 613 self.addCleanup(urllib.request.urlcleanup) 614 615 # text containing URL special- and unicode-characters 616 self.text = "test data URLs :;,%=& \u00f6 \u00c4 " 617 # 2x1 pixel RGB PNG image with one black and one white pixel 618 self.image = ( 619 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00' 620 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae' 621 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00' 622 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82') 623 624 self.text_url = ( 625 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3" 626 "D%26%20%C3%B6%20%C3%84%20") 627 self.text_url_base64 = ( 628 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs" 629 "sJT0mIPYgxCA%3D") 630 # base64 encoded data URL that contains ignorable spaces, 631 # such as "\n", " ", "%0A", and "%20". 632 self.image_url = ( 633 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n" 634 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 " 635 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20") 636 637 self.text_url_resp = urllib.request.urlopen(self.text_url) 638 self.text_url_base64_resp = urllib.request.urlopen( 639 self.text_url_base64) 640 self.image_url_resp = urllib.request.urlopen(self.image_url) 641 642 def test_interface(self): 643 # Make sure object returned by urlopen() has the specified methods 644 for attr in ("read", "readline", "readlines", 645 "close", "info", "geturl", "getcode", "__iter__"): 646 self.assertTrue(hasattr(self.text_url_resp, attr), 647 "object returned by urlopen() lacks %s attribute" % 648 attr) 649 650 def test_info(self): 651 self.assertIsInstance(self.text_url_resp.info(), email.message.Message) 652 self.assertEqual(self.text_url_base64_resp.info().get_params(), 653 [('text/plain', ''), ('charset', 'ISO-8859-1')]) 654 self.assertEqual(self.image_url_resp.info()['content-length'], 655 str(len(self.image))) 656 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(), 657 [('text/plain', ''), ('charset', 'US-ASCII')]) 658 659 def test_geturl(self): 660 self.assertEqual(self.text_url_resp.geturl(), self.text_url) 661 self.assertEqual(self.text_url_base64_resp.geturl(), 662 self.text_url_base64) 663 self.assertEqual(self.image_url_resp.geturl(), self.image_url) 664 665 def test_read_text(self): 666 self.assertEqual(self.text_url_resp.read().decode( 667 dict(self.text_url_resp.info().get_params())['charset']), self.text) 668 669 def test_read_text_base64(self): 670 self.assertEqual(self.text_url_base64_resp.read().decode( 671 dict(self.text_url_base64_resp.info().get_params())['charset']), 672 self.text) 673 674 def test_read_image(self): 675 self.assertEqual(self.image_url_resp.read(), self.image) 676 677 def test_missing_comma(self): 678 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain') 679 680 def test_invalid_base64_data(self): 681 # missing padding character 682 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') 683 684 685class urlretrieve_FileTests(unittest.TestCase): 686 """Test urllib.urlretrieve() on local files""" 687 688 def setUp(self): 689 # clear _opener global variable 690 self.addCleanup(urllib.request.urlcleanup) 691 692 # Create a list of temporary files. Each item in the list is a file 693 # name (absolute path or relative to the current working directory). 694 # All files in this list will be deleted in the tearDown method. Note, 695 # this only helps to makes sure temporary files get deleted, but it 696 # does nothing about trying to close files that may still be open. It 697 # is the responsibility of the developer to properly close files even 698 # when exceptional conditions occur. 699 self.tempFiles = [] 700 701 # Create a temporary file. 702 self.registerFileForCleanUp(support.TESTFN) 703 self.text = b'testing urllib.urlretrieve' 704 try: 705 FILE = open(support.TESTFN, 'wb') 706 FILE.write(self.text) 707 FILE.close() 708 finally: 709 try: FILE.close() 710 except: pass 711 712 def tearDown(self): 713 # Delete the temporary files. 714 for each in self.tempFiles: 715 try: os.remove(each) 716 except: pass 717 718 def constructLocalFileUrl(self, filePath): 719 filePath = os.path.abspath(filePath) 720 try: 721 filePath.encode("utf-8") 722 except UnicodeEncodeError: 723 raise unittest.SkipTest("filePath is not encodable to utf8") 724 return "file://%s" % urllib.request.pathname2url(filePath) 725 726 def createNewTempFile(self, data=b""): 727 """Creates a new temporary file containing the specified data, 728 registers the file for deletion during the test fixture tear down, and 729 returns the absolute path of the file.""" 730 731 newFd, newFilePath = tempfile.mkstemp() 732 try: 733 self.registerFileForCleanUp(newFilePath) 734 newFile = os.fdopen(newFd, "wb") 735 newFile.write(data) 736 newFile.close() 737 finally: 738 try: newFile.close() 739 except: pass 740 return newFilePath 741 742 def registerFileForCleanUp(self, fileName): 743 self.tempFiles.append(fileName) 744 745 def test_basic(self): 746 # Make sure that a local file just gets its own location returned and 747 # a headers value is returned. 748 result = urllib.request.urlretrieve("file:%s" % support.TESTFN) 749 self.assertEqual(result[0], support.TESTFN) 750 self.assertIsInstance(result[1], email.message.Message, 751 "did not get an email.message.Message instance " 752 "as second returned value") 753 754 def test_copy(self): 755 # Test that setting the filename argument works. 756 second_temp = "%s.2" % support.TESTFN 757 self.registerFileForCleanUp(second_temp) 758 result = urllib.request.urlretrieve(self.constructLocalFileUrl( 759 support.TESTFN), second_temp) 760 self.assertEqual(second_temp, result[0]) 761 self.assertTrue(os.path.exists(second_temp), "copy of the file was not " 762 "made") 763 FILE = open(second_temp, 'rb') 764 try: 765 text = FILE.read() 766 FILE.close() 767 finally: 768 try: FILE.close() 769 except: pass 770 self.assertEqual(self.text, text) 771 772 def test_reporthook(self): 773 # Make sure that the reporthook works. 774 def hooktester(block_count, block_read_size, file_size, count_holder=[0]): 775 self.assertIsInstance(block_count, int) 776 self.assertIsInstance(block_read_size, int) 777 self.assertIsInstance(file_size, int) 778 self.assertEqual(block_count, count_holder[0]) 779 count_holder[0] = count_holder[0] + 1 780 second_temp = "%s.2" % support.TESTFN 781 self.registerFileForCleanUp(second_temp) 782 urllib.request.urlretrieve( 783 self.constructLocalFileUrl(support.TESTFN), 784 second_temp, hooktester) 785 786 def test_reporthook_0_bytes(self): 787 # Test on zero length file. Should call reporthook only 1 time. 788 report = [] 789 def hooktester(block_count, block_read_size, file_size, _report=report): 790 _report.append((block_count, block_read_size, file_size)) 791 srcFileName = self.createNewTempFile() 792 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 793 support.TESTFN, hooktester) 794 self.assertEqual(len(report), 1) 795 self.assertEqual(report[0][2], 0) 796 797 def test_reporthook_5_bytes(self): 798 # Test on 5 byte file. Should call reporthook only 2 times (once when 799 # the "network connection" is established and once when the block is 800 # read). 801 report = [] 802 def hooktester(block_count, block_read_size, file_size, _report=report): 803 _report.append((block_count, block_read_size, file_size)) 804 srcFileName = self.createNewTempFile(b"x" * 5) 805 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 806 support.TESTFN, hooktester) 807 self.assertEqual(len(report), 2) 808 self.assertEqual(report[0][2], 5) 809 self.assertEqual(report[1][2], 5) 810 811 def test_reporthook_8193_bytes(self): 812 # Test on 8193 byte file. Should call reporthook only 3 times (once 813 # when the "network connection" is established, once for the next 8192 814 # bytes, and once for the last byte). 815 report = [] 816 def hooktester(block_count, block_read_size, file_size, _report=report): 817 _report.append((block_count, block_read_size, file_size)) 818 srcFileName = self.createNewTempFile(b"x" * 8193) 819 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 820 support.TESTFN, hooktester) 821 self.assertEqual(len(report), 3) 822 self.assertEqual(report[0][2], 8193) 823 self.assertEqual(report[0][1], 8192) 824 self.assertEqual(report[1][1], 8192) 825 self.assertEqual(report[2][1], 8192) 826 827 828class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): 829 """Test urllib.urlretrieve() using fake http connections""" 830 831 def test_short_content_raises_ContentTooShortError(self): 832 self.addCleanup(urllib.request.urlcleanup) 833 834 self.fakehttp(b'''HTTP/1.1 200 OK 835Date: Wed, 02 Jan 2008 03:03:54 GMT 836Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 837Connection: close 838Content-Length: 100 839Content-Type: text/html; charset=iso-8859-1 840 841FF 842''') 843 844 def _reporthook(par1, par2, par3): 845 pass 846 847 with self.assertRaises(urllib.error.ContentTooShortError): 848 try: 849 urllib.request.urlretrieve(support.TEST_HTTP_URL, 850 reporthook=_reporthook) 851 finally: 852 self.unfakehttp() 853 854 def test_short_content_raises_ContentTooShortError_without_reporthook(self): 855 self.addCleanup(urllib.request.urlcleanup) 856 857 self.fakehttp(b'''HTTP/1.1 200 OK 858Date: Wed, 02 Jan 2008 03:03:54 GMT 859Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 860Connection: close 861Content-Length: 100 862Content-Type: text/html; charset=iso-8859-1 863 864FF 865''') 866 with self.assertRaises(urllib.error.ContentTooShortError): 867 try: 868 urllib.request.urlretrieve(support.TEST_HTTP_URL) 869 finally: 870 self.unfakehttp() 871 872 873class QuotingTests(unittest.TestCase): 874 r"""Tests for urllib.quote() and urllib.quote_plus() 875 876 According to RFC 3986 (Uniform Resource Identifiers), to escape a 877 character you write it as '%' + <2 character US-ASCII hex value>. 878 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a 879 character properly. Case does not matter on the hex letters. 880 881 The various character sets specified are: 882 883 Reserved characters : ";/?:@&=+$," 884 Have special meaning in URIs and must be escaped if not being used for 885 their special meaning 886 Data characters : letters, digits, and "-_.!~*'()" 887 Unreserved and do not need to be escaped; can be, though, if desired 888 Control characters : 0x00 - 0x1F, 0x7F 889 Have no use in URIs so must be escaped 890 space : 0x20 891 Must be escaped 892 Delimiters : '<>#%"' 893 Must be escaped 894 Unwise : "{}|\^[]`" 895 Must be escaped 896 897 """ 898 899 def test_never_quote(self): 900 # Make sure quote() does not quote letters, digits, and "_,.-" 901 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", 902 "abcdefghijklmnopqrstuvwxyz", 903 "0123456789", 904 "_.-~"]) 905 result = urllib.parse.quote(do_not_quote) 906 self.assertEqual(do_not_quote, result, 907 "using quote(): %r != %r" % (do_not_quote, result)) 908 result = urllib.parse.quote_plus(do_not_quote) 909 self.assertEqual(do_not_quote, result, 910 "using quote_plus(): %r != %r" % (do_not_quote, result)) 911 912 def test_default_safe(self): 913 # Test '/' is default value for 'safe' parameter 914 self.assertEqual(urllib.parse.quote.__defaults__[0], '/') 915 916 def test_safe(self): 917 # Test setting 'safe' parameter does what it should do 918 quote_by_default = "<>" 919 result = urllib.parse.quote(quote_by_default, safe=quote_by_default) 920 self.assertEqual(quote_by_default, result, 921 "using quote(): %r != %r" % (quote_by_default, result)) 922 result = urllib.parse.quote_plus(quote_by_default, 923 safe=quote_by_default) 924 self.assertEqual(quote_by_default, result, 925 "using quote_plus(): %r != %r" % 926 (quote_by_default, result)) 927 # Safe expressed as bytes rather than str 928 result = urllib.parse.quote(quote_by_default, safe=b"<>") 929 self.assertEqual(quote_by_default, result, 930 "using quote(): %r != %r" % (quote_by_default, result)) 931 # "Safe" non-ASCII characters should have no effect 932 # (Since URIs are not allowed to have non-ASCII characters) 933 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") 934 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 935 self.assertEqual(expect, result, 936 "using quote(): %r != %r" % 937 (expect, result)) 938 # Same as above, but using a bytes rather than str 939 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") 940 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 941 self.assertEqual(expect, result, 942 "using quote(): %r != %r" % 943 (expect, result)) 944 945 def test_default_quoting(self): 946 # Make sure all characters that should be quoted are by default sans 947 # space (separate test for that). 948 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F 949 should_quote.append(r'<>#%"{}|\^[]`') 950 should_quote.append(chr(127)) # For 0x7F 951 should_quote = ''.join(should_quote) 952 for char in should_quote: 953 result = urllib.parse.quote(char) 954 self.assertEqual(hexescape(char), result, 955 "using quote(): " 956 "%s should be escaped to %s, not %s" % 957 (char, hexescape(char), result)) 958 result = urllib.parse.quote_plus(char) 959 self.assertEqual(hexescape(char), result, 960 "using quote_plus(): " 961 "%s should be escapes to %s, not %s" % 962 (char, hexescape(char), result)) 963 del should_quote 964 partial_quote = "ab[]cd" 965 expected = "ab%5B%5Dcd" 966 result = urllib.parse.quote(partial_quote) 967 self.assertEqual(expected, result, 968 "using quote(): %r != %r" % (expected, result)) 969 result = urllib.parse.quote_plus(partial_quote) 970 self.assertEqual(expected, result, 971 "using quote_plus(): %r != %r" % (expected, result)) 972 973 def test_quoting_space(self): 974 # Make sure quote() and quote_plus() handle spaces as specified in 975 # their unique way 976 result = urllib.parse.quote(' ') 977 self.assertEqual(result, hexescape(' '), 978 "using quote(): %r != %r" % (result, hexescape(' '))) 979 result = urllib.parse.quote_plus(' ') 980 self.assertEqual(result, '+', 981 "using quote_plus(): %r != +" % result) 982 given = "a b cd e f" 983 expect = given.replace(' ', hexescape(' ')) 984 result = urllib.parse.quote(given) 985 self.assertEqual(expect, result, 986 "using quote(): %r != %r" % (expect, result)) 987 expect = given.replace(' ', '+') 988 result = urllib.parse.quote_plus(given) 989 self.assertEqual(expect, result, 990 "using quote_plus(): %r != %r" % (expect, result)) 991 992 def test_quoting_plus(self): 993 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), 994 'alpha%2Bbeta+gamma') 995 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), 996 'alpha+beta+gamma') 997 # Test with bytes 998 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), 999 'alpha%2Bbeta+gamma') 1000 # Test with safe bytes 1001 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), 1002 'alpha+beta+gamma') 1003 1004 def test_quote_bytes(self): 1005 # Bytes should quote directly to percent-encoded values 1006 given = b"\xa2\xd8ab\xff" 1007 expect = "%A2%D8ab%FF" 1008 result = urllib.parse.quote(given) 1009 self.assertEqual(expect, result, 1010 "using quote(): %r != %r" % (expect, result)) 1011 # Encoding argument should raise type error on bytes input 1012 self.assertRaises(TypeError, urllib.parse.quote, given, 1013 encoding="latin-1") 1014 # quote_from_bytes should work the same 1015 result = urllib.parse.quote_from_bytes(given) 1016 self.assertEqual(expect, result, 1017 "using quote_from_bytes(): %r != %r" 1018 % (expect, result)) 1019 1020 def test_quote_with_unicode(self): 1021 # Characters in Latin-1 range, encoded by default in UTF-8 1022 given = "\xa2\xd8ab\xff" 1023 expect = "%C2%A2%C3%98ab%C3%BF" 1024 result = urllib.parse.quote(given) 1025 self.assertEqual(expect, result, 1026 "using quote(): %r != %r" % (expect, result)) 1027 # Characters in Latin-1 range, encoded by with None (default) 1028 result = urllib.parse.quote(given, encoding=None, errors=None) 1029 self.assertEqual(expect, result, 1030 "using quote(): %r != %r" % (expect, result)) 1031 # Characters in Latin-1 range, encoded with Latin-1 1032 given = "\xa2\xd8ab\xff" 1033 expect = "%A2%D8ab%FF" 1034 result = urllib.parse.quote(given, encoding="latin-1") 1035 self.assertEqual(expect, result, 1036 "using quote(): %r != %r" % (expect, result)) 1037 # Characters in BMP, encoded by default in UTF-8 1038 given = "\u6f22\u5b57" # "Kanji" 1039 expect = "%E6%BC%A2%E5%AD%97" 1040 result = urllib.parse.quote(given) 1041 self.assertEqual(expect, result, 1042 "using quote(): %r != %r" % (expect, result)) 1043 # Characters in BMP, encoded with Latin-1 1044 given = "\u6f22\u5b57" 1045 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, 1046 encoding="latin-1") 1047 # Characters in BMP, encoded with Latin-1, with replace error handling 1048 given = "\u6f22\u5b57" 1049 expect = "%3F%3F" # "??" 1050 result = urllib.parse.quote(given, encoding="latin-1", 1051 errors="replace") 1052 self.assertEqual(expect, result, 1053 "using quote(): %r != %r" % (expect, result)) 1054 # Characters in BMP, Latin-1, with xmlcharref error handling 1055 given = "\u6f22\u5b57" 1056 expect = "%26%2328450%3B%26%2323383%3B" # "漢字" 1057 result = urllib.parse.quote(given, encoding="latin-1", 1058 errors="xmlcharrefreplace") 1059 self.assertEqual(expect, result, 1060 "using quote(): %r != %r" % (expect, result)) 1061 1062 def test_quote_plus_with_unicode(self): 1063 # Encoding (latin-1) test for quote_plus 1064 given = "\xa2\xd8 \xff" 1065 expect = "%A2%D8+%FF" 1066 result = urllib.parse.quote_plus(given, encoding="latin-1") 1067 self.assertEqual(expect, result, 1068 "using quote_plus(): %r != %r" % (expect, result)) 1069 # Errors test for quote_plus 1070 given = "ab\u6f22\u5b57 cd" 1071 expect = "ab%3F%3F+cd" 1072 result = urllib.parse.quote_plus(given, encoding="latin-1", 1073 errors="replace") 1074 self.assertEqual(expect, result, 1075 "using quote_plus(): %r != %r" % (expect, result)) 1076 1077 1078class UnquotingTests(unittest.TestCase): 1079 """Tests for unquote() and unquote_plus() 1080 1081 See the doc string for quoting_Tests for details on quoting and such. 1082 1083 """ 1084 1085 def test_unquoting(self): 1086 # Make sure unquoting of all ASCII values works 1087 escape_list = [] 1088 for num in range(128): 1089 given = hexescape(chr(num)) 1090 expect = chr(num) 1091 result = urllib.parse.unquote(given) 1092 self.assertEqual(expect, result, 1093 "using unquote(): %r != %r" % (expect, result)) 1094 result = urllib.parse.unquote_plus(given) 1095 self.assertEqual(expect, result, 1096 "using unquote_plus(): %r != %r" % 1097 (expect, result)) 1098 escape_list.append(given) 1099 escape_string = ''.join(escape_list) 1100 del escape_list 1101 result = urllib.parse.unquote(escape_string) 1102 self.assertEqual(result.count('%'), 1, 1103 "using unquote(): not all characters escaped: " 1104 "%s" % result) 1105 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) 1106 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) 1107 1108 def test_unquoting_badpercent(self): 1109 # Test unquoting on bad percent-escapes 1110 given = '%xab' 1111 expect = given 1112 result = urllib.parse.unquote(given) 1113 self.assertEqual(expect, result, "using unquote(): %r != %r" 1114 % (expect, result)) 1115 given = '%x' 1116 expect = given 1117 result = urllib.parse.unquote(given) 1118 self.assertEqual(expect, result, "using unquote(): %r != %r" 1119 % (expect, result)) 1120 given = '%' 1121 expect = given 1122 result = urllib.parse.unquote(given) 1123 self.assertEqual(expect, result, "using unquote(): %r != %r" 1124 % (expect, result)) 1125 # unquote_to_bytes 1126 given = '%xab' 1127 expect = bytes(given, 'ascii') 1128 result = urllib.parse.unquote_to_bytes(given) 1129 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1130 % (expect, result)) 1131 given = '%x' 1132 expect = bytes(given, 'ascii') 1133 result = urllib.parse.unquote_to_bytes(given) 1134 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1135 % (expect, result)) 1136 given = '%' 1137 expect = bytes(given, 'ascii') 1138 result = urllib.parse.unquote_to_bytes(given) 1139 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1140 % (expect, result)) 1141 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) 1142 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) 1143 1144 def test_unquoting_mixed_case(self): 1145 # Test unquoting on mixed-case hex digits in the percent-escapes 1146 given = '%Ab%eA' 1147 expect = b'\xab\xea' 1148 result = urllib.parse.unquote_to_bytes(given) 1149 self.assertEqual(expect, result, 1150 "using unquote_to_bytes(): %r != %r" 1151 % (expect, result)) 1152 1153 def test_unquoting_parts(self): 1154 # Make sure unquoting works when have non-quoted characters 1155 # interspersed 1156 given = 'ab%sd' % hexescape('c') 1157 expect = "abcd" 1158 result = urllib.parse.unquote(given) 1159 self.assertEqual(expect, result, 1160 "using quote(): %r != %r" % (expect, result)) 1161 result = urllib.parse.unquote_plus(given) 1162 self.assertEqual(expect, result, 1163 "using unquote_plus(): %r != %r" % (expect, result)) 1164 1165 def test_unquoting_plus(self): 1166 # Test difference between unquote() and unquote_plus() 1167 given = "are+there+spaces..." 1168 expect = given 1169 result = urllib.parse.unquote(given) 1170 self.assertEqual(expect, result, 1171 "using unquote(): %r != %r" % (expect, result)) 1172 expect = given.replace('+', ' ') 1173 result = urllib.parse.unquote_plus(given) 1174 self.assertEqual(expect, result, 1175 "using unquote_plus(): %r != %r" % (expect, result)) 1176 1177 def test_unquote_to_bytes(self): 1178 given = 'br%C3%BCckner_sapporo_20050930.doc' 1179 expect = b'br\xc3\xbcckner_sapporo_20050930.doc' 1180 result = urllib.parse.unquote_to_bytes(given) 1181 self.assertEqual(expect, result, 1182 "using unquote_to_bytes(): %r != %r" 1183 % (expect, result)) 1184 # Test on a string with unescaped non-ASCII characters 1185 # (Technically an invalid URI; expect those characters to be UTF-8 1186 # encoded). 1187 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") 1188 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" 1189 self.assertEqual(expect, result, 1190 "using unquote_to_bytes(): %r != %r" 1191 % (expect, result)) 1192 # Test with a bytes as input 1193 given = b'%A2%D8ab%FF' 1194 expect = b'\xa2\xd8ab\xff' 1195 result = urllib.parse.unquote_to_bytes(given) 1196 self.assertEqual(expect, result, 1197 "using unquote_to_bytes(): %r != %r" 1198 % (expect, result)) 1199 # Test with a bytes as input, with unescaped non-ASCII bytes 1200 # (Technically an invalid URI; expect those bytes to be preserved) 1201 given = b'%A2\xd8ab%FF' 1202 expect = b'\xa2\xd8ab\xff' 1203 result = urllib.parse.unquote_to_bytes(given) 1204 self.assertEqual(expect, result, 1205 "using unquote_to_bytes(): %r != %r" 1206 % (expect, result)) 1207 1208 def test_unquote_with_unicode(self): 1209 # Characters in the Latin-1 range, encoded with UTF-8 1210 given = 'br%C3%BCckner_sapporo_20050930.doc' 1211 expect = 'br\u00fcckner_sapporo_20050930.doc' 1212 result = urllib.parse.unquote(given) 1213 self.assertEqual(expect, result, 1214 "using unquote(): %r != %r" % (expect, result)) 1215 # Characters in the Latin-1 range, encoded with None (default) 1216 result = urllib.parse.unquote(given, encoding=None, errors=None) 1217 self.assertEqual(expect, result, 1218 "using unquote(): %r != %r" % (expect, result)) 1219 1220 # Characters in the Latin-1 range, encoded with Latin-1 1221 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', 1222 encoding="latin-1") 1223 expect = 'br\u00fcckner_sapporo_20050930.doc' 1224 self.assertEqual(expect, result, 1225 "using unquote(): %r != %r" % (expect, result)) 1226 1227 # Characters in BMP, encoded with UTF-8 1228 given = "%E6%BC%A2%E5%AD%97" 1229 expect = "\u6f22\u5b57" # "Kanji" 1230 result = urllib.parse.unquote(given) 1231 self.assertEqual(expect, result, 1232 "using unquote(): %r != %r" % (expect, result)) 1233 1234 # Decode with UTF-8, invalid sequence 1235 given = "%F3%B1" 1236 expect = "\ufffd" # Replacement character 1237 result = urllib.parse.unquote(given) 1238 self.assertEqual(expect, result, 1239 "using unquote(): %r != %r" % (expect, result)) 1240 1241 # Decode with UTF-8, invalid sequence, replace errors 1242 result = urllib.parse.unquote(given, errors="replace") 1243 self.assertEqual(expect, result, 1244 "using unquote(): %r != %r" % (expect, result)) 1245 1246 # Decode with UTF-8, invalid sequence, ignoring errors 1247 given = "%F3%B1" 1248 expect = "" 1249 result = urllib.parse.unquote(given, errors="ignore") 1250 self.assertEqual(expect, result, 1251 "using unquote(): %r != %r" % (expect, result)) 1252 1253 # A mix of non-ASCII and percent-encoded characters, UTF-8 1254 result = urllib.parse.unquote("\u6f22%C3%BC") 1255 expect = '\u6f22\u00fc' 1256 self.assertEqual(expect, result, 1257 "using unquote(): %r != %r" % (expect, result)) 1258 1259 # A mix of non-ASCII and percent-encoded characters, Latin-1 1260 # (Note, the string contains non-Latin-1-representable characters) 1261 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") 1262 expect = '\u6f22\u00fc' 1263 self.assertEqual(expect, result, 1264 "using unquote(): %r != %r" % (expect, result)) 1265 1266 def test_unquoting_with_bytes_input(self): 1267 # ASCII characters decoded to a string 1268 given = b'blueberryjam' 1269 expect = 'blueberryjam' 1270 result = urllib.parse.unquote(given) 1271 self.assertEqual(expect, result, 1272 "using unquote(): %r != %r" % (expect, result)) 1273 1274 # A mix of non-ASCII hex-encoded characters and ASCII characters 1275 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y' 1276 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y' 1277 result = urllib.parse.unquote(given) 1278 self.assertEqual(expect, result, 1279 "using unquote(): %r != %r" % (expect, result)) 1280 1281 # A mix of non-ASCII percent-encoded characters and ASCII characters 1282 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j' 1283 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j' 1284 result = urllib.parse.unquote(given) 1285 self.assertEqual(expect, result, 1286 "using unquote(): %r != %r" % (expect, result)) 1287 1288 1289class urlencode_Tests(unittest.TestCase): 1290 """Tests for urlencode()""" 1291 1292 def help_inputtype(self, given, test_type): 1293 """Helper method for testing different input types. 1294 1295 'given' must lead to only the pairs: 1296 * 1st, 1 1297 * 2nd, 2 1298 * 3rd, 3 1299 1300 Test cannot assume anything about order. Docs make no guarantee and 1301 have possible dictionary input. 1302 1303 """ 1304 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] 1305 result = urllib.parse.urlencode(given) 1306 for expected in expect_somewhere: 1307 self.assertIn(expected, result, 1308 "testing %s: %s not found in %s" % 1309 (test_type, expected, result)) 1310 self.assertEqual(result.count('&'), 2, 1311 "testing %s: expected 2 '&'s; got %s" % 1312 (test_type, result.count('&'))) 1313 amp_location = result.index('&') 1314 on_amp_left = result[amp_location - 1] 1315 on_amp_right = result[amp_location + 1] 1316 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), 1317 "testing %s: '&' not located in proper place in %s" % 1318 (test_type, result)) 1319 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps 1320 "testing %s: " 1321 "unexpected number of characters: %s != %s" % 1322 (test_type, len(result), (5 * 3) + 2)) 1323 1324 def test_using_mapping(self): 1325 # Test passing in a mapping object as an argument. 1326 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, 1327 "using dict as input type") 1328 1329 def test_using_sequence(self): 1330 # Test passing in a sequence of two-item sequences as an argument. 1331 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], 1332 "using sequence of two-item tuples as input") 1333 1334 def test_quoting(self): 1335 # Make sure keys and values are quoted using quote_plus() 1336 given = {"&":"="} 1337 expect = "%s=%s" % (hexescape('&'), hexescape('=')) 1338 result = urllib.parse.urlencode(given) 1339 self.assertEqual(expect, result) 1340 given = {"key name":"A bunch of pluses"} 1341 expect = "key+name=A+bunch+of+pluses" 1342 result = urllib.parse.urlencode(given) 1343 self.assertEqual(expect, result) 1344 1345 def test_doseq(self): 1346 # Test that passing True for 'doseq' parameter works correctly 1347 given = {'sequence':['1', '2', '3']} 1348 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3'])) 1349 result = urllib.parse.urlencode(given) 1350 self.assertEqual(expect, result) 1351 result = urllib.parse.urlencode(given, True) 1352 for value in given["sequence"]: 1353 expect = "sequence=%s" % value 1354 self.assertIn(expect, result) 1355 self.assertEqual(result.count('&'), 2, 1356 "Expected 2 '&'s, got %s" % result.count('&')) 1357 1358 def test_empty_sequence(self): 1359 self.assertEqual("", urllib.parse.urlencode({})) 1360 self.assertEqual("", urllib.parse.urlencode([])) 1361 1362 def test_nonstring_values(self): 1363 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1})) 1364 self.assertEqual("a=None", urllib.parse.urlencode({"a": None})) 1365 1366 def test_nonstring_seq_values(self): 1367 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) 1368 self.assertEqual("a=None&a=a", 1369 urllib.parse.urlencode({"a": [None, "a"]}, True)) 1370 data = collections.OrderedDict([("a", 1), ("b", 1)]) 1371 self.assertEqual("a=a&a=b", 1372 urllib.parse.urlencode({"a": data}, True)) 1373 1374 def test_urlencode_encoding(self): 1375 # ASCII encoding. Expect %3F with errors="replace' 1376 given = (('\u00a0', '\u00c1'),) 1377 expect = '%3F=%3F' 1378 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") 1379 self.assertEqual(expect, result) 1380 1381 # Default is UTF-8 encoding. 1382 given = (('\u00a0', '\u00c1'),) 1383 expect = '%C2%A0=%C3%81' 1384 result = urllib.parse.urlencode(given) 1385 self.assertEqual(expect, result) 1386 1387 # Latin-1 encoding. 1388 given = (('\u00a0', '\u00c1'),) 1389 expect = '%A0=%C1' 1390 result = urllib.parse.urlencode(given, encoding="latin-1") 1391 self.assertEqual(expect, result) 1392 1393 def test_urlencode_encoding_doseq(self): 1394 # ASCII Encoding. Expect %3F with errors="replace' 1395 given = (('\u00a0', '\u00c1'),) 1396 expect = '%3F=%3F' 1397 result = urllib.parse.urlencode(given, doseq=True, 1398 encoding="ASCII", errors="replace") 1399 self.assertEqual(expect, result) 1400 1401 # ASCII Encoding. On a sequence of values. 1402 given = (("\u00a0", (1, "\u00c1")),) 1403 expect = '%3F=1&%3F=%3F' 1404 result = urllib.parse.urlencode(given, True, 1405 encoding="ASCII", errors="replace") 1406 self.assertEqual(expect, result) 1407 1408 # Utf-8 1409 given = (("\u00a0", "\u00c1"),) 1410 expect = '%C2%A0=%C3%81' 1411 result = urllib.parse.urlencode(given, True) 1412 self.assertEqual(expect, result) 1413 1414 given = (("\u00a0", (42, "\u00c1")),) 1415 expect = '%C2%A0=42&%C2%A0=%C3%81' 1416 result = urllib.parse.urlencode(given, True) 1417 self.assertEqual(expect, result) 1418 1419 # latin-1 1420 given = (("\u00a0", "\u00c1"),) 1421 expect = '%A0=%C1' 1422 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1423 self.assertEqual(expect, result) 1424 1425 given = (("\u00a0", (42, "\u00c1")),) 1426 expect = '%A0=42&%A0=%C1' 1427 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1428 self.assertEqual(expect, result) 1429 1430 def test_urlencode_bytes(self): 1431 given = ((b'\xa0\x24', b'\xc1\x24'),) 1432 expect = '%A0%24=%C1%24' 1433 result = urllib.parse.urlencode(given) 1434 self.assertEqual(expect, result) 1435 result = urllib.parse.urlencode(given, True) 1436 self.assertEqual(expect, result) 1437 1438 # Sequence of values 1439 given = ((b'\xa0\x24', (42, b'\xc1\x24')),) 1440 expect = '%A0%24=42&%A0%24=%C1%24' 1441 result = urllib.parse.urlencode(given, True) 1442 self.assertEqual(expect, result) 1443 1444 def test_urlencode_encoding_safe_parameter(self): 1445 1446 # Send '$' (\x24) as safe character 1447 # Default utf-8 encoding 1448 1449 given = ((b'\xa0\x24', b'\xc1\x24'),) 1450 result = urllib.parse.urlencode(given, safe=":$") 1451 expect = '%A0$=%C1$' 1452 self.assertEqual(expect, result) 1453 1454 given = ((b'\xa0\x24', b'\xc1\x24'),) 1455 result = urllib.parse.urlencode(given, doseq=True, safe=":$") 1456 expect = '%A0$=%C1$' 1457 self.assertEqual(expect, result) 1458 1459 # Safe parameter in sequence 1460 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1461 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1462 result = urllib.parse.urlencode(given, True, safe=":$") 1463 self.assertEqual(expect, result) 1464 1465 # Test all above in latin-1 encoding 1466 1467 given = ((b'\xa0\x24', b'\xc1\x24'),) 1468 result = urllib.parse.urlencode(given, safe=":$", 1469 encoding="latin-1") 1470 expect = '%A0$=%C1$' 1471 self.assertEqual(expect, result) 1472 1473 given = ((b'\xa0\x24', b'\xc1\x24'),) 1474 expect = '%A0$=%C1$' 1475 result = urllib.parse.urlencode(given, doseq=True, safe=":$", 1476 encoding="latin-1") 1477 1478 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1479 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1480 result = urllib.parse.urlencode(given, True, safe=":$", 1481 encoding="latin-1") 1482 self.assertEqual(expect, result) 1483 1484class Pathname_Tests(unittest.TestCase): 1485 """Test pathname2url() and url2pathname()""" 1486 1487 def test_basic(self): 1488 # Make sure simple tests pass 1489 expected_path = os.path.join("parts", "of", "a", "path") 1490 expected_url = "parts/of/a/path" 1491 result = urllib.request.pathname2url(expected_path) 1492 self.assertEqual(expected_url, result, 1493 "pathname2url() failed; %s != %s" % 1494 (result, expected_url)) 1495 result = urllib.request.url2pathname(expected_url) 1496 self.assertEqual(expected_path, result, 1497 "url2pathame() failed; %s != %s" % 1498 (result, expected_path)) 1499 1500 def test_quoting(self): 1501 # Test automatic quoting and unquoting works for pathnam2url() and 1502 # url2pathname() respectively 1503 given = os.path.join("needs", "quot=ing", "here") 1504 expect = "needs/%s/here" % urllib.parse.quote("quot=ing") 1505 result = urllib.request.pathname2url(given) 1506 self.assertEqual(expect, result, 1507 "pathname2url() failed; %s != %s" % 1508 (expect, result)) 1509 expect = given 1510 result = urllib.request.url2pathname(result) 1511 self.assertEqual(expect, result, 1512 "url2pathname() failed; %s != %s" % 1513 (expect, result)) 1514 given = os.path.join("make sure", "using_quote") 1515 expect = "%s/using_quote" % urllib.parse.quote("make sure") 1516 result = urllib.request.pathname2url(given) 1517 self.assertEqual(expect, result, 1518 "pathname2url() failed; %s != %s" % 1519 (expect, result)) 1520 given = "make+sure/using_unquote" 1521 expect = os.path.join("make+sure", "using_unquote") 1522 result = urllib.request.url2pathname(given) 1523 self.assertEqual(expect, result, 1524 "url2pathname() failed; %s != %s" % 1525 (expect, result)) 1526 1527 @unittest.skipUnless(sys.platform == 'win32', 1528 'test specific to the urllib.url2path function.') 1529 def test_ntpath(self): 1530 given = ('/C:/', '///C:/', '/C|//') 1531 expect = 'C:\\' 1532 for url in given: 1533 result = urllib.request.url2pathname(url) 1534 self.assertEqual(expect, result, 1535 'urllib.request..url2pathname() failed; %s != %s' % 1536 (expect, result)) 1537 given = '///C|/path' 1538 expect = 'C:\\path' 1539 result = urllib.request.url2pathname(given) 1540 self.assertEqual(expect, result, 1541 'urllib.request.url2pathname() failed; %s != %s' % 1542 (expect, result)) 1543 1544class Utility_Tests(unittest.TestCase): 1545 """Testcase to test the various utility functions in the urllib.""" 1546 1547 def test_thishost(self): 1548 """Test the urllib.request.thishost utility function returns a tuple""" 1549 self.assertIsInstance(urllib.request.thishost(), tuple) 1550 1551 1552class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): 1553 """Testcase to test the open method of URLopener class.""" 1554 1555 def test_quoted_open(self): 1556 class DummyURLopener(urllib.request.URLopener): 1557 def open_spam(self, url): 1558 return url 1559 with support.check_warnings( 1560 ('DummyURLopener style of invoking requests is deprecated.', 1561 DeprecationWarning)): 1562 self.assertEqual(DummyURLopener().open( 1563 'spam://example/ /'),'//example/%20/') 1564 1565 # test the safe characters are not quoted by urlopen 1566 self.assertEqual(DummyURLopener().open( 1567 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), 1568 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") 1569 1570 @support.ignore_warnings(category=DeprecationWarning) 1571 def test_urlopener_retrieve_file(self): 1572 with support.temp_dir() as tmpdir: 1573 fd, tmpfile = tempfile.mkstemp(dir=tmpdir) 1574 os.close(fd) 1575 fileurl = "file:" + urllib.request.pathname2url(tmpfile) 1576 filename, _ = urllib.request.URLopener().retrieve(fileurl) 1577 # Some buildbots have TEMP folder that uses a lowercase drive letter. 1578 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) 1579 1580 @support.ignore_warnings(category=DeprecationWarning) 1581 def test_urlopener_retrieve_remote(self): 1582 url = "http://www.python.org/file.txt" 1583 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 1584 self.addCleanup(self.unfakehttp) 1585 filename, _ = urllib.request.URLopener().retrieve(url) 1586 self.assertEqual(os.path.splitext(filename)[1], ".txt") 1587 1588 @support.ignore_warnings(category=DeprecationWarning) 1589 def test_local_file_open(self): 1590 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme 1591 class DummyURLopener(urllib.request.URLopener): 1592 def open_local_file(self, url): 1593 return url 1594 for url in ('local_file://example', 'local-file://example'): 1595 self.assertRaises(OSError, urllib.request.urlopen, url) 1596 self.assertRaises(OSError, urllib.request.URLopener().open, url) 1597 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) 1598 self.assertRaises(OSError, DummyURLopener().open, url) 1599 self.assertRaises(OSError, DummyURLopener().retrieve, url) 1600 1601 1602class RequestTests(unittest.TestCase): 1603 """Unit tests for urllib.request.Request.""" 1604 1605 def test_default_values(self): 1606 Request = urllib.request.Request 1607 request = Request("http://www.python.org") 1608 self.assertEqual(request.get_method(), 'GET') 1609 request = Request("http://www.python.org", {}) 1610 self.assertEqual(request.get_method(), 'POST') 1611 1612 def test_with_method_arg(self): 1613 Request = urllib.request.Request 1614 request = Request("http://www.python.org", method='HEAD') 1615 self.assertEqual(request.method, 'HEAD') 1616 self.assertEqual(request.get_method(), 'HEAD') 1617 request = Request("http://www.python.org", {}, method='HEAD') 1618 self.assertEqual(request.method, 'HEAD') 1619 self.assertEqual(request.get_method(), 'HEAD') 1620 request = Request("http://www.python.org", method='GET') 1621 self.assertEqual(request.get_method(), 'GET') 1622 request.method = 'HEAD' 1623 self.assertEqual(request.get_method(), 'HEAD') 1624 1625 1626class URL2PathNameTests(unittest.TestCase): 1627 1628 def test_converting_drive_letter(self): 1629 self.assertEqual(url2pathname("///C|"), 'C:') 1630 self.assertEqual(url2pathname("///C:"), 'C:') 1631 self.assertEqual(url2pathname("///C|/"), 'C:\\') 1632 1633 def test_converting_when_no_drive_letter(self): 1634 # cannot end a raw string in \ 1635 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') 1636 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') 1637 1638 def test_simple_compare(self): 1639 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), 1640 r'C:\foo\bar\spam.foo') 1641 1642 def test_non_ascii_drive_letter(self): 1643 self.assertRaises(IOError, url2pathname, "///\u00e8|/") 1644 1645 def test_roundtrip_url2pathname(self): 1646 list_of_paths = ['C:', 1647 r'\\\C\test\\', 1648 r'C:\foo\bar\spam.foo' 1649 ] 1650 for path in list_of_paths: 1651 self.assertEqual(url2pathname(pathname2url(path)), path) 1652 1653class PathName2URLTests(unittest.TestCase): 1654 1655 def test_converting_drive_letter(self): 1656 self.assertEqual(pathname2url("C:"), '///C:') 1657 self.assertEqual(pathname2url("C:\\"), '///C:') 1658 1659 def test_converting_when_no_drive_letter(self): 1660 self.assertEqual(pathname2url(r"\\\folder\test" "\\"), 1661 '/////folder/test/') 1662 self.assertEqual(pathname2url(r"\\folder\test" "\\"), 1663 '////folder/test/') 1664 self.assertEqual(pathname2url(r"\folder\test" "\\"), 1665 '/folder/test/') 1666 1667 def test_simple_compare(self): 1668 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), 1669 "///C:/foo/bar/spam.foo" ) 1670 1671 def test_long_drive_letter(self): 1672 self.assertRaises(IOError, pathname2url, "XX:\\") 1673 1674 def test_roundtrip_pathname2url(self): 1675 list_of_paths = ['///C:', 1676 '/////folder/test/', 1677 '///C:/foo/bar/spam.foo'] 1678 for path in list_of_paths: 1679 self.assertEqual(pathname2url(url2pathname(path)), path) 1680 1681if __name__ == '__main__': 1682 unittest.main() 1683