1"""Regression tests for what was in Python 2's "urllib" module""" 2 3import urllib.parse 4import urllib.request 5import urllib.error 6import http.client 7import email.message 8import io 9import unittest 10from unittest.mock import patch 11from test import support 12from test.support import os_helper 13from test.support import warnings_helper 14import os 15try: 16 import ssl 17except ImportError: 18 ssl = None 19import sys 20import tempfile 21from nturl2path import url2pathname, pathname2url 22 23from base64 import b64encode 24import collections 25 26 27def hexescape(char): 28 """Escape char as RFC 2396 specifies""" 29 hex_repr = hex(ord(char))[2:].upper() 30 if len(hex_repr) == 1: 31 hex_repr = "0%s" % hex_repr 32 return "%" + hex_repr 33 34# Shortcut for testing FancyURLopener 35_urlopener = None 36 37 38def urlopen(url, data=None, proxies=None): 39 """urlopen(url [, data]) -> open file-like object""" 40 global _urlopener 41 if proxies is not None: 42 opener = urllib.request.FancyURLopener(proxies=proxies) 43 elif not _urlopener: 44 opener = FancyURLopener() 45 _urlopener = opener 46 else: 47 opener = _urlopener 48 if data is None: 49 return opener.open(url) 50 else: 51 return opener.open(url, data) 52 53 54def FancyURLopener(): 55 with warnings_helper.check_warnings( 56 ('FancyURLopener style of invoking requests is deprecated.', 57 DeprecationWarning)): 58 return urllib.request.FancyURLopener() 59 60 61def fakehttp(fakedata, mock_close=False): 62 class FakeSocket(io.BytesIO): 63 io_refs = 1 64 65 def sendall(self, data): 66 FakeHTTPConnection.buf = data 67 68 def makefile(self, *args, **kwds): 69 self.io_refs += 1 70 return self 71 72 def read(self, amt=None): 73 if self.closed: 74 return b"" 75 return io.BytesIO.read(self, amt) 76 77 def readline(self, length=None): 78 if self.closed: 79 return b"" 80 return io.BytesIO.readline(self, length) 81 82 def close(self): 83 self.io_refs -= 1 84 if self.io_refs == 0: 85 io.BytesIO.close(self) 86 87 class FakeHTTPConnection(http.client.HTTPConnection): 88 89 # buffer to store data for verification in urlopen tests. 90 buf = None 91 92 def connect(self): 93 self.sock = FakeSocket(self.fakedata) 94 type(self).fakesock = self.sock 95 96 if mock_close: 97 # bpo-36918: HTTPConnection destructor calls close() which calls 98 # flush(). Problem: flush() calls self.fp.flush() which raises 99 # "ValueError: I/O operation on closed file" which is logged as an 100 # "Exception ignored in". Override close() to silence this error. 101 def close(self): 102 pass 103 FakeHTTPConnection.fakedata = fakedata 104 105 return FakeHTTPConnection 106 107 108class FakeHTTPMixin(object): 109 def fakehttp(self, fakedata, mock_close=False): 110 fake_http_class = fakehttp(fakedata, mock_close=mock_close) 111 self._connection_class = http.client.HTTPConnection 112 http.client.HTTPConnection = fake_http_class 113 114 def unfakehttp(self): 115 http.client.HTTPConnection = self._connection_class 116 117 118class FakeFTPMixin(object): 119 def fakeftp(self): 120 class FakeFtpWrapper(object): 121 def __init__(self, user, passwd, host, port, dirs, timeout=None, 122 persistent=True): 123 pass 124 125 def retrfile(self, file, type): 126 return io.BytesIO(), 0 127 128 def close(self): 129 pass 130 131 self._ftpwrapper_class = urllib.request.ftpwrapper 132 urllib.request.ftpwrapper = FakeFtpWrapper 133 134 def unfakeftp(self): 135 urllib.request.ftpwrapper = self._ftpwrapper_class 136 137 138class urlopen_FileTests(unittest.TestCase): 139 """Test urlopen() opening a temporary file. 140 141 Try to test as much functionality as possible so as to cut down on reliance 142 on connecting to the Net for testing. 143 144 """ 145 146 def setUp(self): 147 # Create a temp file to use for testing 148 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, 149 "ascii") 150 f = open(os_helper.TESTFN, 'wb') 151 try: 152 f.write(self.text) 153 finally: 154 f.close() 155 self.pathname = os_helper.TESTFN 156 self.quoted_pathname = urllib.parse.quote(self.pathname) 157 self.returned_obj = urlopen("file:%s" % self.quoted_pathname) 158 159 def tearDown(self): 160 """Shut down the open object""" 161 self.returned_obj.close() 162 os.remove(os_helper.TESTFN) 163 164 def test_interface(self): 165 # Make sure object returned by urlopen() has the specified methods 166 for attr in ("read", "readline", "readlines", "fileno", 167 "close", "info", "geturl", "getcode", "__iter__"): 168 self.assertTrue(hasattr(self.returned_obj, attr), 169 "object returned by urlopen() lacks %s attribute" % 170 attr) 171 172 def test_read(self): 173 self.assertEqual(self.text, self.returned_obj.read()) 174 175 def test_readline(self): 176 self.assertEqual(self.text, self.returned_obj.readline()) 177 self.assertEqual(b'', self.returned_obj.readline(), 178 "calling readline() after exhausting the file did not" 179 " return an empty string") 180 181 def test_readlines(self): 182 lines_list = self.returned_obj.readlines() 183 self.assertEqual(len(lines_list), 1, 184 "readlines() returned the wrong number of lines") 185 self.assertEqual(lines_list[0], self.text, 186 "readlines() returned improper text") 187 188 def test_fileno(self): 189 file_num = self.returned_obj.fileno() 190 self.assertIsInstance(file_num, int, "fileno() did not return an int") 191 self.assertEqual(os.read(file_num, len(self.text)), self.text, 192 "Reading on the file descriptor returned by fileno() " 193 "did not return the expected text") 194 195 def test_close(self): 196 # Test close() by calling it here and then having it be called again 197 # by the tearDown() method for the test 198 self.returned_obj.close() 199 200 def test_headers(self): 201 self.assertIsInstance(self.returned_obj.headers, email.message.Message) 202 203 def test_url(self): 204 self.assertEqual(self.returned_obj.url, self.quoted_pathname) 205 206 def test_status(self): 207 self.assertIsNone(self.returned_obj.status) 208 209 def test_info(self): 210 self.assertIsInstance(self.returned_obj.info(), email.message.Message) 211 212 def test_geturl(self): 213 self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname) 214 215 def test_getcode(self): 216 self.assertIsNone(self.returned_obj.getcode()) 217 218 def test_iter(self): 219 # Test iterator 220 # Don't need to count number of iterations since test would fail the 221 # instant it returned anything beyond the first line from the 222 # comparison. 223 # Use the iterator in the usual implicit way to test for ticket #4608. 224 for line in self.returned_obj: 225 self.assertEqual(line, self.text) 226 227 def test_relativelocalfile(self): 228 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) 229 230 231class ProxyTests(unittest.TestCase): 232 233 def setUp(self): 234 # Records changes to env vars 235 self.env = os_helper.EnvironmentVarGuard() 236 # Delete all proxy related env vars 237 for k in list(os.environ): 238 if 'proxy' in k.lower(): 239 self.env.unset(k) 240 241 def tearDown(self): 242 # Restore all proxy related env vars 243 self.env.__exit__() 244 del self.env 245 246 def test_getproxies_environment_keep_no_proxies(self): 247 self.env.set('NO_PROXY', 'localhost') 248 proxies = urllib.request.getproxies_environment() 249 # getproxies_environment use lowered case truncated (no '_proxy') keys 250 self.assertEqual('localhost', proxies['no']) 251 # List of no_proxies with space. 252 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') 253 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) 254 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888')) 255 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234')) 256 257 def test_proxy_cgi_ignore(self): 258 try: 259 self.env.set('HTTP_PROXY', 'http://somewhere:3128') 260 proxies = urllib.request.getproxies_environment() 261 self.assertEqual('http://somewhere:3128', proxies['http']) 262 self.env.set('REQUEST_METHOD', 'GET') 263 proxies = urllib.request.getproxies_environment() 264 self.assertNotIn('http', proxies) 265 finally: 266 self.env.unset('REQUEST_METHOD') 267 self.env.unset('HTTP_PROXY') 268 269 def test_proxy_bypass_environment_host_match(self): 270 bypass = urllib.request.proxy_bypass_environment 271 self.env.set('NO_PROXY', 272 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') 273 self.assertTrue(bypass('localhost')) 274 self.assertTrue(bypass('LocalHost')) # MixedCase 275 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE 276 self.assertTrue(bypass('.localhost')) 277 self.assertTrue(bypass('newdomain.com:1234')) 278 self.assertTrue(bypass('.newdomain.com:1234')) 279 self.assertTrue(bypass('foo.d.o.t')) # issue 29142 280 self.assertTrue(bypass('d.o.t')) 281 self.assertTrue(bypass('anotherdomain.com:8888')) 282 self.assertTrue(bypass('.anotherdomain.com:8888')) 283 self.assertTrue(bypass('www.newdomain.com:1234')) 284 self.assertFalse(bypass('prelocalhost')) 285 self.assertFalse(bypass('newdomain.com')) # no port 286 self.assertFalse(bypass('newdomain.com:1235')) # wrong port 287 288 def test_proxy_bypass_environment_always_match(self): 289 bypass = urllib.request.proxy_bypass_environment 290 self.env.set('NO_PROXY', '*') 291 self.assertTrue(bypass('newdomain.com')) 292 self.assertTrue(bypass('newdomain.com:1234')) 293 self.env.set('NO_PROXY', '*, anotherdomain.com') 294 self.assertTrue(bypass('anotherdomain.com')) 295 self.assertFalse(bypass('newdomain.com')) 296 self.assertFalse(bypass('newdomain.com:1234')) 297 298 def test_proxy_bypass_environment_newline(self): 299 bypass = urllib.request.proxy_bypass_environment 300 self.env.set('NO_PROXY', 301 'localhost, anotherdomain.com, newdomain.com:1234') 302 self.assertFalse(bypass('localhost\n')) 303 self.assertFalse(bypass('anotherdomain.com:8888\n')) 304 self.assertFalse(bypass('newdomain.com:1234\n')) 305 306 307class ProxyTests_withOrderedEnv(unittest.TestCase): 308 309 def setUp(self): 310 # We need to test conditions, where variable order _is_ significant 311 self._saved_env = os.environ 312 # Monkey patch os.environ, start with empty fake environment 313 os.environ = collections.OrderedDict() 314 315 def tearDown(self): 316 os.environ = self._saved_env 317 318 def test_getproxies_environment_prefer_lowercase(self): 319 # Test lowercase preference with removal 320 os.environ['no_proxy'] = '' 321 os.environ['No_Proxy'] = 'localhost' 322 self.assertFalse(urllib.request.proxy_bypass_environment('localhost')) 323 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 324 os.environ['http_proxy'] = '' 325 os.environ['HTTP_PROXY'] = 'http://somewhere:3128' 326 proxies = urllib.request.getproxies_environment() 327 self.assertEqual({}, proxies) 328 # Test lowercase preference of proxy bypass and correct matching including ports 329 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' 330 os.environ['No_Proxy'] = 'xyz.com' 331 self.assertTrue(urllib.request.proxy_bypass_environment('localhost')) 332 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678')) 333 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234')) 334 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy')) 335 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 336 # Test lowercase preference with replacement 337 os.environ['http_proxy'] = 'http://somewhere:3128' 338 os.environ['Http_Proxy'] = 'http://somewhereelse:3128' 339 proxies = urllib.request.getproxies_environment() 340 self.assertEqual('http://somewhere:3128', proxies['http']) 341 342 343class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): 344 """Test urlopen() opening a fake http connection.""" 345 346 def check_read(self, ver): 347 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") 348 try: 349 fp = urlopen("http://python.org/") 350 self.assertEqual(fp.readline(), b"Hello!") 351 self.assertEqual(fp.readline(), b"") 352 self.assertEqual(fp.geturl(), 'http://python.org/') 353 self.assertEqual(fp.getcode(), 200) 354 finally: 355 self.unfakehttp() 356 357 def test_url_fragment(self): 358 # Issue #11703: geturl() omits fragments in the original URL. 359 url = 'http://docs.python.org/library/urllib.html#OK' 360 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 361 try: 362 fp = urllib.request.urlopen(url) 363 self.assertEqual(fp.geturl(), url) 364 finally: 365 self.unfakehttp() 366 367 def test_willclose(self): 368 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 369 try: 370 resp = urlopen("http://www.python.org") 371 self.assertTrue(resp.fp.will_close) 372 finally: 373 self.unfakehttp() 374 375 @unittest.skipUnless(ssl, "ssl module required") 376 def test_url_path_with_control_char_rejected(self): 377 for char_no in list(range(0, 0x21)) + [0x7f]: 378 char = chr(char_no) 379 schemeless_url = f"//localhost:7777/test{char}/" 380 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 381 try: 382 # We explicitly test urllib.request.urlopen() instead of the top 383 # level 'def urlopen()' function defined in this... (quite ugly) 384 # test suite. They use different url opening codepaths. Plain 385 # urlopen uses FancyURLOpener which goes via a codepath that 386 # calls urllib.parse.quote() on the URL which makes all of the 387 # above attempts at injection within the url _path_ safe. 388 escaped_char_repr = repr(char).replace('\\', r'\\') 389 InvalidURL = http.client.InvalidURL 390 with self.assertRaisesRegex( 391 InvalidURL, f"contain control.*{escaped_char_repr}"): 392 urllib.request.urlopen(f"http:{schemeless_url}") 393 with self.assertRaisesRegex( 394 InvalidURL, f"contain control.*{escaped_char_repr}"): 395 urllib.request.urlopen(f"https:{schemeless_url}") 396 # This code path quotes the URL so there is no injection. 397 resp = urlopen(f"http:{schemeless_url}") 398 self.assertNotIn(char, resp.geturl()) 399 finally: 400 self.unfakehttp() 401 402 @unittest.skipUnless(ssl, "ssl module required") 403 def test_url_path_with_newline_header_injection_rejected(self): 404 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 405 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" 406 schemeless_url = "//" + host + ":8080/test/?test=a" 407 try: 408 # We explicitly test urllib.request.urlopen() instead of the top 409 # level 'def urlopen()' function defined in this... (quite ugly) 410 # test suite. They use different url opening codepaths. Plain 411 # urlopen uses FancyURLOpener which goes via a codepath that 412 # calls urllib.parse.quote() on the URL which makes all of the 413 # above attempts at injection within the url _path_ safe. 414 InvalidURL = http.client.InvalidURL 415 with self.assertRaisesRegex( 416 InvalidURL, r"contain control.*\\r.*(found at least . .)"): 417 urllib.request.urlopen(f"http:{schemeless_url}") 418 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 419 urllib.request.urlopen(f"https:{schemeless_url}") 420 # This code path quotes the URL so there is no injection. 421 resp = urlopen(f"http:{schemeless_url}") 422 self.assertNotIn(' ', resp.geturl()) 423 self.assertNotIn('\r', resp.geturl()) 424 self.assertNotIn('\n', resp.geturl()) 425 finally: 426 self.unfakehttp() 427 428 @unittest.skipUnless(ssl, "ssl module required") 429 def test_url_host_with_control_char_rejected(self): 430 for char_no in list(range(0, 0x21)) + [0x7f]: 431 char = chr(char_no) 432 schemeless_url = f"//localhost{char}/test/" 433 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 434 try: 435 escaped_char_repr = repr(char).replace('\\', r'\\') 436 InvalidURL = http.client.InvalidURL 437 with self.assertRaisesRegex( 438 InvalidURL, f"contain control.*{escaped_char_repr}"): 439 urlopen(f"http:{schemeless_url}") 440 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): 441 urlopen(f"https:{schemeless_url}") 442 finally: 443 self.unfakehttp() 444 445 @unittest.skipUnless(ssl, "ssl module required") 446 def test_url_host_with_newline_header_injection_rejected(self): 447 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 448 host = "localhost\r\nX-injected: header\r\n" 449 schemeless_url = "//" + host + ":8080/test/?test=a" 450 try: 451 InvalidURL = http.client.InvalidURL 452 with self.assertRaisesRegex( 453 InvalidURL, r"contain control.*\\r"): 454 urlopen(f"http:{schemeless_url}") 455 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 456 urlopen(f"https:{schemeless_url}") 457 finally: 458 self.unfakehttp() 459 460 def test_read_0_9(self): 461 # "0.9" response accepted (but not "simple responses" without 462 # a status line) 463 self.check_read(b"0.9") 464 465 def test_read_1_0(self): 466 self.check_read(b"1.0") 467 468 def test_read_1_1(self): 469 self.check_read(b"1.1") 470 471 def test_read_bogus(self): 472 # urlopen() should raise OSError for many error codes. 473 self.fakehttp(b'''HTTP/1.1 401 Authentication Required 474Date: Wed, 02 Jan 2008 03:03:54 GMT 475Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 476Connection: close 477Content-Type: text/html; charset=iso-8859-1 478''', mock_close=True) 479 try: 480 self.assertRaises(OSError, urlopen, "http://python.org/") 481 finally: 482 self.unfakehttp() 483 484 def test_invalid_redirect(self): 485 # urlopen() should raise OSError for many error codes. 486 self.fakehttp(b'''HTTP/1.1 302 Found 487Date: Wed, 02 Jan 2008 03:03:54 GMT 488Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 489Location: file://guidocomputer.athome.com:/python/license 490Connection: close 491Content-Type: text/html; charset=iso-8859-1 492''', mock_close=True) 493 try: 494 msg = "Redirection to url 'file:" 495 with self.assertRaisesRegex(urllib.error.HTTPError, msg): 496 urlopen("http://python.org/") 497 finally: 498 self.unfakehttp() 499 500 def test_redirect_limit_independent(self): 501 # Ticket #12923: make sure independent requests each use their 502 # own retry limit. 503 for i in range(FancyURLopener().maxtries): 504 self.fakehttp(b'''HTTP/1.1 302 Found 505Location: file://guidocomputer.athome.com:/python/license 506Connection: close 507''', mock_close=True) 508 try: 509 self.assertRaises(urllib.error.HTTPError, urlopen, 510 "http://something") 511 finally: 512 self.unfakehttp() 513 514 def test_empty_socket(self): 515 # urlopen() raises OSError if the underlying socket does not send any 516 # data. (#1680230) 517 self.fakehttp(b'') 518 try: 519 self.assertRaises(OSError, urlopen, "http://something") 520 finally: 521 self.unfakehttp() 522 523 def test_missing_localfile(self): 524 # Test for #10836 525 with self.assertRaises(urllib.error.URLError) as e: 526 urlopen('file://localhost/a/file/which/doesnot/exists.py') 527 self.assertTrue(e.exception.filename) 528 self.assertTrue(e.exception.reason) 529 530 def test_file_notexists(self): 531 fd, tmp_file = tempfile.mkstemp() 532 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') 533 try: 534 self.assertTrue(os.path.exists(tmp_file)) 535 with urlopen(tmp_fileurl) as fobj: 536 self.assertTrue(fobj) 537 finally: 538 os.close(fd) 539 os.unlink(tmp_file) 540 self.assertFalse(os.path.exists(tmp_file)) 541 with self.assertRaises(urllib.error.URLError): 542 urlopen(tmp_fileurl) 543 544 def test_ftp_nohost(self): 545 test_ftp_url = 'ftp:///path' 546 with self.assertRaises(urllib.error.URLError) as e: 547 urlopen(test_ftp_url) 548 self.assertFalse(e.exception.filename) 549 self.assertTrue(e.exception.reason) 550 551 def test_ftp_nonexisting(self): 552 with self.assertRaises(urllib.error.URLError) as e: 553 urlopen('ftp://localhost/a/file/which/doesnot/exists.py') 554 self.assertFalse(e.exception.filename) 555 self.assertTrue(e.exception.reason) 556 557 @patch.object(urllib.request, 'MAXFTPCACHE', 0) 558 def test_ftp_cache_pruning(self): 559 self.fakeftp() 560 try: 561 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) 562 urlopen('ftp://localhost') 563 finally: 564 self.unfakeftp() 565 566 def test_userpass_inurl(self): 567 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 568 try: 569 fp = urlopen("http://user:pass@python.org/") 570 self.assertEqual(fp.readline(), b"Hello!") 571 self.assertEqual(fp.readline(), b"") 572 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') 573 self.assertEqual(fp.getcode(), 200) 574 finally: 575 self.unfakehttp() 576 577 def test_userpass_inurl_w_spaces(self): 578 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 579 try: 580 userpass = "a b:c d" 581 url = "http://{}@python.org/".format(userpass) 582 fakehttp_wrapper = http.client.HTTPConnection 583 authorization = ("Authorization: Basic %s\r\n" % 584 b64encode(userpass.encode("ASCII")).decode("ASCII")) 585 fp = urlopen(url) 586 # The authorization header must be in place 587 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) 588 self.assertEqual(fp.readline(), b"Hello!") 589 self.assertEqual(fp.readline(), b"") 590 # the spaces are quoted in URL so no match 591 self.assertNotEqual(fp.geturl(), url) 592 self.assertEqual(fp.getcode(), 200) 593 finally: 594 self.unfakehttp() 595 596 def test_URLopener_deprecation(self): 597 with warnings_helper.check_warnings(('',DeprecationWarning)): 598 urllib.request.URLopener() 599 600 @unittest.skipUnless(ssl, "ssl module required") 601 def test_cafile_and_context(self): 602 context = ssl.create_default_context() 603 with warnings_helper.check_warnings(('', DeprecationWarning)): 604 with self.assertRaises(ValueError): 605 urllib.request.urlopen( 606 "https://localhost", cafile="/nonexistent/path", context=context 607 ) 608 609 610class urlopen_DataTests(unittest.TestCase): 611 """Test urlopen() opening a data URL.""" 612 613 def setUp(self): 614 # clear _opener global variable 615 self.addCleanup(urllib.request.urlcleanup) 616 617 # text containing URL special- and unicode-characters 618 self.text = "test data URLs :;,%=& \u00f6 \u00c4 " 619 # 2x1 pixel RGB PNG image with one black and one white pixel 620 self.image = ( 621 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00' 622 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae' 623 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00' 624 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82') 625 626 self.text_url = ( 627 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3" 628 "D%26%20%C3%B6%20%C3%84%20") 629 self.text_url_base64 = ( 630 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs" 631 "sJT0mIPYgxCA%3D") 632 # base64 encoded data URL that contains ignorable spaces, 633 # such as "\n", " ", "%0A", and "%20". 634 self.image_url = ( 635 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n" 636 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 " 637 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20") 638 639 self.text_url_resp = urllib.request.urlopen(self.text_url) 640 self.text_url_base64_resp = urllib.request.urlopen( 641 self.text_url_base64) 642 self.image_url_resp = urllib.request.urlopen(self.image_url) 643 644 def test_interface(self): 645 # Make sure object returned by urlopen() has the specified methods 646 for attr in ("read", "readline", "readlines", 647 "close", "info", "geturl", "getcode", "__iter__"): 648 self.assertTrue(hasattr(self.text_url_resp, attr), 649 "object returned by urlopen() lacks %s attribute" % 650 attr) 651 652 def test_info(self): 653 self.assertIsInstance(self.text_url_resp.info(), email.message.Message) 654 self.assertEqual(self.text_url_base64_resp.info().get_params(), 655 [('text/plain', ''), ('charset', 'ISO-8859-1')]) 656 self.assertEqual(self.image_url_resp.info()['content-length'], 657 str(len(self.image))) 658 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(), 659 [('text/plain', ''), ('charset', 'US-ASCII')]) 660 661 def test_geturl(self): 662 self.assertEqual(self.text_url_resp.geturl(), self.text_url) 663 self.assertEqual(self.text_url_base64_resp.geturl(), 664 self.text_url_base64) 665 self.assertEqual(self.image_url_resp.geturl(), self.image_url) 666 667 def test_read_text(self): 668 self.assertEqual(self.text_url_resp.read().decode( 669 dict(self.text_url_resp.info().get_params())['charset']), self.text) 670 671 def test_read_text_base64(self): 672 self.assertEqual(self.text_url_base64_resp.read().decode( 673 dict(self.text_url_base64_resp.info().get_params())['charset']), 674 self.text) 675 676 def test_read_image(self): 677 self.assertEqual(self.image_url_resp.read(), self.image) 678 679 def test_missing_comma(self): 680 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain') 681 682 def test_invalid_base64_data(self): 683 # missing padding character 684 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') 685 686 687class urlretrieve_FileTests(unittest.TestCase): 688 """Test urllib.urlretrieve() on local files""" 689 690 def setUp(self): 691 # clear _opener global variable 692 self.addCleanup(urllib.request.urlcleanup) 693 694 # Create a list of temporary files. Each item in the list is a file 695 # name (absolute path or relative to the current working directory). 696 # All files in this list will be deleted in the tearDown method. Note, 697 # this only helps to makes sure temporary files get deleted, but it 698 # does nothing about trying to close files that may still be open. It 699 # is the responsibility of the developer to properly close files even 700 # when exceptional conditions occur. 701 self.tempFiles = [] 702 703 # Create a temporary file. 704 self.registerFileForCleanUp(os_helper.TESTFN) 705 self.text = b'testing urllib.urlretrieve' 706 try: 707 FILE = open(os_helper.TESTFN, 'wb') 708 FILE.write(self.text) 709 FILE.close() 710 finally: 711 try: FILE.close() 712 except: pass 713 714 def tearDown(self): 715 # Delete the temporary files. 716 for each in self.tempFiles: 717 try: os.remove(each) 718 except: pass 719 720 def constructLocalFileUrl(self, filePath): 721 filePath = os.path.abspath(filePath) 722 try: 723 filePath.encode("utf-8") 724 except UnicodeEncodeError: 725 raise unittest.SkipTest("filePath is not encodable to utf8") 726 return "file://%s" % urllib.request.pathname2url(filePath) 727 728 def createNewTempFile(self, data=b""): 729 """Creates a new temporary file containing the specified data, 730 registers the file for deletion during the test fixture tear down, and 731 returns the absolute path of the file.""" 732 733 newFd, newFilePath = tempfile.mkstemp() 734 try: 735 self.registerFileForCleanUp(newFilePath) 736 newFile = os.fdopen(newFd, "wb") 737 newFile.write(data) 738 newFile.close() 739 finally: 740 try: newFile.close() 741 except: pass 742 return newFilePath 743 744 def registerFileForCleanUp(self, fileName): 745 self.tempFiles.append(fileName) 746 747 def test_basic(self): 748 # Make sure that a local file just gets its own location returned and 749 # a headers value is returned. 750 result = urllib.request.urlretrieve("file:%s" % os_helper.TESTFN) 751 self.assertEqual(result[0], os_helper.TESTFN) 752 self.assertIsInstance(result[1], email.message.Message, 753 "did not get an email.message.Message instance " 754 "as second returned value") 755 756 def test_copy(self): 757 # Test that setting the filename argument works. 758 second_temp = "%s.2" % os_helper.TESTFN 759 self.registerFileForCleanUp(second_temp) 760 result = urllib.request.urlretrieve(self.constructLocalFileUrl( 761 os_helper.TESTFN), second_temp) 762 self.assertEqual(second_temp, result[0]) 763 self.assertTrue(os.path.exists(second_temp), "copy of the file was not " 764 "made") 765 FILE = open(second_temp, 'rb') 766 try: 767 text = FILE.read() 768 FILE.close() 769 finally: 770 try: FILE.close() 771 except: pass 772 self.assertEqual(self.text, text) 773 774 def test_reporthook(self): 775 # Make sure that the reporthook works. 776 def hooktester(block_count, block_read_size, file_size, count_holder=[0]): 777 self.assertIsInstance(block_count, int) 778 self.assertIsInstance(block_read_size, int) 779 self.assertIsInstance(file_size, int) 780 self.assertEqual(block_count, count_holder[0]) 781 count_holder[0] = count_holder[0] + 1 782 second_temp = "%s.2" % os_helper.TESTFN 783 self.registerFileForCleanUp(second_temp) 784 urllib.request.urlretrieve( 785 self.constructLocalFileUrl(os_helper.TESTFN), 786 second_temp, hooktester) 787 788 def test_reporthook_0_bytes(self): 789 # Test on zero length file. Should call reporthook only 1 time. 790 report = [] 791 def hooktester(block_count, block_read_size, file_size, _report=report): 792 _report.append((block_count, block_read_size, file_size)) 793 srcFileName = self.createNewTempFile() 794 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 795 os_helper.TESTFN, hooktester) 796 self.assertEqual(len(report), 1) 797 self.assertEqual(report[0][2], 0) 798 799 def test_reporthook_5_bytes(self): 800 # Test on 5 byte file. Should call reporthook only 2 times (once when 801 # the "network connection" is established and once when the block is 802 # read). 803 report = [] 804 def hooktester(block_count, block_read_size, file_size, _report=report): 805 _report.append((block_count, block_read_size, file_size)) 806 srcFileName = self.createNewTempFile(b"x" * 5) 807 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 808 os_helper.TESTFN, hooktester) 809 self.assertEqual(len(report), 2) 810 self.assertEqual(report[0][2], 5) 811 self.assertEqual(report[1][2], 5) 812 813 def test_reporthook_8193_bytes(self): 814 # Test on 8193 byte file. Should call reporthook only 3 times (once 815 # when the "network connection" is established, once for the next 8192 816 # bytes, and once for the last byte). 817 report = [] 818 def hooktester(block_count, block_read_size, file_size, _report=report): 819 _report.append((block_count, block_read_size, file_size)) 820 srcFileName = self.createNewTempFile(b"x" * 8193) 821 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 822 os_helper.TESTFN, hooktester) 823 self.assertEqual(len(report), 3) 824 self.assertEqual(report[0][2], 8193) 825 self.assertEqual(report[0][1], 8192) 826 self.assertEqual(report[1][1], 8192) 827 self.assertEqual(report[2][1], 8192) 828 829 830class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): 831 """Test urllib.urlretrieve() using fake http connections""" 832 833 def test_short_content_raises_ContentTooShortError(self): 834 self.addCleanup(urllib.request.urlcleanup) 835 836 self.fakehttp(b'''HTTP/1.1 200 OK 837Date: Wed, 02 Jan 2008 03:03:54 GMT 838Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 839Connection: close 840Content-Length: 100 841Content-Type: text/html; charset=iso-8859-1 842 843FF 844''') 845 846 def _reporthook(par1, par2, par3): 847 pass 848 849 with self.assertRaises(urllib.error.ContentTooShortError): 850 try: 851 urllib.request.urlretrieve(support.TEST_HTTP_URL, 852 reporthook=_reporthook) 853 finally: 854 self.unfakehttp() 855 856 def test_short_content_raises_ContentTooShortError_without_reporthook(self): 857 self.addCleanup(urllib.request.urlcleanup) 858 859 self.fakehttp(b'''HTTP/1.1 200 OK 860Date: Wed, 02 Jan 2008 03:03:54 GMT 861Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 862Connection: close 863Content-Length: 100 864Content-Type: text/html; charset=iso-8859-1 865 866FF 867''') 868 with self.assertRaises(urllib.error.ContentTooShortError): 869 try: 870 urllib.request.urlretrieve(support.TEST_HTTP_URL) 871 finally: 872 self.unfakehttp() 873 874 875class QuotingTests(unittest.TestCase): 876 r"""Tests for urllib.quote() and urllib.quote_plus() 877 878 According to RFC 3986 (Uniform Resource Identifiers), to escape a 879 character you write it as '%' + <2 character US-ASCII hex value>. 880 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a 881 character properly. Case does not matter on the hex letters. 882 883 The various character sets specified are: 884 885 Reserved characters : ";/?:@&=+$," 886 Have special meaning in URIs and must be escaped if not being used for 887 their special meaning 888 Data characters : letters, digits, and "-_.!~*'()" 889 Unreserved and do not need to be escaped; can be, though, if desired 890 Control characters : 0x00 - 0x1F, 0x7F 891 Have no use in URIs so must be escaped 892 space : 0x20 893 Must be escaped 894 Delimiters : '<>#%"' 895 Must be escaped 896 Unwise : "{}|\^[]`" 897 Must be escaped 898 899 """ 900 901 def test_never_quote(self): 902 # Make sure quote() does not quote letters, digits, and "_,.-" 903 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", 904 "abcdefghijklmnopqrstuvwxyz", 905 "0123456789", 906 "_.-~"]) 907 result = urllib.parse.quote(do_not_quote) 908 self.assertEqual(do_not_quote, result, 909 "using quote(): %r != %r" % (do_not_quote, result)) 910 result = urllib.parse.quote_plus(do_not_quote) 911 self.assertEqual(do_not_quote, result, 912 "using quote_plus(): %r != %r" % (do_not_quote, result)) 913 914 def test_default_safe(self): 915 # Test '/' is default value for 'safe' parameter 916 self.assertEqual(urllib.parse.quote.__defaults__[0], '/') 917 918 def test_safe(self): 919 # Test setting 'safe' parameter does what it should do 920 quote_by_default = "<>" 921 result = urllib.parse.quote(quote_by_default, safe=quote_by_default) 922 self.assertEqual(quote_by_default, result, 923 "using quote(): %r != %r" % (quote_by_default, result)) 924 result = urllib.parse.quote_plus(quote_by_default, 925 safe=quote_by_default) 926 self.assertEqual(quote_by_default, result, 927 "using quote_plus(): %r != %r" % 928 (quote_by_default, result)) 929 # Safe expressed as bytes rather than str 930 result = urllib.parse.quote(quote_by_default, safe=b"<>") 931 self.assertEqual(quote_by_default, result, 932 "using quote(): %r != %r" % (quote_by_default, result)) 933 # "Safe" non-ASCII characters should have no effect 934 # (Since URIs are not allowed to have non-ASCII characters) 935 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") 936 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 937 self.assertEqual(expect, result, 938 "using quote(): %r != %r" % 939 (expect, result)) 940 # Same as above, but using a bytes rather than str 941 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") 942 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 943 self.assertEqual(expect, result, 944 "using quote(): %r != %r" % 945 (expect, result)) 946 947 def test_default_quoting(self): 948 # Make sure all characters that should be quoted are by default sans 949 # space (separate test for that). 950 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F 951 should_quote.append(r'<>#%"{}|\^[]`') 952 should_quote.append(chr(127)) # For 0x7F 953 should_quote = ''.join(should_quote) 954 for char in should_quote: 955 result = urllib.parse.quote(char) 956 self.assertEqual(hexescape(char), result, 957 "using quote(): " 958 "%s should be escaped to %s, not %s" % 959 (char, hexescape(char), result)) 960 result = urllib.parse.quote_plus(char) 961 self.assertEqual(hexescape(char), result, 962 "using quote_plus(): " 963 "%s should be escapes to %s, not %s" % 964 (char, hexescape(char), result)) 965 del should_quote 966 partial_quote = "ab[]cd" 967 expected = "ab%5B%5Dcd" 968 result = urllib.parse.quote(partial_quote) 969 self.assertEqual(expected, result, 970 "using quote(): %r != %r" % (expected, result)) 971 result = urllib.parse.quote_plus(partial_quote) 972 self.assertEqual(expected, result, 973 "using quote_plus(): %r != %r" % (expected, result)) 974 975 def test_quoting_space(self): 976 # Make sure quote() and quote_plus() handle spaces as specified in 977 # their unique way 978 result = urllib.parse.quote(' ') 979 self.assertEqual(result, hexescape(' '), 980 "using quote(): %r != %r" % (result, hexescape(' '))) 981 result = urllib.parse.quote_plus(' ') 982 self.assertEqual(result, '+', 983 "using quote_plus(): %r != +" % result) 984 given = "a b cd e f" 985 expect = given.replace(' ', hexescape(' ')) 986 result = urllib.parse.quote(given) 987 self.assertEqual(expect, result, 988 "using quote(): %r != %r" % (expect, result)) 989 expect = given.replace(' ', '+') 990 result = urllib.parse.quote_plus(given) 991 self.assertEqual(expect, result, 992 "using quote_plus(): %r != %r" % (expect, result)) 993 994 def test_quoting_plus(self): 995 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), 996 'alpha%2Bbeta+gamma') 997 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), 998 'alpha+beta+gamma') 999 # Test with bytes 1000 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), 1001 'alpha%2Bbeta+gamma') 1002 # Test with safe bytes 1003 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), 1004 'alpha+beta+gamma') 1005 1006 def test_quote_bytes(self): 1007 # Bytes should quote directly to percent-encoded values 1008 given = b"\xa2\xd8ab\xff" 1009 expect = "%A2%D8ab%FF" 1010 result = urllib.parse.quote(given) 1011 self.assertEqual(expect, result, 1012 "using quote(): %r != %r" % (expect, result)) 1013 # Encoding argument should raise type error on bytes input 1014 self.assertRaises(TypeError, urllib.parse.quote, given, 1015 encoding="latin-1") 1016 # quote_from_bytes should work the same 1017 result = urllib.parse.quote_from_bytes(given) 1018 self.assertEqual(expect, result, 1019 "using quote_from_bytes(): %r != %r" 1020 % (expect, result)) 1021 1022 def test_quote_with_unicode(self): 1023 # Characters in Latin-1 range, encoded by default in UTF-8 1024 given = "\xa2\xd8ab\xff" 1025 expect = "%C2%A2%C3%98ab%C3%BF" 1026 result = urllib.parse.quote(given) 1027 self.assertEqual(expect, result, 1028 "using quote(): %r != %r" % (expect, result)) 1029 # Characters in Latin-1 range, encoded by with None (default) 1030 result = urllib.parse.quote(given, encoding=None, errors=None) 1031 self.assertEqual(expect, result, 1032 "using quote(): %r != %r" % (expect, result)) 1033 # Characters in Latin-1 range, encoded with Latin-1 1034 given = "\xa2\xd8ab\xff" 1035 expect = "%A2%D8ab%FF" 1036 result = urllib.parse.quote(given, encoding="latin-1") 1037 self.assertEqual(expect, result, 1038 "using quote(): %r != %r" % (expect, result)) 1039 # Characters in BMP, encoded by default in UTF-8 1040 given = "\u6f22\u5b57" # "Kanji" 1041 expect = "%E6%BC%A2%E5%AD%97" 1042 result = urllib.parse.quote(given) 1043 self.assertEqual(expect, result, 1044 "using quote(): %r != %r" % (expect, result)) 1045 # Characters in BMP, encoded with Latin-1 1046 given = "\u6f22\u5b57" 1047 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, 1048 encoding="latin-1") 1049 # Characters in BMP, encoded with Latin-1, with replace error handling 1050 given = "\u6f22\u5b57" 1051 expect = "%3F%3F" # "??" 1052 result = urllib.parse.quote(given, encoding="latin-1", 1053 errors="replace") 1054 self.assertEqual(expect, result, 1055 "using quote(): %r != %r" % (expect, result)) 1056 # Characters in BMP, Latin-1, with xmlcharref error handling 1057 given = "\u6f22\u5b57" 1058 expect = "%26%2328450%3B%26%2323383%3B" # "漢字" 1059 result = urllib.parse.quote(given, encoding="latin-1", 1060 errors="xmlcharrefreplace") 1061 self.assertEqual(expect, result, 1062 "using quote(): %r != %r" % (expect, result)) 1063 1064 def test_quote_plus_with_unicode(self): 1065 # Encoding (latin-1) test for quote_plus 1066 given = "\xa2\xd8 \xff" 1067 expect = "%A2%D8+%FF" 1068 result = urllib.parse.quote_plus(given, encoding="latin-1") 1069 self.assertEqual(expect, result, 1070 "using quote_plus(): %r != %r" % (expect, result)) 1071 # Errors test for quote_plus 1072 given = "ab\u6f22\u5b57 cd" 1073 expect = "ab%3F%3F+cd" 1074 result = urllib.parse.quote_plus(given, encoding="latin-1", 1075 errors="replace") 1076 self.assertEqual(expect, result, 1077 "using quote_plus(): %r != %r" % (expect, result)) 1078 1079 1080class UnquotingTests(unittest.TestCase): 1081 """Tests for unquote() and unquote_plus() 1082 1083 See the doc string for quoting_Tests for details on quoting and such. 1084 1085 """ 1086 1087 def test_unquoting(self): 1088 # Make sure unquoting of all ASCII values works 1089 escape_list = [] 1090 for num in range(128): 1091 given = hexescape(chr(num)) 1092 expect = chr(num) 1093 result = urllib.parse.unquote(given) 1094 self.assertEqual(expect, result, 1095 "using unquote(): %r != %r" % (expect, result)) 1096 result = urllib.parse.unquote_plus(given) 1097 self.assertEqual(expect, result, 1098 "using unquote_plus(): %r != %r" % 1099 (expect, result)) 1100 escape_list.append(given) 1101 escape_string = ''.join(escape_list) 1102 del escape_list 1103 result = urllib.parse.unquote(escape_string) 1104 self.assertEqual(result.count('%'), 1, 1105 "using unquote(): not all characters escaped: " 1106 "%s" % result) 1107 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) 1108 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) 1109 1110 def test_unquoting_badpercent(self): 1111 # Test unquoting on bad percent-escapes 1112 given = '%xab' 1113 expect = given 1114 result = urllib.parse.unquote(given) 1115 self.assertEqual(expect, result, "using unquote(): %r != %r" 1116 % (expect, result)) 1117 given = '%x' 1118 expect = given 1119 result = urllib.parse.unquote(given) 1120 self.assertEqual(expect, result, "using unquote(): %r != %r" 1121 % (expect, result)) 1122 given = '%' 1123 expect = given 1124 result = urllib.parse.unquote(given) 1125 self.assertEqual(expect, result, "using unquote(): %r != %r" 1126 % (expect, result)) 1127 # unquote_to_bytes 1128 given = '%xab' 1129 expect = bytes(given, 'ascii') 1130 result = urllib.parse.unquote_to_bytes(given) 1131 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1132 % (expect, result)) 1133 given = '%x' 1134 expect = bytes(given, 'ascii') 1135 result = urllib.parse.unquote_to_bytes(given) 1136 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1137 % (expect, result)) 1138 given = '%' 1139 expect = bytes(given, 'ascii') 1140 result = urllib.parse.unquote_to_bytes(given) 1141 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1142 % (expect, result)) 1143 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) 1144 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) 1145 1146 def test_unquoting_mixed_case(self): 1147 # Test unquoting on mixed-case hex digits in the percent-escapes 1148 given = '%Ab%eA' 1149 expect = b'\xab\xea' 1150 result = urllib.parse.unquote_to_bytes(given) 1151 self.assertEqual(expect, result, 1152 "using unquote_to_bytes(): %r != %r" 1153 % (expect, result)) 1154 1155 def test_unquoting_parts(self): 1156 # Make sure unquoting works when have non-quoted characters 1157 # interspersed 1158 given = 'ab%sd' % hexescape('c') 1159 expect = "abcd" 1160 result = urllib.parse.unquote(given) 1161 self.assertEqual(expect, result, 1162 "using quote(): %r != %r" % (expect, result)) 1163 result = urllib.parse.unquote_plus(given) 1164 self.assertEqual(expect, result, 1165 "using unquote_plus(): %r != %r" % (expect, result)) 1166 1167 def test_unquoting_plus(self): 1168 # Test difference between unquote() and unquote_plus() 1169 given = "are+there+spaces..." 1170 expect = given 1171 result = urllib.parse.unquote(given) 1172 self.assertEqual(expect, result, 1173 "using unquote(): %r != %r" % (expect, result)) 1174 expect = given.replace('+', ' ') 1175 result = urllib.parse.unquote_plus(given) 1176 self.assertEqual(expect, result, 1177 "using unquote_plus(): %r != %r" % (expect, result)) 1178 1179 def test_unquote_to_bytes(self): 1180 given = 'br%C3%BCckner_sapporo_20050930.doc' 1181 expect = b'br\xc3\xbcckner_sapporo_20050930.doc' 1182 result = urllib.parse.unquote_to_bytes(given) 1183 self.assertEqual(expect, result, 1184 "using unquote_to_bytes(): %r != %r" 1185 % (expect, result)) 1186 # Test on a string with unescaped non-ASCII characters 1187 # (Technically an invalid URI; expect those characters to be UTF-8 1188 # encoded). 1189 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") 1190 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" 1191 self.assertEqual(expect, result, 1192 "using unquote_to_bytes(): %r != %r" 1193 % (expect, result)) 1194 # Test with a bytes as input 1195 given = b'%A2%D8ab%FF' 1196 expect = b'\xa2\xd8ab\xff' 1197 result = urllib.parse.unquote_to_bytes(given) 1198 self.assertEqual(expect, result, 1199 "using unquote_to_bytes(): %r != %r" 1200 % (expect, result)) 1201 # Test with a bytes as input, with unescaped non-ASCII bytes 1202 # (Technically an invalid URI; expect those bytes to be preserved) 1203 given = b'%A2\xd8ab%FF' 1204 expect = b'\xa2\xd8ab\xff' 1205 result = urllib.parse.unquote_to_bytes(given) 1206 self.assertEqual(expect, result, 1207 "using unquote_to_bytes(): %r != %r" 1208 % (expect, result)) 1209 1210 def test_unquote_with_unicode(self): 1211 # Characters in the Latin-1 range, encoded with UTF-8 1212 given = 'br%C3%BCckner_sapporo_20050930.doc' 1213 expect = 'br\u00fcckner_sapporo_20050930.doc' 1214 result = urllib.parse.unquote(given) 1215 self.assertEqual(expect, result, 1216 "using unquote(): %r != %r" % (expect, result)) 1217 # Characters in the Latin-1 range, encoded with None (default) 1218 result = urllib.parse.unquote(given, encoding=None, errors=None) 1219 self.assertEqual(expect, result, 1220 "using unquote(): %r != %r" % (expect, result)) 1221 1222 # Characters in the Latin-1 range, encoded with Latin-1 1223 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', 1224 encoding="latin-1") 1225 expect = 'br\u00fcckner_sapporo_20050930.doc' 1226 self.assertEqual(expect, result, 1227 "using unquote(): %r != %r" % (expect, result)) 1228 1229 # Characters in BMP, encoded with UTF-8 1230 given = "%E6%BC%A2%E5%AD%97" 1231 expect = "\u6f22\u5b57" # "Kanji" 1232 result = urllib.parse.unquote(given) 1233 self.assertEqual(expect, result, 1234 "using unquote(): %r != %r" % (expect, result)) 1235 1236 # Decode with UTF-8, invalid sequence 1237 given = "%F3%B1" 1238 expect = "\ufffd" # Replacement character 1239 result = urllib.parse.unquote(given) 1240 self.assertEqual(expect, result, 1241 "using unquote(): %r != %r" % (expect, result)) 1242 1243 # Decode with UTF-8, invalid sequence, replace errors 1244 result = urllib.parse.unquote(given, errors="replace") 1245 self.assertEqual(expect, result, 1246 "using unquote(): %r != %r" % (expect, result)) 1247 1248 # Decode with UTF-8, invalid sequence, ignoring errors 1249 given = "%F3%B1" 1250 expect = "" 1251 result = urllib.parse.unquote(given, errors="ignore") 1252 self.assertEqual(expect, result, 1253 "using unquote(): %r != %r" % (expect, result)) 1254 1255 # A mix of non-ASCII and percent-encoded characters, UTF-8 1256 result = urllib.parse.unquote("\u6f22%C3%BC") 1257 expect = '\u6f22\u00fc' 1258 self.assertEqual(expect, result, 1259 "using unquote(): %r != %r" % (expect, result)) 1260 1261 # A mix of non-ASCII and percent-encoded characters, Latin-1 1262 # (Note, the string contains non-Latin-1-representable characters) 1263 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") 1264 expect = '\u6f22\u00fc' 1265 self.assertEqual(expect, result, 1266 "using unquote(): %r != %r" % (expect, result)) 1267 1268 def test_unquoting_with_bytes_input(self): 1269 # ASCII characters decoded to a string 1270 given = b'blueberryjam' 1271 expect = 'blueberryjam' 1272 result = urllib.parse.unquote(given) 1273 self.assertEqual(expect, result, 1274 "using unquote(): %r != %r" % (expect, result)) 1275 1276 # A mix of non-ASCII hex-encoded characters and ASCII characters 1277 given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y' 1278 expect = 'bl\u00e5b\u00e6rsyltet\u00f8y' 1279 result = urllib.parse.unquote(given) 1280 self.assertEqual(expect, result, 1281 "using unquote(): %r != %r" % (expect, result)) 1282 1283 # A mix of non-ASCII percent-encoded characters and ASCII characters 1284 given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j' 1285 expect = 'bl\u00e5b\u00e6rsyltet\u00f8j' 1286 result = urllib.parse.unquote(given) 1287 self.assertEqual(expect, result, 1288 "using unquote(): %r != %r" % (expect, result)) 1289 1290 1291class urlencode_Tests(unittest.TestCase): 1292 """Tests for urlencode()""" 1293 1294 def help_inputtype(self, given, test_type): 1295 """Helper method for testing different input types. 1296 1297 'given' must lead to only the pairs: 1298 * 1st, 1 1299 * 2nd, 2 1300 * 3rd, 3 1301 1302 Test cannot assume anything about order. Docs make no guarantee and 1303 have possible dictionary input. 1304 1305 """ 1306 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] 1307 result = urllib.parse.urlencode(given) 1308 for expected in expect_somewhere: 1309 self.assertIn(expected, result, 1310 "testing %s: %s not found in %s" % 1311 (test_type, expected, result)) 1312 self.assertEqual(result.count('&'), 2, 1313 "testing %s: expected 2 '&'s; got %s" % 1314 (test_type, result.count('&'))) 1315 amp_location = result.index('&') 1316 on_amp_left = result[amp_location - 1] 1317 on_amp_right = result[amp_location + 1] 1318 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), 1319 "testing %s: '&' not located in proper place in %s" % 1320 (test_type, result)) 1321 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps 1322 "testing %s: " 1323 "unexpected number of characters: %s != %s" % 1324 (test_type, len(result), (5 * 3) + 2)) 1325 1326 def test_using_mapping(self): 1327 # Test passing in a mapping object as an argument. 1328 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, 1329 "using dict as input type") 1330 1331 def test_using_sequence(self): 1332 # Test passing in a sequence of two-item sequences as an argument. 1333 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], 1334 "using sequence of two-item tuples as input") 1335 1336 def test_quoting(self): 1337 # Make sure keys and values are quoted using quote_plus() 1338 given = {"&":"="} 1339 expect = "%s=%s" % (hexescape('&'), hexescape('=')) 1340 result = urllib.parse.urlencode(given) 1341 self.assertEqual(expect, result) 1342 given = {"key name":"A bunch of pluses"} 1343 expect = "key+name=A+bunch+of+pluses" 1344 result = urllib.parse.urlencode(given) 1345 self.assertEqual(expect, result) 1346 1347 def test_doseq(self): 1348 # Test that passing True for 'doseq' parameter works correctly 1349 given = {'sequence':['1', '2', '3']} 1350 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3'])) 1351 result = urllib.parse.urlencode(given) 1352 self.assertEqual(expect, result) 1353 result = urllib.parse.urlencode(given, True) 1354 for value in given["sequence"]: 1355 expect = "sequence=%s" % value 1356 self.assertIn(expect, result) 1357 self.assertEqual(result.count('&'), 2, 1358 "Expected 2 '&'s, got %s" % result.count('&')) 1359 1360 def test_empty_sequence(self): 1361 self.assertEqual("", urllib.parse.urlencode({})) 1362 self.assertEqual("", urllib.parse.urlencode([])) 1363 1364 def test_nonstring_values(self): 1365 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1})) 1366 self.assertEqual("a=None", urllib.parse.urlencode({"a": None})) 1367 1368 def test_nonstring_seq_values(self): 1369 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) 1370 self.assertEqual("a=None&a=a", 1371 urllib.parse.urlencode({"a": [None, "a"]}, True)) 1372 data = collections.OrderedDict([("a", 1), ("b", 1)]) 1373 self.assertEqual("a=a&a=b", 1374 urllib.parse.urlencode({"a": data}, True)) 1375 1376 def test_urlencode_encoding(self): 1377 # ASCII encoding. Expect %3F with errors="replace' 1378 given = (('\u00a0', '\u00c1'),) 1379 expect = '%3F=%3F' 1380 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") 1381 self.assertEqual(expect, result) 1382 1383 # Default is UTF-8 encoding. 1384 given = (('\u00a0', '\u00c1'),) 1385 expect = '%C2%A0=%C3%81' 1386 result = urllib.parse.urlencode(given) 1387 self.assertEqual(expect, result) 1388 1389 # Latin-1 encoding. 1390 given = (('\u00a0', '\u00c1'),) 1391 expect = '%A0=%C1' 1392 result = urllib.parse.urlencode(given, encoding="latin-1") 1393 self.assertEqual(expect, result) 1394 1395 def test_urlencode_encoding_doseq(self): 1396 # ASCII Encoding. Expect %3F with errors="replace' 1397 given = (('\u00a0', '\u00c1'),) 1398 expect = '%3F=%3F' 1399 result = urllib.parse.urlencode(given, doseq=True, 1400 encoding="ASCII", errors="replace") 1401 self.assertEqual(expect, result) 1402 1403 # ASCII Encoding. On a sequence of values. 1404 given = (("\u00a0", (1, "\u00c1")),) 1405 expect = '%3F=1&%3F=%3F' 1406 result = urllib.parse.urlencode(given, True, 1407 encoding="ASCII", errors="replace") 1408 self.assertEqual(expect, result) 1409 1410 # Utf-8 1411 given = (("\u00a0", "\u00c1"),) 1412 expect = '%C2%A0=%C3%81' 1413 result = urllib.parse.urlencode(given, True) 1414 self.assertEqual(expect, result) 1415 1416 given = (("\u00a0", (42, "\u00c1")),) 1417 expect = '%C2%A0=42&%C2%A0=%C3%81' 1418 result = urllib.parse.urlencode(given, True) 1419 self.assertEqual(expect, result) 1420 1421 # latin-1 1422 given = (("\u00a0", "\u00c1"),) 1423 expect = '%A0=%C1' 1424 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1425 self.assertEqual(expect, result) 1426 1427 given = (("\u00a0", (42, "\u00c1")),) 1428 expect = '%A0=42&%A0=%C1' 1429 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1430 self.assertEqual(expect, result) 1431 1432 def test_urlencode_bytes(self): 1433 given = ((b'\xa0\x24', b'\xc1\x24'),) 1434 expect = '%A0%24=%C1%24' 1435 result = urllib.parse.urlencode(given) 1436 self.assertEqual(expect, result) 1437 result = urllib.parse.urlencode(given, True) 1438 self.assertEqual(expect, result) 1439 1440 # Sequence of values 1441 given = ((b'\xa0\x24', (42, b'\xc1\x24')),) 1442 expect = '%A0%24=42&%A0%24=%C1%24' 1443 result = urllib.parse.urlencode(given, True) 1444 self.assertEqual(expect, result) 1445 1446 def test_urlencode_encoding_safe_parameter(self): 1447 1448 # Send '$' (\x24) as safe character 1449 # Default utf-8 encoding 1450 1451 given = ((b'\xa0\x24', b'\xc1\x24'),) 1452 result = urllib.parse.urlencode(given, safe=":$") 1453 expect = '%A0$=%C1$' 1454 self.assertEqual(expect, result) 1455 1456 given = ((b'\xa0\x24', b'\xc1\x24'),) 1457 result = urllib.parse.urlencode(given, doseq=True, safe=":$") 1458 expect = '%A0$=%C1$' 1459 self.assertEqual(expect, result) 1460 1461 # Safe parameter in sequence 1462 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1463 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1464 result = urllib.parse.urlencode(given, True, safe=":$") 1465 self.assertEqual(expect, result) 1466 1467 # Test all above in latin-1 encoding 1468 1469 given = ((b'\xa0\x24', b'\xc1\x24'),) 1470 result = urllib.parse.urlencode(given, safe=":$", 1471 encoding="latin-1") 1472 expect = '%A0$=%C1$' 1473 self.assertEqual(expect, result) 1474 1475 given = ((b'\xa0\x24', b'\xc1\x24'),) 1476 expect = '%A0$=%C1$' 1477 result = urllib.parse.urlencode(given, doseq=True, safe=":$", 1478 encoding="latin-1") 1479 1480 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1481 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1482 result = urllib.parse.urlencode(given, True, safe=":$", 1483 encoding="latin-1") 1484 self.assertEqual(expect, result) 1485 1486class Pathname_Tests(unittest.TestCase): 1487 """Test pathname2url() and url2pathname()""" 1488 1489 def test_basic(self): 1490 # Make sure simple tests pass 1491 expected_path = os.path.join("parts", "of", "a", "path") 1492 expected_url = "parts/of/a/path" 1493 result = urllib.request.pathname2url(expected_path) 1494 self.assertEqual(expected_url, result, 1495 "pathname2url() failed; %s != %s" % 1496 (result, expected_url)) 1497 result = urllib.request.url2pathname(expected_url) 1498 self.assertEqual(expected_path, result, 1499 "url2pathame() failed; %s != %s" % 1500 (result, expected_path)) 1501 1502 def test_quoting(self): 1503 # Test automatic quoting and unquoting works for pathnam2url() and 1504 # url2pathname() respectively 1505 given = os.path.join("needs", "quot=ing", "here") 1506 expect = "needs/%s/here" % urllib.parse.quote("quot=ing") 1507 result = urllib.request.pathname2url(given) 1508 self.assertEqual(expect, result, 1509 "pathname2url() failed; %s != %s" % 1510 (expect, result)) 1511 expect = given 1512 result = urllib.request.url2pathname(result) 1513 self.assertEqual(expect, result, 1514 "url2pathname() failed; %s != %s" % 1515 (expect, result)) 1516 given = os.path.join("make sure", "using_quote") 1517 expect = "%s/using_quote" % urllib.parse.quote("make sure") 1518 result = urllib.request.pathname2url(given) 1519 self.assertEqual(expect, result, 1520 "pathname2url() failed; %s != %s" % 1521 (expect, result)) 1522 given = "make+sure/using_unquote" 1523 expect = os.path.join("make+sure", "using_unquote") 1524 result = urllib.request.url2pathname(given) 1525 self.assertEqual(expect, result, 1526 "url2pathname() failed; %s != %s" % 1527 (expect, result)) 1528 1529 @unittest.skipUnless(sys.platform == 'win32', 1530 'test specific to the nturl2path functions.') 1531 def test_prefixes(self): 1532 # Test special prefixes are correctly handled in pathname2url() 1533 given = '\\\\?\\C:\\dir' 1534 expect = '///C:/dir' 1535 result = urllib.request.pathname2url(given) 1536 self.assertEqual(expect, result, 1537 "pathname2url() failed; %s != %s" % 1538 (expect, result)) 1539 given = '\\\\?\\unc\\server\\share\\dir' 1540 expect = '/server/share/dir' 1541 result = urllib.request.pathname2url(given) 1542 self.assertEqual(expect, result, 1543 "pathname2url() failed; %s != %s" % 1544 (expect, result)) 1545 1546 1547 @unittest.skipUnless(sys.platform == 'win32', 1548 'test specific to the urllib.url2path function.') 1549 def test_ntpath(self): 1550 given = ('/C:/', '///C:/', '/C|//') 1551 expect = 'C:\\' 1552 for url in given: 1553 result = urllib.request.url2pathname(url) 1554 self.assertEqual(expect, result, 1555 'urllib.request..url2pathname() failed; %s != %s' % 1556 (expect, result)) 1557 given = '///C|/path' 1558 expect = 'C:\\path' 1559 result = urllib.request.url2pathname(given) 1560 self.assertEqual(expect, result, 1561 'urllib.request.url2pathname() failed; %s != %s' % 1562 (expect, result)) 1563 1564class Utility_Tests(unittest.TestCase): 1565 """Testcase to test the various utility functions in the urllib.""" 1566 1567 def test_thishost(self): 1568 """Test the urllib.request.thishost utility function returns a tuple""" 1569 self.assertIsInstance(urllib.request.thishost(), tuple) 1570 1571 1572class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): 1573 """Testcase to test the open method of URLopener class.""" 1574 1575 def test_quoted_open(self): 1576 class DummyURLopener(urllib.request.URLopener): 1577 def open_spam(self, url): 1578 return url 1579 with warnings_helper.check_warnings( 1580 ('DummyURLopener style of invoking requests is deprecated.', 1581 DeprecationWarning)): 1582 self.assertEqual(DummyURLopener().open( 1583 'spam://example/ /'),'//example/%20/') 1584 1585 # test the safe characters are not quoted by urlopen 1586 self.assertEqual(DummyURLopener().open( 1587 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), 1588 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") 1589 1590 @warnings_helper.ignore_warnings(category=DeprecationWarning) 1591 def test_urlopener_retrieve_file(self): 1592 with os_helper.temp_dir() as tmpdir: 1593 fd, tmpfile = tempfile.mkstemp(dir=tmpdir) 1594 os.close(fd) 1595 fileurl = "file:" + urllib.request.pathname2url(tmpfile) 1596 filename, _ = urllib.request.URLopener().retrieve(fileurl) 1597 # Some buildbots have TEMP folder that uses a lowercase drive letter. 1598 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) 1599 1600 @warnings_helper.ignore_warnings(category=DeprecationWarning) 1601 def test_urlopener_retrieve_remote(self): 1602 url = "http://www.python.org/file.txt" 1603 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 1604 self.addCleanup(self.unfakehttp) 1605 filename, _ = urllib.request.URLopener().retrieve(url) 1606 self.assertEqual(os.path.splitext(filename)[1], ".txt") 1607 1608 @warnings_helper.ignore_warnings(category=DeprecationWarning) 1609 def test_local_file_open(self): 1610 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme 1611 class DummyURLopener(urllib.request.URLopener): 1612 def open_local_file(self, url): 1613 return url 1614 for url in ('local_file://example', 'local-file://example'): 1615 self.assertRaises(OSError, urllib.request.urlopen, url) 1616 self.assertRaises(OSError, urllib.request.URLopener().open, url) 1617 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) 1618 self.assertRaises(OSError, DummyURLopener().open, url) 1619 self.assertRaises(OSError, DummyURLopener().retrieve, url) 1620 1621 1622class RequestTests(unittest.TestCase): 1623 """Unit tests for urllib.request.Request.""" 1624 1625 def test_default_values(self): 1626 Request = urllib.request.Request 1627 request = Request("http://www.python.org") 1628 self.assertEqual(request.get_method(), 'GET') 1629 request = Request("http://www.python.org", {}) 1630 self.assertEqual(request.get_method(), 'POST') 1631 1632 def test_with_method_arg(self): 1633 Request = urllib.request.Request 1634 request = Request("http://www.python.org", method='HEAD') 1635 self.assertEqual(request.method, 'HEAD') 1636 self.assertEqual(request.get_method(), 'HEAD') 1637 request = Request("http://www.python.org", {}, method='HEAD') 1638 self.assertEqual(request.method, 'HEAD') 1639 self.assertEqual(request.get_method(), 'HEAD') 1640 request = Request("http://www.python.org", method='GET') 1641 self.assertEqual(request.get_method(), 'GET') 1642 request.method = 'HEAD' 1643 self.assertEqual(request.get_method(), 'HEAD') 1644 1645 1646class URL2PathNameTests(unittest.TestCase): 1647 1648 def test_converting_drive_letter(self): 1649 self.assertEqual(url2pathname("///C|"), 'C:') 1650 self.assertEqual(url2pathname("///C:"), 'C:') 1651 self.assertEqual(url2pathname("///C|/"), 'C:\\') 1652 1653 def test_converting_when_no_drive_letter(self): 1654 # cannot end a raw string in \ 1655 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') 1656 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') 1657 1658 def test_simple_compare(self): 1659 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), 1660 r'C:\foo\bar\spam.foo') 1661 1662 def test_non_ascii_drive_letter(self): 1663 self.assertRaises(IOError, url2pathname, "///\u00e8|/") 1664 1665 def test_roundtrip_url2pathname(self): 1666 list_of_paths = ['C:', 1667 r'\\\C\test\\', 1668 r'C:\foo\bar\spam.foo' 1669 ] 1670 for path in list_of_paths: 1671 self.assertEqual(url2pathname(pathname2url(path)), path) 1672 1673class PathName2URLTests(unittest.TestCase): 1674 1675 def test_converting_drive_letter(self): 1676 self.assertEqual(pathname2url("C:"), '///C:') 1677 self.assertEqual(pathname2url("C:\\"), '///C:') 1678 1679 def test_converting_when_no_drive_letter(self): 1680 self.assertEqual(pathname2url(r"\\\folder\test" "\\"), 1681 '/////folder/test/') 1682 self.assertEqual(pathname2url(r"\\folder\test" "\\"), 1683 '////folder/test/') 1684 self.assertEqual(pathname2url(r"\folder\test" "\\"), 1685 '/folder/test/') 1686 1687 def test_simple_compare(self): 1688 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), 1689 "///C:/foo/bar/spam.foo" ) 1690 1691 def test_long_drive_letter(self): 1692 self.assertRaises(IOError, pathname2url, "XX:\\") 1693 1694 def test_roundtrip_pathname2url(self): 1695 list_of_paths = ['///C:', 1696 '/////folder/test/', 1697 '///C:/foo/bar/spam.foo'] 1698 for path in list_of_paths: 1699 self.assertEqual(pathname2url(url2pathname(path)), path) 1700 1701if __name__ == '__main__': 1702 unittest.main() 1703