1"""Regression tests for what was in Python 2's "urllib" module""" 2 3import urllib.parse 4import urllib.request 5import urllib.error 6import http.client 7import email.message 8import io 9import unittest 10from unittest.mock import patch 11from test import support 12import os 13try: 14 import ssl 15except ImportError: 16 ssl = None 17import sys 18import tempfile 19from nturl2path import url2pathname, pathname2url 20 21from base64 import b64encode 22import collections 23 24 25def hexescape(char): 26 """Escape char as RFC 2396 specifies""" 27 hex_repr = hex(ord(char))[2:].upper() 28 if len(hex_repr) == 1: 29 hex_repr = "0%s" % hex_repr 30 return "%" + hex_repr 31 32# Shortcut for testing FancyURLopener 33_urlopener = None 34 35 36def urlopen(url, data=None, proxies=None): 37 """urlopen(url [, data]) -> open file-like object""" 38 global _urlopener 39 if proxies is not None: 40 opener = urllib.request.FancyURLopener(proxies=proxies) 41 elif not _urlopener: 42 opener = FancyURLopener() 43 _urlopener = opener 44 else: 45 opener = _urlopener 46 if data is None: 47 return opener.open(url) 48 else: 49 return opener.open(url, data) 50 51 52def FancyURLopener(): 53 with support.check_warnings( 54 ('FancyURLopener style of invoking requests is deprecated.', 55 DeprecationWarning)): 56 return urllib.request.FancyURLopener() 57 58 59def fakehttp(fakedata, mock_close=False): 60 class FakeSocket(io.BytesIO): 61 io_refs = 1 62 63 def sendall(self, data): 64 FakeHTTPConnection.buf = data 65 66 def makefile(self, *args, **kwds): 67 self.io_refs += 1 68 return self 69 70 def read(self, amt=None): 71 if self.closed: 72 return b"" 73 return io.BytesIO.read(self, amt) 74 75 def readline(self, length=None): 76 if self.closed: 77 return b"" 78 return io.BytesIO.readline(self, length) 79 80 def close(self): 81 self.io_refs -= 1 82 if self.io_refs == 0: 83 io.BytesIO.close(self) 84 85 class FakeHTTPConnection(http.client.HTTPConnection): 86 87 # buffer to store data for verification in urlopen tests. 88 buf = None 89 90 def connect(self): 91 self.sock = FakeSocket(self.fakedata) 92 type(self).fakesock = self.sock 93 94 if mock_close: 95 # bpo-36918: HTTPConnection destructor calls close() which calls 96 # flush(). Problem: flush() calls self.fp.flush() which raises 97 # "ValueError: I/O operation on closed file" which is logged as an 98 # "Exception ignored in". Override close() to silence this error. 99 def close(self): 100 pass 101 FakeHTTPConnection.fakedata = fakedata 102 103 return FakeHTTPConnection 104 105 106class FakeHTTPMixin(object): 107 def fakehttp(self, fakedata, mock_close=False): 108 fake_http_class = fakehttp(fakedata, mock_close=mock_close) 109 self._connection_class = http.client.HTTPConnection 110 http.client.HTTPConnection = fake_http_class 111 112 def unfakehttp(self): 113 http.client.HTTPConnection = self._connection_class 114 115 116class FakeFTPMixin(object): 117 def fakeftp(self): 118 class FakeFtpWrapper(object): 119 def __init__(self, user, passwd, host, port, dirs, timeout=None, 120 persistent=True): 121 pass 122 123 def retrfile(self, file, type): 124 return io.BytesIO(), 0 125 126 def close(self): 127 pass 128 129 self._ftpwrapper_class = urllib.request.ftpwrapper 130 urllib.request.ftpwrapper = FakeFtpWrapper 131 132 def unfakeftp(self): 133 urllib.request.ftpwrapper = self._ftpwrapper_class 134 135 136class urlopen_FileTests(unittest.TestCase): 137 """Test urlopen() opening a temporary file. 138 139 Try to test as much functionality as possible so as to cut down on reliance 140 on connecting to the Net for testing. 141 142 """ 143 144 def setUp(self): 145 # Create a temp file to use for testing 146 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, 147 "ascii") 148 f = open(support.TESTFN, 'wb') 149 try: 150 f.write(self.text) 151 finally: 152 f.close() 153 self.pathname = support.TESTFN 154 self.returned_obj = urlopen("file:%s" % self.pathname) 155 156 def tearDown(self): 157 """Shut down the open object""" 158 self.returned_obj.close() 159 os.remove(support.TESTFN) 160 161 def test_interface(self): 162 # Make sure object returned by urlopen() has the specified methods 163 for attr in ("read", "readline", "readlines", "fileno", 164 "close", "info", "geturl", "getcode", "__iter__"): 165 self.assertTrue(hasattr(self.returned_obj, attr), 166 "object returned by urlopen() lacks %s attribute" % 167 attr) 168 169 def test_read(self): 170 self.assertEqual(self.text, self.returned_obj.read()) 171 172 def test_readline(self): 173 self.assertEqual(self.text, self.returned_obj.readline()) 174 self.assertEqual(b'', self.returned_obj.readline(), 175 "calling readline() after exhausting the file did not" 176 " return an empty string") 177 178 def test_readlines(self): 179 lines_list = self.returned_obj.readlines() 180 self.assertEqual(len(lines_list), 1, 181 "readlines() returned the wrong number of lines") 182 self.assertEqual(lines_list[0], self.text, 183 "readlines() returned improper text") 184 185 def test_fileno(self): 186 file_num = self.returned_obj.fileno() 187 self.assertIsInstance(file_num, int, "fileno() did not return an int") 188 self.assertEqual(os.read(file_num, len(self.text)), self.text, 189 "Reading on the file descriptor returned by fileno() " 190 "did not return the expected text") 191 192 def test_close(self): 193 # Test close() by calling it here and then having it be called again 194 # by the tearDown() method for the test 195 self.returned_obj.close() 196 197 def test_info(self): 198 self.assertIsInstance(self.returned_obj.info(), email.message.Message) 199 200 def test_geturl(self): 201 self.assertEqual(self.returned_obj.geturl(), self.pathname) 202 203 def test_getcode(self): 204 self.assertIsNone(self.returned_obj.getcode()) 205 206 def test_iter(self): 207 # Test iterator 208 # Don't need to count number of iterations since test would fail the 209 # instant it returned anything beyond the first line from the 210 # comparison. 211 # Use the iterator in the usual implicit way to test for ticket #4608. 212 for line in self.returned_obj: 213 self.assertEqual(line, self.text) 214 215 def test_relativelocalfile(self): 216 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) 217 218 219class ProxyTests(unittest.TestCase): 220 221 def setUp(self): 222 # Records changes to env vars 223 self.env = support.EnvironmentVarGuard() 224 # Delete all proxy related env vars 225 for k in list(os.environ): 226 if 'proxy' in k.lower(): 227 self.env.unset(k) 228 229 def tearDown(self): 230 # Restore all proxy related env vars 231 self.env.__exit__() 232 del self.env 233 234 def test_getproxies_environment_keep_no_proxies(self): 235 self.env.set('NO_PROXY', 'localhost') 236 proxies = urllib.request.getproxies_environment() 237 # getproxies_environment use lowered case truncated (no '_proxy') keys 238 self.assertEqual('localhost', proxies['no']) 239 # List of no_proxies with space. 240 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') 241 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) 242 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888')) 243 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234')) 244 245 def test_proxy_cgi_ignore(self): 246 try: 247 self.env.set('HTTP_PROXY', 'http://somewhere:3128') 248 proxies = urllib.request.getproxies_environment() 249 self.assertEqual('http://somewhere:3128', proxies['http']) 250 self.env.set('REQUEST_METHOD', 'GET') 251 proxies = urllib.request.getproxies_environment() 252 self.assertNotIn('http', proxies) 253 finally: 254 self.env.unset('REQUEST_METHOD') 255 self.env.unset('HTTP_PROXY') 256 257 def test_proxy_bypass_environment_host_match(self): 258 bypass = urllib.request.proxy_bypass_environment 259 self.env.set('NO_PROXY', 260 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') 261 self.assertTrue(bypass('localhost')) 262 self.assertTrue(bypass('LocalHost')) # MixedCase 263 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE 264 self.assertTrue(bypass('.localhost')) 265 self.assertTrue(bypass('newdomain.com:1234')) 266 self.assertTrue(bypass('.newdomain.com:1234')) 267 self.assertTrue(bypass('foo.d.o.t')) # issue 29142 268 self.assertTrue(bypass('d.o.t')) 269 self.assertTrue(bypass('anotherdomain.com:8888')) 270 self.assertTrue(bypass('.anotherdomain.com:8888')) 271 self.assertTrue(bypass('www.newdomain.com:1234')) 272 self.assertFalse(bypass('prelocalhost')) 273 self.assertFalse(bypass('newdomain.com')) # no port 274 self.assertFalse(bypass('newdomain.com:1235')) # wrong port 275 276 def test_proxy_bypass_environment_always_match(self): 277 bypass = urllib.request.proxy_bypass_environment 278 self.env.set('NO_PROXY', '*') 279 self.assertTrue(bypass('newdomain.com')) 280 self.assertTrue(bypass('newdomain.com:1234')) 281 self.env.set('NO_PROXY', '*, anotherdomain.com') 282 self.assertTrue(bypass('anotherdomain.com')) 283 self.assertFalse(bypass('newdomain.com')) 284 self.assertFalse(bypass('newdomain.com:1234')) 285 286 def test_proxy_bypass_environment_newline(self): 287 bypass = urllib.request.proxy_bypass_environment 288 self.env.set('NO_PROXY', 289 'localhost, anotherdomain.com, newdomain.com:1234') 290 self.assertFalse(bypass('localhost\n')) 291 self.assertFalse(bypass('anotherdomain.com:8888\n')) 292 self.assertFalse(bypass('newdomain.com:1234\n')) 293 294 295class ProxyTests_withOrderedEnv(unittest.TestCase): 296 297 def setUp(self): 298 # We need to test conditions, where variable order _is_ significant 299 self._saved_env = os.environ 300 # Monkey patch os.environ, start with empty fake environment 301 os.environ = collections.OrderedDict() 302 303 def tearDown(self): 304 os.environ = self._saved_env 305 306 def test_getproxies_environment_prefer_lowercase(self): 307 # Test lowercase preference with removal 308 os.environ['no_proxy'] = '' 309 os.environ['No_Proxy'] = 'localhost' 310 self.assertFalse(urllib.request.proxy_bypass_environment('localhost')) 311 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 312 os.environ['http_proxy'] = '' 313 os.environ['HTTP_PROXY'] = 'http://somewhere:3128' 314 proxies = urllib.request.getproxies_environment() 315 self.assertEqual({}, proxies) 316 # Test lowercase preference of proxy bypass and correct matching including ports 317 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' 318 os.environ['No_Proxy'] = 'xyz.com' 319 self.assertTrue(urllib.request.proxy_bypass_environment('localhost')) 320 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678')) 321 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234')) 322 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy')) 323 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 324 # Test lowercase preference with replacement 325 os.environ['http_proxy'] = 'http://somewhere:3128' 326 os.environ['Http_Proxy'] = 'http://somewhereelse:3128' 327 proxies = urllib.request.getproxies_environment() 328 self.assertEqual('http://somewhere:3128', proxies['http']) 329 330 331class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): 332 """Test urlopen() opening a fake http connection.""" 333 334 def check_read(self, ver): 335 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") 336 try: 337 fp = urlopen("http://python.org/") 338 self.assertEqual(fp.readline(), b"Hello!") 339 self.assertEqual(fp.readline(), b"") 340 self.assertEqual(fp.geturl(), 'http://python.org/') 341 self.assertEqual(fp.getcode(), 200) 342 finally: 343 self.unfakehttp() 344 345 def test_url_fragment(self): 346 # Issue #11703: geturl() omits fragments in the original URL. 347 url = 'http://docs.python.org/library/urllib.html#OK' 348 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 349 try: 350 fp = urllib.request.urlopen(url) 351 self.assertEqual(fp.geturl(), url) 352 finally: 353 self.unfakehttp() 354 355 def test_willclose(self): 356 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 357 try: 358 resp = urlopen("http://www.python.org") 359 self.assertTrue(resp.fp.will_close) 360 finally: 361 self.unfakehttp() 362 363 @unittest.skipUnless(ssl, "ssl module required") 364 def test_url_path_with_control_char_rejected(self): 365 for char_no in list(range(0, 0x21)) + [0x7f]: 366 char = chr(char_no) 367 schemeless_url = f"//localhost:7777/test{char}/" 368 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 369 try: 370 # We explicitly test urllib.request.urlopen() instead of the top 371 # level 'def urlopen()' function defined in this... (quite ugly) 372 # test suite. They use different url opening codepaths. Plain 373 # urlopen uses FancyURLOpener which goes via a codepath that 374 # calls urllib.parse.quote() on the URL which makes all of the 375 # above attempts at injection within the url _path_ safe. 376 escaped_char_repr = repr(char).replace('\\', r'\\') 377 InvalidURL = http.client.InvalidURL 378 with self.assertRaisesRegex( 379 InvalidURL, f"contain control.*{escaped_char_repr}"): 380 urllib.request.urlopen(f"http:{schemeless_url}") 381 with self.assertRaisesRegex( 382 InvalidURL, f"contain control.*{escaped_char_repr}"): 383 urllib.request.urlopen(f"https:{schemeless_url}") 384 # This code path quotes the URL so there is no injection. 385 resp = urlopen(f"http:{schemeless_url}") 386 self.assertNotIn(char, resp.geturl()) 387 finally: 388 self.unfakehttp() 389 390 @unittest.skipUnless(ssl, "ssl module required") 391 def test_url_path_with_newline_header_injection_rejected(self): 392 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 393 host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" 394 schemeless_url = "//" + host + ":8080/test/?test=a" 395 try: 396 # We explicitly test urllib.request.urlopen() instead of the top 397 # level 'def urlopen()' function defined in this... (quite ugly) 398 # test suite. They use different url opening codepaths. Plain 399 # urlopen uses FancyURLOpener which goes via a codepath that 400 # calls urllib.parse.quote() on the URL which makes all of the 401 # above attempts at injection within the url _path_ safe. 402 InvalidURL = http.client.InvalidURL 403 with self.assertRaisesRegex( 404 InvalidURL, r"contain control.*\\r.*(found at least . .)"): 405 urllib.request.urlopen(f"http:{schemeless_url}") 406 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 407 urllib.request.urlopen(f"https:{schemeless_url}") 408 # This code path quotes the URL so there is no injection. 409 resp = urlopen(f"http:{schemeless_url}") 410 self.assertNotIn(' ', resp.geturl()) 411 self.assertNotIn('\r', resp.geturl()) 412 self.assertNotIn('\n', resp.geturl()) 413 finally: 414 self.unfakehttp() 415 416 @unittest.skipUnless(ssl, "ssl module required") 417 def test_url_host_with_control_char_rejected(self): 418 for char_no in list(range(0, 0x21)) + [0x7f]: 419 char = chr(char_no) 420 schemeless_url = f"//localhost{char}/test/" 421 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 422 try: 423 escaped_char_repr = repr(char).replace('\\', r'\\') 424 InvalidURL = http.client.InvalidURL 425 with self.assertRaisesRegex( 426 InvalidURL, f"contain control.*{escaped_char_repr}"): 427 urlopen(f"http:{schemeless_url}") 428 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): 429 urlopen(f"https:{schemeless_url}") 430 finally: 431 self.unfakehttp() 432 433 @unittest.skipUnless(ssl, "ssl module required") 434 def test_url_host_with_newline_header_injection_rejected(self): 435 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") 436 host = "localhost\r\nX-injected: header\r\n" 437 schemeless_url = "//" + host + ":8080/test/?test=a" 438 try: 439 InvalidURL = http.client.InvalidURL 440 with self.assertRaisesRegex( 441 InvalidURL, r"contain control.*\\r"): 442 urlopen(f"http:{schemeless_url}") 443 with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): 444 urlopen(f"https:{schemeless_url}") 445 finally: 446 self.unfakehttp() 447 448 def test_read_0_9(self): 449 # "0.9" response accepted (but not "simple responses" without 450 # a status line) 451 self.check_read(b"0.9") 452 453 def test_read_1_0(self): 454 self.check_read(b"1.0") 455 456 def test_read_1_1(self): 457 self.check_read(b"1.1") 458 459 def test_read_bogus(self): 460 # urlopen() should raise OSError for many error codes. 461 self.fakehttp(b'''HTTP/1.1 401 Authentication Required 462Date: Wed, 02 Jan 2008 03:03:54 GMT 463Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 464Connection: close 465Content-Type: text/html; charset=iso-8859-1 466''', mock_close=True) 467 try: 468 self.assertRaises(OSError, urlopen, "http://python.org/") 469 finally: 470 self.unfakehttp() 471 472 def test_invalid_redirect(self): 473 # urlopen() should raise OSError for many error codes. 474 self.fakehttp(b'''HTTP/1.1 302 Found 475Date: Wed, 02 Jan 2008 03:03:54 GMT 476Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 477Location: file://guidocomputer.athome.com:/python/license 478Connection: close 479Content-Type: text/html; charset=iso-8859-1 480''', mock_close=True) 481 try: 482 msg = "Redirection to url 'file:" 483 with self.assertRaisesRegex(urllib.error.HTTPError, msg): 484 urlopen("http://python.org/") 485 finally: 486 self.unfakehttp() 487 488 def test_redirect_limit_independent(self): 489 # Ticket #12923: make sure independent requests each use their 490 # own retry limit. 491 for i in range(FancyURLopener().maxtries): 492 self.fakehttp(b'''HTTP/1.1 302 Found 493Location: file://guidocomputer.athome.com:/python/license 494Connection: close 495''', mock_close=True) 496 try: 497 self.assertRaises(urllib.error.HTTPError, urlopen, 498 "http://something") 499 finally: 500 self.unfakehttp() 501 502 def test_empty_socket(self): 503 # urlopen() raises OSError if the underlying socket does not send any 504 # data. (#1680230) 505 self.fakehttp(b'') 506 try: 507 self.assertRaises(OSError, urlopen, "http://something") 508 finally: 509 self.unfakehttp() 510 511 def test_missing_localfile(self): 512 # Test for #10836 513 with self.assertRaises(urllib.error.URLError) as e: 514 urlopen('file://localhost/a/file/which/doesnot/exists.py') 515 self.assertTrue(e.exception.filename) 516 self.assertTrue(e.exception.reason) 517 518 def test_file_notexists(self): 519 fd, tmp_file = tempfile.mkstemp() 520 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') 521 try: 522 self.assertTrue(os.path.exists(tmp_file)) 523 with urlopen(tmp_fileurl) as fobj: 524 self.assertTrue(fobj) 525 finally: 526 os.close(fd) 527 os.unlink(tmp_file) 528 self.assertFalse(os.path.exists(tmp_file)) 529 with self.assertRaises(urllib.error.URLError): 530 urlopen(tmp_fileurl) 531 532 def test_ftp_nohost(self): 533 test_ftp_url = 'ftp:///path' 534 with self.assertRaises(urllib.error.URLError) as e: 535 urlopen(test_ftp_url) 536 self.assertFalse(e.exception.filename) 537 self.assertTrue(e.exception.reason) 538 539 def test_ftp_nonexisting(self): 540 with self.assertRaises(urllib.error.URLError) as e: 541 urlopen('ftp://localhost/a/file/which/doesnot/exists.py') 542 self.assertFalse(e.exception.filename) 543 self.assertTrue(e.exception.reason) 544 545 @patch.object(urllib.request, 'MAXFTPCACHE', 0) 546 def test_ftp_cache_pruning(self): 547 self.fakeftp() 548 try: 549 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) 550 urlopen('ftp://localhost') 551 finally: 552 self.unfakeftp() 553 554 def test_userpass_inurl(self): 555 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 556 try: 557 fp = urlopen("http://user:pass@python.org/") 558 self.assertEqual(fp.readline(), b"Hello!") 559 self.assertEqual(fp.readline(), b"") 560 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') 561 self.assertEqual(fp.getcode(), 200) 562 finally: 563 self.unfakehttp() 564 565 def test_userpass_inurl_w_spaces(self): 566 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 567 try: 568 userpass = "a b:c d" 569 url = "http://{}@python.org/".format(userpass) 570 fakehttp_wrapper = http.client.HTTPConnection 571 authorization = ("Authorization: Basic %s\r\n" % 572 b64encode(userpass.encode("ASCII")).decode("ASCII")) 573 fp = urlopen(url) 574 # The authorization header must be in place 575 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) 576 self.assertEqual(fp.readline(), b"Hello!") 577 self.assertEqual(fp.readline(), b"") 578 # the spaces are quoted in URL so no match 579 self.assertNotEqual(fp.geturl(), url) 580 self.assertEqual(fp.getcode(), 200) 581 finally: 582 self.unfakehttp() 583 584 def test_URLopener_deprecation(self): 585 with support.check_warnings(('',DeprecationWarning)): 586 urllib.request.URLopener() 587 588 @unittest.skipUnless(ssl, "ssl module required") 589 def test_cafile_and_context(self): 590 context = ssl.create_default_context() 591 with support.check_warnings(('', DeprecationWarning)): 592 with self.assertRaises(ValueError): 593 urllib.request.urlopen( 594 "https://localhost", cafile="/nonexistent/path", context=context 595 ) 596 597 598class urlopen_DataTests(unittest.TestCase): 599 """Test urlopen() opening a data URL.""" 600 601 def setUp(self): 602 # text containing URL special- and unicode-characters 603 self.text = "test data URLs :;,%=& \u00f6 \u00c4 " 604 # 2x1 pixel RGB PNG image with one black and one white pixel 605 self.image = ( 606 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00' 607 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae' 608 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00' 609 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82') 610 611 self.text_url = ( 612 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3" 613 "D%26%20%C3%B6%20%C3%84%20") 614 self.text_url_base64 = ( 615 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs" 616 "sJT0mIPYgxCA%3D") 617 # base64 encoded data URL that contains ignorable spaces, 618 # such as "\n", " ", "%0A", and "%20". 619 self.image_url = ( 620 "\n" 621 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 " 622 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20") 623 624 self.text_url_resp = urllib.request.urlopen(self.text_url) 625 self.text_url_base64_resp = urllib.request.urlopen( 626 self.text_url_base64) 627 self.image_url_resp = urllib.request.urlopen(self.image_url) 628 629 def test_interface(self): 630 # Make sure object returned by urlopen() has the specified methods 631 for attr in ("read", "readline", "readlines", 632 "close", "info", "geturl", "getcode", "__iter__"): 633 self.assertTrue(hasattr(self.text_url_resp, attr), 634 "object returned by urlopen() lacks %s attribute" % 635 attr) 636 637 def test_info(self): 638 self.assertIsInstance(self.text_url_resp.info(), email.message.Message) 639 self.assertEqual(self.text_url_base64_resp.info().get_params(), 640 [('text/plain', ''), ('charset', 'ISO-8859-1')]) 641 self.assertEqual(self.image_url_resp.info()['content-length'], 642 str(len(self.image))) 643 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(), 644 [('text/plain', ''), ('charset', 'US-ASCII')]) 645 646 def test_geturl(self): 647 self.assertEqual(self.text_url_resp.geturl(), self.text_url) 648 self.assertEqual(self.text_url_base64_resp.geturl(), 649 self.text_url_base64) 650 self.assertEqual(self.image_url_resp.geturl(), self.image_url) 651 652 def test_read_text(self): 653 self.assertEqual(self.text_url_resp.read().decode( 654 dict(self.text_url_resp.info().get_params())['charset']), self.text) 655 656 def test_read_text_base64(self): 657 self.assertEqual(self.text_url_base64_resp.read().decode( 658 dict(self.text_url_base64_resp.info().get_params())['charset']), 659 self.text) 660 661 def test_read_image(self): 662 self.assertEqual(self.image_url_resp.read(), self.image) 663 664 def test_missing_comma(self): 665 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain') 666 667 def test_invalid_base64_data(self): 668 # missing padding character 669 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') 670 671 672class urlretrieve_FileTests(unittest.TestCase): 673 """Test urllib.urlretrieve() on local files""" 674 675 def setUp(self): 676 # Create a list of temporary files. Each item in the list is a file 677 # name (absolute path or relative to the current working directory). 678 # All files in this list will be deleted in the tearDown method. Note, 679 # this only helps to makes sure temporary files get deleted, but it 680 # does nothing about trying to close files that may still be open. It 681 # is the responsibility of the developer to properly close files even 682 # when exceptional conditions occur. 683 self.tempFiles = [] 684 685 # Create a temporary file. 686 self.registerFileForCleanUp(support.TESTFN) 687 self.text = b'testing urllib.urlretrieve' 688 try: 689 FILE = open(support.TESTFN, 'wb') 690 FILE.write(self.text) 691 FILE.close() 692 finally: 693 try: FILE.close() 694 except: pass 695 696 def tearDown(self): 697 # Delete the temporary files. 698 for each in self.tempFiles: 699 try: os.remove(each) 700 except: pass 701 702 def constructLocalFileUrl(self, filePath): 703 filePath = os.path.abspath(filePath) 704 try: 705 filePath.encode("utf-8") 706 except UnicodeEncodeError: 707 raise unittest.SkipTest("filePath is not encodable to utf8") 708 return "file://%s" % urllib.request.pathname2url(filePath) 709 710 def createNewTempFile(self, data=b""): 711 """Creates a new temporary file containing the specified data, 712 registers the file for deletion during the test fixture tear down, and 713 returns the absolute path of the file.""" 714 715 newFd, newFilePath = tempfile.mkstemp() 716 try: 717 self.registerFileForCleanUp(newFilePath) 718 newFile = os.fdopen(newFd, "wb") 719 newFile.write(data) 720 newFile.close() 721 finally: 722 try: newFile.close() 723 except: pass 724 return newFilePath 725 726 def registerFileForCleanUp(self, fileName): 727 self.tempFiles.append(fileName) 728 729 def test_basic(self): 730 # Make sure that a local file just gets its own location returned and 731 # a headers value is returned. 732 result = urllib.request.urlretrieve("file:%s" % support.TESTFN) 733 self.assertEqual(result[0], support.TESTFN) 734 self.assertIsInstance(result[1], email.message.Message, 735 "did not get an email.message.Message instance " 736 "as second returned value") 737 738 def test_copy(self): 739 # Test that setting the filename argument works. 740 second_temp = "%s.2" % support.TESTFN 741 self.registerFileForCleanUp(second_temp) 742 result = urllib.request.urlretrieve(self.constructLocalFileUrl( 743 support.TESTFN), second_temp) 744 self.assertEqual(second_temp, result[0]) 745 self.assertTrue(os.path.exists(second_temp), "copy of the file was not " 746 "made") 747 FILE = open(second_temp, 'rb') 748 try: 749 text = FILE.read() 750 FILE.close() 751 finally: 752 try: FILE.close() 753 except: pass 754 self.assertEqual(self.text, text) 755 756 def test_reporthook(self): 757 # Make sure that the reporthook works. 758 def hooktester(block_count, block_read_size, file_size, count_holder=[0]): 759 self.assertIsInstance(block_count, int) 760 self.assertIsInstance(block_read_size, int) 761 self.assertIsInstance(file_size, int) 762 self.assertEqual(block_count, count_holder[0]) 763 count_holder[0] = count_holder[0] + 1 764 second_temp = "%s.2" % support.TESTFN 765 self.registerFileForCleanUp(second_temp) 766 urllib.request.urlretrieve( 767 self.constructLocalFileUrl(support.TESTFN), 768 second_temp, hooktester) 769 770 def test_reporthook_0_bytes(self): 771 # Test on zero length file. Should call reporthook only 1 time. 772 report = [] 773 def hooktester(block_count, block_read_size, file_size, _report=report): 774 _report.append((block_count, block_read_size, file_size)) 775 srcFileName = self.createNewTempFile() 776 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 777 support.TESTFN, hooktester) 778 self.assertEqual(len(report), 1) 779 self.assertEqual(report[0][2], 0) 780 781 def test_reporthook_5_bytes(self): 782 # Test on 5 byte file. Should call reporthook only 2 times (once when 783 # the "network connection" is established and once when the block is 784 # read). 785 report = [] 786 def hooktester(block_count, block_read_size, file_size, _report=report): 787 _report.append((block_count, block_read_size, file_size)) 788 srcFileName = self.createNewTempFile(b"x" * 5) 789 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 790 support.TESTFN, hooktester) 791 self.assertEqual(len(report), 2) 792 self.assertEqual(report[0][2], 5) 793 self.assertEqual(report[1][2], 5) 794 795 def test_reporthook_8193_bytes(self): 796 # Test on 8193 byte file. Should call reporthook only 3 times (once 797 # when the "network connection" is established, once for the next 8192 798 # bytes, and once for the last byte). 799 report = [] 800 def hooktester(block_count, block_read_size, file_size, _report=report): 801 _report.append((block_count, block_read_size, file_size)) 802 srcFileName = self.createNewTempFile(b"x" * 8193) 803 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 804 support.TESTFN, hooktester) 805 self.assertEqual(len(report), 3) 806 self.assertEqual(report[0][2], 8193) 807 self.assertEqual(report[0][1], 8192) 808 self.assertEqual(report[1][1], 8192) 809 self.assertEqual(report[2][1], 8192) 810 811 812class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): 813 """Test urllib.urlretrieve() using fake http connections""" 814 815 def test_short_content_raises_ContentTooShortError(self): 816 self.fakehttp(b'''HTTP/1.1 200 OK 817Date: Wed, 02 Jan 2008 03:03:54 GMT 818Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 819Connection: close 820Content-Length: 100 821Content-Type: text/html; charset=iso-8859-1 822 823FF 824''') 825 826 def _reporthook(par1, par2, par3): 827 pass 828 829 with self.assertRaises(urllib.error.ContentTooShortError): 830 try: 831 urllib.request.urlretrieve(support.TEST_HTTP_URL, 832 reporthook=_reporthook) 833 finally: 834 self.unfakehttp() 835 836 def test_short_content_raises_ContentTooShortError_without_reporthook(self): 837 self.fakehttp(b'''HTTP/1.1 200 OK 838Date: Wed, 02 Jan 2008 03:03:54 GMT 839Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 840Connection: close 841Content-Length: 100 842Content-Type: text/html; charset=iso-8859-1 843 844FF 845''') 846 with self.assertRaises(urllib.error.ContentTooShortError): 847 try: 848 urllib.request.urlretrieve(support.TEST_HTTP_URL) 849 finally: 850 self.unfakehttp() 851 852 853class QuotingTests(unittest.TestCase): 854 r"""Tests for urllib.quote() and urllib.quote_plus() 855 856 According to RFC 3986 (Uniform Resource Identifiers), to escape a 857 character you write it as '%' + <2 character US-ASCII hex value>. 858 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a 859 character properly. Case does not matter on the hex letters. 860 861 The various character sets specified are: 862 863 Reserved characters : ";/?:@&=+$," 864 Have special meaning in URIs and must be escaped if not being used for 865 their special meaning 866 Data characters : letters, digits, and "-_.!~*'()" 867 Unreserved and do not need to be escaped; can be, though, if desired 868 Control characters : 0x00 - 0x1F, 0x7F 869 Have no use in URIs so must be escaped 870 space : 0x20 871 Must be escaped 872 Delimiters : '<>#%"' 873 Must be escaped 874 Unwise : "{}|\^[]`" 875 Must be escaped 876 877 """ 878 879 def test_never_quote(self): 880 # Make sure quote() does not quote letters, digits, and "_,.-" 881 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", 882 "abcdefghijklmnopqrstuvwxyz", 883 "0123456789", 884 "_.-~"]) 885 result = urllib.parse.quote(do_not_quote) 886 self.assertEqual(do_not_quote, result, 887 "using quote(): %r != %r" % (do_not_quote, result)) 888 result = urllib.parse.quote_plus(do_not_quote) 889 self.assertEqual(do_not_quote, result, 890 "using quote_plus(): %r != %r" % (do_not_quote, result)) 891 892 def test_default_safe(self): 893 # Test '/' is default value for 'safe' parameter 894 self.assertEqual(urllib.parse.quote.__defaults__[0], '/') 895 896 def test_safe(self): 897 # Test setting 'safe' parameter does what it should do 898 quote_by_default = "<>" 899 result = urllib.parse.quote(quote_by_default, safe=quote_by_default) 900 self.assertEqual(quote_by_default, result, 901 "using quote(): %r != %r" % (quote_by_default, result)) 902 result = urllib.parse.quote_plus(quote_by_default, 903 safe=quote_by_default) 904 self.assertEqual(quote_by_default, result, 905 "using quote_plus(): %r != %r" % 906 (quote_by_default, result)) 907 # Safe expressed as bytes rather than str 908 result = urllib.parse.quote(quote_by_default, safe=b"<>") 909 self.assertEqual(quote_by_default, result, 910 "using quote(): %r != %r" % (quote_by_default, result)) 911 # "Safe" non-ASCII characters should have no effect 912 # (Since URIs are not allowed to have non-ASCII characters) 913 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") 914 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 915 self.assertEqual(expect, result, 916 "using quote(): %r != %r" % 917 (expect, result)) 918 # Same as above, but using a bytes rather than str 919 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") 920 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 921 self.assertEqual(expect, result, 922 "using quote(): %r != %r" % 923 (expect, result)) 924 925 def test_default_quoting(self): 926 # Make sure all characters that should be quoted are by default sans 927 # space (separate test for that). 928 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F 929 should_quote.append(r'<>#%"{}|\^[]`') 930 should_quote.append(chr(127)) # For 0x7F 931 should_quote = ''.join(should_quote) 932 for char in should_quote: 933 result = urllib.parse.quote(char) 934 self.assertEqual(hexescape(char), result, 935 "using quote(): " 936 "%s should be escaped to %s, not %s" % 937 (char, hexescape(char), result)) 938 result = urllib.parse.quote_plus(char) 939 self.assertEqual(hexescape(char), result, 940 "using quote_plus(): " 941 "%s should be escapes to %s, not %s" % 942 (char, hexescape(char), result)) 943 del should_quote 944 partial_quote = "ab[]cd" 945 expected = "ab%5B%5Dcd" 946 result = urllib.parse.quote(partial_quote) 947 self.assertEqual(expected, result, 948 "using quote(): %r != %r" % (expected, result)) 949 result = urllib.parse.quote_plus(partial_quote) 950 self.assertEqual(expected, result, 951 "using quote_plus(): %r != %r" % (expected, result)) 952 953 def test_quoting_space(self): 954 # Make sure quote() and quote_plus() handle spaces as specified in 955 # their unique way 956 result = urllib.parse.quote(' ') 957 self.assertEqual(result, hexescape(' '), 958 "using quote(): %r != %r" % (result, hexescape(' '))) 959 result = urllib.parse.quote_plus(' ') 960 self.assertEqual(result, '+', 961 "using quote_plus(): %r != +" % result) 962 given = "a b cd e f" 963 expect = given.replace(' ', hexescape(' ')) 964 result = urllib.parse.quote(given) 965 self.assertEqual(expect, result, 966 "using quote(): %r != %r" % (expect, result)) 967 expect = given.replace(' ', '+') 968 result = urllib.parse.quote_plus(given) 969 self.assertEqual(expect, result, 970 "using quote_plus(): %r != %r" % (expect, result)) 971 972 def test_quoting_plus(self): 973 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), 974 'alpha%2Bbeta+gamma') 975 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), 976 'alpha+beta+gamma') 977 # Test with bytes 978 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), 979 'alpha%2Bbeta+gamma') 980 # Test with safe bytes 981 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), 982 'alpha+beta+gamma') 983 984 def test_quote_bytes(self): 985 # Bytes should quote directly to percent-encoded values 986 given = b"\xa2\xd8ab\xff" 987 expect = "%A2%D8ab%FF" 988 result = urllib.parse.quote(given) 989 self.assertEqual(expect, result, 990 "using quote(): %r != %r" % (expect, result)) 991 # Encoding argument should raise type error on bytes input 992 self.assertRaises(TypeError, urllib.parse.quote, given, 993 encoding="latin-1") 994 # quote_from_bytes should work the same 995 result = urllib.parse.quote_from_bytes(given) 996 self.assertEqual(expect, result, 997 "using quote_from_bytes(): %r != %r" 998 % (expect, result)) 999 1000 def test_quote_with_unicode(self): 1001 # Characters in Latin-1 range, encoded by default in UTF-8 1002 given = "\xa2\xd8ab\xff" 1003 expect = "%C2%A2%C3%98ab%C3%BF" 1004 result = urllib.parse.quote(given) 1005 self.assertEqual(expect, result, 1006 "using quote(): %r != %r" % (expect, result)) 1007 # Characters in Latin-1 range, encoded by with None (default) 1008 result = urllib.parse.quote(given, encoding=None, errors=None) 1009 self.assertEqual(expect, result, 1010 "using quote(): %r != %r" % (expect, result)) 1011 # Characters in Latin-1 range, encoded with Latin-1 1012 given = "\xa2\xd8ab\xff" 1013 expect = "%A2%D8ab%FF" 1014 result = urllib.parse.quote(given, encoding="latin-1") 1015 self.assertEqual(expect, result, 1016 "using quote(): %r != %r" % (expect, result)) 1017 # Characters in BMP, encoded by default in UTF-8 1018 given = "\u6f22\u5b57" # "Kanji" 1019 expect = "%E6%BC%A2%E5%AD%97" 1020 result = urllib.parse.quote(given) 1021 self.assertEqual(expect, result, 1022 "using quote(): %r != %r" % (expect, result)) 1023 # Characters in BMP, encoded with Latin-1 1024 given = "\u6f22\u5b57" 1025 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, 1026 encoding="latin-1") 1027 # Characters in BMP, encoded with Latin-1, with replace error handling 1028 given = "\u6f22\u5b57" 1029 expect = "%3F%3F" # "??" 1030 result = urllib.parse.quote(given, encoding="latin-1", 1031 errors="replace") 1032 self.assertEqual(expect, result, 1033 "using quote(): %r != %r" % (expect, result)) 1034 # Characters in BMP, Latin-1, with xmlcharref error handling 1035 given = "\u6f22\u5b57" 1036 expect = "%26%2328450%3B%26%2323383%3B" # "漢字" 1037 result = urllib.parse.quote(given, encoding="latin-1", 1038 errors="xmlcharrefreplace") 1039 self.assertEqual(expect, result, 1040 "using quote(): %r != %r" % (expect, result)) 1041 1042 def test_quote_plus_with_unicode(self): 1043 # Encoding (latin-1) test for quote_plus 1044 given = "\xa2\xd8 \xff" 1045 expect = "%A2%D8+%FF" 1046 result = urllib.parse.quote_plus(given, encoding="latin-1") 1047 self.assertEqual(expect, result, 1048 "using quote_plus(): %r != %r" % (expect, result)) 1049 # Errors test for quote_plus 1050 given = "ab\u6f22\u5b57 cd" 1051 expect = "ab%3F%3F+cd" 1052 result = urllib.parse.quote_plus(given, encoding="latin-1", 1053 errors="replace") 1054 self.assertEqual(expect, result, 1055 "using quote_plus(): %r != %r" % (expect, result)) 1056 1057 1058class UnquotingTests(unittest.TestCase): 1059 """Tests for unquote() and unquote_plus() 1060 1061 See the doc string for quoting_Tests for details on quoting and such. 1062 1063 """ 1064 1065 def test_unquoting(self): 1066 # Make sure unquoting of all ASCII values works 1067 escape_list = [] 1068 for num in range(128): 1069 given = hexescape(chr(num)) 1070 expect = chr(num) 1071 result = urllib.parse.unquote(given) 1072 self.assertEqual(expect, result, 1073 "using unquote(): %r != %r" % (expect, result)) 1074 result = urllib.parse.unquote_plus(given) 1075 self.assertEqual(expect, result, 1076 "using unquote_plus(): %r != %r" % 1077 (expect, result)) 1078 escape_list.append(given) 1079 escape_string = ''.join(escape_list) 1080 del escape_list 1081 result = urllib.parse.unquote(escape_string) 1082 self.assertEqual(result.count('%'), 1, 1083 "using unquote(): not all characters escaped: " 1084 "%s" % result) 1085 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) 1086 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) 1087 with support.check_warnings(('', BytesWarning), quiet=True): 1088 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'') 1089 1090 def test_unquoting_badpercent(self): 1091 # Test unquoting on bad percent-escapes 1092 given = '%xab' 1093 expect = given 1094 result = urllib.parse.unquote(given) 1095 self.assertEqual(expect, result, "using unquote(): %r != %r" 1096 % (expect, result)) 1097 given = '%x' 1098 expect = given 1099 result = urllib.parse.unquote(given) 1100 self.assertEqual(expect, result, "using unquote(): %r != %r" 1101 % (expect, result)) 1102 given = '%' 1103 expect = given 1104 result = urllib.parse.unquote(given) 1105 self.assertEqual(expect, result, "using unquote(): %r != %r" 1106 % (expect, result)) 1107 # unquote_to_bytes 1108 given = '%xab' 1109 expect = bytes(given, 'ascii') 1110 result = urllib.parse.unquote_to_bytes(given) 1111 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1112 % (expect, result)) 1113 given = '%x' 1114 expect = bytes(given, 'ascii') 1115 result = urllib.parse.unquote_to_bytes(given) 1116 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1117 % (expect, result)) 1118 given = '%' 1119 expect = bytes(given, 'ascii') 1120 result = urllib.parse.unquote_to_bytes(given) 1121 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1122 % (expect, result)) 1123 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) 1124 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) 1125 1126 def test_unquoting_mixed_case(self): 1127 # Test unquoting on mixed-case hex digits in the percent-escapes 1128 given = '%Ab%eA' 1129 expect = b'\xab\xea' 1130 result = urllib.parse.unquote_to_bytes(given) 1131 self.assertEqual(expect, result, 1132 "using unquote_to_bytes(): %r != %r" 1133 % (expect, result)) 1134 1135 def test_unquoting_parts(self): 1136 # Make sure unquoting works when have non-quoted characters 1137 # interspersed 1138 given = 'ab%sd' % hexescape('c') 1139 expect = "abcd" 1140 result = urllib.parse.unquote(given) 1141 self.assertEqual(expect, result, 1142 "using quote(): %r != %r" % (expect, result)) 1143 result = urllib.parse.unquote_plus(given) 1144 self.assertEqual(expect, result, 1145 "using unquote_plus(): %r != %r" % (expect, result)) 1146 1147 def test_unquoting_plus(self): 1148 # Test difference between unquote() and unquote_plus() 1149 given = "are+there+spaces..." 1150 expect = given 1151 result = urllib.parse.unquote(given) 1152 self.assertEqual(expect, result, 1153 "using unquote(): %r != %r" % (expect, result)) 1154 expect = given.replace('+', ' ') 1155 result = urllib.parse.unquote_plus(given) 1156 self.assertEqual(expect, result, 1157 "using unquote_plus(): %r != %r" % (expect, result)) 1158 1159 def test_unquote_to_bytes(self): 1160 given = 'br%C3%BCckner_sapporo_20050930.doc' 1161 expect = b'br\xc3\xbcckner_sapporo_20050930.doc' 1162 result = urllib.parse.unquote_to_bytes(given) 1163 self.assertEqual(expect, result, 1164 "using unquote_to_bytes(): %r != %r" 1165 % (expect, result)) 1166 # Test on a string with unescaped non-ASCII characters 1167 # (Technically an invalid URI; expect those characters to be UTF-8 1168 # encoded). 1169 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") 1170 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" 1171 self.assertEqual(expect, result, 1172 "using unquote_to_bytes(): %r != %r" 1173 % (expect, result)) 1174 # Test with a bytes as input 1175 given = b'%A2%D8ab%FF' 1176 expect = b'\xa2\xd8ab\xff' 1177 result = urllib.parse.unquote_to_bytes(given) 1178 self.assertEqual(expect, result, 1179 "using unquote_to_bytes(): %r != %r" 1180 % (expect, result)) 1181 # Test with a bytes as input, with unescaped non-ASCII bytes 1182 # (Technically an invalid URI; expect those bytes to be preserved) 1183 given = b'%A2\xd8ab%FF' 1184 expect = b'\xa2\xd8ab\xff' 1185 result = urllib.parse.unquote_to_bytes(given) 1186 self.assertEqual(expect, result, 1187 "using unquote_to_bytes(): %r != %r" 1188 % (expect, result)) 1189 1190 def test_unquote_with_unicode(self): 1191 # Characters in the Latin-1 range, encoded with UTF-8 1192 given = 'br%C3%BCckner_sapporo_20050930.doc' 1193 expect = 'br\u00fcckner_sapporo_20050930.doc' 1194 result = urllib.parse.unquote(given) 1195 self.assertEqual(expect, result, 1196 "using unquote(): %r != %r" % (expect, result)) 1197 # Characters in the Latin-1 range, encoded with None (default) 1198 result = urllib.parse.unquote(given, encoding=None, errors=None) 1199 self.assertEqual(expect, result, 1200 "using unquote(): %r != %r" % (expect, result)) 1201 1202 # Characters in the Latin-1 range, encoded with Latin-1 1203 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', 1204 encoding="latin-1") 1205 expect = 'br\u00fcckner_sapporo_20050930.doc' 1206 self.assertEqual(expect, result, 1207 "using unquote(): %r != %r" % (expect, result)) 1208 1209 # Characters in BMP, encoded with UTF-8 1210 given = "%E6%BC%A2%E5%AD%97" 1211 expect = "\u6f22\u5b57" # "Kanji" 1212 result = urllib.parse.unquote(given) 1213 self.assertEqual(expect, result, 1214 "using unquote(): %r != %r" % (expect, result)) 1215 1216 # Decode with UTF-8, invalid sequence 1217 given = "%F3%B1" 1218 expect = "\ufffd" # Replacement character 1219 result = urllib.parse.unquote(given) 1220 self.assertEqual(expect, result, 1221 "using unquote(): %r != %r" % (expect, result)) 1222 1223 # Decode with UTF-8, invalid sequence, replace errors 1224 result = urllib.parse.unquote(given, errors="replace") 1225 self.assertEqual(expect, result, 1226 "using unquote(): %r != %r" % (expect, result)) 1227 1228 # Decode with UTF-8, invalid sequence, ignoring errors 1229 given = "%F3%B1" 1230 expect = "" 1231 result = urllib.parse.unquote(given, errors="ignore") 1232 self.assertEqual(expect, result, 1233 "using unquote(): %r != %r" % (expect, result)) 1234 1235 # A mix of non-ASCII and percent-encoded characters, UTF-8 1236 result = urllib.parse.unquote("\u6f22%C3%BC") 1237 expect = '\u6f22\u00fc' 1238 self.assertEqual(expect, result, 1239 "using unquote(): %r != %r" % (expect, result)) 1240 1241 # A mix of non-ASCII and percent-encoded characters, Latin-1 1242 # (Note, the string contains non-Latin-1-representable characters) 1243 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") 1244 expect = '\u6f22\u00fc' 1245 self.assertEqual(expect, result, 1246 "using unquote(): %r != %r" % (expect, result)) 1247 1248class urlencode_Tests(unittest.TestCase): 1249 """Tests for urlencode()""" 1250 1251 def help_inputtype(self, given, test_type): 1252 """Helper method for testing different input types. 1253 1254 'given' must lead to only the pairs: 1255 * 1st, 1 1256 * 2nd, 2 1257 * 3rd, 3 1258 1259 Test cannot assume anything about order. Docs make no guarantee and 1260 have possible dictionary input. 1261 1262 """ 1263 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] 1264 result = urllib.parse.urlencode(given) 1265 for expected in expect_somewhere: 1266 self.assertIn(expected, result, 1267 "testing %s: %s not found in %s" % 1268 (test_type, expected, result)) 1269 self.assertEqual(result.count('&'), 2, 1270 "testing %s: expected 2 '&'s; got %s" % 1271 (test_type, result.count('&'))) 1272 amp_location = result.index('&') 1273 on_amp_left = result[amp_location - 1] 1274 on_amp_right = result[amp_location + 1] 1275 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), 1276 "testing %s: '&' not located in proper place in %s" % 1277 (test_type, result)) 1278 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps 1279 "testing %s: " 1280 "unexpected number of characters: %s != %s" % 1281 (test_type, len(result), (5 * 3) + 2)) 1282 1283 def test_using_mapping(self): 1284 # Test passing in a mapping object as an argument. 1285 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, 1286 "using dict as input type") 1287 1288 def test_using_sequence(self): 1289 # Test passing in a sequence of two-item sequences as an argument. 1290 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], 1291 "using sequence of two-item tuples as input") 1292 1293 def test_quoting(self): 1294 # Make sure keys and values are quoted using quote_plus() 1295 given = {"&":"="} 1296 expect = "%s=%s" % (hexescape('&'), hexescape('=')) 1297 result = urllib.parse.urlencode(given) 1298 self.assertEqual(expect, result) 1299 given = {"key name":"A bunch of pluses"} 1300 expect = "key+name=A+bunch+of+pluses" 1301 result = urllib.parse.urlencode(given) 1302 self.assertEqual(expect, result) 1303 1304 def test_doseq(self): 1305 # Test that passing True for 'doseq' parameter works correctly 1306 given = {'sequence':['1', '2', '3']} 1307 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3'])) 1308 result = urllib.parse.urlencode(given) 1309 self.assertEqual(expect, result) 1310 result = urllib.parse.urlencode(given, True) 1311 for value in given["sequence"]: 1312 expect = "sequence=%s" % value 1313 self.assertIn(expect, result) 1314 self.assertEqual(result.count('&'), 2, 1315 "Expected 2 '&'s, got %s" % result.count('&')) 1316 1317 def test_empty_sequence(self): 1318 self.assertEqual("", urllib.parse.urlencode({})) 1319 self.assertEqual("", urllib.parse.urlencode([])) 1320 1321 def test_nonstring_values(self): 1322 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1})) 1323 self.assertEqual("a=None", urllib.parse.urlencode({"a": None})) 1324 1325 def test_nonstring_seq_values(self): 1326 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) 1327 self.assertEqual("a=None&a=a", 1328 urllib.parse.urlencode({"a": [None, "a"]}, True)) 1329 data = collections.OrderedDict([("a", 1), ("b", 1)]) 1330 self.assertEqual("a=a&a=b", 1331 urllib.parse.urlencode({"a": data}, True)) 1332 1333 def test_urlencode_encoding(self): 1334 # ASCII encoding. Expect %3F with errors="replace' 1335 given = (('\u00a0', '\u00c1'),) 1336 expect = '%3F=%3F' 1337 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") 1338 self.assertEqual(expect, result) 1339 1340 # Default is UTF-8 encoding. 1341 given = (('\u00a0', '\u00c1'),) 1342 expect = '%C2%A0=%C3%81' 1343 result = urllib.parse.urlencode(given) 1344 self.assertEqual(expect, result) 1345 1346 # Latin-1 encoding. 1347 given = (('\u00a0', '\u00c1'),) 1348 expect = '%A0=%C1' 1349 result = urllib.parse.urlencode(given, encoding="latin-1") 1350 self.assertEqual(expect, result) 1351 1352 def test_urlencode_encoding_doseq(self): 1353 # ASCII Encoding. Expect %3F with errors="replace' 1354 given = (('\u00a0', '\u00c1'),) 1355 expect = '%3F=%3F' 1356 result = urllib.parse.urlencode(given, doseq=True, 1357 encoding="ASCII", errors="replace") 1358 self.assertEqual(expect, result) 1359 1360 # ASCII Encoding. On a sequence of values. 1361 given = (("\u00a0", (1, "\u00c1")),) 1362 expect = '%3F=1&%3F=%3F' 1363 result = urllib.parse.urlencode(given, True, 1364 encoding="ASCII", errors="replace") 1365 self.assertEqual(expect, result) 1366 1367 # Utf-8 1368 given = (("\u00a0", "\u00c1"),) 1369 expect = '%C2%A0=%C3%81' 1370 result = urllib.parse.urlencode(given, True) 1371 self.assertEqual(expect, result) 1372 1373 given = (("\u00a0", (42, "\u00c1")),) 1374 expect = '%C2%A0=42&%C2%A0=%C3%81' 1375 result = urllib.parse.urlencode(given, True) 1376 self.assertEqual(expect, result) 1377 1378 # latin-1 1379 given = (("\u00a0", "\u00c1"),) 1380 expect = '%A0=%C1' 1381 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1382 self.assertEqual(expect, result) 1383 1384 given = (("\u00a0", (42, "\u00c1")),) 1385 expect = '%A0=42&%A0=%C1' 1386 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1387 self.assertEqual(expect, result) 1388 1389 def test_urlencode_bytes(self): 1390 given = ((b'\xa0\x24', b'\xc1\x24'),) 1391 expect = '%A0%24=%C1%24' 1392 result = urllib.parse.urlencode(given) 1393 self.assertEqual(expect, result) 1394 result = urllib.parse.urlencode(given, True) 1395 self.assertEqual(expect, result) 1396 1397 # Sequence of values 1398 given = ((b'\xa0\x24', (42, b'\xc1\x24')),) 1399 expect = '%A0%24=42&%A0%24=%C1%24' 1400 result = urllib.parse.urlencode(given, True) 1401 self.assertEqual(expect, result) 1402 1403 def test_urlencode_encoding_safe_parameter(self): 1404 1405 # Send '$' (\x24) as safe character 1406 # Default utf-8 encoding 1407 1408 given = ((b'\xa0\x24', b'\xc1\x24'),) 1409 result = urllib.parse.urlencode(given, safe=":$") 1410 expect = '%A0$=%C1$' 1411 self.assertEqual(expect, result) 1412 1413 given = ((b'\xa0\x24', b'\xc1\x24'),) 1414 result = urllib.parse.urlencode(given, doseq=True, safe=":$") 1415 expect = '%A0$=%C1$' 1416 self.assertEqual(expect, result) 1417 1418 # Safe parameter in sequence 1419 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1420 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1421 result = urllib.parse.urlencode(given, True, safe=":$") 1422 self.assertEqual(expect, result) 1423 1424 # Test all above in latin-1 encoding 1425 1426 given = ((b'\xa0\x24', b'\xc1\x24'),) 1427 result = urllib.parse.urlencode(given, safe=":$", 1428 encoding="latin-1") 1429 expect = '%A0$=%C1$' 1430 self.assertEqual(expect, result) 1431 1432 given = ((b'\xa0\x24', b'\xc1\x24'),) 1433 expect = '%A0$=%C1$' 1434 result = urllib.parse.urlencode(given, doseq=True, safe=":$", 1435 encoding="latin-1") 1436 1437 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1438 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1439 result = urllib.parse.urlencode(given, True, safe=":$", 1440 encoding="latin-1") 1441 self.assertEqual(expect, result) 1442 1443class Pathname_Tests(unittest.TestCase): 1444 """Test pathname2url() and url2pathname()""" 1445 1446 def test_basic(self): 1447 # Make sure simple tests pass 1448 expected_path = os.path.join("parts", "of", "a", "path") 1449 expected_url = "parts/of/a/path" 1450 result = urllib.request.pathname2url(expected_path) 1451 self.assertEqual(expected_url, result, 1452 "pathname2url() failed; %s != %s" % 1453 (result, expected_url)) 1454 result = urllib.request.url2pathname(expected_url) 1455 self.assertEqual(expected_path, result, 1456 "url2pathame() failed; %s != %s" % 1457 (result, expected_path)) 1458 1459 def test_quoting(self): 1460 # Test automatic quoting and unquoting works for pathnam2url() and 1461 # url2pathname() respectively 1462 given = os.path.join("needs", "quot=ing", "here") 1463 expect = "needs/%s/here" % urllib.parse.quote("quot=ing") 1464 result = urllib.request.pathname2url(given) 1465 self.assertEqual(expect, result, 1466 "pathname2url() failed; %s != %s" % 1467 (expect, result)) 1468 expect = given 1469 result = urllib.request.url2pathname(result) 1470 self.assertEqual(expect, result, 1471 "url2pathname() failed; %s != %s" % 1472 (expect, result)) 1473 given = os.path.join("make sure", "using_quote") 1474 expect = "%s/using_quote" % urllib.parse.quote("make sure") 1475 result = urllib.request.pathname2url(given) 1476 self.assertEqual(expect, result, 1477 "pathname2url() failed; %s != %s" % 1478 (expect, result)) 1479 given = "make+sure/using_unquote" 1480 expect = os.path.join("make+sure", "using_unquote") 1481 result = urllib.request.url2pathname(given) 1482 self.assertEqual(expect, result, 1483 "url2pathname() failed; %s != %s" % 1484 (expect, result)) 1485 1486 @unittest.skipUnless(sys.platform == 'win32', 1487 'test specific to the urllib.url2path function.') 1488 def test_ntpath(self): 1489 given = ('/C:/', '///C:/', '/C|//') 1490 expect = 'C:\\' 1491 for url in given: 1492 result = urllib.request.url2pathname(url) 1493 self.assertEqual(expect, result, 1494 'urllib.request..url2pathname() failed; %s != %s' % 1495 (expect, result)) 1496 given = '///C|/path' 1497 expect = 'C:\\path' 1498 result = urllib.request.url2pathname(given) 1499 self.assertEqual(expect, result, 1500 'urllib.request.url2pathname() failed; %s != %s' % 1501 (expect, result)) 1502 1503class Utility_Tests(unittest.TestCase): 1504 """Testcase to test the various utility functions in the urllib.""" 1505 1506 def test_thishost(self): 1507 """Test the urllib.request.thishost utility function returns a tuple""" 1508 self.assertIsInstance(urllib.request.thishost(), tuple) 1509 1510 1511class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): 1512 """Testcase to test the open method of URLopener class.""" 1513 1514 def test_quoted_open(self): 1515 class DummyURLopener(urllib.request.URLopener): 1516 def open_spam(self, url): 1517 return url 1518 with support.check_warnings( 1519 ('DummyURLopener style of invoking requests is deprecated.', 1520 DeprecationWarning)): 1521 self.assertEqual(DummyURLopener().open( 1522 'spam://example/ /'),'//example/%20/') 1523 1524 # test the safe characters are not quoted by urlopen 1525 self.assertEqual(DummyURLopener().open( 1526 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), 1527 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") 1528 1529 @support.ignore_warnings(category=DeprecationWarning) 1530 def test_urlopener_retrieve_file(self): 1531 with support.temp_dir() as tmpdir: 1532 fd, tmpfile = tempfile.mkstemp(dir=tmpdir) 1533 os.close(fd) 1534 fileurl = "file:" + urllib.request.pathname2url(tmpfile) 1535 filename, _ = urllib.request.URLopener().retrieve(fileurl) 1536 # Some buildbots have TEMP folder that uses a lowercase drive letter. 1537 self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) 1538 1539 @support.ignore_warnings(category=DeprecationWarning) 1540 def test_urlopener_retrieve_remote(self): 1541 url = "http://www.python.org/file.txt" 1542 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 1543 self.addCleanup(self.unfakehttp) 1544 filename, _ = urllib.request.URLopener().retrieve(url) 1545 self.assertEqual(os.path.splitext(filename)[1], ".txt") 1546 1547 @support.ignore_warnings(category=DeprecationWarning) 1548 def test_local_file_open(self): 1549 # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme 1550 class DummyURLopener(urllib.request.URLopener): 1551 def open_local_file(self, url): 1552 return url 1553 for url in ('local_file://example', 'local-file://example'): 1554 self.assertRaises(OSError, urllib.request.urlopen, url) 1555 self.assertRaises(OSError, urllib.request.URLopener().open, url) 1556 self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) 1557 self.assertRaises(OSError, DummyURLopener().open, url) 1558 self.assertRaises(OSError, DummyURLopener().retrieve, url) 1559 1560 1561# Just commented them out. 1562# Can't really tell why keep failing in windows and sparc. 1563# Everywhere else they work ok, but on those machines, sometimes 1564# fail in one of the tests, sometimes in other. I have a linux, and 1565# the tests go ok. 1566# If anybody has one of the problematic environments, please help! 1567# . Facundo 1568# 1569# def server(evt): 1570# import socket, time 1571# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1572# serv.settimeout(3) 1573# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 1574# serv.bind(("", 9093)) 1575# serv.listen() 1576# try: 1577# conn, addr = serv.accept() 1578# conn.send("1 Hola mundo\n") 1579# cantdata = 0 1580# while cantdata < 13: 1581# data = conn.recv(13-cantdata) 1582# cantdata += len(data) 1583# time.sleep(.3) 1584# conn.send("2 No more lines\n") 1585# conn.close() 1586# except socket.timeout: 1587# pass 1588# finally: 1589# serv.close() 1590# evt.set() 1591# 1592# class FTPWrapperTests(unittest.TestCase): 1593# 1594# def setUp(self): 1595# import ftplib, time, threading 1596# ftplib.FTP.port = 9093 1597# self.evt = threading.Event() 1598# threading.Thread(target=server, args=(self.evt,)).start() 1599# time.sleep(.1) 1600# 1601# def tearDown(self): 1602# self.evt.wait() 1603# 1604# def testBasic(self): 1605# # connects 1606# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1607# ftp.close() 1608# 1609# def testTimeoutNone(self): 1610# # global default timeout is ignored 1611# import socket 1612# self.assertIsNone(socket.getdefaulttimeout()) 1613# socket.setdefaulttimeout(30) 1614# try: 1615# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1616# finally: 1617# socket.setdefaulttimeout(None) 1618# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1619# ftp.close() 1620# 1621# def testTimeoutDefault(self): 1622# # global default timeout is used 1623# import socket 1624# self.assertIsNone(socket.getdefaulttimeout()) 1625# socket.setdefaulttimeout(30) 1626# try: 1627# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1628# finally: 1629# socket.setdefaulttimeout(None) 1630# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1631# ftp.close() 1632# 1633# def testTimeoutValue(self): 1634# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [], 1635# timeout=30) 1636# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1637# ftp.close() 1638 1639 1640class RequestTests(unittest.TestCase): 1641 """Unit tests for urllib.request.Request.""" 1642 1643 def test_default_values(self): 1644 Request = urllib.request.Request 1645 request = Request("http://www.python.org") 1646 self.assertEqual(request.get_method(), 'GET') 1647 request = Request("http://www.python.org", {}) 1648 self.assertEqual(request.get_method(), 'POST') 1649 1650 def test_with_method_arg(self): 1651 Request = urllib.request.Request 1652 request = Request("http://www.python.org", method='HEAD') 1653 self.assertEqual(request.method, 'HEAD') 1654 self.assertEqual(request.get_method(), 'HEAD') 1655 request = Request("http://www.python.org", {}, method='HEAD') 1656 self.assertEqual(request.method, 'HEAD') 1657 self.assertEqual(request.get_method(), 'HEAD') 1658 request = Request("http://www.python.org", method='GET') 1659 self.assertEqual(request.get_method(), 'GET') 1660 request.method = 'HEAD' 1661 self.assertEqual(request.get_method(), 'HEAD') 1662 1663 1664class URL2PathNameTests(unittest.TestCase): 1665 1666 def test_converting_drive_letter(self): 1667 self.assertEqual(url2pathname("///C|"), 'C:') 1668 self.assertEqual(url2pathname("///C:"), 'C:') 1669 self.assertEqual(url2pathname("///C|/"), 'C:\\') 1670 1671 def test_converting_when_no_drive_letter(self): 1672 # cannot end a raw string in \ 1673 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') 1674 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') 1675 1676 def test_simple_compare(self): 1677 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), 1678 r'C:\foo\bar\spam.foo') 1679 1680 def test_non_ascii_drive_letter(self): 1681 self.assertRaises(IOError, url2pathname, "///\u00e8|/") 1682 1683 def test_roundtrip_url2pathname(self): 1684 list_of_paths = ['C:', 1685 r'\\\C\test\\', 1686 r'C:\foo\bar\spam.foo' 1687 ] 1688 for path in list_of_paths: 1689 self.assertEqual(url2pathname(pathname2url(path)), path) 1690 1691class PathName2URLTests(unittest.TestCase): 1692 1693 def test_converting_drive_letter(self): 1694 self.assertEqual(pathname2url("C:"), '///C:') 1695 self.assertEqual(pathname2url("C:\\"), '///C:') 1696 1697 def test_converting_when_no_drive_letter(self): 1698 self.assertEqual(pathname2url(r"\\\folder\test" "\\"), 1699 '/////folder/test/') 1700 self.assertEqual(pathname2url(r"\\folder\test" "\\"), 1701 '////folder/test/') 1702 self.assertEqual(pathname2url(r"\folder\test" "\\"), 1703 '/folder/test/') 1704 1705 def test_simple_compare(self): 1706 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), 1707 "///C:/foo/bar/spam.foo" ) 1708 1709 def test_long_drive_letter(self): 1710 self.assertRaises(IOError, pathname2url, "XX:\\") 1711 1712 def test_roundtrip_pathname2url(self): 1713 list_of_paths = ['///C:', 1714 '/////folder/test/', 1715 '///C:/foo/bar/spam.foo'] 1716 for path in list_of_paths: 1717 self.assertEqual(pathname2url(url2pathname(path)), path) 1718 1719if __name__ == '__main__': 1720 unittest.main() 1721