• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Regression tests for what was in Python 2's "urllib" module"""
2
3import urllib.parse
4import urllib.request
5import urllib.error
6import http.client
7import email.message
8import io
9import unittest
10from unittest.mock import patch
11from test import support
12import os
13try:
14    import ssl
15except ImportError:
16    ssl = None
17import sys
18import tempfile
19from nturl2path import url2pathname, pathname2url
20
21from base64 import b64encode
22import collections
23
24
25def hexescape(char):
26    """Escape char as RFC 2396 specifies"""
27    hex_repr = hex(ord(char))[2:].upper()
28    if len(hex_repr) == 1:
29        hex_repr = "0%s" % hex_repr
30    return "%" + hex_repr
31
32# Shortcut for testing FancyURLopener
33_urlopener = None
34
35
36def urlopen(url, data=None, proxies=None):
37    """urlopen(url [, data]) -> open file-like object"""
38    global _urlopener
39    if proxies is not None:
40        opener = urllib.request.FancyURLopener(proxies=proxies)
41    elif not _urlopener:
42        opener = FancyURLopener()
43        _urlopener = opener
44    else:
45        opener = _urlopener
46    if data is None:
47        return opener.open(url)
48    else:
49        return opener.open(url, data)
50
51
52def FancyURLopener():
53    with support.check_warnings(
54            ('FancyURLopener style of invoking requests is deprecated.',
55            DeprecationWarning)):
56        return urllib.request.FancyURLopener()
57
58
59def fakehttp(fakedata, mock_close=False):
60    class FakeSocket(io.BytesIO):
61        io_refs = 1
62
63        def sendall(self, data):
64            FakeHTTPConnection.buf = data
65
66        def makefile(self, *args, **kwds):
67            self.io_refs += 1
68            return self
69
70        def read(self, amt=None):
71            if self.closed:
72                return b""
73            return io.BytesIO.read(self, amt)
74
75        def readline(self, length=None):
76            if self.closed:
77                return b""
78            return io.BytesIO.readline(self, length)
79
80        def close(self):
81            self.io_refs -= 1
82            if self.io_refs == 0:
83                io.BytesIO.close(self)
84
85    class FakeHTTPConnection(http.client.HTTPConnection):
86
87        # buffer to store data for verification in urlopen tests.
88        buf = None
89
90        def connect(self):
91            self.sock = FakeSocket(self.fakedata)
92            type(self).fakesock = self.sock
93
94        if mock_close:
95            # bpo-36918: HTTPConnection destructor calls close() which calls
96            # flush(). Problem: flush() calls self.fp.flush() which raises
97            # "ValueError: I/O operation on closed file" which is logged as an
98            # "Exception ignored in". Override close() to silence this error.
99            def close(self):
100                pass
101    FakeHTTPConnection.fakedata = fakedata
102
103    return FakeHTTPConnection
104
105
106class FakeHTTPMixin(object):
107    def fakehttp(self, fakedata, mock_close=False):
108        fake_http_class = fakehttp(fakedata, mock_close=mock_close)
109        self._connection_class = http.client.HTTPConnection
110        http.client.HTTPConnection = fake_http_class
111
112    def unfakehttp(self):
113        http.client.HTTPConnection = self._connection_class
114
115
116class FakeFTPMixin(object):
117    def fakeftp(self):
118        class FakeFtpWrapper(object):
119            def __init__(self,  user, passwd, host, port, dirs, timeout=None,
120                     persistent=True):
121                pass
122
123            def retrfile(self, file, type):
124                return io.BytesIO(), 0
125
126            def close(self):
127                pass
128
129        self._ftpwrapper_class = urllib.request.ftpwrapper
130        urllib.request.ftpwrapper = FakeFtpWrapper
131
132    def unfakeftp(self):
133        urllib.request.ftpwrapper = self._ftpwrapper_class
134
135
136class urlopen_FileTests(unittest.TestCase):
137    """Test urlopen() opening a temporary file.
138
139    Try to test as much functionality as possible so as to cut down on reliance
140    on connecting to the Net for testing.
141
142    """
143
144    def setUp(self):
145        # Create a temp file to use for testing
146        self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
147                          "ascii")
148        f = open(support.TESTFN, 'wb')
149        try:
150            f.write(self.text)
151        finally:
152            f.close()
153        self.pathname = support.TESTFN
154        self.quoted_pathname = urllib.parse.quote(self.pathname)
155        self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
156
157    def tearDown(self):
158        """Shut down the open object"""
159        self.returned_obj.close()
160        os.remove(support.TESTFN)
161
162    def test_interface(self):
163        # Make sure object returned by urlopen() has the specified methods
164        for attr in ("read", "readline", "readlines", "fileno",
165                     "close", "info", "geturl", "getcode", "__iter__"):
166            self.assertTrue(hasattr(self.returned_obj, attr),
167                         "object returned by urlopen() lacks %s attribute" %
168                         attr)
169
170    def test_read(self):
171        self.assertEqual(self.text, self.returned_obj.read())
172
173    def test_readline(self):
174        self.assertEqual(self.text, self.returned_obj.readline())
175        self.assertEqual(b'', self.returned_obj.readline(),
176                         "calling readline() after exhausting the file did not"
177                         " return an empty string")
178
179    def test_readlines(self):
180        lines_list = self.returned_obj.readlines()
181        self.assertEqual(len(lines_list), 1,
182                         "readlines() returned the wrong number of lines")
183        self.assertEqual(lines_list[0], self.text,
184                         "readlines() returned improper text")
185
186    def test_fileno(self):
187        file_num = self.returned_obj.fileno()
188        self.assertIsInstance(file_num, int, "fileno() did not return an int")
189        self.assertEqual(os.read(file_num, len(self.text)), self.text,
190                         "Reading on the file descriptor returned by fileno() "
191                         "did not return the expected text")
192
193    def test_close(self):
194        # Test close() by calling it here and then having it be called again
195        # by the tearDown() method for the test
196        self.returned_obj.close()
197
198    def test_headers(self):
199        self.assertIsInstance(self.returned_obj.headers, email.message.Message)
200
201    def test_url(self):
202        self.assertEqual(self.returned_obj.url, self.quoted_pathname)
203
204    def test_status(self):
205        self.assertIsNone(self.returned_obj.status)
206
207    def test_info(self):
208        self.assertIsInstance(self.returned_obj.info(), email.message.Message)
209
210    def test_geturl(self):
211        self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
212
213    def test_getcode(self):
214        self.assertIsNone(self.returned_obj.getcode())
215
216    def test_iter(self):
217        # Test iterator
218        # Don't need to count number of iterations since test would fail the
219        # instant it returned anything beyond the first line from the
220        # comparison.
221        # Use the iterator in the usual implicit way to test for ticket #4608.
222        for line in self.returned_obj:
223            self.assertEqual(line, self.text)
224
225    def test_relativelocalfile(self):
226        self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
227
228
229class ProxyTests(unittest.TestCase):
230
231    def setUp(self):
232        # Records changes to env vars
233        self.env = support.EnvironmentVarGuard()
234        # Delete all proxy related env vars
235        for k in list(os.environ):
236            if 'proxy' in k.lower():
237                self.env.unset(k)
238
239    def tearDown(self):
240        # Restore all proxy related env vars
241        self.env.__exit__()
242        del self.env
243
244    def test_getproxies_environment_keep_no_proxies(self):
245        self.env.set('NO_PROXY', 'localhost')
246        proxies = urllib.request.getproxies_environment()
247        # getproxies_environment use lowered case truncated (no '_proxy') keys
248        self.assertEqual('localhost', proxies['no'])
249        # List of no_proxies with space.
250        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
251        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
252        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
253        self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
254
255    def test_proxy_cgi_ignore(self):
256        try:
257            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
258            proxies = urllib.request.getproxies_environment()
259            self.assertEqual('http://somewhere:3128', proxies['http'])
260            self.env.set('REQUEST_METHOD', 'GET')
261            proxies = urllib.request.getproxies_environment()
262            self.assertNotIn('http', proxies)
263        finally:
264            self.env.unset('REQUEST_METHOD')
265            self.env.unset('HTTP_PROXY')
266
267    def test_proxy_bypass_environment_host_match(self):
268        bypass = urllib.request.proxy_bypass_environment
269        self.env.set('NO_PROXY',
270                     'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
271        self.assertTrue(bypass('localhost'))
272        self.assertTrue(bypass('LocalHost'))                 # MixedCase
273        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
274        self.assertTrue(bypass('.localhost'))
275        self.assertTrue(bypass('newdomain.com:1234'))
276        self.assertTrue(bypass('.newdomain.com:1234'))
277        self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
278        self.assertTrue(bypass('d.o.t'))
279        self.assertTrue(bypass('anotherdomain.com:8888'))
280        self.assertTrue(bypass('.anotherdomain.com:8888'))
281        self.assertTrue(bypass('www.newdomain.com:1234'))
282        self.assertFalse(bypass('prelocalhost'))
283        self.assertFalse(bypass('newdomain.com'))            # no port
284        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
285
286    def test_proxy_bypass_environment_always_match(self):
287        bypass = urllib.request.proxy_bypass_environment
288        self.env.set('NO_PROXY', '*')
289        self.assertTrue(bypass('newdomain.com'))
290        self.assertTrue(bypass('newdomain.com:1234'))
291        self.env.set('NO_PROXY', '*, anotherdomain.com')
292        self.assertTrue(bypass('anotherdomain.com'))
293        self.assertFalse(bypass('newdomain.com'))
294        self.assertFalse(bypass('newdomain.com:1234'))
295
296    def test_proxy_bypass_environment_newline(self):
297        bypass = urllib.request.proxy_bypass_environment
298        self.env.set('NO_PROXY',
299                     'localhost, anotherdomain.com, newdomain.com:1234')
300        self.assertFalse(bypass('localhost\n'))
301        self.assertFalse(bypass('anotherdomain.com:8888\n'))
302        self.assertFalse(bypass('newdomain.com:1234\n'))
303
304
305class ProxyTests_withOrderedEnv(unittest.TestCase):
306
307    def setUp(self):
308        # We need to test conditions, where variable order _is_ significant
309        self._saved_env = os.environ
310        # Monkey patch os.environ, start with empty fake environment
311        os.environ = collections.OrderedDict()
312
313    def tearDown(self):
314        os.environ = self._saved_env
315
316    def test_getproxies_environment_prefer_lowercase(self):
317        # Test lowercase preference with removal
318        os.environ['no_proxy'] = ''
319        os.environ['No_Proxy'] = 'localhost'
320        self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
321        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
322        os.environ['http_proxy'] = ''
323        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
324        proxies = urllib.request.getproxies_environment()
325        self.assertEqual({}, proxies)
326        # Test lowercase preference of proxy bypass and correct matching including ports
327        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
328        os.environ['No_Proxy'] = 'xyz.com'
329        self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
330        self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
331        self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
332        self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
333        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
334        # Test lowercase preference with replacement
335        os.environ['http_proxy'] = 'http://somewhere:3128'
336        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
337        proxies = urllib.request.getproxies_environment()
338        self.assertEqual('http://somewhere:3128', proxies['http'])
339
340
341class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
342    """Test urlopen() opening a fake http connection."""
343
344    def check_read(self, ver):
345        self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
346        try:
347            fp = urlopen("http://python.org/")
348            self.assertEqual(fp.readline(), b"Hello!")
349            self.assertEqual(fp.readline(), b"")
350            self.assertEqual(fp.geturl(), 'http://python.org/')
351            self.assertEqual(fp.getcode(), 200)
352        finally:
353            self.unfakehttp()
354
355    def test_url_fragment(self):
356        # Issue #11703: geturl() omits fragments in the original URL.
357        url = 'http://docs.python.org/library/urllib.html#OK'
358        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
359        try:
360            fp = urllib.request.urlopen(url)
361            self.assertEqual(fp.geturl(), url)
362        finally:
363            self.unfakehttp()
364
365    def test_willclose(self):
366        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
367        try:
368            resp = urlopen("http://www.python.org")
369            self.assertTrue(resp.fp.will_close)
370        finally:
371            self.unfakehttp()
372
373    @unittest.skipUnless(ssl, "ssl module required")
374    def test_url_path_with_control_char_rejected(self):
375        for char_no in list(range(0, 0x21)) + [0x7f]:
376            char = chr(char_no)
377            schemeless_url = f"//localhost:7777/test{char}/"
378            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
379            try:
380                # We explicitly test urllib.request.urlopen() instead of the top
381                # level 'def urlopen()' function defined in this... (quite ugly)
382                # test suite.  They use different url opening codepaths.  Plain
383                # urlopen uses FancyURLOpener which goes via a codepath that
384                # calls urllib.parse.quote() on the URL which makes all of the
385                # above attempts at injection within the url _path_ safe.
386                escaped_char_repr = repr(char).replace('\\', r'\\')
387                InvalidURL = http.client.InvalidURL
388                with self.assertRaisesRegex(
389                    InvalidURL, f"contain control.*{escaped_char_repr}"):
390                    urllib.request.urlopen(f"http:{schemeless_url}")
391                with self.assertRaisesRegex(
392                    InvalidURL, f"contain control.*{escaped_char_repr}"):
393                    urllib.request.urlopen(f"https:{schemeless_url}")
394                # This code path quotes the URL so there is no injection.
395                resp = urlopen(f"http:{schemeless_url}")
396                self.assertNotIn(char, resp.geturl())
397            finally:
398                self.unfakehttp()
399
400    @unittest.skipUnless(ssl, "ssl module required")
401    def test_url_path_with_newline_header_injection_rejected(self):
402        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
403        host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
404        schemeless_url = "//" + host + ":8080/test/?test=a"
405        try:
406            # We explicitly test urllib.request.urlopen() instead of the top
407            # level 'def urlopen()' function defined in this... (quite ugly)
408            # test suite.  They use different url opening codepaths.  Plain
409            # urlopen uses FancyURLOpener which goes via a codepath that
410            # calls urllib.parse.quote() on the URL which makes all of the
411            # above attempts at injection within the url _path_ safe.
412            InvalidURL = http.client.InvalidURL
413            with self.assertRaisesRegex(
414                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
415                urllib.request.urlopen(f"http:{schemeless_url}")
416            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
417                urllib.request.urlopen(f"https:{schemeless_url}")
418            # This code path quotes the URL so there is no injection.
419            resp = urlopen(f"http:{schemeless_url}")
420            self.assertNotIn(' ', resp.geturl())
421            self.assertNotIn('\r', resp.geturl())
422            self.assertNotIn('\n', resp.geturl())
423        finally:
424            self.unfakehttp()
425
426    @unittest.skipUnless(ssl, "ssl module required")
427    def test_url_host_with_control_char_rejected(self):
428        for char_no in list(range(0, 0x21)) + [0x7f]:
429            char = chr(char_no)
430            schemeless_url = f"//localhost{char}/test/"
431            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
432            try:
433                escaped_char_repr = repr(char).replace('\\', r'\\')
434                InvalidURL = http.client.InvalidURL
435                with self.assertRaisesRegex(
436                    InvalidURL, f"contain control.*{escaped_char_repr}"):
437                    urlopen(f"http:{schemeless_url}")
438                with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
439                    urlopen(f"https:{schemeless_url}")
440            finally:
441                self.unfakehttp()
442
443    @unittest.skipUnless(ssl, "ssl module required")
444    def test_url_host_with_newline_header_injection_rejected(self):
445        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
446        host = "localhost\r\nX-injected: header\r\n"
447        schemeless_url = "//" + host + ":8080/test/?test=a"
448        try:
449            InvalidURL = http.client.InvalidURL
450            with self.assertRaisesRegex(
451                InvalidURL, r"contain control.*\\r"):
452                urlopen(f"http:{schemeless_url}")
453            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
454                urlopen(f"https:{schemeless_url}")
455        finally:
456            self.unfakehttp()
457
458    def test_read_0_9(self):
459        # "0.9" response accepted (but not "simple responses" without
460        # a status line)
461        self.check_read(b"0.9")
462
463    def test_read_1_0(self):
464        self.check_read(b"1.0")
465
466    def test_read_1_1(self):
467        self.check_read(b"1.1")
468
469    def test_read_bogus(self):
470        # urlopen() should raise OSError for many error codes.
471        self.fakehttp(b'''HTTP/1.1 401 Authentication Required
472Date: Wed, 02 Jan 2008 03:03:54 GMT
473Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
474Connection: close
475Content-Type: text/html; charset=iso-8859-1
476''', mock_close=True)
477        try:
478            self.assertRaises(OSError, urlopen, "http://python.org/")
479        finally:
480            self.unfakehttp()
481
482    def test_invalid_redirect(self):
483        # urlopen() should raise OSError for many error codes.
484        self.fakehttp(b'''HTTP/1.1 302 Found
485Date: Wed, 02 Jan 2008 03:03:54 GMT
486Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
487Location: file://guidocomputer.athome.com:/python/license
488Connection: close
489Content-Type: text/html; charset=iso-8859-1
490''', mock_close=True)
491        try:
492            msg = "Redirection to url 'file:"
493            with self.assertRaisesRegex(urllib.error.HTTPError, msg):
494                urlopen("http://python.org/")
495        finally:
496            self.unfakehttp()
497
498    def test_redirect_limit_independent(self):
499        # Ticket #12923: make sure independent requests each use their
500        # own retry limit.
501        for i in range(FancyURLopener().maxtries):
502            self.fakehttp(b'''HTTP/1.1 302 Found
503Location: file://guidocomputer.athome.com:/python/license
504Connection: close
505''', mock_close=True)
506            try:
507                self.assertRaises(urllib.error.HTTPError, urlopen,
508                    "http://something")
509            finally:
510                self.unfakehttp()
511
512    def test_empty_socket(self):
513        # urlopen() raises OSError if the underlying socket does not send any
514        # data. (#1680230)
515        self.fakehttp(b'')
516        try:
517            self.assertRaises(OSError, urlopen, "http://something")
518        finally:
519            self.unfakehttp()
520
521    def test_missing_localfile(self):
522        # Test for #10836
523        with self.assertRaises(urllib.error.URLError) as e:
524            urlopen('file://localhost/a/file/which/doesnot/exists.py')
525        self.assertTrue(e.exception.filename)
526        self.assertTrue(e.exception.reason)
527
528    def test_file_notexists(self):
529        fd, tmp_file = tempfile.mkstemp()
530        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
531        try:
532            self.assertTrue(os.path.exists(tmp_file))
533            with urlopen(tmp_fileurl) as fobj:
534                self.assertTrue(fobj)
535        finally:
536            os.close(fd)
537            os.unlink(tmp_file)
538        self.assertFalse(os.path.exists(tmp_file))
539        with self.assertRaises(urllib.error.URLError):
540            urlopen(tmp_fileurl)
541
542    def test_ftp_nohost(self):
543        test_ftp_url = 'ftp:///path'
544        with self.assertRaises(urllib.error.URLError) as e:
545            urlopen(test_ftp_url)
546        self.assertFalse(e.exception.filename)
547        self.assertTrue(e.exception.reason)
548
549    def test_ftp_nonexisting(self):
550        with self.assertRaises(urllib.error.URLError) as e:
551            urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
552        self.assertFalse(e.exception.filename)
553        self.assertTrue(e.exception.reason)
554
555    @patch.object(urllib.request, 'MAXFTPCACHE', 0)
556    def test_ftp_cache_pruning(self):
557        self.fakeftp()
558        try:
559            urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
560            urlopen('ftp://localhost')
561        finally:
562            self.unfakeftp()
563
564    def test_userpass_inurl(self):
565        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
566        try:
567            fp = urlopen("http://user:pass@python.org/")
568            self.assertEqual(fp.readline(), b"Hello!")
569            self.assertEqual(fp.readline(), b"")
570            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
571            self.assertEqual(fp.getcode(), 200)
572        finally:
573            self.unfakehttp()
574
575    def test_userpass_inurl_w_spaces(self):
576        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
577        try:
578            userpass = "a b:c d"
579            url = "http://{}@python.org/".format(userpass)
580            fakehttp_wrapper = http.client.HTTPConnection
581            authorization = ("Authorization: Basic %s\r\n" %
582                             b64encode(userpass.encode("ASCII")).decode("ASCII"))
583            fp = urlopen(url)
584            # The authorization header must be in place
585            self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
586            self.assertEqual(fp.readline(), b"Hello!")
587            self.assertEqual(fp.readline(), b"")
588            # the spaces are quoted in URL so no match
589            self.assertNotEqual(fp.geturl(), url)
590            self.assertEqual(fp.getcode(), 200)
591        finally:
592            self.unfakehttp()
593
594    def test_URLopener_deprecation(self):
595        with support.check_warnings(('',DeprecationWarning)):
596            urllib.request.URLopener()
597
598    @unittest.skipUnless(ssl, "ssl module required")
599    def test_cafile_and_context(self):
600        context = ssl.create_default_context()
601        with support.check_warnings(('', DeprecationWarning)):
602            with self.assertRaises(ValueError):
603                urllib.request.urlopen(
604                    "https://localhost", cafile="/nonexistent/path", context=context
605                )
606
607
608class urlopen_DataTests(unittest.TestCase):
609    """Test urlopen() opening a data URL."""
610
611    def setUp(self):
612        # clear _opener global variable
613        self.addCleanup(urllib.request.urlcleanup)
614
615        # text containing URL special- and unicode-characters
616        self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
617        # 2x1 pixel RGB PNG image with one black and one white pixel
618        self.image = (
619            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
620            b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
621            b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
622            b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
623
624        self.text_url = (
625            "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
626            "D%26%20%C3%B6%20%C3%84%20")
627        self.text_url_base64 = (
628            "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
629            "sJT0mIPYgxCA%3D")
630        # base64 encoded data URL that contains ignorable spaces,
631        # such as "\n", " ", "%0A", and "%20".
632        self.image_url = (
633            "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
634            "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
635            "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
636
637        self.text_url_resp = urllib.request.urlopen(self.text_url)
638        self.text_url_base64_resp = urllib.request.urlopen(
639            self.text_url_base64)
640        self.image_url_resp = urllib.request.urlopen(self.image_url)
641
642    def test_interface(self):
643        # Make sure object returned by urlopen() has the specified methods
644        for attr in ("read", "readline", "readlines",
645                     "close", "info", "geturl", "getcode", "__iter__"):
646            self.assertTrue(hasattr(self.text_url_resp, attr),
647                         "object returned by urlopen() lacks %s attribute" %
648                         attr)
649
650    def test_info(self):
651        self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
652        self.assertEqual(self.text_url_base64_resp.info().get_params(),
653            [('text/plain', ''), ('charset', 'ISO-8859-1')])
654        self.assertEqual(self.image_url_resp.info()['content-length'],
655            str(len(self.image)))
656        self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
657            [('text/plain', ''), ('charset', 'US-ASCII')])
658
659    def test_geturl(self):
660        self.assertEqual(self.text_url_resp.geturl(), self.text_url)
661        self.assertEqual(self.text_url_base64_resp.geturl(),
662            self.text_url_base64)
663        self.assertEqual(self.image_url_resp.geturl(), self.image_url)
664
665    def test_read_text(self):
666        self.assertEqual(self.text_url_resp.read().decode(
667            dict(self.text_url_resp.info().get_params())['charset']), self.text)
668
669    def test_read_text_base64(self):
670        self.assertEqual(self.text_url_base64_resp.read().decode(
671            dict(self.text_url_base64_resp.info().get_params())['charset']),
672            self.text)
673
674    def test_read_image(self):
675        self.assertEqual(self.image_url_resp.read(), self.image)
676
677    def test_missing_comma(self):
678        self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
679
680    def test_invalid_base64_data(self):
681        # missing padding character
682        self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
683
684
685class urlretrieve_FileTests(unittest.TestCase):
686    """Test urllib.urlretrieve() on local files"""
687
688    def setUp(self):
689        # clear _opener global variable
690        self.addCleanup(urllib.request.urlcleanup)
691
692        # Create a list of temporary files. Each item in the list is a file
693        # name (absolute path or relative to the current working directory).
694        # All files in this list will be deleted in the tearDown method. Note,
695        # this only helps to makes sure temporary files get deleted, but it
696        # does nothing about trying to close files that may still be open. It
697        # is the responsibility of the developer to properly close files even
698        # when exceptional conditions occur.
699        self.tempFiles = []
700
701        # Create a temporary file.
702        self.registerFileForCleanUp(support.TESTFN)
703        self.text = b'testing urllib.urlretrieve'
704        try:
705            FILE = open(support.TESTFN, 'wb')
706            FILE.write(self.text)
707            FILE.close()
708        finally:
709            try: FILE.close()
710            except: pass
711
712    def tearDown(self):
713        # Delete the temporary files.
714        for each in self.tempFiles:
715            try: os.remove(each)
716            except: pass
717
718    def constructLocalFileUrl(self, filePath):
719        filePath = os.path.abspath(filePath)
720        try:
721            filePath.encode("utf-8")
722        except UnicodeEncodeError:
723            raise unittest.SkipTest("filePath is not encodable to utf8")
724        return "file://%s" % urllib.request.pathname2url(filePath)
725
726    def createNewTempFile(self, data=b""):
727        """Creates a new temporary file containing the specified data,
728        registers the file for deletion during the test fixture tear down, and
729        returns the absolute path of the file."""
730
731        newFd, newFilePath = tempfile.mkstemp()
732        try:
733            self.registerFileForCleanUp(newFilePath)
734            newFile = os.fdopen(newFd, "wb")
735            newFile.write(data)
736            newFile.close()
737        finally:
738            try: newFile.close()
739            except: pass
740        return newFilePath
741
742    def registerFileForCleanUp(self, fileName):
743        self.tempFiles.append(fileName)
744
745    def test_basic(self):
746        # Make sure that a local file just gets its own location returned and
747        # a headers value is returned.
748        result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
749        self.assertEqual(result[0], support.TESTFN)
750        self.assertIsInstance(result[1], email.message.Message,
751                              "did not get an email.message.Message instance "
752                              "as second returned value")
753
754    def test_copy(self):
755        # Test that setting the filename argument works.
756        second_temp = "%s.2" % support.TESTFN
757        self.registerFileForCleanUp(second_temp)
758        result = urllib.request.urlretrieve(self.constructLocalFileUrl(
759            support.TESTFN), second_temp)
760        self.assertEqual(second_temp, result[0])
761        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
762                                                  "made")
763        FILE = open(second_temp, 'rb')
764        try:
765            text = FILE.read()
766            FILE.close()
767        finally:
768            try: FILE.close()
769            except: pass
770        self.assertEqual(self.text, text)
771
772    def test_reporthook(self):
773        # Make sure that the reporthook works.
774        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
775            self.assertIsInstance(block_count, int)
776            self.assertIsInstance(block_read_size, int)
777            self.assertIsInstance(file_size, int)
778            self.assertEqual(block_count, count_holder[0])
779            count_holder[0] = count_holder[0] + 1
780        second_temp = "%s.2" % support.TESTFN
781        self.registerFileForCleanUp(second_temp)
782        urllib.request.urlretrieve(
783            self.constructLocalFileUrl(support.TESTFN),
784            second_temp, hooktester)
785
786    def test_reporthook_0_bytes(self):
787        # Test on zero length file. Should call reporthook only 1 time.
788        report = []
789        def hooktester(block_count, block_read_size, file_size, _report=report):
790            _report.append((block_count, block_read_size, file_size))
791        srcFileName = self.createNewTempFile()
792        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
793            support.TESTFN, hooktester)
794        self.assertEqual(len(report), 1)
795        self.assertEqual(report[0][2], 0)
796
797    def test_reporthook_5_bytes(self):
798        # Test on 5 byte file. Should call reporthook only 2 times (once when
799        # the "network connection" is established and once when the block is
800        # read).
801        report = []
802        def hooktester(block_count, block_read_size, file_size, _report=report):
803            _report.append((block_count, block_read_size, file_size))
804        srcFileName = self.createNewTempFile(b"x" * 5)
805        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
806            support.TESTFN, hooktester)
807        self.assertEqual(len(report), 2)
808        self.assertEqual(report[0][2], 5)
809        self.assertEqual(report[1][2], 5)
810
811    def test_reporthook_8193_bytes(self):
812        # Test on 8193 byte file. Should call reporthook only 3 times (once
813        # when the "network connection" is established, once for the next 8192
814        # bytes, and once for the last byte).
815        report = []
816        def hooktester(block_count, block_read_size, file_size, _report=report):
817            _report.append((block_count, block_read_size, file_size))
818        srcFileName = self.createNewTempFile(b"x" * 8193)
819        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
820            support.TESTFN, hooktester)
821        self.assertEqual(len(report), 3)
822        self.assertEqual(report[0][2], 8193)
823        self.assertEqual(report[0][1], 8192)
824        self.assertEqual(report[1][1], 8192)
825        self.assertEqual(report[2][1], 8192)
826
827
828class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
829    """Test urllib.urlretrieve() using fake http connections"""
830
831    def test_short_content_raises_ContentTooShortError(self):
832        self.addCleanup(urllib.request.urlcleanup)
833
834        self.fakehttp(b'''HTTP/1.1 200 OK
835Date: Wed, 02 Jan 2008 03:03:54 GMT
836Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
837Connection: close
838Content-Length: 100
839Content-Type: text/html; charset=iso-8859-1
840
841FF
842''')
843
844        def _reporthook(par1, par2, par3):
845            pass
846
847        with self.assertRaises(urllib.error.ContentTooShortError):
848            try:
849                urllib.request.urlretrieve(support.TEST_HTTP_URL,
850                                           reporthook=_reporthook)
851            finally:
852                self.unfakehttp()
853
854    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
855        self.addCleanup(urllib.request.urlcleanup)
856
857        self.fakehttp(b'''HTTP/1.1 200 OK
858Date: Wed, 02 Jan 2008 03:03:54 GMT
859Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
860Connection: close
861Content-Length: 100
862Content-Type: text/html; charset=iso-8859-1
863
864FF
865''')
866        with self.assertRaises(urllib.error.ContentTooShortError):
867            try:
868                urllib.request.urlretrieve(support.TEST_HTTP_URL)
869            finally:
870                self.unfakehttp()
871
872
873class QuotingTests(unittest.TestCase):
874    r"""Tests for urllib.quote() and urllib.quote_plus()
875
876    According to RFC 3986 (Uniform Resource Identifiers), to escape a
877    character you write it as '%' + <2 character US-ASCII hex value>.
878    The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
879    character properly. Case does not matter on the hex letters.
880
881    The various character sets specified are:
882
883    Reserved characters : ";/?:@&=+$,"
884        Have special meaning in URIs and must be escaped if not being used for
885        their special meaning
886    Data characters : letters, digits, and "-_.!~*'()"
887        Unreserved and do not need to be escaped; can be, though, if desired
888    Control characters : 0x00 - 0x1F, 0x7F
889        Have no use in URIs so must be escaped
890    space : 0x20
891        Must be escaped
892    Delimiters : '<>#%"'
893        Must be escaped
894    Unwise : "{}|\^[]`"
895        Must be escaped
896
897    """
898
899    def test_never_quote(self):
900        # Make sure quote() does not quote letters, digits, and "_,.-"
901        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
902                                 "abcdefghijklmnopqrstuvwxyz",
903                                 "0123456789",
904                                 "_.-~"])
905        result = urllib.parse.quote(do_not_quote)
906        self.assertEqual(do_not_quote, result,
907                         "using quote(): %r != %r" % (do_not_quote, result))
908        result = urllib.parse.quote_plus(do_not_quote)
909        self.assertEqual(do_not_quote, result,
910                        "using quote_plus(): %r != %r" % (do_not_quote, result))
911
912    def test_default_safe(self):
913        # Test '/' is default value for 'safe' parameter
914        self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
915
916    def test_safe(self):
917        # Test setting 'safe' parameter does what it should do
918        quote_by_default = "<>"
919        result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
920        self.assertEqual(quote_by_default, result,
921                         "using quote(): %r != %r" % (quote_by_default, result))
922        result = urllib.parse.quote_plus(quote_by_default,
923                                         safe=quote_by_default)
924        self.assertEqual(quote_by_default, result,
925                         "using quote_plus(): %r != %r" %
926                         (quote_by_default, result))
927        # Safe expressed as bytes rather than str
928        result = urllib.parse.quote(quote_by_default, safe=b"<>")
929        self.assertEqual(quote_by_default, result,
930                         "using quote(): %r != %r" % (quote_by_default, result))
931        # "Safe" non-ASCII characters should have no effect
932        # (Since URIs are not allowed to have non-ASCII characters)
933        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
934        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
935        self.assertEqual(expect, result,
936                         "using quote(): %r != %r" %
937                         (expect, result))
938        # Same as above, but using a bytes rather than str
939        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
940        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
941        self.assertEqual(expect, result,
942                         "using quote(): %r != %r" %
943                         (expect, result))
944
945    def test_default_quoting(self):
946        # Make sure all characters that should be quoted are by default sans
947        # space (separate test for that).
948        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
949        should_quote.append(r'<>#%"{}|\^[]`')
950        should_quote.append(chr(127)) # For 0x7F
951        should_quote = ''.join(should_quote)
952        for char in should_quote:
953            result = urllib.parse.quote(char)
954            self.assertEqual(hexescape(char), result,
955                             "using quote(): "
956                             "%s should be escaped to %s, not %s" %
957                             (char, hexescape(char), result))
958            result = urllib.parse.quote_plus(char)
959            self.assertEqual(hexescape(char), result,
960                             "using quote_plus(): "
961                             "%s should be escapes to %s, not %s" %
962                             (char, hexescape(char), result))
963        del should_quote
964        partial_quote = "ab[]cd"
965        expected = "ab%5B%5Dcd"
966        result = urllib.parse.quote(partial_quote)
967        self.assertEqual(expected, result,
968                         "using quote(): %r != %r" % (expected, result))
969        result = urllib.parse.quote_plus(partial_quote)
970        self.assertEqual(expected, result,
971                         "using quote_plus(): %r != %r" % (expected, result))
972
973    def test_quoting_space(self):
974        # Make sure quote() and quote_plus() handle spaces as specified in
975        # their unique way
976        result = urllib.parse.quote(' ')
977        self.assertEqual(result, hexescape(' '),
978                         "using quote(): %r != %r" % (result, hexescape(' ')))
979        result = urllib.parse.quote_plus(' ')
980        self.assertEqual(result, '+',
981                         "using quote_plus(): %r != +" % result)
982        given = "a b cd e f"
983        expect = given.replace(' ', hexescape(' '))
984        result = urllib.parse.quote(given)
985        self.assertEqual(expect, result,
986                         "using quote(): %r != %r" % (expect, result))
987        expect = given.replace(' ', '+')
988        result = urllib.parse.quote_plus(given)
989        self.assertEqual(expect, result,
990                         "using quote_plus(): %r != %r" % (expect, result))
991
992    def test_quoting_plus(self):
993        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
994                         'alpha%2Bbeta+gamma')
995        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
996                         'alpha+beta+gamma')
997        # Test with bytes
998        self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
999                         'alpha%2Bbeta+gamma')
1000        # Test with safe bytes
1001        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1002                         'alpha+beta+gamma')
1003
1004    def test_quote_bytes(self):
1005        # Bytes should quote directly to percent-encoded values
1006        given = b"\xa2\xd8ab\xff"
1007        expect = "%A2%D8ab%FF"
1008        result = urllib.parse.quote(given)
1009        self.assertEqual(expect, result,
1010                         "using quote(): %r != %r" % (expect, result))
1011        # Encoding argument should raise type error on bytes input
1012        self.assertRaises(TypeError, urllib.parse.quote, given,
1013                            encoding="latin-1")
1014        # quote_from_bytes should work the same
1015        result = urllib.parse.quote_from_bytes(given)
1016        self.assertEqual(expect, result,
1017                         "using quote_from_bytes(): %r != %r"
1018                         % (expect, result))
1019
1020    def test_quote_with_unicode(self):
1021        # Characters in Latin-1 range, encoded by default in UTF-8
1022        given = "\xa2\xd8ab\xff"
1023        expect = "%C2%A2%C3%98ab%C3%BF"
1024        result = urllib.parse.quote(given)
1025        self.assertEqual(expect, result,
1026                         "using quote(): %r != %r" % (expect, result))
1027        # Characters in Latin-1 range, encoded by with None (default)
1028        result = urllib.parse.quote(given, encoding=None, errors=None)
1029        self.assertEqual(expect, result,
1030                         "using quote(): %r != %r" % (expect, result))
1031        # Characters in Latin-1 range, encoded with Latin-1
1032        given = "\xa2\xd8ab\xff"
1033        expect = "%A2%D8ab%FF"
1034        result = urllib.parse.quote(given, encoding="latin-1")
1035        self.assertEqual(expect, result,
1036                         "using quote(): %r != %r" % (expect, result))
1037        # Characters in BMP, encoded by default in UTF-8
1038        given = "\u6f22\u5b57"              # "Kanji"
1039        expect = "%E6%BC%A2%E5%AD%97"
1040        result = urllib.parse.quote(given)
1041        self.assertEqual(expect, result,
1042                         "using quote(): %r != %r" % (expect, result))
1043        # Characters in BMP, encoded with Latin-1
1044        given = "\u6f22\u5b57"
1045        self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1046                                    encoding="latin-1")
1047        # Characters in BMP, encoded with Latin-1, with replace error handling
1048        given = "\u6f22\u5b57"
1049        expect = "%3F%3F"                   # "??"
1050        result = urllib.parse.quote(given, encoding="latin-1",
1051                                    errors="replace")
1052        self.assertEqual(expect, result,
1053                         "using quote(): %r != %r" % (expect, result))
1054        # Characters in BMP, Latin-1, with xmlcharref error handling
1055        given = "\u6f22\u5b57"
1056        expect = "%26%2328450%3B%26%2323383%3B"     # "&#28450;&#23383;"
1057        result = urllib.parse.quote(given, encoding="latin-1",
1058                                    errors="xmlcharrefreplace")
1059        self.assertEqual(expect, result,
1060                         "using quote(): %r != %r" % (expect, result))
1061
1062    def test_quote_plus_with_unicode(self):
1063        # Encoding (latin-1) test for quote_plus
1064        given = "\xa2\xd8 \xff"
1065        expect = "%A2%D8+%FF"
1066        result = urllib.parse.quote_plus(given, encoding="latin-1")
1067        self.assertEqual(expect, result,
1068                         "using quote_plus(): %r != %r" % (expect, result))
1069        # Errors test for quote_plus
1070        given = "ab\u6f22\u5b57 cd"
1071        expect = "ab%3F%3F+cd"
1072        result = urllib.parse.quote_plus(given, encoding="latin-1",
1073                                         errors="replace")
1074        self.assertEqual(expect, result,
1075                         "using quote_plus(): %r != %r" % (expect, result))
1076
1077
1078class UnquotingTests(unittest.TestCase):
1079    """Tests for unquote() and unquote_plus()
1080
1081    See the doc string for quoting_Tests for details on quoting and such.
1082
1083    """
1084
1085    def test_unquoting(self):
1086        # Make sure unquoting of all ASCII values works
1087        escape_list = []
1088        for num in range(128):
1089            given = hexescape(chr(num))
1090            expect = chr(num)
1091            result = urllib.parse.unquote(given)
1092            self.assertEqual(expect, result,
1093                             "using unquote(): %r != %r" % (expect, result))
1094            result = urllib.parse.unquote_plus(given)
1095            self.assertEqual(expect, result,
1096                             "using unquote_plus(): %r != %r" %
1097                             (expect, result))
1098            escape_list.append(given)
1099        escape_string = ''.join(escape_list)
1100        del escape_list
1101        result = urllib.parse.unquote(escape_string)
1102        self.assertEqual(result.count('%'), 1,
1103                         "using unquote(): not all characters escaped: "
1104                         "%s" % result)
1105        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1106        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
1107
1108    def test_unquoting_badpercent(self):
1109        # Test unquoting on bad percent-escapes
1110        given = '%xab'
1111        expect = given
1112        result = urllib.parse.unquote(given)
1113        self.assertEqual(expect, result, "using unquote(): %r != %r"
1114                         % (expect, result))
1115        given = '%x'
1116        expect = given
1117        result = urllib.parse.unquote(given)
1118        self.assertEqual(expect, result, "using unquote(): %r != %r"
1119                         % (expect, result))
1120        given = '%'
1121        expect = given
1122        result = urllib.parse.unquote(given)
1123        self.assertEqual(expect, result, "using unquote(): %r != %r"
1124                         % (expect, result))
1125        # unquote_to_bytes
1126        given = '%xab'
1127        expect = bytes(given, 'ascii')
1128        result = urllib.parse.unquote_to_bytes(given)
1129        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1130                         % (expect, result))
1131        given = '%x'
1132        expect = bytes(given, 'ascii')
1133        result = urllib.parse.unquote_to_bytes(given)
1134        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1135                         % (expect, result))
1136        given = '%'
1137        expect = bytes(given, 'ascii')
1138        result = urllib.parse.unquote_to_bytes(given)
1139        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1140                         % (expect, result))
1141        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1142        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1143
1144    def test_unquoting_mixed_case(self):
1145        # Test unquoting on mixed-case hex digits in the percent-escapes
1146        given = '%Ab%eA'
1147        expect = b'\xab\xea'
1148        result = urllib.parse.unquote_to_bytes(given)
1149        self.assertEqual(expect, result,
1150                         "using unquote_to_bytes(): %r != %r"
1151                         % (expect, result))
1152
1153    def test_unquoting_parts(self):
1154        # Make sure unquoting works when have non-quoted characters
1155        # interspersed
1156        given = 'ab%sd' % hexescape('c')
1157        expect = "abcd"
1158        result = urllib.parse.unquote(given)
1159        self.assertEqual(expect, result,
1160                         "using quote(): %r != %r" % (expect, result))
1161        result = urllib.parse.unquote_plus(given)
1162        self.assertEqual(expect, result,
1163                         "using unquote_plus(): %r != %r" % (expect, result))
1164
1165    def test_unquoting_plus(self):
1166        # Test difference between unquote() and unquote_plus()
1167        given = "are+there+spaces..."
1168        expect = given
1169        result = urllib.parse.unquote(given)
1170        self.assertEqual(expect, result,
1171                         "using unquote(): %r != %r" % (expect, result))
1172        expect = given.replace('+', ' ')
1173        result = urllib.parse.unquote_plus(given)
1174        self.assertEqual(expect, result,
1175                         "using unquote_plus(): %r != %r" % (expect, result))
1176
1177    def test_unquote_to_bytes(self):
1178        given = 'br%C3%BCckner_sapporo_20050930.doc'
1179        expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1180        result = urllib.parse.unquote_to_bytes(given)
1181        self.assertEqual(expect, result,
1182                         "using unquote_to_bytes(): %r != %r"
1183                         % (expect, result))
1184        # Test on a string with unescaped non-ASCII characters
1185        # (Technically an invalid URI; expect those characters to be UTF-8
1186        # encoded).
1187        result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1188        expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
1189        self.assertEqual(expect, result,
1190                         "using unquote_to_bytes(): %r != %r"
1191                         % (expect, result))
1192        # Test with a bytes as input
1193        given = b'%A2%D8ab%FF'
1194        expect = b'\xa2\xd8ab\xff'
1195        result = urllib.parse.unquote_to_bytes(given)
1196        self.assertEqual(expect, result,
1197                         "using unquote_to_bytes(): %r != %r"
1198                         % (expect, result))
1199        # Test with a bytes as input, with unescaped non-ASCII bytes
1200        # (Technically an invalid URI; expect those bytes to be preserved)
1201        given = b'%A2\xd8ab%FF'
1202        expect = b'\xa2\xd8ab\xff'
1203        result = urllib.parse.unquote_to_bytes(given)
1204        self.assertEqual(expect, result,
1205                         "using unquote_to_bytes(): %r != %r"
1206                         % (expect, result))
1207
1208    def test_unquote_with_unicode(self):
1209        # Characters in the Latin-1 range, encoded with UTF-8
1210        given = 'br%C3%BCckner_sapporo_20050930.doc'
1211        expect = 'br\u00fcckner_sapporo_20050930.doc'
1212        result = urllib.parse.unquote(given)
1213        self.assertEqual(expect, result,
1214                         "using unquote(): %r != %r" % (expect, result))
1215        # Characters in the Latin-1 range, encoded with None (default)
1216        result = urllib.parse.unquote(given, encoding=None, errors=None)
1217        self.assertEqual(expect, result,
1218                         "using unquote(): %r != %r" % (expect, result))
1219
1220        # Characters in the Latin-1 range, encoded with Latin-1
1221        result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1222                                      encoding="latin-1")
1223        expect = 'br\u00fcckner_sapporo_20050930.doc'
1224        self.assertEqual(expect, result,
1225                         "using unquote(): %r != %r" % (expect, result))
1226
1227        # Characters in BMP, encoded with UTF-8
1228        given = "%E6%BC%A2%E5%AD%97"
1229        expect = "\u6f22\u5b57"             # "Kanji"
1230        result = urllib.parse.unquote(given)
1231        self.assertEqual(expect, result,
1232                         "using unquote(): %r != %r" % (expect, result))
1233
1234        # Decode with UTF-8, invalid sequence
1235        given = "%F3%B1"
1236        expect = "\ufffd"                   # Replacement character
1237        result = urllib.parse.unquote(given)
1238        self.assertEqual(expect, result,
1239                         "using unquote(): %r != %r" % (expect, result))
1240
1241        # Decode with UTF-8, invalid sequence, replace errors
1242        result = urllib.parse.unquote(given, errors="replace")
1243        self.assertEqual(expect, result,
1244                         "using unquote(): %r != %r" % (expect, result))
1245
1246        # Decode with UTF-8, invalid sequence, ignoring errors
1247        given = "%F3%B1"
1248        expect = ""
1249        result = urllib.parse.unquote(given, errors="ignore")
1250        self.assertEqual(expect, result,
1251                         "using unquote(): %r != %r" % (expect, result))
1252
1253        # A mix of non-ASCII and percent-encoded characters, UTF-8
1254        result = urllib.parse.unquote("\u6f22%C3%BC")
1255        expect = '\u6f22\u00fc'
1256        self.assertEqual(expect, result,
1257                         "using unquote(): %r != %r" % (expect, result))
1258
1259        # A mix of non-ASCII and percent-encoded characters, Latin-1
1260        # (Note, the string contains non-Latin-1-representable characters)
1261        result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1262        expect = '\u6f22\u00fc'
1263        self.assertEqual(expect, result,
1264                         "using unquote(): %r != %r" % (expect, result))
1265
1266    def test_unquoting_with_bytes_input(self):
1267        # ASCII characters decoded to a string
1268        given = b'blueberryjam'
1269        expect = 'blueberryjam'
1270        result = urllib.parse.unquote(given)
1271        self.assertEqual(expect, result,
1272                         "using unquote(): %r != %r" % (expect, result))
1273
1274        # A mix of non-ASCII hex-encoded characters and ASCII characters
1275        given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1276        expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1277        result = urllib.parse.unquote(given)
1278        self.assertEqual(expect, result,
1279                         "using unquote(): %r != %r" % (expect, result))
1280
1281        # A mix of non-ASCII percent-encoded characters and ASCII characters
1282        given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1283        expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1284        result = urllib.parse.unquote(given)
1285        self.assertEqual(expect, result,
1286                         "using unquote(): %r != %r" % (expect, result))
1287
1288
1289class urlencode_Tests(unittest.TestCase):
1290    """Tests for urlencode()"""
1291
1292    def help_inputtype(self, given, test_type):
1293        """Helper method for testing different input types.
1294
1295        'given' must lead to only the pairs:
1296            * 1st, 1
1297            * 2nd, 2
1298            * 3rd, 3
1299
1300        Test cannot assume anything about order.  Docs make no guarantee and
1301        have possible dictionary input.
1302
1303        """
1304        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
1305        result = urllib.parse.urlencode(given)
1306        for expected in expect_somewhere:
1307            self.assertIn(expected, result,
1308                         "testing %s: %s not found in %s" %
1309                         (test_type, expected, result))
1310        self.assertEqual(result.count('&'), 2,
1311                         "testing %s: expected 2 '&'s; got %s" %
1312                         (test_type, result.count('&')))
1313        amp_location = result.index('&')
1314        on_amp_left = result[amp_location - 1]
1315        on_amp_right = result[amp_location + 1]
1316        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
1317                     "testing %s: '&' not located in proper place in %s" %
1318                     (test_type, result))
1319        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1320                         "testing %s: "
1321                         "unexpected number of characters: %s != %s" %
1322                         (test_type, len(result), (5 * 3) + 2))
1323
1324    def test_using_mapping(self):
1325        # Test passing in a mapping object as an argument.
1326        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1327                            "using dict as input type")
1328
1329    def test_using_sequence(self):
1330        # Test passing in a sequence of two-item sequences as an argument.
1331        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1332                            "using sequence of two-item tuples as input")
1333
1334    def test_quoting(self):
1335        # Make sure keys and values are quoted using quote_plus()
1336        given = {"&":"="}
1337        expect = "%s=%s" % (hexescape('&'), hexescape('='))
1338        result = urllib.parse.urlencode(given)
1339        self.assertEqual(expect, result)
1340        given = {"key name":"A bunch of pluses"}
1341        expect = "key+name=A+bunch+of+pluses"
1342        result = urllib.parse.urlencode(given)
1343        self.assertEqual(expect, result)
1344
1345    def test_doseq(self):
1346        # Test that passing True for 'doseq' parameter works correctly
1347        given = {'sequence':['1', '2', '3']}
1348        expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1349        result = urllib.parse.urlencode(given)
1350        self.assertEqual(expect, result)
1351        result = urllib.parse.urlencode(given, True)
1352        for value in given["sequence"]:
1353            expect = "sequence=%s" % value
1354            self.assertIn(expect, result)
1355        self.assertEqual(result.count('&'), 2,
1356                         "Expected 2 '&'s, got %s" % result.count('&'))
1357
1358    def test_empty_sequence(self):
1359        self.assertEqual("", urllib.parse.urlencode({}))
1360        self.assertEqual("", urllib.parse.urlencode([]))
1361
1362    def test_nonstring_values(self):
1363        self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1364        self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1365
1366    def test_nonstring_seq_values(self):
1367        self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1368        self.assertEqual("a=None&a=a",
1369                         urllib.parse.urlencode({"a": [None, "a"]}, True))
1370        data = collections.OrderedDict([("a", 1), ("b", 1)])
1371        self.assertEqual("a=a&a=b",
1372                         urllib.parse.urlencode({"a": data}, True))
1373
1374    def test_urlencode_encoding(self):
1375        # ASCII encoding. Expect %3F with errors="replace'
1376        given = (('\u00a0', '\u00c1'),)
1377        expect = '%3F=%3F'
1378        result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1379        self.assertEqual(expect, result)
1380
1381        # Default is UTF-8 encoding.
1382        given = (('\u00a0', '\u00c1'),)
1383        expect = '%C2%A0=%C3%81'
1384        result = urllib.parse.urlencode(given)
1385        self.assertEqual(expect, result)
1386
1387        # Latin-1 encoding.
1388        given = (('\u00a0', '\u00c1'),)
1389        expect = '%A0=%C1'
1390        result = urllib.parse.urlencode(given, encoding="latin-1")
1391        self.assertEqual(expect, result)
1392
1393    def test_urlencode_encoding_doseq(self):
1394        # ASCII Encoding. Expect %3F with errors="replace'
1395        given = (('\u00a0', '\u00c1'),)
1396        expect = '%3F=%3F'
1397        result = urllib.parse.urlencode(given, doseq=True,
1398                                        encoding="ASCII", errors="replace")
1399        self.assertEqual(expect, result)
1400
1401        # ASCII Encoding. On a sequence of values.
1402        given = (("\u00a0", (1, "\u00c1")),)
1403        expect = '%3F=1&%3F=%3F'
1404        result = urllib.parse.urlencode(given, True,
1405                                        encoding="ASCII", errors="replace")
1406        self.assertEqual(expect, result)
1407
1408        # Utf-8
1409        given = (("\u00a0", "\u00c1"),)
1410        expect = '%C2%A0=%C3%81'
1411        result = urllib.parse.urlencode(given, True)
1412        self.assertEqual(expect, result)
1413
1414        given = (("\u00a0", (42, "\u00c1")),)
1415        expect = '%C2%A0=42&%C2%A0=%C3%81'
1416        result = urllib.parse.urlencode(given, True)
1417        self.assertEqual(expect, result)
1418
1419        # latin-1
1420        given = (("\u00a0", "\u00c1"),)
1421        expect = '%A0=%C1'
1422        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1423        self.assertEqual(expect, result)
1424
1425        given = (("\u00a0", (42, "\u00c1")),)
1426        expect = '%A0=42&%A0=%C1'
1427        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1428        self.assertEqual(expect, result)
1429
1430    def test_urlencode_bytes(self):
1431        given = ((b'\xa0\x24', b'\xc1\x24'),)
1432        expect = '%A0%24=%C1%24'
1433        result = urllib.parse.urlencode(given)
1434        self.assertEqual(expect, result)
1435        result = urllib.parse.urlencode(given, True)
1436        self.assertEqual(expect, result)
1437
1438        # Sequence of values
1439        given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1440        expect = '%A0%24=42&%A0%24=%C1%24'
1441        result = urllib.parse.urlencode(given, True)
1442        self.assertEqual(expect, result)
1443
1444    def test_urlencode_encoding_safe_parameter(self):
1445
1446        # Send '$' (\x24) as safe character
1447        # Default utf-8 encoding
1448
1449        given = ((b'\xa0\x24', b'\xc1\x24'),)
1450        result = urllib.parse.urlencode(given, safe=":$")
1451        expect = '%A0$=%C1$'
1452        self.assertEqual(expect, result)
1453
1454        given = ((b'\xa0\x24', b'\xc1\x24'),)
1455        result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1456        expect = '%A0$=%C1$'
1457        self.assertEqual(expect, result)
1458
1459        # Safe parameter in sequence
1460        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1461        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1462        result = urllib.parse.urlencode(given, True, safe=":$")
1463        self.assertEqual(expect, result)
1464
1465        # Test all above in latin-1 encoding
1466
1467        given = ((b'\xa0\x24', b'\xc1\x24'),)
1468        result = urllib.parse.urlencode(given, safe=":$",
1469                                        encoding="latin-1")
1470        expect = '%A0$=%C1$'
1471        self.assertEqual(expect, result)
1472
1473        given = ((b'\xa0\x24', b'\xc1\x24'),)
1474        expect = '%A0$=%C1$'
1475        result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1476                                        encoding="latin-1")
1477
1478        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1479        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1480        result = urllib.parse.urlencode(given, True, safe=":$",
1481                                        encoding="latin-1")
1482        self.assertEqual(expect, result)
1483
1484class Pathname_Tests(unittest.TestCase):
1485    """Test pathname2url() and url2pathname()"""
1486
1487    def test_basic(self):
1488        # Make sure simple tests pass
1489        expected_path = os.path.join("parts", "of", "a", "path")
1490        expected_url = "parts/of/a/path"
1491        result = urllib.request.pathname2url(expected_path)
1492        self.assertEqual(expected_url, result,
1493                         "pathname2url() failed; %s != %s" %
1494                         (result, expected_url))
1495        result = urllib.request.url2pathname(expected_url)
1496        self.assertEqual(expected_path, result,
1497                         "url2pathame() failed; %s != %s" %
1498                         (result, expected_path))
1499
1500    def test_quoting(self):
1501        # Test automatic quoting and unquoting works for pathnam2url() and
1502        # url2pathname() respectively
1503        given = os.path.join("needs", "quot=ing", "here")
1504        expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1505        result = urllib.request.pathname2url(given)
1506        self.assertEqual(expect, result,
1507                         "pathname2url() failed; %s != %s" %
1508                         (expect, result))
1509        expect = given
1510        result = urllib.request.url2pathname(result)
1511        self.assertEqual(expect, result,
1512                         "url2pathname() failed; %s != %s" %
1513                         (expect, result))
1514        given = os.path.join("make sure", "using_quote")
1515        expect = "%s/using_quote" % urllib.parse.quote("make sure")
1516        result = urllib.request.pathname2url(given)
1517        self.assertEqual(expect, result,
1518                         "pathname2url() failed; %s != %s" %
1519                         (expect, result))
1520        given = "make+sure/using_unquote"
1521        expect = os.path.join("make+sure", "using_unquote")
1522        result = urllib.request.url2pathname(given)
1523        self.assertEqual(expect, result,
1524                         "url2pathname() failed; %s != %s" %
1525                         (expect, result))
1526
1527    @unittest.skipUnless(sys.platform == 'win32',
1528                         'test specific to the urllib.url2path function.')
1529    def test_ntpath(self):
1530        given = ('/C:/', '///C:/', '/C|//')
1531        expect = 'C:\\'
1532        for url in given:
1533            result = urllib.request.url2pathname(url)
1534            self.assertEqual(expect, result,
1535                             'urllib.request..url2pathname() failed; %s != %s' %
1536                             (expect, result))
1537        given = '///C|/path'
1538        expect = 'C:\\path'
1539        result = urllib.request.url2pathname(given)
1540        self.assertEqual(expect, result,
1541                         'urllib.request.url2pathname() failed; %s != %s' %
1542                         (expect, result))
1543
1544class Utility_Tests(unittest.TestCase):
1545    """Testcase to test the various utility functions in the urllib."""
1546
1547    def test_thishost(self):
1548        """Test the urllib.request.thishost utility function returns a tuple"""
1549        self.assertIsInstance(urllib.request.thishost(), tuple)
1550
1551
1552class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
1553    """Testcase to test the open method of URLopener class."""
1554
1555    def test_quoted_open(self):
1556        class DummyURLopener(urllib.request.URLopener):
1557            def open_spam(self, url):
1558                return url
1559        with support.check_warnings(
1560                ('DummyURLopener style of invoking requests is deprecated.',
1561                DeprecationWarning)):
1562            self.assertEqual(DummyURLopener().open(
1563                'spam://example/ /'),'//example/%20/')
1564
1565            # test the safe characters are not quoted by urlopen
1566            self.assertEqual(DummyURLopener().open(
1567                "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1568                "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1569
1570    @support.ignore_warnings(category=DeprecationWarning)
1571    def test_urlopener_retrieve_file(self):
1572        with support.temp_dir() as tmpdir:
1573            fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1574            os.close(fd)
1575            fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1576            filename, _ = urllib.request.URLopener().retrieve(fileurl)
1577            # Some buildbots have TEMP folder that uses a lowercase drive letter.
1578            self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
1579
1580    @support.ignore_warnings(category=DeprecationWarning)
1581    def test_urlopener_retrieve_remote(self):
1582        url = "http://www.python.org/file.txt"
1583        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1584        self.addCleanup(self.unfakehttp)
1585        filename, _ = urllib.request.URLopener().retrieve(url)
1586        self.assertEqual(os.path.splitext(filename)[1], ".txt")
1587
1588    @support.ignore_warnings(category=DeprecationWarning)
1589    def test_local_file_open(self):
1590        # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1591        class DummyURLopener(urllib.request.URLopener):
1592            def open_local_file(self, url):
1593                return url
1594        for url in ('local_file://example', 'local-file://example'):
1595            self.assertRaises(OSError, urllib.request.urlopen, url)
1596            self.assertRaises(OSError, urllib.request.URLopener().open, url)
1597            self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1598            self.assertRaises(OSError, DummyURLopener().open, url)
1599            self.assertRaises(OSError, DummyURLopener().retrieve, url)
1600
1601
1602class RequestTests(unittest.TestCase):
1603    """Unit tests for urllib.request.Request."""
1604
1605    def test_default_values(self):
1606        Request = urllib.request.Request
1607        request = Request("http://www.python.org")
1608        self.assertEqual(request.get_method(), 'GET')
1609        request = Request("http://www.python.org", {})
1610        self.assertEqual(request.get_method(), 'POST')
1611
1612    def test_with_method_arg(self):
1613        Request = urllib.request.Request
1614        request = Request("http://www.python.org", method='HEAD')
1615        self.assertEqual(request.method, 'HEAD')
1616        self.assertEqual(request.get_method(), 'HEAD')
1617        request = Request("http://www.python.org", {}, method='HEAD')
1618        self.assertEqual(request.method, 'HEAD')
1619        self.assertEqual(request.get_method(), 'HEAD')
1620        request = Request("http://www.python.org", method='GET')
1621        self.assertEqual(request.get_method(), 'GET')
1622        request.method = 'HEAD'
1623        self.assertEqual(request.get_method(), 'HEAD')
1624
1625
1626class URL2PathNameTests(unittest.TestCase):
1627
1628    def test_converting_drive_letter(self):
1629        self.assertEqual(url2pathname("///C|"), 'C:')
1630        self.assertEqual(url2pathname("///C:"), 'C:')
1631        self.assertEqual(url2pathname("///C|/"), 'C:\\')
1632
1633    def test_converting_when_no_drive_letter(self):
1634        # cannot end a raw string in \
1635        self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1636        self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1637
1638    def test_simple_compare(self):
1639        self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1640                         r'C:\foo\bar\spam.foo')
1641
1642    def test_non_ascii_drive_letter(self):
1643        self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1644
1645    def test_roundtrip_url2pathname(self):
1646        list_of_paths = ['C:',
1647                         r'\\\C\test\\',
1648                         r'C:\foo\bar\spam.foo'
1649                         ]
1650        for path in list_of_paths:
1651            self.assertEqual(url2pathname(pathname2url(path)), path)
1652
1653class PathName2URLTests(unittest.TestCase):
1654
1655    def test_converting_drive_letter(self):
1656        self.assertEqual(pathname2url("C:"), '///C:')
1657        self.assertEqual(pathname2url("C:\\"), '///C:')
1658
1659    def test_converting_when_no_drive_letter(self):
1660        self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1661                         '/////folder/test/')
1662        self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1663                         '////folder/test/')
1664        self.assertEqual(pathname2url(r"\folder\test" "\\"),
1665                         '/folder/test/')
1666
1667    def test_simple_compare(self):
1668        self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1669                         "///C:/foo/bar/spam.foo" )
1670
1671    def test_long_drive_letter(self):
1672        self.assertRaises(IOError, pathname2url, "XX:\\")
1673
1674    def test_roundtrip_pathname2url(self):
1675        list_of_paths = ['///C:',
1676                         '/////folder/test/',
1677                         '///C:/foo/bar/spam.foo']
1678        for path in list_of_paths:
1679            self.assertEqual(pathname2url(url2pathname(path)), path)
1680
1681if __name__ == '__main__':
1682    unittest.main()
1683