• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below).  It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work.  Each Handler implements a particular protocol or
10option.  The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL.  For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns.  The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303, 307, and 308 redirect errors, and the
15HTTPDigestAuthHandler deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib.  pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back.  One difference is that you can also pass
20a Request instance instead of URL.  Raises a URLError (subclass of
21OSError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers.  Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate.  If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
33
34OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
36
37Request -- An object that encapsulates the state of a request.  The
38state can be as simple as the URL.  It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
49import urllib.request
50
51# set up authentication info
52authinfo = urllib.request.HTTPBasicAuthHandler()
53authinfo.add_password(realm='PDQ Application',
54                      uri='https://mahler:8092/site-updates.py',
55                      user='klem',
56                      passwd='geheim$parole')
57
58proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59
60# build a new opener that adds authentication and caching FTP handlers
61opener = urllib.request.build_opener(proxy_support, authinfo,
62                                     urllib.request.CacheFTPHandler)
63
64# install it
65urllib.request.install_opener(opener)
66
67f = urllib.request.urlopen('https://www.python.org/')
68"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled?  The client needs to know the HTTP error code.  But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies  XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
85import bisect
86import email
87import hashlib
88import http.client
89import io
90import os
91import re
92import socket
93import string
94import sys
95import time
96import tempfile
97import contextlib
98import warnings
99
100
101from urllib.error import URLError, HTTPError, ContentTooShortError
102from urllib.parse import (
103    urlparse, urlsplit, urljoin, unwrap, quote, unquote,
104    _splittype, _splithost, _splitport, _splituser, _splitpasswd,
105    _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
106    unquote_to_bytes, urlunparse)
107from urllib.response import addinfourl, addclosehook
108
109# check for SSL
110try:
111    import ssl
112except ImportError:
113    _have_ssl = False
114else:
115    _have_ssl = True
116
117__all__ = [
118    # Classes
119    'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120    'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121    'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122    'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
123    'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
124    'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
125    'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
126    'UnknownHandler', 'HTTPErrorProcessor',
127    # Functions
128    'urlopen', 'install_opener', 'build_opener',
129    'pathname2url', 'url2pathname', 'getproxies',
130    # Legacy interface
131    'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
132]
133
134# used in User-Agent header sent
135__version__ = '%d.%d' % sys.version_info[:2]
136
137_opener = None
138def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
139            *, context=None):
140    '''Open the URL url, which can be either a string or a Request object.
141
142    *data* must be an object specifying additional data to be sent to
143    the server, or None if no such data is needed.  See Request for
144    details.
145
146    urllib.request module uses HTTP/1.1 and includes a "Connection:close"
147    header in its HTTP requests.
148
149    The optional *timeout* parameter specifies a timeout in seconds for
150    blocking operations like the connection attempt (if not specified, the
151    global default timeout setting will be used). This only works for HTTP,
152    HTTPS and FTP connections.
153
154    If *context* is specified, it must be a ssl.SSLContext instance describing
155    the various SSL options. See HTTPSConnection for more details.
156
157
158    This function always returns an object which can work as a
159    context manager and has the properties url, headers, and status.
160    See urllib.response.addinfourl for more detail on these properties.
161
162    For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
163    object slightly modified. In addition to the three new methods above, the
164    msg attribute contains the same information as the reason attribute ---
165    the reason phrase returned by the server --- instead of the response
166    headers as it is specified in the documentation for HTTPResponse.
167
168    For FTP, file, and data URLs and requests explicitly handled by legacy
169    URLopener and FancyURLopener classes, this function returns a
170    urllib.response.addinfourl object.
171
172    Note that None may be returned if no handler handles the request (though
173    the default installed global OpenerDirector uses UnknownHandler to ensure
174    this never happens).
175
176    In addition, if proxy settings are detected (for example, when a *_proxy
177    environment variable like http_proxy is set), ProxyHandler is default
178    installed and makes sure the requests are handled through the proxy.
179
180    '''
181    global _opener
182    if context:
183        https_handler = HTTPSHandler(context=context)
184        opener = build_opener(https_handler)
185    elif _opener is None:
186        _opener = opener = build_opener()
187    else:
188        opener = _opener
189    return opener.open(url, data, timeout)
190
191def install_opener(opener):
192    global _opener
193    _opener = opener
194
195_url_tempfiles = []
196def urlretrieve(url, filename=None, reporthook=None, data=None):
197    """
198    Retrieve a URL into a temporary location on disk.
199
200    Requires a URL argument. If a filename is passed, it is used as
201    the temporary file location. The reporthook argument should be
202    a callable that accepts a block number, a read size, and the
203    total file size of the URL target. The data argument should be
204    valid URL encoded data.
205
206    If a filename is passed and the URL points to a local resource,
207    the result is a copy from local file to new file.
208
209    Returns a tuple containing the path to the newly created
210    data file as well as the resulting HTTPMessage object.
211    """
212    url_type, path = _splittype(url)
213
214    with contextlib.closing(urlopen(url, data)) as fp:
215        headers = fp.info()
216
217        # Just return the local path and the "headers" for file://
218        # URLs. No sense in performing a copy unless requested.
219        if url_type == "file" and not filename:
220            return os.path.normpath(path), headers
221
222        # Handle temporary file setup.
223        if filename:
224            tfp = open(filename, 'wb')
225        else:
226            tfp = tempfile.NamedTemporaryFile(delete=False)
227            filename = tfp.name
228            _url_tempfiles.append(filename)
229
230        with tfp:
231            result = filename, headers
232            bs = 1024*8
233            size = -1
234            read = 0
235            blocknum = 0
236            if "content-length" in headers:
237                size = int(headers["Content-Length"])
238
239            if reporthook:
240                reporthook(blocknum, bs, size)
241
242            while block := fp.read(bs):
243                read += len(block)
244                tfp.write(block)
245                blocknum += 1
246                if reporthook:
247                    reporthook(blocknum, bs, size)
248
249    if size >= 0 and read < size:
250        raise ContentTooShortError(
251            "retrieval incomplete: got only %i out of %i bytes"
252            % (read, size), result)
253
254    return result
255
256def urlcleanup():
257    """Clean up temporary files from urlretrieve calls."""
258    for temp_file in _url_tempfiles:
259        try:
260            os.unlink(temp_file)
261        except OSError:
262            pass
263
264    del _url_tempfiles[:]
265    global _opener
266    if _opener:
267        _opener = None
268
269# copied from cookielib.py
270_cut_port_re = re.compile(r":\d+$", re.ASCII)
271def request_host(request):
272    """Return request-host, as defined by RFC 2965.
273
274    Variation from RFC: returned value is lowercased, for convenient
275    comparison.
276
277    """
278    url = request.full_url
279    host = urlparse(url)[1]
280    if host == "":
281        host = request.get_header("Host", "")
282
283    # remove port, if present
284    host = _cut_port_re.sub("", host, 1)
285    return host.lower()
286
287class Request:
288
289    def __init__(self, url, data=None, headers={},
290                 origin_req_host=None, unverifiable=False,
291                 method=None):
292        self.full_url = url
293        self.headers = {}
294        self.unredirected_hdrs = {}
295        self._data = None
296        self.data = data
297        self._tunnel_host = None
298        for key, value in headers.items():
299            self.add_header(key, value)
300        if origin_req_host is None:
301            origin_req_host = request_host(self)
302        self.origin_req_host = origin_req_host
303        self.unverifiable = unverifiable
304        if method:
305            self.method = method
306
307    @property
308    def full_url(self):
309        if self.fragment:
310            return '{}#{}'.format(self._full_url, self.fragment)
311        return self._full_url
312
313    @full_url.setter
314    def full_url(self, url):
315        # unwrap('<URL:type://host/path>') --> 'type://host/path'
316        self._full_url = unwrap(url)
317        self._full_url, self.fragment = _splittag(self._full_url)
318        self._parse()
319
320    @full_url.deleter
321    def full_url(self):
322        self._full_url = None
323        self.fragment = None
324        self.selector = ''
325
326    @property
327    def data(self):
328        return self._data
329
330    @data.setter
331    def data(self, data):
332        if data != self._data:
333            self._data = data
334            # issue 16464
335            # if we change data we need to remove content-length header
336            # (cause it's most probably calculated for previous value)
337            if self.has_header("Content-length"):
338                self.remove_header("Content-length")
339
340    @data.deleter
341    def data(self):
342        self.data = None
343
344    def _parse(self):
345        self.type, rest = _splittype(self._full_url)
346        if self.type is None:
347            raise ValueError("unknown url type: %r" % self.full_url)
348        self.host, self.selector = _splithost(rest)
349        if self.host:
350            self.host = unquote(self.host)
351
352    def get_method(self):
353        """Return a string indicating the HTTP request method."""
354        default_method = "POST" if self.data is not None else "GET"
355        return getattr(self, 'method', default_method)
356
357    def get_full_url(self):
358        return self.full_url
359
360    def set_proxy(self, host, type):
361        if self.type == 'https' and not self._tunnel_host:
362            self._tunnel_host = self.host
363        else:
364            self.type= type
365            self.selector = self.full_url
366        self.host = host
367
368    def has_proxy(self):
369        return self.selector == self.full_url
370
371    def add_header(self, key, val):
372        # useful for something like authentication
373        self.headers[key.capitalize()] = val
374
375    def add_unredirected_header(self, key, val):
376        # will not be added to a redirected request
377        self.unredirected_hdrs[key.capitalize()] = val
378
379    def has_header(self, header_name):
380        return (header_name in self.headers or
381                header_name in self.unredirected_hdrs)
382
383    def get_header(self, header_name, default=None):
384        return self.headers.get(
385            header_name,
386            self.unredirected_hdrs.get(header_name, default))
387
388    def remove_header(self, header_name):
389        self.headers.pop(header_name, None)
390        self.unredirected_hdrs.pop(header_name, None)
391
392    def header_items(self):
393        hdrs = {**self.unredirected_hdrs, **self.headers}
394        return list(hdrs.items())
395
396class OpenerDirector:
397    def __init__(self):
398        client_version = "Python-urllib/%s" % __version__
399        self.addheaders = [('User-agent', client_version)]
400        # self.handlers is retained only for backward compatibility
401        self.handlers = []
402        # manage the individual handlers
403        self.handle_open = {}
404        self.handle_error = {}
405        self.process_response = {}
406        self.process_request = {}
407
408    def add_handler(self, handler):
409        if not hasattr(handler, "add_parent"):
410            raise TypeError("expected BaseHandler instance, got %r" %
411                            type(handler))
412
413        added = False
414        for meth in dir(handler):
415            if meth in ["redirect_request", "do_open", "proxy_open"]:
416                # oops, coincidental match
417                continue
418
419            i = meth.find("_")
420            protocol = meth[:i]
421            condition = meth[i+1:]
422
423            if condition.startswith("error"):
424                j = condition.find("_") + i + 1
425                kind = meth[j+1:]
426                try:
427                    kind = int(kind)
428                except ValueError:
429                    pass
430                lookup = self.handle_error.get(protocol, {})
431                self.handle_error[protocol] = lookup
432            elif condition == "open":
433                kind = protocol
434                lookup = self.handle_open
435            elif condition == "response":
436                kind = protocol
437                lookup = self.process_response
438            elif condition == "request":
439                kind = protocol
440                lookup = self.process_request
441            else:
442                continue
443
444            handlers = lookup.setdefault(kind, [])
445            if handlers:
446                bisect.insort(handlers, handler)
447            else:
448                handlers.append(handler)
449            added = True
450
451        if added:
452            bisect.insort(self.handlers, handler)
453            handler.add_parent(self)
454
455    def close(self):
456        # Only exists for backwards compatibility.
457        pass
458
459    def _call_chain(self, chain, kind, meth_name, *args):
460        # Handlers raise an exception if no one else should try to handle
461        # the request, or return None if they can't but another handler
462        # could.  Otherwise, they return the response.
463        handlers = chain.get(kind, ())
464        for handler in handlers:
465            func = getattr(handler, meth_name)
466            result = func(*args)
467            if result is not None:
468                return result
469
470    def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
471        # accept a URL or a Request object
472        if isinstance(fullurl, str):
473            req = Request(fullurl, data)
474        else:
475            req = fullurl
476            if data is not None:
477                req.data = data
478
479        req.timeout = timeout
480        protocol = req.type
481
482        # pre-process request
483        meth_name = protocol+"_request"
484        for processor in self.process_request.get(protocol, []):
485            meth = getattr(processor, meth_name)
486            req = meth(req)
487
488        sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
489        response = self._open(req, data)
490
491        # post-process response
492        meth_name = protocol+"_response"
493        for processor in self.process_response.get(protocol, []):
494            meth = getattr(processor, meth_name)
495            response = meth(req, response)
496
497        return response
498
499    def _open(self, req, data=None):
500        result = self._call_chain(self.handle_open, 'default',
501                                  'default_open', req)
502        if result:
503            return result
504
505        protocol = req.type
506        result = self._call_chain(self.handle_open, protocol, protocol +
507                                  '_open', req)
508        if result:
509            return result
510
511        return self._call_chain(self.handle_open, 'unknown',
512                                'unknown_open', req)
513
514    def error(self, proto, *args):
515        if proto in ('http', 'https'):
516            # XXX http[s] protocols are special-cased
517            dict = self.handle_error['http'] # https is not different than http
518            proto = args[2]  # YUCK!
519            meth_name = 'http_error_%s' % proto
520            http_err = 1
521            orig_args = args
522        else:
523            dict = self.handle_error
524            meth_name = proto + '_error'
525            http_err = 0
526        args = (dict, proto, meth_name) + args
527        result = self._call_chain(*args)
528        if result:
529            return result
530
531        if http_err:
532            args = (dict, 'default', 'http_error_default') + orig_args
533            return self._call_chain(*args)
534
535# XXX probably also want an abstract factory that knows when it makes
536# sense to skip a superclass in favor of a subclass and when it might
537# make sense to include both
538
539def build_opener(*handlers):
540    """Create an opener object from a list of handlers.
541
542    The opener will use several default handlers, including support
543    for HTTP, FTP and when applicable HTTPS.
544
545    If any of the handlers passed as arguments are subclasses of the
546    default handlers, the default handlers will not be used.
547    """
548    opener = OpenerDirector()
549    default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
550                       HTTPDefaultErrorHandler, HTTPRedirectHandler,
551                       FTPHandler, FileHandler, HTTPErrorProcessor,
552                       DataHandler]
553    if hasattr(http.client, "HTTPSConnection"):
554        default_classes.append(HTTPSHandler)
555    skip = set()
556    for klass in default_classes:
557        for check in handlers:
558            if isinstance(check, type):
559                if issubclass(check, klass):
560                    skip.add(klass)
561            elif isinstance(check, klass):
562                skip.add(klass)
563    for klass in skip:
564        default_classes.remove(klass)
565
566    for klass in default_classes:
567        opener.add_handler(klass())
568
569    for h in handlers:
570        if isinstance(h, type):
571            h = h()
572        opener.add_handler(h)
573    return opener
574
575class BaseHandler:
576    handler_order = 500
577
578    def add_parent(self, parent):
579        self.parent = parent
580
581    def close(self):
582        # Only exists for backwards compatibility
583        pass
584
585    def __lt__(self, other):
586        if not hasattr(other, "handler_order"):
587            # Try to preserve the old behavior of having custom classes
588            # inserted after default ones (works only for custom user
589            # classes which are not aware of handler_order).
590            return True
591        return self.handler_order < other.handler_order
592
593
594class HTTPErrorProcessor(BaseHandler):
595    """Process HTTP error responses."""
596    handler_order = 1000  # after all other processing
597
598    def http_response(self, request, response):
599        code, msg, hdrs = response.code, response.msg, response.info()
600
601        # According to RFC 2616, "2xx" code indicates that the client's
602        # request was successfully received, understood, and accepted.
603        if not (200 <= code < 300):
604            response = self.parent.error(
605                'http', request, response, code, msg, hdrs)
606
607        return response
608
609    https_response = http_response
610
611class HTTPDefaultErrorHandler(BaseHandler):
612    def http_error_default(self, req, fp, code, msg, hdrs):
613        raise HTTPError(req.full_url, code, msg, hdrs, fp)
614
615class HTTPRedirectHandler(BaseHandler):
616    # maximum number of redirections to any single URL
617    # this is needed because of the state that cookies introduce
618    max_repeats = 4
619    # maximum total number of redirections (regardless of URL) before
620    # assuming we're in a loop
621    max_redirections = 10
622
623    def redirect_request(self, req, fp, code, msg, headers, newurl):
624        """Return a Request or None in response to a redirect.
625
626        This is called by the http_error_30x methods when a
627        redirection response is received.  If a redirection should
628        take place, return a new Request to allow http_error_30x to
629        perform the redirect.  Otherwise, raise HTTPError if no-one
630        else should try to handle this url.  Return None if you can't
631        but another Handler might.
632        """
633        m = req.get_method()
634        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
635            or code in (301, 302, 303) and m == "POST")):
636            raise HTTPError(req.full_url, code, msg, headers, fp)
637
638        # Strictly (according to RFC 2616), 301 or 302 in response to
639        # a POST MUST NOT cause a redirection without confirmation
640        # from the user (of urllib.request, in this case).  In practice,
641        # essentially all clients do redirect in this case, so we do
642        # the same.
643
644        # Be conciliant with URIs containing a space.  This is mainly
645        # redundant with the more complete encoding done in http_error_302(),
646        # but it is kept for compatibility with other callers.
647        newurl = newurl.replace(' ', '%20')
648
649        CONTENT_HEADERS = ("content-length", "content-type")
650        newheaders = {k: v for k, v in req.headers.items()
651                      if k.lower() not in CONTENT_HEADERS}
652        return Request(newurl,
653                       method="HEAD" if m == "HEAD" else "GET",
654                       headers=newheaders,
655                       origin_req_host=req.origin_req_host,
656                       unverifiable=True)
657
658    # Implementation note: To avoid the server sending us into an
659    # infinite loop, the request object needs to track what URLs we
660    # have already seen.  Do this by adding a handler-specific
661    # attribute to the Request object.
662    def http_error_302(self, req, fp, code, msg, headers):
663        # Some servers (incorrectly) return multiple Location headers
664        # (so probably same goes for URI).  Use first header.
665        if "location" in headers:
666            newurl = headers["location"]
667        elif "uri" in headers:
668            newurl = headers["uri"]
669        else:
670            return
671
672        # fix a possible malformed URL
673        urlparts = urlparse(newurl)
674
675        # For security reasons we don't allow redirection to anything other
676        # than http, https or ftp.
677
678        if urlparts.scheme not in ('http', 'https', 'ftp', ''):
679            raise HTTPError(
680                newurl, code,
681                "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
682                headers, fp)
683
684        if not urlparts.path and urlparts.netloc:
685            urlparts = list(urlparts)
686            urlparts[2] = "/"
687        newurl = urlunparse(urlparts)
688
689        # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
690        # original bytes and percent-encode non-ASCII bytes, and any special
691        # characters such as the space.
692        newurl = quote(
693            newurl, encoding="iso-8859-1", safe=string.punctuation)
694        newurl = urljoin(req.full_url, newurl)
695
696        # XXX Probably want to forget about the state of the current
697        # request, although that might interact poorly with other
698        # handlers that also use handler-specific request attributes
699        new = self.redirect_request(req, fp, code, msg, headers, newurl)
700        if new is None:
701            return
702
703        # loop detection
704        # .redirect_dict has a key url if url was previously visited.
705        if hasattr(req, 'redirect_dict'):
706            visited = new.redirect_dict = req.redirect_dict
707            if (visited.get(newurl, 0) >= self.max_repeats or
708                len(visited) >= self.max_redirections):
709                raise HTTPError(req.full_url, code,
710                                self.inf_msg + msg, headers, fp)
711        else:
712            visited = new.redirect_dict = req.redirect_dict = {}
713        visited[newurl] = visited.get(newurl, 0) + 1
714
715        # Don't close the fp until we are sure that we won't use it
716        # with HTTPError.
717        fp.read()
718        fp.close()
719
720        return self.parent.open(new, timeout=req.timeout)
721
722    http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
723
724    inf_msg = "The HTTP server returned a redirect error that would " \
725              "lead to an infinite loop.\n" \
726              "The last 30x error message was:\n"
727
728
729def _parse_proxy(proxy):
730    """Return (scheme, user, password, host/port) given a URL or an authority.
731
732    If a URL is supplied, it must have an authority (host:port) component.
733    According to RFC 3986, having an authority component means the URL must
734    have two slashes after the scheme.
735    """
736    scheme, r_scheme = _splittype(proxy)
737    if not r_scheme.startswith("/"):
738        # authority
739        scheme = None
740        authority = proxy
741    else:
742        # URL
743        if not r_scheme.startswith("//"):
744            raise ValueError("proxy URL with no authority: %r" % proxy)
745        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
746        # and 3.3.), path is empty or starts with '/'
747        if '@' in r_scheme:
748            host_separator = r_scheme.find('@')
749            end = r_scheme.find("/", host_separator)
750        else:
751            end = r_scheme.find("/", 2)
752        if end == -1:
753            end = None
754        authority = r_scheme[2:end]
755    userinfo, hostport = _splituser(authority)
756    if userinfo is not None:
757        user, password = _splitpasswd(userinfo)
758    else:
759        user = password = None
760    return scheme, user, password, hostport
761
762class ProxyHandler(BaseHandler):
763    # Proxies must be in front
764    handler_order = 100
765
766    def __init__(self, proxies=None):
767        if proxies is None:
768            proxies = getproxies()
769        assert hasattr(proxies, 'keys'), "proxies must be a mapping"
770        self.proxies = proxies
771        for type, url in proxies.items():
772            type = type.lower()
773            setattr(self, '%s_open' % type,
774                    lambda r, proxy=url, type=type, meth=self.proxy_open:
775                        meth(r, proxy, type))
776
777    def proxy_open(self, req, proxy, type):
778        orig_type = req.type
779        proxy_type, user, password, hostport = _parse_proxy(proxy)
780        if proxy_type is None:
781            proxy_type = orig_type
782
783        if req.host and proxy_bypass(req.host):
784            return None
785
786        if user and password:
787            user_pass = '%s:%s' % (unquote(user),
788                                   unquote(password))
789            creds = base64.b64encode(user_pass.encode()).decode("ascii")
790            req.add_header('Proxy-authorization', 'Basic ' + creds)
791        hostport = unquote(hostport)
792        req.set_proxy(hostport, proxy_type)
793        if orig_type == proxy_type or orig_type == 'https':
794            # let other handlers take care of it
795            return None
796        else:
797            # need to start over, because the other handlers don't
798            # grok the proxy's URL type
799            # e.g. if we have a constructor arg proxies like so:
800            # {'http': 'ftp://proxy.example.com'}, we may end up turning
801            # a request for http://acme.example.com/a into one for
802            # ftp://proxy.example.com/a
803            return self.parent.open(req, timeout=req.timeout)
804
805class HTTPPasswordMgr:
806
807    def __init__(self):
808        self.passwd = {}
809
810    def add_password(self, realm, uri, user, passwd):
811        # uri could be a single URI or a sequence
812        if isinstance(uri, str):
813            uri = [uri]
814        if realm not in self.passwd:
815            self.passwd[realm] = {}
816        for default_port in True, False:
817            reduced_uri = tuple(
818                self.reduce_uri(u, default_port) for u in uri)
819            self.passwd[realm][reduced_uri] = (user, passwd)
820
821    def find_user_password(self, realm, authuri):
822        domains = self.passwd.get(realm, {})
823        for default_port in True, False:
824            reduced_authuri = self.reduce_uri(authuri, default_port)
825            for uris, authinfo in domains.items():
826                for uri in uris:
827                    if self.is_suburi(uri, reduced_authuri):
828                        return authinfo
829        return None, None
830
831    def reduce_uri(self, uri, default_port=True):
832        """Accept authority or URI and extract only the authority and path."""
833        # note HTTP URLs do not have a userinfo component
834        parts = urlsplit(uri)
835        if parts[1]:
836            # URI
837            scheme = parts[0]
838            authority = parts[1]
839            path = parts[2] or '/'
840        else:
841            # host or host:port
842            scheme = None
843            authority = uri
844            path = '/'
845        host, port = _splitport(authority)
846        if default_port and port is None and scheme is not None:
847            dport = {"http": 80,
848                     "https": 443,
849                     }.get(scheme)
850            if dport is not None:
851                authority = "%s:%d" % (host, dport)
852        return authority, path
853
854    def is_suburi(self, base, test):
855        """Check if test is below base in a URI tree
856
857        Both args must be URIs in reduced form.
858        """
859        if base == test:
860            return True
861        if base[0] != test[0]:
862            return False
863        prefix = base[1]
864        if prefix[-1:] != '/':
865            prefix += '/'
866        return test[1].startswith(prefix)
867
868
869class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
870
871    def find_user_password(self, realm, authuri):
872        user, password = HTTPPasswordMgr.find_user_password(self, realm,
873                                                            authuri)
874        if user is not None:
875            return user, password
876        return HTTPPasswordMgr.find_user_password(self, None, authuri)
877
878
879class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
880
881    def __init__(self, *args, **kwargs):
882        self.authenticated = {}
883        super().__init__(*args, **kwargs)
884
885    def add_password(self, realm, uri, user, passwd, is_authenticated=False):
886        self.update_authenticated(uri, is_authenticated)
887        # Add a default for prior auth requests
888        if realm is not None:
889            super().add_password(None, uri, user, passwd)
890        super().add_password(realm, uri, user, passwd)
891
892    def update_authenticated(self, uri, is_authenticated=False):
893        # uri could be a single URI or a sequence
894        if isinstance(uri, str):
895            uri = [uri]
896
897        for default_port in True, False:
898            for u in uri:
899                reduced_uri = self.reduce_uri(u, default_port)
900                self.authenticated[reduced_uri] = is_authenticated
901
902    def is_authenticated(self, authuri):
903        for default_port in True, False:
904            reduced_authuri = self.reduce_uri(authuri, default_port)
905            for uri in self.authenticated:
906                if self.is_suburi(uri, reduced_authuri):
907                    return self.authenticated[uri]
908
909
910class AbstractBasicAuthHandler:
911
912    # XXX this allows for multiple auth-schemes, but will stupidly pick
913    # the last one with a realm specified.
914
915    # allow for double- and single-quoted realm values
916    # (single quotes are a violation of the RFC, but appear in the wild)
917    rx = re.compile('(?:^|,)'   # start of the string or ','
918                    '[ \t]*'    # optional whitespaces
919                    '([^ \t,]+)' # scheme like "Basic"
920                    '[ \t]+'    # mandatory whitespaces
921                    # realm=xxx
922                    # realm='xxx'
923                    # realm="xxx"
924                    'realm=(["\']?)([^"\']*)\\2',
925                    re.I)
926
927    # XXX could pre-emptively send auth info already accepted (RFC 2617,
928    # end of section 2, and section 1.2 immediately after "credentials"
929    # production).
930
931    def __init__(self, password_mgr=None):
932        if password_mgr is None:
933            password_mgr = HTTPPasswordMgr()
934        self.passwd = password_mgr
935        self.add_password = self.passwd.add_password
936
937    def _parse_realm(self, header):
938        # parse WWW-Authenticate header: accept multiple challenges per header
939        found_challenge = False
940        for mo in AbstractBasicAuthHandler.rx.finditer(header):
941            scheme, quote, realm = mo.groups()
942            if quote not in ['"', "'"]:
943                warnings.warn("Basic Auth Realm was unquoted",
944                              UserWarning, 3)
945
946            yield (scheme, realm)
947
948            found_challenge = True
949
950        if not found_challenge:
951            if header:
952                scheme = header.split()[0]
953            else:
954                scheme = ''
955            yield (scheme, None)
956
957    def http_error_auth_reqed(self, authreq, host, req, headers):
958        # host may be an authority (without userinfo) or a URL with an
959        # authority
960        headers = headers.get_all(authreq)
961        if not headers:
962            # no header found
963            return
964
965        unsupported = None
966        for header in headers:
967            for scheme, realm in self._parse_realm(header):
968                if scheme.lower() != 'basic':
969                    unsupported = scheme
970                    continue
971
972                if realm is not None:
973                    # Use the first matching Basic challenge.
974                    # Ignore following challenges even if they use the Basic
975                    # scheme.
976                    return self.retry_http_basic_auth(host, req, realm)
977
978        if unsupported is not None:
979            raise ValueError("AbstractBasicAuthHandler does not "
980                             "support the following scheme: %r"
981                             % (scheme,))
982
983    def retry_http_basic_auth(self, host, req, realm):
984        user, pw = self.passwd.find_user_password(realm, host)
985        if pw is not None:
986            raw = "%s:%s" % (user, pw)
987            auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
988            if req.get_header(self.auth_header, None) == auth:
989                return None
990            req.add_unredirected_header(self.auth_header, auth)
991            return self.parent.open(req, timeout=req.timeout)
992        else:
993            return None
994
995    def http_request(self, req):
996        if (not hasattr(self.passwd, 'is_authenticated') or
997           not self.passwd.is_authenticated(req.full_url)):
998            return req
999
1000        if not req.has_header('Authorization'):
1001            user, passwd = self.passwd.find_user_password(None, req.full_url)
1002            credentials = '{0}:{1}'.format(user, passwd).encode()
1003            auth_str = base64.standard_b64encode(credentials).decode()
1004            req.add_unredirected_header('Authorization',
1005                                        'Basic {}'.format(auth_str.strip()))
1006        return req
1007
1008    def http_response(self, req, response):
1009        if hasattr(self.passwd, 'is_authenticated'):
1010            if 200 <= response.code < 300:
1011                self.passwd.update_authenticated(req.full_url, True)
1012            else:
1013                self.passwd.update_authenticated(req.full_url, False)
1014        return response
1015
1016    https_request = http_request
1017    https_response = http_response
1018
1019
1020
1021class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1022
1023    auth_header = 'Authorization'
1024
1025    def http_error_401(self, req, fp, code, msg, headers):
1026        url = req.full_url
1027        response = self.http_error_auth_reqed('www-authenticate',
1028                                          url, req, headers)
1029        return response
1030
1031
1032class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1033
1034    auth_header = 'Proxy-authorization'
1035
1036    def http_error_407(self, req, fp, code, msg, headers):
1037        # http_error_auth_reqed requires that there is no userinfo component in
1038        # authority.  Assume there isn't one, since urllib.request does not (and
1039        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1040        # userinfo.
1041        authority = req.host
1042        response = self.http_error_auth_reqed('proxy-authenticate',
1043                                          authority, req, headers)
1044        return response
1045
1046
1047# Return n random bytes.
1048_randombytes = os.urandom
1049
1050
1051class AbstractDigestAuthHandler:
1052    # Digest authentication is specified in RFC 2617.
1053
1054    # XXX The client does not inspect the Authentication-Info header
1055    # in a successful response.
1056
1057    # XXX It should be possible to test this implementation against
1058    # a mock server that just generates a static set of challenges.
1059
1060    # XXX qop="auth-int" supports is shaky
1061
1062    def __init__(self, passwd=None):
1063        if passwd is None:
1064            passwd = HTTPPasswordMgr()
1065        self.passwd = passwd
1066        self.add_password = self.passwd.add_password
1067        self.retried = 0
1068        self.nonce_count = 0
1069        self.last_nonce = None
1070
1071    def reset_retry_count(self):
1072        self.retried = 0
1073
1074    def http_error_auth_reqed(self, auth_header, host, req, headers):
1075        authreq = headers.get(auth_header, None)
1076        if self.retried > 5:
1077            # Don't fail endlessly - if we failed once, we'll probably
1078            # fail a second time. Hm. Unless the Password Manager is
1079            # prompting for the information. Crap. This isn't great
1080            # but it's better than the current 'repeat until recursion
1081            # depth exceeded' approach <wink>
1082            raise HTTPError(req.full_url, 401, "digest auth failed",
1083                            headers, None)
1084        else:
1085            self.retried += 1
1086        if authreq:
1087            scheme = authreq.split()[0]
1088            if scheme.lower() == 'digest':
1089                return self.retry_http_digest_auth(req, authreq)
1090            elif scheme.lower() != 'basic':
1091                raise ValueError("AbstractDigestAuthHandler does not support"
1092                                 " the following scheme: '%s'" % scheme)
1093
1094    def retry_http_digest_auth(self, req, auth):
1095        token, challenge = auth.split(' ', 1)
1096        chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1097        auth = self.get_authorization(req, chal)
1098        if auth:
1099            auth_val = 'Digest %s' % auth
1100            if req.headers.get(self.auth_header, None) == auth_val:
1101                return None
1102            req.add_unredirected_header(self.auth_header, auth_val)
1103            resp = self.parent.open(req, timeout=req.timeout)
1104            return resp
1105
1106    def get_cnonce(self, nonce):
1107        # The cnonce-value is an opaque
1108        # quoted string value provided by the client and used by both client
1109        # and server to avoid chosen plaintext attacks, to provide mutual
1110        # authentication, and to provide some message integrity protection.
1111        # This isn't a fabulous effort, but it's probably Good Enough.
1112        s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1113        b = s.encode("ascii") + _randombytes(8)
1114        dig = hashlib.sha1(b).hexdigest()
1115        return dig[:16]
1116
1117    def get_authorization(self, req, chal):
1118        try:
1119            realm = chal['realm']
1120            nonce = chal['nonce']
1121            qop = chal.get('qop')
1122            algorithm = chal.get('algorithm', 'MD5')
1123            # mod_digest doesn't send an opaque, even though it isn't
1124            # supposed to be optional
1125            opaque = chal.get('opaque', None)
1126        except KeyError:
1127            return None
1128
1129        H, KD = self.get_algorithm_impls(algorithm)
1130        if H is None:
1131            return None
1132
1133        user, pw = self.passwd.find_user_password(realm, req.full_url)
1134        if user is None:
1135            return None
1136
1137        # XXX not implemented yet
1138        if req.data is not None:
1139            entdig = self.get_entity_digest(req.data, chal)
1140        else:
1141            entdig = None
1142
1143        A1 = "%s:%s:%s" % (user, realm, pw)
1144        A2 = "%s:%s" % (req.get_method(),
1145                        # XXX selector: what about proxies and full urls
1146                        req.selector)
1147        # NOTE: As per  RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1148        #     or `auth-int` to the response back. we use `auth` to send the response back.
1149        if qop is None:
1150            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1151        elif 'auth' in qop.split(','):
1152            if nonce == self.last_nonce:
1153                self.nonce_count += 1
1154            else:
1155                self.nonce_count = 1
1156                self.last_nonce = nonce
1157            ncvalue = '%08x' % self.nonce_count
1158            cnonce = self.get_cnonce(nonce)
1159            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
1160            respdig = KD(H(A1), noncebit)
1161        else:
1162            # XXX handle auth-int.
1163            raise URLError("qop '%s' is not supported." % qop)
1164
1165        # XXX should the partial digests be encoded too?
1166
1167        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1168               'response="%s"' % (user, realm, nonce, req.selector,
1169                                  respdig)
1170        if opaque:
1171            base += ', opaque="%s"' % opaque
1172        if entdig:
1173            base += ', digest="%s"' % entdig
1174        base += ', algorithm="%s"' % algorithm
1175        if qop:
1176            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1177        return base
1178
1179    def get_algorithm_impls(self, algorithm):
1180        # lambdas assume digest modules are imported at the top level
1181        if algorithm == 'MD5':
1182            H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1183        elif algorithm == 'SHA':
1184            H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1185        # XXX MD5-sess
1186        else:
1187            raise ValueError("Unsupported digest authentication "
1188                             "algorithm %r" % algorithm)
1189        KD = lambda s, d: H("%s:%s" % (s, d))
1190        return H, KD
1191
1192    def get_entity_digest(self, data, chal):
1193        # XXX not implemented yet
1194        return None
1195
1196
1197class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1198    """An authentication protocol defined by RFC 2069
1199
1200    Digest authentication improves on basic authentication because it
1201    does not transmit passwords in the clear.
1202    """
1203
1204    auth_header = 'Authorization'
1205    handler_order = 490  # before Basic auth
1206
1207    def http_error_401(self, req, fp, code, msg, headers):
1208        host = urlparse(req.full_url)[1]
1209        retry = self.http_error_auth_reqed('www-authenticate',
1210                                           host, req, headers)
1211        self.reset_retry_count()
1212        return retry
1213
1214
1215class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1216
1217    auth_header = 'Proxy-Authorization'
1218    handler_order = 490  # before Basic auth
1219
1220    def http_error_407(self, req, fp, code, msg, headers):
1221        host = req.host
1222        retry = self.http_error_auth_reqed('proxy-authenticate',
1223                                           host, req, headers)
1224        self.reset_retry_count()
1225        return retry
1226
1227class AbstractHTTPHandler(BaseHandler):
1228
1229    def __init__(self, debuglevel=None):
1230        self._debuglevel = debuglevel if debuglevel is not None else http.client.HTTPConnection.debuglevel
1231
1232    def set_http_debuglevel(self, level):
1233        self._debuglevel = level
1234
1235    def _get_content_length(self, request):
1236        return http.client.HTTPConnection._get_content_length(
1237            request.data,
1238            request.get_method())
1239
1240    def do_request_(self, request):
1241        host = request.host
1242        if not host:
1243            raise URLError('no host given')
1244
1245        if request.data is not None:  # POST
1246            data = request.data
1247            if isinstance(data, str):
1248                msg = "POST data should be bytes, an iterable of bytes, " \
1249                      "or a file object. It cannot be of type str."
1250                raise TypeError(msg)
1251            if not request.has_header('Content-type'):
1252                request.add_unredirected_header(
1253                    'Content-type',
1254                    'application/x-www-form-urlencoded')
1255            if (not request.has_header('Content-length')
1256                    and not request.has_header('Transfer-encoding')):
1257                content_length = self._get_content_length(request)
1258                if content_length is not None:
1259                    request.add_unredirected_header(
1260                            'Content-length', str(content_length))
1261                else:
1262                    request.add_unredirected_header(
1263                            'Transfer-encoding', 'chunked')
1264
1265        sel_host = host
1266        if request.has_proxy():
1267            scheme, sel = _splittype(request.selector)
1268            sel_host, sel_path = _splithost(sel)
1269        if not request.has_header('Host'):
1270            request.add_unredirected_header('Host', sel_host)
1271        for name, value in self.parent.addheaders:
1272            name = name.capitalize()
1273            if not request.has_header(name):
1274                request.add_unredirected_header(name, value)
1275
1276        return request
1277
1278    def do_open(self, http_class, req, **http_conn_args):
1279        """Return an HTTPResponse object for the request, using http_class.
1280
1281        http_class must implement the HTTPConnection API from http.client.
1282        """
1283        host = req.host
1284        if not host:
1285            raise URLError('no host given')
1286
1287        # will parse host:port
1288        h = http_class(host, timeout=req.timeout, **http_conn_args)
1289        h.set_debuglevel(self._debuglevel)
1290
1291        headers = dict(req.unredirected_hdrs)
1292        headers.update({k: v for k, v in req.headers.items()
1293                        if k not in headers})
1294
1295        # TODO(jhylton): Should this be redesigned to handle
1296        # persistent connections?
1297
1298        # We want to make an HTTP/1.1 request, but the addinfourl
1299        # class isn't prepared to deal with a persistent connection.
1300        # It will try to read all remaining data from the socket,
1301        # which will block while the server waits for the next request.
1302        # So make sure the connection gets closed after the (only)
1303        # request.
1304        headers["Connection"] = "close"
1305        headers = {name.title(): val for name, val in headers.items()}
1306
1307        if req._tunnel_host:
1308            tunnel_headers = {}
1309            proxy_auth_hdr = "Proxy-Authorization"
1310            if proxy_auth_hdr in headers:
1311                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1312                # Proxy-Authorization should not be sent to origin
1313                # server.
1314                del headers[proxy_auth_hdr]
1315            h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1316
1317        try:
1318            try:
1319                h.request(req.get_method(), req.selector, req.data, headers,
1320                          encode_chunked=req.has_header('Transfer-encoding'))
1321            except OSError as err: # timeout error
1322                raise URLError(err)
1323            r = h.getresponse()
1324        except:
1325            h.close()
1326            raise
1327
1328        # If the server does not send us a 'Connection: close' header,
1329        # HTTPConnection assumes the socket should be left open. Manually
1330        # mark the socket to be closed when this response object goes away.
1331        if h.sock:
1332            h.sock.close()
1333            h.sock = None
1334
1335        r.url = req.get_full_url()
1336        # This line replaces the .msg attribute of the HTTPResponse
1337        # with .headers, because urllib clients expect the response to
1338        # have the reason in .msg.  It would be good to mark this
1339        # attribute is deprecated and get then to use info() or
1340        # .headers.
1341        r.msg = r.reason
1342        return r
1343
1344
1345class HTTPHandler(AbstractHTTPHandler):
1346
1347    def http_open(self, req):
1348        return self.do_open(http.client.HTTPConnection, req)
1349
1350    http_request = AbstractHTTPHandler.do_request_
1351
1352if hasattr(http.client, 'HTTPSConnection'):
1353
1354    class HTTPSHandler(AbstractHTTPHandler):
1355
1356        def __init__(self, debuglevel=None, context=None, check_hostname=None):
1357            debuglevel = debuglevel if debuglevel is not None else http.client.HTTPSConnection.debuglevel
1358            AbstractHTTPHandler.__init__(self, debuglevel)
1359            if context is None:
1360                http_version = http.client.HTTPSConnection._http_vsn
1361                context = http.client._create_https_context(http_version)
1362            if check_hostname is not None:
1363                context.check_hostname = check_hostname
1364            self._context = context
1365
1366        def https_open(self, req):
1367            return self.do_open(http.client.HTTPSConnection, req,
1368                                context=self._context)
1369
1370        https_request = AbstractHTTPHandler.do_request_
1371
1372    __all__.append('HTTPSHandler')
1373
1374class HTTPCookieProcessor(BaseHandler):
1375    def __init__(self, cookiejar=None):
1376        import http.cookiejar
1377        if cookiejar is None:
1378            cookiejar = http.cookiejar.CookieJar()
1379        self.cookiejar = cookiejar
1380
1381    def http_request(self, request):
1382        self.cookiejar.add_cookie_header(request)
1383        return request
1384
1385    def http_response(self, request, response):
1386        self.cookiejar.extract_cookies(response, request)
1387        return response
1388
1389    https_request = http_request
1390    https_response = http_response
1391
1392class UnknownHandler(BaseHandler):
1393    def unknown_open(self, req):
1394        type = req.type
1395        raise URLError('unknown url type: %s' % type)
1396
1397def parse_keqv_list(l):
1398    """Parse list of key=value strings where keys are not duplicated."""
1399    parsed = {}
1400    for elt in l:
1401        k, v = elt.split('=', 1)
1402        if v[0] == '"' and v[-1] == '"':
1403            v = v[1:-1]
1404        parsed[k] = v
1405    return parsed
1406
1407def parse_http_list(s):
1408    """Parse lists as described by RFC 2068 Section 2.
1409
1410    In particular, parse comma-separated lists where the elements of
1411    the list may include quoted-strings.  A quoted-string could
1412    contain a comma.  A non-quoted string could have quotes in the
1413    middle.  Neither commas nor quotes count if they are escaped.
1414    Only double-quotes count, not single-quotes.
1415    """
1416    res = []
1417    part = ''
1418
1419    escape = quote = False
1420    for cur in s:
1421        if escape:
1422            part += cur
1423            escape = False
1424            continue
1425        if quote:
1426            if cur == '\\':
1427                escape = True
1428                continue
1429            elif cur == '"':
1430                quote = False
1431            part += cur
1432            continue
1433
1434        if cur == ',':
1435            res.append(part)
1436            part = ''
1437            continue
1438
1439        if cur == '"':
1440            quote = True
1441
1442        part += cur
1443
1444    # append last part
1445    if part:
1446        res.append(part)
1447
1448    return [part.strip() for part in res]
1449
1450class FileHandler(BaseHandler):
1451    # Use local file or FTP depending on form of URL
1452    def file_open(self, req):
1453        url = req.selector
1454        if url[:2] == '//' and url[2:3] != '/' and (req.host and
1455                req.host != 'localhost'):
1456            if not req.host in self.get_names():
1457                raise URLError("file:// scheme is supported only on localhost")
1458        else:
1459            return self.open_local_file(req)
1460
1461    # names for the localhost
1462    names = None
1463    def get_names(self):
1464        if FileHandler.names is None:
1465            try:
1466                FileHandler.names = tuple(
1467                    socket.gethostbyname_ex('localhost')[2] +
1468                    socket.gethostbyname_ex(socket.gethostname())[2])
1469            except socket.gaierror:
1470                FileHandler.names = (socket.gethostbyname('localhost'),)
1471        return FileHandler.names
1472
1473    # not entirely sure what the rules are here
1474    def open_local_file(self, req):
1475        import email.utils
1476        import mimetypes
1477        host = req.host
1478        filename = req.selector
1479        localfile = url2pathname(filename)
1480        try:
1481            stats = os.stat(localfile)
1482            size = stats.st_size
1483            modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1484            mtype = mimetypes.guess_type(filename)[0]
1485            headers = email.message_from_string(
1486                'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1487                (mtype or 'text/plain', size, modified))
1488            if host:
1489                host, port = _splitport(host)
1490            if not host or \
1491                (not port and _safe_gethostbyname(host) in self.get_names()):
1492                if host:
1493                    origurl = 'file://' + host + filename
1494                else:
1495                    origurl = 'file://' + filename
1496                return addinfourl(open(localfile, 'rb'), headers, origurl)
1497        except OSError as exp:
1498            raise URLError(exp)
1499        raise URLError('file not on local host')
1500
1501def _safe_gethostbyname(host):
1502    try:
1503        return socket.gethostbyname(host)
1504    except socket.gaierror:
1505        return None
1506
1507class FTPHandler(BaseHandler):
1508    def ftp_open(self, req):
1509        import ftplib
1510        import mimetypes
1511        host = req.host
1512        if not host:
1513            raise URLError('ftp error: no host given')
1514        host, port = _splitport(host)
1515        if port is None:
1516            port = ftplib.FTP_PORT
1517        else:
1518            port = int(port)
1519
1520        # username/password handling
1521        user, host = _splituser(host)
1522        if user:
1523            user, passwd = _splitpasswd(user)
1524        else:
1525            passwd = None
1526        host = unquote(host)
1527        user = user or ''
1528        passwd = passwd or ''
1529
1530        try:
1531            host = socket.gethostbyname(host)
1532        except OSError as msg:
1533            raise URLError(msg)
1534        path, attrs = _splitattr(req.selector)
1535        dirs = path.split('/')
1536        dirs = list(map(unquote, dirs))
1537        dirs, file = dirs[:-1], dirs[-1]
1538        if dirs and not dirs[0]:
1539            dirs = dirs[1:]
1540        try:
1541            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1542            type = file and 'I' or 'D'
1543            for attr in attrs:
1544                attr, value = _splitvalue(attr)
1545                if attr.lower() == 'type' and \
1546                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
1547                    type = value.upper()
1548            fp, retrlen = fw.retrfile(file, type)
1549            headers = ""
1550            mtype = mimetypes.guess_type(req.full_url)[0]
1551            if mtype:
1552                headers += "Content-type: %s\n" % mtype
1553            if retrlen is not None and retrlen >= 0:
1554                headers += "Content-length: %d\n" % retrlen
1555            headers = email.message_from_string(headers)
1556            return addinfourl(fp, headers, req.full_url)
1557        except ftplib.all_errors as exp:
1558            raise URLError(exp) from exp
1559
1560    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1561        return ftpwrapper(user, passwd, host, port, dirs, timeout,
1562                          persistent=False)
1563
1564class CacheFTPHandler(FTPHandler):
1565    # XXX would be nice to have pluggable cache strategies
1566    # XXX this stuff is definitely not thread safe
1567    def __init__(self):
1568        self.cache = {}
1569        self.timeout = {}
1570        self.soonest = 0
1571        self.delay = 60
1572        self.max_conns = 16
1573
1574    def setTimeout(self, t):
1575        self.delay = t
1576
1577    def setMaxConns(self, m):
1578        self.max_conns = m
1579
1580    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1581        key = user, host, port, '/'.join(dirs), timeout
1582        if key in self.cache:
1583            self.timeout[key] = time.time() + self.delay
1584        else:
1585            self.cache[key] = ftpwrapper(user, passwd, host, port,
1586                                         dirs, timeout)
1587            self.timeout[key] = time.time() + self.delay
1588        self.check_cache()
1589        return self.cache[key]
1590
1591    def check_cache(self):
1592        # first check for old ones
1593        t = time.time()
1594        if self.soonest <= t:
1595            for k, v in list(self.timeout.items()):
1596                if v < t:
1597                    self.cache[k].close()
1598                    del self.cache[k]
1599                    del self.timeout[k]
1600        self.soonest = min(list(self.timeout.values()))
1601
1602        # then check the size
1603        if len(self.cache) == self.max_conns:
1604            for k, v in list(self.timeout.items()):
1605                if v == self.soonest:
1606                    del self.cache[k]
1607                    del self.timeout[k]
1608                    break
1609            self.soonest = min(list(self.timeout.values()))
1610
1611    def clear_cache(self):
1612        for conn in self.cache.values():
1613            conn.close()
1614        self.cache.clear()
1615        self.timeout.clear()
1616
1617class DataHandler(BaseHandler):
1618    def data_open(self, req):
1619        # data URLs as specified in RFC 2397.
1620        #
1621        # ignores POSTed data
1622        #
1623        # syntax:
1624        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
1625        # mediatype := [ type "/" subtype ] *( ";" parameter )
1626        # data      := *urlchar
1627        # parameter := attribute "=" value
1628        url = req.full_url
1629
1630        scheme, data = url.split(":",1)
1631        mediatype, data = data.split(",",1)
1632
1633        # even base64 encoded data URLs might be quoted so unquote in any case:
1634        data = unquote_to_bytes(data)
1635        if mediatype.endswith(";base64"):
1636            data = base64.decodebytes(data)
1637            mediatype = mediatype[:-7]
1638
1639        if not mediatype:
1640            mediatype = "text/plain;charset=US-ASCII"
1641
1642        headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1643            (mediatype, len(data)))
1644
1645        return addinfourl(io.BytesIO(data), headers, url)
1646
1647
1648# Code move from the old urllib module
1649
1650MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
1651
1652# Helper for non-unix systems
1653if os.name == 'nt':
1654    from nturl2path import url2pathname, pathname2url
1655else:
1656    def url2pathname(pathname):
1657        """OS-specific conversion from a relative URL of the 'file' scheme
1658        to a file system path; not recommended for general use."""
1659        if pathname[:3] == '///':
1660            # URL has an empty authority section, so the path begins on the
1661            # third character.
1662            pathname = pathname[2:]
1663        elif pathname[:12] == '//localhost/':
1664            # Skip past 'localhost' authority.
1665            pathname = pathname[11:]
1666        encoding = sys.getfilesystemencoding()
1667        errors = sys.getfilesystemencodeerrors()
1668        return unquote(pathname, encoding=encoding, errors=errors)
1669
1670    def pathname2url(pathname):
1671        """OS-specific conversion from a file system path to a relative URL
1672        of the 'file' scheme; not recommended for general use."""
1673        if pathname[:2] == '//':
1674            # Add explicitly empty authority to avoid interpreting the path
1675            # as authority.
1676            pathname = '//' + pathname
1677        encoding = sys.getfilesystemencoding()
1678        errors = sys.getfilesystemencodeerrors()
1679        return quote(pathname, encoding=encoding, errors=errors)
1680
1681
1682ftpcache = {}
1683
1684
1685class URLopener:
1686    """Class to open URLs.
1687    This is a class rather than just a subroutine because we may need
1688    more than one set of global protocol-specific options.
1689    Note -- this is a base class for those who don't want the
1690    automatic handling of errors type 302 (relocated) and 401
1691    (authorization needed)."""
1692
1693    __tempfiles = None
1694
1695    version = "Python-urllib/%s" % __version__
1696
1697    # Constructor
1698    def __init__(self, proxies=None, **x509):
1699        msg = "%(class)s style of invoking requests is deprecated. " \
1700              "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1701        warnings.warn(msg, DeprecationWarning, stacklevel=3)
1702        if proxies is None:
1703            proxies = getproxies()
1704        assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1705        self.proxies = proxies
1706        self.key_file = x509.get('key_file')
1707        self.cert_file = x509.get('cert_file')
1708        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
1709        self.__tempfiles = []
1710        self.__unlink = os.unlink # See cleanup()
1711        self.tempcache = None
1712        # Undocumented feature: if you assign {} to tempcache,
1713        # it is used to cache files retrieved with
1714        # self.retrieve().  This is not enabled by default
1715        # since it does not work for changing documents (and I
1716        # haven't got the logic to check expiration headers
1717        # yet).
1718        self.ftpcache = ftpcache
1719        # Undocumented feature: you can use a different
1720        # ftp cache by assigning to the .ftpcache member;
1721        # in case you want logically independent URL openers
1722        # XXX This is not threadsafe.  Bah.
1723
1724    def __del__(self):
1725        self.close()
1726
1727    def close(self):
1728        self.cleanup()
1729
1730    def cleanup(self):
1731        # This code sometimes runs when the rest of this module
1732        # has already been deleted, so it can't use any globals
1733        # or import anything.
1734        if self.__tempfiles:
1735            for file in self.__tempfiles:
1736                try:
1737                    self.__unlink(file)
1738                except OSError:
1739                    pass
1740            del self.__tempfiles[:]
1741        if self.tempcache:
1742            self.tempcache.clear()
1743
1744    def addheader(self, *args):
1745        """Add a header to be used by the HTTP interface only
1746        e.g. u.addheader('Accept', 'sound/basic')"""
1747        self.addheaders.append(args)
1748
1749    # External interface
1750    def open(self, fullurl, data=None):
1751        """Use URLopener().open(file) instead of open(file, 'r')."""
1752        fullurl = unwrap(_to_bytes(fullurl))
1753        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1754        if self.tempcache and fullurl in self.tempcache:
1755            filename, headers = self.tempcache[fullurl]
1756            fp = open(filename, 'rb')
1757            return addinfourl(fp, headers, fullurl)
1758        urltype, url = _splittype(fullurl)
1759        if not urltype:
1760            urltype = 'file'
1761        if urltype in self.proxies:
1762            proxy = self.proxies[urltype]
1763            urltype, proxyhost = _splittype(proxy)
1764            host, selector = _splithost(proxyhost)
1765            url = (host, fullurl) # Signal special case to open_*()
1766        else:
1767            proxy = None
1768        name = 'open_' + urltype
1769        self.type = urltype
1770        name = name.replace('-', '_')
1771        if not hasattr(self, name) or name == 'open_local_file':
1772            if proxy:
1773                return self.open_unknown_proxy(proxy, fullurl, data)
1774            else:
1775                return self.open_unknown(fullurl, data)
1776        try:
1777            if data is None:
1778                return getattr(self, name)(url)
1779            else:
1780                return getattr(self, name)(url, data)
1781        except (HTTPError, URLError):
1782            raise
1783        except OSError as msg:
1784            raise OSError('socket error', msg) from msg
1785
1786    def open_unknown(self, fullurl, data=None):
1787        """Overridable interface to open unknown URL type."""
1788        type, url = _splittype(fullurl)
1789        raise OSError('url error', 'unknown url type', type)
1790
1791    def open_unknown_proxy(self, proxy, fullurl, data=None):
1792        """Overridable interface to open unknown URL type."""
1793        type, url = _splittype(fullurl)
1794        raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1795
1796    # External interface
1797    def retrieve(self, url, filename=None, reporthook=None, data=None):
1798        """retrieve(url) returns (filename, headers) for a local object
1799        or (tempfilename, headers) for a remote object."""
1800        url = unwrap(_to_bytes(url))
1801        if self.tempcache and url in self.tempcache:
1802            return self.tempcache[url]
1803        type, url1 = _splittype(url)
1804        if filename is None and (not type or type == 'file'):
1805            try:
1806                fp = self.open_local_file(url1)
1807                hdrs = fp.info()
1808                fp.close()
1809                return url2pathname(_splithost(url1)[1]), hdrs
1810            except OSError:
1811                pass
1812        fp = self.open(url, data)
1813        try:
1814            headers = fp.info()
1815            if filename:
1816                tfp = open(filename, 'wb')
1817            else:
1818                garbage, path = _splittype(url)
1819                garbage, path = _splithost(path or "")
1820                path, garbage = _splitquery(path or "")
1821                path, garbage = _splitattr(path or "")
1822                suffix = os.path.splitext(path)[1]
1823                (fd, filename) = tempfile.mkstemp(suffix)
1824                self.__tempfiles.append(filename)
1825                tfp = os.fdopen(fd, 'wb')
1826            try:
1827                result = filename, headers
1828                if self.tempcache is not None:
1829                    self.tempcache[url] = result
1830                bs = 1024*8
1831                size = -1
1832                read = 0
1833                blocknum = 0
1834                if "content-length" in headers:
1835                    size = int(headers["Content-Length"])
1836                if reporthook:
1837                    reporthook(blocknum, bs, size)
1838                while block := fp.read(bs):
1839                    read += len(block)
1840                    tfp.write(block)
1841                    blocknum += 1
1842                    if reporthook:
1843                        reporthook(blocknum, bs, size)
1844            finally:
1845                tfp.close()
1846        finally:
1847            fp.close()
1848
1849        # raise exception if actual size does not match content-length header
1850        if size >= 0 and read < size:
1851            raise ContentTooShortError(
1852                "retrieval incomplete: got only %i out of %i bytes"
1853                % (read, size), result)
1854
1855        return result
1856
1857    # Each method named open_<type> knows how to open that type of URL
1858
1859    def _open_generic_http(self, connection_factory, url, data):
1860        """Make an HTTP connection using connection_class.
1861
1862        This is an internal method that should be called from
1863        open_http() or open_https().
1864
1865        Arguments:
1866        - connection_factory should take a host name and return an
1867          HTTPConnection instance.
1868        - url is the url to retrieval or a host, relative-path pair.
1869        - data is payload for a POST request or None.
1870        """
1871
1872        user_passwd = None
1873        proxy_passwd= None
1874        if isinstance(url, str):
1875            host, selector = _splithost(url)
1876            if host:
1877                user_passwd, host = _splituser(host)
1878                host = unquote(host)
1879            realhost = host
1880        else:
1881            host, selector = url
1882            # check whether the proxy contains authorization information
1883            proxy_passwd, host = _splituser(host)
1884            # now we proceed with the url we want to obtain
1885            urltype, rest = _splittype(selector)
1886            url = rest
1887            user_passwd = None
1888            if urltype.lower() != 'http':
1889                realhost = None
1890            else:
1891                realhost, rest = _splithost(rest)
1892                if realhost:
1893                    user_passwd, realhost = _splituser(realhost)
1894                if user_passwd:
1895                    selector = "%s://%s%s" % (urltype, realhost, rest)
1896                if proxy_bypass(realhost):
1897                    host = realhost
1898
1899        if not host: raise OSError('http error', 'no host given')
1900
1901        if proxy_passwd:
1902            proxy_passwd = unquote(proxy_passwd)
1903            proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1904        else:
1905            proxy_auth = None
1906
1907        if user_passwd:
1908            user_passwd = unquote(user_passwd)
1909            auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1910        else:
1911            auth = None
1912        http_conn = connection_factory(host)
1913        headers = {}
1914        if proxy_auth:
1915            headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1916        if auth:
1917            headers["Authorization"] =  "Basic %s" % auth
1918        if realhost:
1919            headers["Host"] = realhost
1920
1921        # Add Connection:close as we don't support persistent connections yet.
1922        # This helps in closing the socket and avoiding ResourceWarning
1923
1924        headers["Connection"] = "close"
1925
1926        for header, value in self.addheaders:
1927            headers[header] = value
1928
1929        if data is not None:
1930            headers["Content-Type"] = "application/x-www-form-urlencoded"
1931            http_conn.request("POST", selector, data, headers)
1932        else:
1933            http_conn.request("GET", selector, headers=headers)
1934
1935        try:
1936            response = http_conn.getresponse()
1937        except http.client.BadStatusLine:
1938            # something went wrong with the HTTP status line
1939            raise URLError("http protocol error: bad status line")
1940
1941        # According to RFC 2616, "2xx" code indicates that the client's
1942        # request was successfully received, understood, and accepted.
1943        if 200 <= response.status < 300:
1944            return addinfourl(response, response.msg, "http:" + url,
1945                              response.status)
1946        else:
1947            return self.http_error(
1948                url, response.fp,
1949                response.status, response.reason, response.msg, data)
1950
1951    def open_http(self, url, data=None):
1952        """Use HTTP protocol."""
1953        return self._open_generic_http(http.client.HTTPConnection, url, data)
1954
1955    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1956        """Handle http errors.
1957
1958        Derived class can override this, or provide specific handlers
1959        named http_error_DDD where DDD is the 3-digit error code."""
1960        # First check if there's a specific handler for this error
1961        name = 'http_error_%d' % errcode
1962        if hasattr(self, name):
1963            method = getattr(self, name)
1964            if data is None:
1965                result = method(url, fp, errcode, errmsg, headers)
1966            else:
1967                result = method(url, fp, errcode, errmsg, headers, data)
1968            if result: return result
1969        return self.http_error_default(url, fp, errcode, errmsg, headers)
1970
1971    def http_error_default(self, url, fp, errcode, errmsg, headers):
1972        """Default error handler: close the connection and raise OSError."""
1973        fp.close()
1974        raise HTTPError(url, errcode, errmsg, headers, None)
1975
1976    if _have_ssl:
1977        def _https_connection(self, host):
1978            if self.key_file or self.cert_file:
1979                http_version = http.client.HTTPSConnection._http_vsn
1980                context = http.client._create_https_context(http_version)
1981                context.load_cert_chain(self.cert_file, self.key_file)
1982                # cert and key file means the user wants to authenticate.
1983                # enable TLS 1.3 PHA implicitly even for custom contexts.
1984                if context.post_handshake_auth is not None:
1985                    context.post_handshake_auth = True
1986            else:
1987                context = None
1988            return http.client.HTTPSConnection(host, context=context)
1989
1990        def open_https(self, url, data=None):
1991            """Use HTTPS protocol."""
1992            return self._open_generic_http(self._https_connection, url, data)
1993
1994    def open_file(self, url):
1995        """Use local file or FTP depending on form of URL."""
1996        if not isinstance(url, str):
1997            raise URLError('file error: proxy support for file protocol currently not implemented')
1998        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
1999            raise ValueError("file:// scheme is supported only on localhost")
2000        else:
2001            return self.open_local_file(url)
2002
2003    def open_local_file(self, url):
2004        """Use local file."""
2005        import email.utils
2006        import mimetypes
2007        host, file = _splithost(url)
2008        localname = url2pathname(file)
2009        try:
2010            stats = os.stat(localname)
2011        except OSError as e:
2012            raise URLError(e.strerror, e.filename)
2013        size = stats.st_size
2014        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2015        mtype = mimetypes.guess_type(url)[0]
2016        headers = email.message_from_string(
2017            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2018            (mtype or 'text/plain', size, modified))
2019        if not host:
2020            urlfile = file
2021            if file[:1] == '/':
2022                urlfile = 'file://' + file
2023            return addinfourl(open(localname, 'rb'), headers, urlfile)
2024        host, port = _splitport(host)
2025        if (not port
2026           and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2027            urlfile = file
2028            if file[:1] == '/':
2029                urlfile = 'file://' + file
2030            elif file[:2] == './':
2031                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2032            return addinfourl(open(localname, 'rb'), headers, urlfile)
2033        raise URLError('local file error: not on local host')
2034
2035    def open_ftp(self, url):
2036        """Use FTP protocol."""
2037        if not isinstance(url, str):
2038            raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2039        import mimetypes
2040        host, path = _splithost(url)
2041        if not host: raise URLError('ftp error: no host given')
2042        host, port = _splitport(host)
2043        user, host = _splituser(host)
2044        if user: user, passwd = _splitpasswd(user)
2045        else: passwd = None
2046        host = unquote(host)
2047        user = unquote(user or '')
2048        passwd = unquote(passwd or '')
2049        host = socket.gethostbyname(host)
2050        if not port:
2051            import ftplib
2052            port = ftplib.FTP_PORT
2053        else:
2054            port = int(port)
2055        path, attrs = _splitattr(path)
2056        path = unquote(path)
2057        dirs = path.split('/')
2058        dirs, file = dirs[:-1], dirs[-1]
2059        if dirs and not dirs[0]: dirs = dirs[1:]
2060        if dirs and not dirs[0]: dirs[0] = '/'
2061        key = user, host, port, '/'.join(dirs)
2062        # XXX thread unsafe!
2063        if len(self.ftpcache) > MAXFTPCACHE:
2064            # Prune the cache, rather arbitrarily
2065            for k in list(self.ftpcache):
2066                if k != key:
2067                    v = self.ftpcache[k]
2068                    del self.ftpcache[k]
2069                    v.close()
2070        try:
2071            if key not in self.ftpcache:
2072                self.ftpcache[key] = \
2073                    ftpwrapper(user, passwd, host, port, dirs)
2074            if not file: type = 'D'
2075            else: type = 'I'
2076            for attr in attrs:
2077                attr, value = _splitvalue(attr)
2078                if attr.lower() == 'type' and \
2079                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
2080                    type = value.upper()
2081            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2082            mtype = mimetypes.guess_type("ftp:" + url)[0]
2083            headers = ""
2084            if mtype:
2085                headers += "Content-Type: %s\n" % mtype
2086            if retrlen is not None and retrlen >= 0:
2087                headers += "Content-Length: %d\n" % retrlen
2088            headers = email.message_from_string(headers)
2089            return addinfourl(fp, headers, "ftp:" + url)
2090        except ftperrors() as exp:
2091            raise URLError(f'ftp error: {exp}') from exp
2092
2093    def open_data(self, url, data=None):
2094        """Use "data" URL."""
2095        if not isinstance(url, str):
2096            raise URLError('data error: proxy support for data protocol currently not implemented')
2097        # ignore POSTed data
2098        #
2099        # syntax of data URLs:
2100        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
2101        # mediatype := [ type "/" subtype ] *( ";" parameter )
2102        # data      := *urlchar
2103        # parameter := attribute "=" value
2104        try:
2105            [type, data] = url.split(',', 1)
2106        except ValueError:
2107            raise OSError('data error', 'bad data URL')
2108        if not type:
2109            type = 'text/plain;charset=US-ASCII'
2110        semi = type.rfind(';')
2111        if semi >= 0 and '=' not in type[semi:]:
2112            encoding = type[semi+1:]
2113            type = type[:semi]
2114        else:
2115            encoding = ''
2116        msg = []
2117        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2118                                            time.gmtime(time.time())))
2119        msg.append('Content-type: %s' % type)
2120        if encoding == 'base64':
2121            # XXX is this encoding/decoding ok?
2122            data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2123        else:
2124            data = unquote(data)
2125        msg.append('Content-Length: %d' % len(data))
2126        msg.append('')
2127        msg.append(data)
2128        msg = '\n'.join(msg)
2129        headers = email.message_from_string(msg)
2130        f = io.StringIO(msg)
2131        #f.fileno = None     # needed for addinfourl
2132        return addinfourl(f, headers, url)
2133
2134
2135class FancyURLopener(URLopener):
2136    """Derived class with handlers for errors we can handle (perhaps)."""
2137
2138    def __init__(self, *args, **kwargs):
2139        URLopener.__init__(self, *args, **kwargs)
2140        self.auth_cache = {}
2141        self.tries = 0
2142        self.maxtries = 10
2143
2144    def http_error_default(self, url, fp, errcode, errmsg, headers):
2145        """Default error handling -- don't raise an exception."""
2146        return addinfourl(fp, headers, "http:" + url, errcode)
2147
2148    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2149        """Error 302 -- relocated (temporarily)."""
2150        self.tries += 1
2151        try:
2152            if self.maxtries and self.tries >= self.maxtries:
2153                if hasattr(self, "http_error_500"):
2154                    meth = self.http_error_500
2155                else:
2156                    meth = self.http_error_default
2157                return meth(url, fp, 500,
2158                            "Internal Server Error: Redirect Recursion",
2159                            headers)
2160            result = self.redirect_internal(url, fp, errcode, errmsg,
2161                                            headers, data)
2162            return result
2163        finally:
2164            self.tries = 0
2165
2166    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2167        if 'location' in headers:
2168            newurl = headers['location']
2169        elif 'uri' in headers:
2170            newurl = headers['uri']
2171        else:
2172            return
2173        fp.close()
2174
2175        # In case the server sent a relative URL, join with original:
2176        newurl = urljoin(self.type + ":" + url, newurl)
2177
2178        urlparts = urlparse(newurl)
2179
2180        # For security reasons, we don't allow redirection to anything other
2181        # than http, https and ftp.
2182
2183        # We are using newer HTTPError with older redirect_internal method
2184        # This older method will get deprecated in 3.3
2185
2186        if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2187            raise HTTPError(newurl, errcode,
2188                            errmsg +
2189                            " Redirection to url '%s' is not allowed." % newurl,
2190                            headers, fp)
2191
2192        return self.open(newurl)
2193
2194    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2195        """Error 301 -- also relocated (permanently)."""
2196        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2197
2198    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2199        """Error 303 -- also relocated (essentially identical to 302)."""
2200        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2201
2202    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2203        """Error 307 -- relocated, but turn POST into error."""
2204        if data is None:
2205            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2206        else:
2207            return self.http_error_default(url, fp, errcode, errmsg, headers)
2208
2209    def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
2210        """Error 308 -- relocated, but turn POST into error."""
2211        if data is None:
2212            return self.http_error_301(url, fp, errcode, errmsg, headers, data)
2213        else:
2214            return self.http_error_default(url, fp, errcode, errmsg, headers)
2215
2216    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2217            retry=False):
2218        """Error 401 -- authentication required.
2219        This function supports Basic authentication only."""
2220        if 'www-authenticate' not in headers:
2221            URLopener.http_error_default(self, url, fp,
2222                                         errcode, errmsg, headers)
2223        stuff = headers['www-authenticate']
2224        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2225        if not match:
2226            URLopener.http_error_default(self, url, fp,
2227                                         errcode, errmsg, headers)
2228        scheme, realm = match.groups()
2229        if scheme.lower() != 'basic':
2230            URLopener.http_error_default(self, url, fp,
2231                                         errcode, errmsg, headers)
2232        if not retry:
2233            URLopener.http_error_default(self, url, fp, errcode, errmsg,
2234                    headers)
2235        name = 'retry_' + self.type + '_basic_auth'
2236        if data is None:
2237            return getattr(self,name)(url, realm)
2238        else:
2239            return getattr(self,name)(url, realm, data)
2240
2241    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2242            retry=False):
2243        """Error 407 -- proxy authentication required.
2244        This function supports Basic authentication only."""
2245        if 'proxy-authenticate' not in headers:
2246            URLopener.http_error_default(self, url, fp,
2247                                         errcode, errmsg, headers)
2248        stuff = headers['proxy-authenticate']
2249        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2250        if not match:
2251            URLopener.http_error_default(self, url, fp,
2252                                         errcode, errmsg, headers)
2253        scheme, realm = match.groups()
2254        if scheme.lower() != 'basic':
2255            URLopener.http_error_default(self, url, fp,
2256                                         errcode, errmsg, headers)
2257        if not retry:
2258            URLopener.http_error_default(self, url, fp, errcode, errmsg,
2259                    headers)
2260        name = 'retry_proxy_' + self.type + '_basic_auth'
2261        if data is None:
2262            return getattr(self,name)(url, realm)
2263        else:
2264            return getattr(self,name)(url, realm, data)
2265
2266    def retry_proxy_http_basic_auth(self, url, realm, data=None):
2267        host, selector = _splithost(url)
2268        newurl = 'http://' + host + selector
2269        proxy = self.proxies['http']
2270        urltype, proxyhost = _splittype(proxy)
2271        proxyhost, proxyselector = _splithost(proxyhost)
2272        i = proxyhost.find('@') + 1
2273        proxyhost = proxyhost[i:]
2274        user, passwd = self.get_user_passwd(proxyhost, realm, i)
2275        if not (user or passwd): return None
2276        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2277                                  quote(passwd, safe=''), proxyhost)
2278        self.proxies['http'] = 'http://' + proxyhost + proxyselector
2279        if data is None:
2280            return self.open(newurl)
2281        else:
2282            return self.open(newurl, data)
2283
2284    def retry_proxy_https_basic_auth(self, url, realm, data=None):
2285        host, selector = _splithost(url)
2286        newurl = 'https://' + host + selector
2287        proxy = self.proxies['https']
2288        urltype, proxyhost = _splittype(proxy)
2289        proxyhost, proxyselector = _splithost(proxyhost)
2290        i = proxyhost.find('@') + 1
2291        proxyhost = proxyhost[i:]
2292        user, passwd = self.get_user_passwd(proxyhost, realm, i)
2293        if not (user or passwd): return None
2294        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2295                                  quote(passwd, safe=''), proxyhost)
2296        self.proxies['https'] = 'https://' + proxyhost + proxyselector
2297        if data is None:
2298            return self.open(newurl)
2299        else:
2300            return self.open(newurl, data)
2301
2302    def retry_http_basic_auth(self, url, realm, data=None):
2303        host, selector = _splithost(url)
2304        i = host.find('@') + 1
2305        host = host[i:]
2306        user, passwd = self.get_user_passwd(host, realm, i)
2307        if not (user or passwd): return None
2308        host = "%s:%s@%s" % (quote(user, safe=''),
2309                             quote(passwd, safe=''), host)
2310        newurl = 'http://' + host + selector
2311        if data is None:
2312            return self.open(newurl)
2313        else:
2314            return self.open(newurl, data)
2315
2316    def retry_https_basic_auth(self, url, realm, data=None):
2317        host, selector = _splithost(url)
2318        i = host.find('@') + 1
2319        host = host[i:]
2320        user, passwd = self.get_user_passwd(host, realm, i)
2321        if not (user or passwd): return None
2322        host = "%s:%s@%s" % (quote(user, safe=''),
2323                             quote(passwd, safe=''), host)
2324        newurl = 'https://' + host + selector
2325        if data is None:
2326            return self.open(newurl)
2327        else:
2328            return self.open(newurl, data)
2329
2330    def get_user_passwd(self, host, realm, clear_cache=0):
2331        key = realm + '@' + host.lower()
2332        if key in self.auth_cache:
2333            if clear_cache:
2334                del self.auth_cache[key]
2335            else:
2336                return self.auth_cache[key]
2337        user, passwd = self.prompt_user_passwd(host, realm)
2338        if user or passwd: self.auth_cache[key] = (user, passwd)
2339        return user, passwd
2340
2341    def prompt_user_passwd(self, host, realm):
2342        """Override this in a GUI environment!"""
2343        import getpass
2344        try:
2345            user = input("Enter username for %s at %s: " % (realm, host))
2346            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2347                (user, realm, host))
2348            return user, passwd
2349        except KeyboardInterrupt:
2350            print()
2351            return None, None
2352
2353
2354# Utility functions
2355
2356_localhost = None
2357def localhost():
2358    """Return the IP address of the magic hostname 'localhost'."""
2359    global _localhost
2360    if _localhost is None:
2361        _localhost = socket.gethostbyname('localhost')
2362    return _localhost
2363
2364_thishost = None
2365def thishost():
2366    """Return the IP addresses of the current host."""
2367    global _thishost
2368    if _thishost is None:
2369        try:
2370            _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2371        except socket.gaierror:
2372            _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2373    return _thishost
2374
2375_ftperrors = None
2376def ftperrors():
2377    """Return the set of errors raised by the FTP class."""
2378    global _ftperrors
2379    if _ftperrors is None:
2380        import ftplib
2381        _ftperrors = ftplib.all_errors
2382    return _ftperrors
2383
2384_noheaders = None
2385def noheaders():
2386    """Return an empty email Message object."""
2387    global _noheaders
2388    if _noheaders is None:
2389        _noheaders = email.message_from_string("")
2390    return _noheaders
2391
2392
2393# Utility classes
2394
2395class ftpwrapper:
2396    """Class used by open_ftp() for cache of open FTP connections."""
2397
2398    def __init__(self, user, passwd, host, port, dirs, timeout=None,
2399                 persistent=True):
2400        self.user = user
2401        self.passwd = passwd
2402        self.host = host
2403        self.port = port
2404        self.dirs = dirs
2405        self.timeout = timeout
2406        self.refcount = 0
2407        self.keepalive = persistent
2408        try:
2409            self.init()
2410        except:
2411            self.close()
2412            raise
2413
2414    def init(self):
2415        import ftplib
2416        self.busy = 0
2417        self.ftp = ftplib.FTP()
2418        self.ftp.connect(self.host, self.port, self.timeout)
2419        self.ftp.login(self.user, self.passwd)
2420        _target = '/'.join(self.dirs)
2421        self.ftp.cwd(_target)
2422
2423    def retrfile(self, file, type):
2424        import ftplib
2425        self.endtransfer()
2426        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2427        else: cmd = 'TYPE ' + type; isdir = 0
2428        try:
2429            self.ftp.voidcmd(cmd)
2430        except ftplib.all_errors:
2431            self.init()
2432            self.ftp.voidcmd(cmd)
2433        conn = None
2434        if file and not isdir:
2435            # Try to retrieve as a file
2436            try:
2437                cmd = 'RETR ' + file
2438                conn, retrlen = self.ftp.ntransfercmd(cmd)
2439            except ftplib.error_perm as reason:
2440                if str(reason)[:3] != '550':
2441                    raise URLError(f'ftp error: {reason}') from reason
2442        if not conn:
2443            # Set transfer mode to ASCII!
2444            self.ftp.voidcmd('TYPE A')
2445            # Try a directory listing. Verify that directory exists.
2446            if file:
2447                pwd = self.ftp.pwd()
2448                try:
2449                    try:
2450                        self.ftp.cwd(file)
2451                    except ftplib.error_perm as reason:
2452                        raise URLError('ftp error: %r' % reason) from reason
2453                finally:
2454                    self.ftp.cwd(pwd)
2455                cmd = 'LIST ' + file
2456            else:
2457                cmd = 'LIST'
2458            conn, retrlen = self.ftp.ntransfercmd(cmd)
2459        self.busy = 1
2460
2461        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2462        self.refcount += 1
2463        conn.close()
2464        # Pass back both a suitably decorated object and a retrieval length
2465        return (ftpobj, retrlen)
2466
2467    def endtransfer(self):
2468        if not self.busy:
2469            return
2470        self.busy = 0
2471        try:
2472            self.ftp.voidresp()
2473        except ftperrors():
2474            pass
2475
2476    def close(self):
2477        self.keepalive = False
2478        if self.refcount <= 0:
2479            self.real_close()
2480
2481    def file_close(self):
2482        self.endtransfer()
2483        self.refcount -= 1
2484        if self.refcount <= 0 and not self.keepalive:
2485            self.real_close()
2486
2487    def real_close(self):
2488        self.endtransfer()
2489        try:
2490            self.ftp.close()
2491        except ftperrors():
2492            pass
2493
2494# Proxy handling
2495def getproxies_environment():
2496    """Return a dictionary of scheme -> proxy server URL mappings.
2497
2498    Scan the environment for variables named <scheme>_proxy;
2499    this seems to be the standard convention.  If you need a
2500    different way, you can pass a proxies dictionary to the
2501    [Fancy]URLopener constructor.
2502    """
2503    # in order to prefer lowercase variables, process environment in
2504    # two passes: first matches any, second pass matches lowercase only
2505
2506    # select only environment variables which end in (after making lowercase) _proxy
2507    proxies = {}
2508    environment = []
2509    for name in os.environ:
2510        # fast screen underscore position before more expensive case-folding
2511        if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy":
2512            value = os.environ[name]
2513            proxy_name = name[:-6].lower()
2514            environment.append((name, value, proxy_name))
2515            if value:
2516                proxies[proxy_name] = value
2517    # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2518    # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2519    # header from the client
2520    # If "proxy" is lowercase, it will still be used thanks to the next block
2521    if 'REQUEST_METHOD' in os.environ:
2522        proxies.pop('http', None)
2523    for name, value, proxy_name in environment:
2524        # not case-folded, checking here for lower-case env vars only
2525        if name[-6:] == '_proxy':
2526            if value:
2527                proxies[proxy_name] = value
2528            else:
2529                proxies.pop(proxy_name, None)
2530    return proxies
2531
2532def proxy_bypass_environment(host, proxies=None):
2533    """Test if proxies should not be used for a particular host.
2534
2535    Checks the proxy dict for the value of no_proxy, which should
2536    be a list of comma separated DNS suffixes, or '*' for all hosts.
2537
2538    """
2539    if proxies is None:
2540        proxies = getproxies_environment()
2541    # don't bypass, if no_proxy isn't specified
2542    try:
2543        no_proxy = proxies['no']
2544    except KeyError:
2545        return False
2546    # '*' is special case for always bypass
2547    if no_proxy == '*':
2548        return True
2549    host = host.lower()
2550    # strip port off host
2551    hostonly, port = _splitport(host)
2552    # check if the host ends with any of the DNS suffixes
2553    for name in no_proxy.split(','):
2554        name = name.strip()
2555        if name:
2556            name = name.lstrip('.')  # ignore leading dots
2557            name = name.lower()
2558            if hostonly == name or host == name:
2559                return True
2560            name = '.' + name
2561            if hostonly.endswith(name) or host.endswith(name):
2562                return True
2563    # otherwise, don't bypass
2564    return False
2565
2566
2567# This code tests an OSX specific data structure but is testable on all
2568# platforms
2569def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2570    """
2571    Return True iff this host shouldn't be accessed using a proxy
2572
2573    This function uses the MacOSX framework SystemConfiguration
2574    to fetch the proxy information.
2575
2576    proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2577    { 'exclude_simple': bool,
2578      'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2579    }
2580    """
2581    from fnmatch import fnmatch
2582    from ipaddress import AddressValueError, IPv4Address
2583
2584    hostonly, port = _splitport(host)
2585
2586    def ip2num(ipAddr):
2587        parts = ipAddr.split('.')
2588        parts = list(map(int, parts))
2589        if len(parts) != 4:
2590            parts = (parts + [0, 0, 0, 0])[:4]
2591        return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2592
2593    # Check for simple host names:
2594    if '.' not in host:
2595        if proxy_settings['exclude_simple']:
2596            return True
2597
2598    hostIP = None
2599    try:
2600        hostIP = int(IPv4Address(hostonly))
2601    except AddressValueError:
2602        pass
2603
2604    for value in proxy_settings.get('exceptions', ()):
2605        # Items in the list are strings like these: *.local, 169.254/16
2606        if not value: continue
2607
2608        m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2609        if m is not None and hostIP is not None:
2610            base = ip2num(m.group(1))
2611            mask = m.group(2)
2612            if mask is None:
2613                mask = 8 * (m.group(1).count('.') + 1)
2614            else:
2615                mask = int(mask[1:])
2616
2617            if mask < 0 or mask > 32:
2618                # System libraries ignore invalid prefix lengths
2619                continue
2620
2621            mask = 32 - mask
2622
2623            if (hostIP >> mask) == (base >> mask):
2624                return True
2625
2626        elif fnmatch(host, value):
2627            return True
2628
2629    return False
2630
2631
2632# Same as _proxy_bypass_macosx_sysconf, testable on all platforms
2633def _proxy_bypass_winreg_override(host, override):
2634    """Return True if the host should bypass the proxy server.
2635
2636    The proxy override list is obtained from the Windows
2637    Internet settings proxy override registry value.
2638
2639    An example of a proxy override value is:
2640    "www.example.com;*.example.net; 192.168.0.1"
2641    """
2642    from fnmatch import fnmatch
2643
2644    host, _ = _splitport(host)
2645    proxy_override = override.split(';')
2646    for test in proxy_override:
2647        test = test.strip()
2648        # "<local>" should bypass the proxy server for all intranet addresses
2649        if test == '<local>':
2650            if '.' not in host:
2651                return True
2652        elif fnmatch(host, test):
2653            return True
2654    return False
2655
2656
2657if sys.platform == 'darwin':
2658    from _scproxy import _get_proxy_settings, _get_proxies
2659
2660    def proxy_bypass_macosx_sysconf(host):
2661        proxy_settings = _get_proxy_settings()
2662        return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2663
2664    def getproxies_macosx_sysconf():
2665        """Return a dictionary of scheme -> proxy server URL mappings.
2666
2667        This function uses the MacOSX framework SystemConfiguration
2668        to fetch the proxy information.
2669        """
2670        return _get_proxies()
2671
2672
2673
2674    def proxy_bypass(host):
2675        """Return True, if host should be bypassed.
2676
2677        Checks proxy settings gathered from the environment, if specified,
2678        or from the MacOSX framework SystemConfiguration.
2679
2680        """
2681        proxies = getproxies_environment()
2682        if proxies:
2683            return proxy_bypass_environment(host, proxies)
2684        else:
2685            return proxy_bypass_macosx_sysconf(host)
2686
2687    def getproxies():
2688        return getproxies_environment() or getproxies_macosx_sysconf()
2689
2690
2691elif os.name == 'nt':
2692    def getproxies_registry():
2693        """Return a dictionary of scheme -> proxy server URL mappings.
2694
2695        Win32 uses the registry to store proxies.
2696
2697        """
2698        proxies = {}
2699        try:
2700            import winreg
2701        except ImportError:
2702            # Std module, so should be around - but you never know!
2703            return proxies
2704        try:
2705            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2706                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2707            proxyEnable = winreg.QueryValueEx(internetSettings,
2708                                               'ProxyEnable')[0]
2709            if proxyEnable:
2710                # Returned as Unicode but problems if not converted to ASCII
2711                proxyServer = str(winreg.QueryValueEx(internetSettings,
2712                                                       'ProxyServer')[0])
2713                if '=' not in proxyServer and ';' not in proxyServer:
2714                    # Use one setting for all protocols.
2715                    proxyServer = 'http={0};https={0};ftp={0}'.format(proxyServer)
2716                for p in proxyServer.split(';'):
2717                    protocol, address = p.split('=', 1)
2718                    # See if address has a type:// prefix
2719                    if not re.match('(?:[^/:]+)://', address):
2720                        # Add type:// prefix to address without specifying type
2721                        if protocol in ('http', 'https', 'ftp'):
2722                            # The default proxy type of Windows is HTTP
2723                            address = 'http://' + address
2724                        elif protocol == 'socks':
2725                            address = 'socks://' + address
2726                    proxies[protocol] = address
2727                # Use SOCKS proxy for HTTP(S) protocols
2728                if proxies.get('socks'):
2729                    # The default SOCKS proxy type of Windows is SOCKS4
2730                    address = re.sub(r'^socks://', 'socks4://', proxies['socks'])
2731                    proxies['http'] = proxies.get('http') or address
2732                    proxies['https'] = proxies.get('https') or address
2733            internetSettings.Close()
2734        except (OSError, ValueError, TypeError):
2735            # Either registry key not found etc, or the value in an
2736            # unexpected format.
2737            # proxies already set up to be empty so nothing to do
2738            pass
2739        return proxies
2740
2741    def getproxies():
2742        """Return a dictionary of scheme -> proxy server URL mappings.
2743
2744        Returns settings gathered from the environment, if specified,
2745        or the registry.
2746
2747        """
2748        return getproxies_environment() or getproxies_registry()
2749
2750    def proxy_bypass_registry(host):
2751        try:
2752            import winreg
2753        except ImportError:
2754            # Std modules, so should be around - but you never know!
2755            return False
2756        try:
2757            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2758                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2759            proxyEnable = winreg.QueryValueEx(internetSettings,
2760                                               'ProxyEnable')[0]
2761            proxyOverride = str(winreg.QueryValueEx(internetSettings,
2762                                                     'ProxyOverride')[0])
2763            # ^^^^ Returned as Unicode but problems if not converted to ASCII
2764        except OSError:
2765            return False
2766        if not proxyEnable or not proxyOverride:
2767            return False
2768        return _proxy_bypass_winreg_override(host, proxyOverride)
2769
2770    def proxy_bypass(host):
2771        """Return True, if host should be bypassed.
2772
2773        Checks proxy settings gathered from the environment, if specified,
2774        or the registry.
2775
2776        """
2777        proxies = getproxies_environment()
2778        if proxies:
2779            return proxy_bypass_environment(host, proxies)
2780        else:
2781            return proxy_bypass_registry(host)
2782
2783else:
2784    # By default use environment variables
2785    getproxies = getproxies_environment
2786    proxy_bypass = proxy_bypass_environment
2787