• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 """An extensible library for opening URLs using a variety of protocols
2 
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below).  It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
7 
8 The OpenerDirector manages a collection of Handler objects that do
9 all the actual work.  Each Handler implements a particular protocol or
10 option.  The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL.  For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns.  The HTTPRedirectHandler automatically deals with
14 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15 deals with digest authentication.
16 
17 urlopen(url, data=None) -- Basic usage is the same as original
18 urllib.  pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back.  One difference is that you can also pass
20 a Request instance instead of URL.  Raises a URLError (subclass of
21 OSError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
23 
24 build_opener -- Function that creates a new OpenerDirector instance.
25 Will install the default handlers.  Accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate.  If one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
29 
30 install_opener -- Installs a new opener as the default opener.
31 
32 objects of interest:
33 
34 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35 the Handler classes, while dealing with requests and responses.
36 
37 Request -- An object that encapsulates the state of a request.  The
38 state can be as simple as the URL.  It can also include extra HTTP
39 headers, e.g. a User-Agent.
40 
41 BaseHandler --
42 
43 internals:
44 BaseHandler and parent
45 _call_chain conventions
46 
47 Example usage:
48 
49 import urllib.request
50 
51 # set up authentication info
52 authinfo = urllib.request.HTTPBasicAuthHandler()
53 authinfo.add_password(realm='PDQ Application',
54                       uri='https://mahler:8092/site-updates.py',
55                       user='klem',
56                       passwd='geheim$parole')
57 
58 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59 
60 # build a new opener that adds authentication and caching FTP handlers
61 opener = urllib.request.build_opener(proxy_support, authinfo,
62                                      urllib.request.CacheFTPHandler)
63 
64 # install it
65 urllib.request.install_opener(opener)
66 
67 f = urllib.request.urlopen('https://www.python.org/')
68 """
69 
70 # XXX issues:
71 # If an authentication error handler that tries to perform
72 # authentication for some reason but fails, how should the error be
73 # signalled?  The client needs to know the HTTP error code.  But if
74 # the handler knows that the problem was, e.g., that it didn't know
75 # that hash algo that requested in the challenge, it would be good to
76 # pass that information along to the client, too.
77 # ftp errors aren't handled cleanly
78 # check digest against correct (i.e. non-apache) implementation
79 
80 # Possible extensions:
81 # complex proxies  XXX not sure what exactly was meant by this
82 # abstract factory for opener
83 
84 import base64
85 import bisect
86 import email
87 import hashlib
88 import http.client
89 import io
90 import os
91 import posixpath
92 import re
93 import socket
94 import string
95 import sys
96 import time
97 import tempfile
98 import contextlib
99 import warnings
100 
101 
102 from urllib.error import URLError, HTTPError, ContentTooShortError
103 from urllib.parse import (
104     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105     _splittype, _splithost, _splitport, _splituser, _splitpasswd,
106     _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
107     unquote_to_bytes, urlunparse)
108 from urllib.response import addinfourl, addclosehook
109 
110 # check for SSL
111 try:
112     import ssl
113 except ImportError:
114     _have_ssl = False
115 else:
116     _have_ssl = True
117 
118 __all__ = [
119     # Classes
120     'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121     'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122     'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123     'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124     'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125     'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126     'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
127     'UnknownHandler', 'HTTPErrorProcessor',
128     # Functions
129     'urlopen', 'install_opener', 'build_opener',
130     'pathname2url', 'url2pathname', 'getproxies',
131     # Legacy interface
132     'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133 ]
134 
135 # used in User-Agent header sent
136 __version__ = '%d.%d' % sys.version_info[:2]
137 
138 _opener = None
139 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
140             *, cafile=None, capath=None, cadefault=False, context=None):
141     '''Open the URL url, which can be either a string or a Request object.
142 
143     *data* must be an object specifying additional data to be sent to
144     the server, or None if no such data is needed.  See Request for
145     details.
146 
147     urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148     header in its HTTP requests.
149 
150     The optional *timeout* parameter specifies a timeout in seconds for
151     blocking operations like the connection attempt (if not specified, the
152     global default timeout setting will be used). This only works for HTTP,
153     HTTPS and FTP connections.
154 
155     If *context* is specified, it must be a ssl.SSLContext instance describing
156     the various SSL options. See HTTPSConnection for more details.
157 
158     The optional *cafile* and *capath* parameters specify a set of trusted CA
159     certificates for HTTPS requests. cafile should point to a single file
160     containing a bundle of CA certificates, whereas capath should point to a
161     directory of hashed certificate files. More information can be found in
162     ssl.SSLContext.load_verify_locations().
163 
164     The *cadefault* parameter is ignored.
165 
166 
167     This function always returns an object which can work as a
168     context manager and has the properties url, headers, and status.
169     See urllib.response.addinfourl for more detail on these properties.
170 
171     For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
172     object slightly modified. In addition to the three new methods above, the
173     msg attribute contains the same information as the reason attribute ---
174     the reason phrase returned by the server --- instead of the response
175     headers as it is specified in the documentation for HTTPResponse.
176 
177     For FTP, file, and data URLs and requests explicitly handled by legacy
178     URLopener and FancyURLopener classes, this function returns a
179     urllib.response.addinfourl object.
180 
181     Note that None may be returned if no handler handles the request (though
182     the default installed global OpenerDirector uses UnknownHandler to ensure
183     this never happens).
184 
185     In addition, if proxy settings are detected (for example, when a *_proxy
186     environment variable like http_proxy is set), ProxyHandler is default
187     installed and makes sure the requests are handled through the proxy.
188 
189     '''
190     global _opener
191     if cafile or capath or cadefault:
192         import warnings
193         warnings.warn("cafile, capath and cadefault are deprecated, use a "
194                       "custom context instead.", DeprecationWarning, 2)
195         if context is not None:
196             raise ValueError(
197                 "You can't pass both context and any of cafile, capath, and "
198                 "cadefault"
199             )
200         if not _have_ssl:
201             raise ValueError('SSL support not available')
202         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
203                                              cafile=cafile,
204                                              capath=capath)
205         # send ALPN extension to indicate HTTP/1.1 protocol
206         context.set_alpn_protocols(['http/1.1'])
207         https_handler = HTTPSHandler(context=context)
208         opener = build_opener(https_handler)
209     elif context:
210         https_handler = HTTPSHandler(context=context)
211         opener = build_opener(https_handler)
212     elif _opener is None:
213         _opener = opener = build_opener()
214     else:
215         opener = _opener
216     return opener.open(url, data, timeout)
217 
218 def install_opener(opener):
219     global _opener
220     _opener = opener
221 
222 _url_tempfiles = []
223 def urlretrieve(url, filename=None, reporthook=None, data=None):
224     """
225     Retrieve a URL into a temporary location on disk.
226 
227     Requires a URL argument. If a filename is passed, it is used as
228     the temporary file location. The reporthook argument should be
229     a callable that accepts a block number, a read size, and the
230     total file size of the URL target. The data argument should be
231     valid URL encoded data.
232 
233     If a filename is passed and the URL points to a local resource,
234     the result is a copy from local file to new file.
235 
236     Returns a tuple containing the path to the newly created
237     data file as well as the resulting HTTPMessage object.
238     """
239     url_type, path = _splittype(url)
240 
241     with contextlib.closing(urlopen(url, data)) as fp:
242         headers = fp.info()
243 
244         # Just return the local path and the "headers" for file://
245         # URLs. No sense in performing a copy unless requested.
246         if url_type == "file" and not filename:
247             return os.path.normpath(path), headers
248 
249         # Handle temporary file setup.
250         if filename:
251             tfp = open(filename, 'wb')
252         else:
253             tfp = tempfile.NamedTemporaryFile(delete=False)
254             filename = tfp.name
255             _url_tempfiles.append(filename)
256 
257         with tfp:
258             result = filename, headers
259             bs = 1024*8
260             size = -1
261             read = 0
262             blocknum = 0
263             if "content-length" in headers:
264                 size = int(headers["Content-Length"])
265 
266             if reporthook:
267                 reporthook(blocknum, bs, size)
268 
269             while True:
270                 block = fp.read(bs)
271                 if not block:
272                     break
273                 read += len(block)
274                 tfp.write(block)
275                 blocknum += 1
276                 if reporthook:
277                     reporthook(blocknum, bs, size)
278 
279     if size >= 0 and read < size:
280         raise ContentTooShortError(
281             "retrieval incomplete: got only %i out of %i bytes"
282             % (read, size), result)
283 
284     return result
285 
286 def urlcleanup():
287     """Clean up temporary files from urlretrieve calls."""
288     for temp_file in _url_tempfiles:
289         try:
290             os.unlink(temp_file)
291         except OSError:
292             pass
293 
294     del _url_tempfiles[:]
295     global _opener
296     if _opener:
297         _opener = None
298 
299 # copied from cookielib.py
300 _cut_port_re = re.compile(r":\d+$", re.ASCII)
301 def request_host(request):
302     """Return request-host, as defined by RFC 2965.
303 
304     Variation from RFC: returned value is lowercased, for convenient
305     comparison.
306 
307     """
308     url = request.full_url
309     host = urlparse(url)[1]
310     if host == "":
311         host = request.get_header("Host", "")
312 
313     # remove port, if present
314     host = _cut_port_re.sub("", host, 1)
315     return host.lower()
316 
317 class Request:
318 
319     def __init__(self, url, data=None, headers={},
320                  origin_req_host=None, unverifiable=False,
321                  method=None):
322         self.full_url = url
323         self.headers = {}
324         self.unredirected_hdrs = {}
325         self._data = None
326         self.data = data
327         self._tunnel_host = None
328         for key, value in headers.items():
329             self.add_header(key, value)
330         if origin_req_host is None:
331             origin_req_host = request_host(self)
332         self.origin_req_host = origin_req_host
333         self.unverifiable = unverifiable
334         if method:
335             self.method = method
336 
337     @property
338     def full_url(self):
339         if self.fragment:
340             return '{}#{}'.format(self._full_url, self.fragment)
341         return self._full_url
342 
343     @full_url.setter
344     def full_url(self, url):
345         # unwrap('<URL:type://host/path>') --> 'type://host/path'
346         self._full_url = unwrap(url)
347         self._full_url, self.fragment = _splittag(self._full_url)
348         self._parse()
349 
350     @full_url.deleter
351     def full_url(self):
352         self._full_url = None
353         self.fragment = None
354         self.selector = ''
355 
356     @property
357     def data(self):
358         return self._data
359 
360     @data.setter
361     def data(self, data):
362         if data != self._data:
363             self._data = data
364             # issue 16464
365             # if we change data we need to remove content-length header
366             # (cause it's most probably calculated for previous value)
367             if self.has_header("Content-length"):
368                 self.remove_header("Content-length")
369 
370     @data.deleter
371     def data(self):
372         self.data = None
373 
374     def _parse(self):
375         self.type, rest = _splittype(self._full_url)
376         if self.type is None:
377             raise ValueError("unknown url type: %r" % self.full_url)
378         self.host, self.selector = _splithost(rest)
379         if self.host:
380             self.host = unquote(self.host)
381 
382     def get_method(self):
383         """Return a string indicating the HTTP request method."""
384         default_method = "POST" if self.data is not None else "GET"
385         return getattr(self, 'method', default_method)
386 
387     def get_full_url(self):
388         return self.full_url
389 
390     def set_proxy(self, host, type):
391         if self.type == 'https' and not self._tunnel_host:
392             self._tunnel_host = self.host
393         else:
394             self.type= type
395             self.selector = self.full_url
396         self.host = host
397 
398     def has_proxy(self):
399         return self.selector == self.full_url
400 
401     def add_header(self, key, val):
402         # useful for something like authentication
403         self.headers[key.capitalize()] = val
404 
405     def add_unredirected_header(self, key, val):
406         # will not be added to a redirected request
407         self.unredirected_hdrs[key.capitalize()] = val
408 
409     def has_header(self, header_name):
410         return (header_name in self.headers or
411                 header_name in self.unredirected_hdrs)
412 
413     def get_header(self, header_name, default=None):
414         return self.headers.get(
415             header_name,
416             self.unredirected_hdrs.get(header_name, default))
417 
418     def remove_header(self, header_name):
419         self.headers.pop(header_name, None)
420         self.unredirected_hdrs.pop(header_name, None)
421 
422     def header_items(self):
423         hdrs = {**self.unredirected_hdrs, **self.headers}
424         return list(hdrs.items())
425 
426 class OpenerDirector:
427     def __init__(self):
428         client_version = "Python-urllib/%s" % __version__
429         self.addheaders = [('User-agent', client_version)]
430         # self.handlers is retained only for backward compatibility
431         self.handlers = []
432         # manage the individual handlers
433         self.handle_open = {}
434         self.handle_error = {}
435         self.process_response = {}
436         self.process_request = {}
437 
438     def add_handler(self, handler):
439         if not hasattr(handler, "add_parent"):
440             raise TypeError("expected BaseHandler instance, got %r" %
441                             type(handler))
442 
443         added = False
444         for meth in dir(handler):
445             if meth in ["redirect_request", "do_open", "proxy_open"]:
446                 # oops, coincidental match
447                 continue
448 
449             i = meth.find("_")
450             protocol = meth[:i]
451             condition = meth[i+1:]
452 
453             if condition.startswith("error"):
454                 j = condition.find("_") + i + 1
455                 kind = meth[j+1:]
456                 try:
457                     kind = int(kind)
458                 except ValueError:
459                     pass
460                 lookup = self.handle_error.get(protocol, {})
461                 self.handle_error[protocol] = lookup
462             elif condition == "open":
463                 kind = protocol
464                 lookup = self.handle_open
465             elif condition == "response":
466                 kind = protocol
467                 lookup = self.process_response
468             elif condition == "request":
469                 kind = protocol
470                 lookup = self.process_request
471             else:
472                 continue
473 
474             handlers = lookup.setdefault(kind, [])
475             if handlers:
476                 bisect.insort(handlers, handler)
477             else:
478                 handlers.append(handler)
479             added = True
480 
481         if added:
482             bisect.insort(self.handlers, handler)
483             handler.add_parent(self)
484 
485     def close(self):
486         # Only exists for backwards compatibility.
487         pass
488 
489     def _call_chain(self, chain, kind, meth_name, *args):
490         # Handlers raise an exception if no one else should try to handle
491         # the request, or return None if they can't but another handler
492         # could.  Otherwise, they return the response.
493         handlers = chain.get(kind, ())
494         for handler in handlers:
495             func = getattr(handler, meth_name)
496             result = func(*args)
497             if result is not None:
498                 return result
499 
500     def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
501         # accept a URL or a Request object
502         if isinstance(fullurl, str):
503             req = Request(fullurl, data)
504         else:
505             req = fullurl
506             if data is not None:
507                 req.data = data
508 
509         req.timeout = timeout
510         protocol = req.type
511 
512         # pre-process request
513         meth_name = protocol+"_request"
514         for processor in self.process_request.get(protocol, []):
515             meth = getattr(processor, meth_name)
516             req = meth(req)
517 
518         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
519         response = self._open(req, data)
520 
521         # post-process response
522         meth_name = protocol+"_response"
523         for processor in self.process_response.get(protocol, []):
524             meth = getattr(processor, meth_name)
525             response = meth(req, response)
526 
527         return response
528 
529     def _open(self, req, data=None):
530         result = self._call_chain(self.handle_open, 'default',
531                                   'default_open', req)
532         if result:
533             return result
534 
535         protocol = req.type
536         result = self._call_chain(self.handle_open, protocol, protocol +
537                                   '_open', req)
538         if result:
539             return result
540 
541         return self._call_chain(self.handle_open, 'unknown',
542                                 'unknown_open', req)
543 
544     def error(self, proto, *args):
545         if proto in ('http', 'https'):
546             # XXX http[s] protocols are special-cased
547             dict = self.handle_error['http'] # https is not different than http
548             proto = args[2]  # YUCK!
549             meth_name = 'http_error_%s' % proto
550             http_err = 1
551             orig_args = args
552         else:
553             dict = self.handle_error
554             meth_name = proto + '_error'
555             http_err = 0
556         args = (dict, proto, meth_name) + args
557         result = self._call_chain(*args)
558         if result:
559             return result
560 
561         if http_err:
562             args = (dict, 'default', 'http_error_default') + orig_args
563             return self._call_chain(*args)
564 
565 # XXX probably also want an abstract factory that knows when it makes
566 # sense to skip a superclass in favor of a subclass and when it might
567 # make sense to include both
568 
569 def build_opener(*handlers):
570     """Create an opener object from a list of handlers.
571 
572     The opener will use several default handlers, including support
573     for HTTP, FTP and when applicable HTTPS.
574 
575     If any of the handlers passed as arguments are subclasses of the
576     default handlers, the default handlers will not be used.
577     """
578     opener = OpenerDirector()
579     default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
580                        HTTPDefaultErrorHandler, HTTPRedirectHandler,
581                        FTPHandler, FileHandler, HTTPErrorProcessor,
582                        DataHandler]
583     if hasattr(http.client, "HTTPSConnection"):
584         default_classes.append(HTTPSHandler)
585     skip = set()
586     for klass in default_classes:
587         for check in handlers:
588             if isinstance(check, type):
589                 if issubclass(check, klass):
590                     skip.add(klass)
591             elif isinstance(check, klass):
592                 skip.add(klass)
593     for klass in skip:
594         default_classes.remove(klass)
595 
596     for klass in default_classes:
597         opener.add_handler(klass())
598 
599     for h in handlers:
600         if isinstance(h, type):
601             h = h()
602         opener.add_handler(h)
603     return opener
604 
605 class BaseHandler:
606     handler_order = 500
607 
608     def add_parent(self, parent):
609         self.parent = parent
610 
611     def close(self):
612         # Only exists for backwards compatibility
613         pass
614 
615     def __lt__(self, other):
616         if not hasattr(other, "handler_order"):
617             # Try to preserve the old behavior of having custom classes
618             # inserted after default ones (works only for custom user
619             # classes which are not aware of handler_order).
620             return True
621         return self.handler_order < other.handler_order
622 
623 
624 class HTTPErrorProcessor(BaseHandler):
625     """Process HTTP error responses."""
626     handler_order = 1000  # after all other processing
627 
628     def http_response(self, request, response):
629         code, msg, hdrs = response.code, response.msg, response.info()
630 
631         # According to RFC 2616, "2xx" code indicates that the client's
632         # request was successfully received, understood, and accepted.
633         if not (200 <= code < 300):
634             response = self.parent.error(
635                 'http', request, response, code, msg, hdrs)
636 
637         return response
638 
639     https_response = http_response
640 
641 class HTTPDefaultErrorHandler(BaseHandler):
642     def http_error_default(self, req, fp, code, msg, hdrs):
643         raise HTTPError(req.full_url, code, msg, hdrs, fp)
644 
645 class HTTPRedirectHandler(BaseHandler):
646     # maximum number of redirections to any single URL
647     # this is needed because of the state that cookies introduce
648     max_repeats = 4
649     # maximum total number of redirections (regardless of URL) before
650     # assuming we're in a loop
651     max_redirections = 10
652 
653     def redirect_request(self, req, fp, code, msg, headers, newurl):
654         """Return a Request or None in response to a redirect.
655 
656         This is called by the http_error_30x methods when a
657         redirection response is received.  If a redirection should
658         take place, return a new Request to allow http_error_30x to
659         perform the redirect.  Otherwise, raise HTTPError if no-one
660         else should try to handle this url.  Return None if you can't
661         but another Handler might.
662         """
663         m = req.get_method()
664         if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
665             or code in (301, 302, 303) and m == "POST")):
666             raise HTTPError(req.full_url, code, msg, headers, fp)
667 
668         # Strictly (according to RFC 2616), 301 or 302 in response to
669         # a POST MUST NOT cause a redirection without confirmation
670         # from the user (of urllib.request, in this case).  In practice,
671         # essentially all clients do redirect in this case, so we do
672         # the same.
673 
674         # Be conciliant with URIs containing a space.  This is mainly
675         # redundant with the more complete encoding done in http_error_302(),
676         # but it is kept for compatibility with other callers.
677         newurl = newurl.replace(' ', '%20')
678 
679         CONTENT_HEADERS = ("content-length", "content-type")
680         newheaders = {k: v for k, v in req.headers.items()
681                       if k.lower() not in CONTENT_HEADERS}
682         return Request(newurl,
683                        headers=newheaders,
684                        origin_req_host=req.origin_req_host,
685                        unverifiable=True)
686 
687     # Implementation note: To avoid the server sending us into an
688     # infinite loop, the request object needs to track what URLs we
689     # have already seen.  Do this by adding a handler-specific
690     # attribute to the Request object.
691     def http_error_302(self, req, fp, code, msg, headers):
692         # Some servers (incorrectly) return multiple Location headers
693         # (so probably same goes for URI).  Use first header.
694         if "location" in headers:
695             newurl = headers["location"]
696         elif "uri" in headers:
697             newurl = headers["uri"]
698         else:
699             return
700 
701         # fix a possible malformed URL
702         urlparts = urlparse(newurl)
703 
704         # For security reasons we don't allow redirection to anything other
705         # than http, https or ftp.
706 
707         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
708             raise HTTPError(
709                 newurl, code,
710                 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
711                 headers, fp)
712 
713         if not urlparts.path and urlparts.netloc:
714             urlparts = list(urlparts)
715             urlparts[2] = "/"
716         newurl = urlunparse(urlparts)
717 
718         # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
719         # original bytes and percent-encode non-ASCII bytes, and any special
720         # characters such as the space.
721         newurl = quote(
722             newurl, encoding="iso-8859-1", safe=string.punctuation)
723         newurl = urljoin(req.full_url, newurl)
724 
725         # XXX Probably want to forget about the state of the current
726         # request, although that might interact poorly with other
727         # handlers that also use handler-specific request attributes
728         new = self.redirect_request(req, fp, code, msg, headers, newurl)
729         if new is None:
730             return
731 
732         # loop detection
733         # .redirect_dict has a key url if url was previously visited.
734         if hasattr(req, 'redirect_dict'):
735             visited = new.redirect_dict = req.redirect_dict
736             if (visited.get(newurl, 0) >= self.max_repeats or
737                 len(visited) >= self.max_redirections):
738                 raise HTTPError(req.full_url, code,
739                                 self.inf_msg + msg, headers, fp)
740         else:
741             visited = new.redirect_dict = req.redirect_dict = {}
742         visited[newurl] = visited.get(newurl, 0) + 1
743 
744         # Don't close the fp until we are sure that we won't use it
745         # with HTTPError.
746         fp.read()
747         fp.close()
748 
749         return self.parent.open(new, timeout=req.timeout)
750 
751     http_error_301 = http_error_303 = http_error_307 = http_error_302
752 
753     inf_msg = "The HTTP server returned a redirect error that would " \
754               "lead to an infinite loop.\n" \
755               "The last 30x error message was:\n"
756 
757 
758 def _parse_proxy(proxy):
759     """Return (scheme, user, password, host/port) given a URL or an authority.
760 
761     If a URL is supplied, it must have an authority (host:port) component.
762     According to RFC 3986, having an authority component means the URL must
763     have two slashes after the scheme.
764     """
765     scheme, r_scheme = _splittype(proxy)
766     if not r_scheme.startswith("/"):
767         # authority
768         scheme = None
769         authority = proxy
770     else:
771         # URL
772         if not r_scheme.startswith("//"):
773             raise ValueError("proxy URL with no authority: %r" % proxy)
774         # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
775         # and 3.3.), path is empty or starts with '/'
776         if '@' in r_scheme:
777             host_separator = r_scheme.find('@')
778             end = r_scheme.find("/", host_separator)
779         else:
780             end = r_scheme.find("/", 2)
781         if end == -1:
782             end = None
783         authority = r_scheme[2:end]
784     userinfo, hostport = _splituser(authority)
785     if userinfo is not None:
786         user, password = _splitpasswd(userinfo)
787     else:
788         user = password = None
789     return scheme, user, password, hostport
790 
791 class ProxyHandler(BaseHandler):
792     # Proxies must be in front
793     handler_order = 100
794 
795     def __init__(self, proxies=None):
796         if proxies is None:
797             proxies = getproxies()
798         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
799         self.proxies = proxies
800         for type, url in proxies.items():
801             type = type.lower()
802             setattr(self, '%s_open' % type,
803                     lambda r, proxy=url, type=type, meth=self.proxy_open:
804                         meth(r, proxy, type))
805 
806     def proxy_open(self, req, proxy, type):
807         orig_type = req.type
808         proxy_type, user, password, hostport = _parse_proxy(proxy)
809         if proxy_type is None:
810             proxy_type = orig_type
811 
812         if req.host and proxy_bypass(req.host):
813             return None
814 
815         if user and password:
816             user_pass = '%s:%s' % (unquote(user),
817                                    unquote(password))
818             creds = base64.b64encode(user_pass.encode()).decode("ascii")
819             req.add_header('Proxy-authorization', 'Basic ' + creds)
820         hostport = unquote(hostport)
821         req.set_proxy(hostport, proxy_type)
822         if orig_type == proxy_type or orig_type == 'https':
823             # let other handlers take care of it
824             return None
825         else:
826             # need to start over, because the other handlers don't
827             # grok the proxy's URL type
828             # e.g. if we have a constructor arg proxies like so:
829             # {'http': 'ftp://proxy.example.com'}, we may end up turning
830             # a request for http://acme.example.com/a into one for
831             # ftp://proxy.example.com/a
832             return self.parent.open(req, timeout=req.timeout)
833 
834 class HTTPPasswordMgr:
835 
836     def __init__(self):
837         self.passwd = {}
838 
839     def add_password(self, realm, uri, user, passwd):
840         # uri could be a single URI or a sequence
841         if isinstance(uri, str):
842             uri = [uri]
843         if realm not in self.passwd:
844             self.passwd[realm] = {}
845         for default_port in True, False:
846             reduced_uri = tuple(
847                 self.reduce_uri(u, default_port) for u in uri)
848             self.passwd[realm][reduced_uri] = (user, passwd)
849 
850     def find_user_password(self, realm, authuri):
851         domains = self.passwd.get(realm, {})
852         for default_port in True, False:
853             reduced_authuri = self.reduce_uri(authuri, default_port)
854             for uris, authinfo in domains.items():
855                 for uri in uris:
856                     if self.is_suburi(uri, reduced_authuri):
857                         return authinfo
858         return None, None
859 
860     def reduce_uri(self, uri, default_port=True):
861         """Accept authority or URI and extract only the authority and path."""
862         # note HTTP URLs do not have a userinfo component
863         parts = urlsplit(uri)
864         if parts[1]:
865             # URI
866             scheme = parts[0]
867             authority = parts[1]
868             path = parts[2] or '/'
869         else:
870             # host or host:port
871             scheme = None
872             authority = uri
873             path = '/'
874         host, port = _splitport(authority)
875         if default_port and port is None and scheme is not None:
876             dport = {"http": 80,
877                      "https": 443,
878                      }.get(scheme)
879             if dport is not None:
880                 authority = "%s:%d" % (host, dport)
881         return authority, path
882 
883     def is_suburi(self, base, test):
884         """Check if test is below base in a URI tree
885 
886         Both args must be URIs in reduced form.
887         """
888         if base == test:
889             return True
890         if base[0] != test[0]:
891             return False
892         common = posixpath.commonprefix((base[1], test[1]))
893         if len(common) == len(base[1]):
894             return True
895         return False
896 
897 
898 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
899 
900     def find_user_password(self, realm, authuri):
901         user, password = HTTPPasswordMgr.find_user_password(self, realm,
902                                                             authuri)
903         if user is not None:
904             return user, password
905         return HTTPPasswordMgr.find_user_password(self, None, authuri)
906 
907 
908 class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
909 
910     def __init__(self, *args, **kwargs):
911         self.authenticated = {}
912         super().__init__(*args, **kwargs)
913 
914     def add_password(self, realm, uri, user, passwd, is_authenticated=False):
915         self.update_authenticated(uri, is_authenticated)
916         # Add a default for prior auth requests
917         if realm is not None:
918             super().add_password(None, uri, user, passwd)
919         super().add_password(realm, uri, user, passwd)
920 
921     def update_authenticated(self, uri, is_authenticated=False):
922         # uri could be a single URI or a sequence
923         if isinstance(uri, str):
924             uri = [uri]
925 
926         for default_port in True, False:
927             for u in uri:
928                 reduced_uri = self.reduce_uri(u, default_port)
929                 self.authenticated[reduced_uri] = is_authenticated
930 
931     def is_authenticated(self, authuri):
932         for default_port in True, False:
933             reduced_authuri = self.reduce_uri(authuri, default_port)
934             for uri in self.authenticated:
935                 if self.is_suburi(uri, reduced_authuri):
936                     return self.authenticated[uri]
937 
938 
939 class AbstractBasicAuthHandler:
940 
941     # XXX this allows for multiple auth-schemes, but will stupidly pick
942     # the last one with a realm specified.
943 
944     # allow for double- and single-quoted realm values
945     # (single quotes are a violation of the RFC, but appear in the wild)
946     rx = re.compile('(?:^|,)'   # start of the string or ','
947                     '[ \t]*'    # optional whitespaces
948                     '([^ \t,]+)' # scheme like "Basic"
949                     '[ \t]+'    # mandatory whitespaces
950                     # realm=xxx
951                     # realm='xxx'
952                     # realm="xxx"
953                     'realm=(["\']?)([^"\']*)\\2',
954                     re.I)
955 
956     # XXX could pre-emptively send auth info already accepted (RFC 2617,
957     # end of section 2, and section 1.2 immediately after "credentials"
958     # production).
959 
960     def __init__(self, password_mgr=None):
961         if password_mgr is None:
962             password_mgr = HTTPPasswordMgr()
963         self.passwd = password_mgr
964         self.add_password = self.passwd.add_password
965 
966     def _parse_realm(self, header):
967         # parse WWW-Authenticate header: accept multiple challenges per header
968         found_challenge = False
969         for mo in AbstractBasicAuthHandler.rx.finditer(header):
970             scheme, quote, realm = mo.groups()
971             if quote not in ['"', "'"]:
972                 warnings.warn("Basic Auth Realm was unquoted",
973                               UserWarning, 3)
974 
975             yield (scheme, realm)
976 
977             found_challenge = True
978 
979         if not found_challenge:
980             if header:
981                 scheme = header.split()[0]
982             else:
983                 scheme = ''
984             yield (scheme, None)
985 
986     def http_error_auth_reqed(self, authreq, host, req, headers):
987         # host may be an authority (without userinfo) or a URL with an
988         # authority
989         headers = headers.get_all(authreq)
990         if not headers:
991             # no header found
992             return
993 
994         unsupported = None
995         for header in headers:
996             for scheme, realm in self._parse_realm(header):
997                 if scheme.lower() != 'basic':
998                     unsupported = scheme
999                     continue
1000 
1001                 if realm is not None:
1002                     # Use the first matching Basic challenge.
1003                     # Ignore following challenges even if they use the Basic
1004                     # scheme.
1005                     return self.retry_http_basic_auth(host, req, realm)
1006 
1007         if unsupported is not None:
1008             raise ValueError("AbstractBasicAuthHandler does not "
1009                              "support the following scheme: %r"
1010                              % (scheme,))
1011 
1012     def retry_http_basic_auth(self, host, req, realm):
1013         user, pw = self.passwd.find_user_password(realm, host)
1014         if pw is not None:
1015             raw = "%s:%s" % (user, pw)
1016             auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
1017             if req.get_header(self.auth_header, None) == auth:
1018                 return None
1019             req.add_unredirected_header(self.auth_header, auth)
1020             return self.parent.open(req, timeout=req.timeout)
1021         else:
1022             return None
1023 
1024     def http_request(self, req):
1025         if (not hasattr(self.passwd, 'is_authenticated') or
1026            not self.passwd.is_authenticated(req.full_url)):
1027             return req
1028 
1029         if not req.has_header('Authorization'):
1030             user, passwd = self.passwd.find_user_password(None, req.full_url)
1031             credentials = '{0}:{1}'.format(user, passwd).encode()
1032             auth_str = base64.standard_b64encode(credentials).decode()
1033             req.add_unredirected_header('Authorization',
1034                                         'Basic {}'.format(auth_str.strip()))
1035         return req
1036 
1037     def http_response(self, req, response):
1038         if hasattr(self.passwd, 'is_authenticated'):
1039             if 200 <= response.code < 300:
1040                 self.passwd.update_authenticated(req.full_url, True)
1041             else:
1042                 self.passwd.update_authenticated(req.full_url, False)
1043         return response
1044 
1045     https_request = http_request
1046     https_response = http_response
1047 
1048 
1049 
1050 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1051 
1052     auth_header = 'Authorization'
1053 
1054     def http_error_401(self, req, fp, code, msg, headers):
1055         url = req.full_url
1056         response = self.http_error_auth_reqed('www-authenticate',
1057                                           url, req, headers)
1058         return response
1059 
1060 
1061 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1062 
1063     auth_header = 'Proxy-authorization'
1064 
1065     def http_error_407(self, req, fp, code, msg, headers):
1066         # http_error_auth_reqed requires that there is no userinfo component in
1067         # authority.  Assume there isn't one, since urllib.request does not (and
1068         # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1069         # userinfo.
1070         authority = req.host
1071         response = self.http_error_auth_reqed('proxy-authenticate',
1072                                           authority, req, headers)
1073         return response
1074 
1075 
1076 # Return n random bytes.
1077 _randombytes = os.urandom
1078 
1079 
1080 class AbstractDigestAuthHandler:
1081     # Digest authentication is specified in RFC 2617.
1082 
1083     # XXX The client does not inspect the Authentication-Info header
1084     # in a successful response.
1085 
1086     # XXX It should be possible to test this implementation against
1087     # a mock server that just generates a static set of challenges.
1088 
1089     # XXX qop="auth-int" supports is shaky
1090 
1091     def __init__(self, passwd=None):
1092         if passwd is None:
1093             passwd = HTTPPasswordMgr()
1094         self.passwd = passwd
1095         self.add_password = self.passwd.add_password
1096         self.retried = 0
1097         self.nonce_count = 0
1098         self.last_nonce = None
1099 
1100     def reset_retry_count(self):
1101         self.retried = 0
1102 
1103     def http_error_auth_reqed(self, auth_header, host, req, headers):
1104         authreq = headers.get(auth_header, None)
1105         if self.retried > 5:
1106             # Don't fail endlessly - if we failed once, we'll probably
1107             # fail a second time. Hm. Unless the Password Manager is
1108             # prompting for the information. Crap. This isn't great
1109             # but it's better than the current 'repeat until recursion
1110             # depth exceeded' approach <wink>
1111             raise HTTPError(req.full_url, 401, "digest auth failed",
1112                             headers, None)
1113         else:
1114             self.retried += 1
1115         if authreq:
1116             scheme = authreq.split()[0]
1117             if scheme.lower() == 'digest':
1118                 return self.retry_http_digest_auth(req, authreq)
1119             elif scheme.lower() != 'basic':
1120                 raise ValueError("AbstractDigestAuthHandler does not support"
1121                                  " the following scheme: '%s'" % scheme)
1122 
1123     def retry_http_digest_auth(self, req, auth):
1124         token, challenge = auth.split(' ', 1)
1125         chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1126         auth = self.get_authorization(req, chal)
1127         if auth:
1128             auth_val = 'Digest %s' % auth
1129             if req.headers.get(self.auth_header, None) == auth_val:
1130                 return None
1131             req.add_unredirected_header(self.auth_header, auth_val)
1132             resp = self.parent.open(req, timeout=req.timeout)
1133             return resp
1134 
1135     def get_cnonce(self, nonce):
1136         # The cnonce-value is an opaque
1137         # quoted string value provided by the client and used by both client
1138         # and server to avoid chosen plaintext attacks, to provide mutual
1139         # authentication, and to provide some message integrity protection.
1140         # This isn't a fabulous effort, but it's probably Good Enough.
1141         s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1142         b = s.encode("ascii") + _randombytes(8)
1143         dig = hashlib.sha1(b).hexdigest()
1144         return dig[:16]
1145 
1146     def get_authorization(self, req, chal):
1147         try:
1148             realm = chal['realm']
1149             nonce = chal['nonce']
1150             qop = chal.get('qop')
1151             algorithm = chal.get('algorithm', 'MD5')
1152             # mod_digest doesn't send an opaque, even though it isn't
1153             # supposed to be optional
1154             opaque = chal.get('opaque', None)
1155         except KeyError:
1156             return None
1157 
1158         H, KD = self.get_algorithm_impls(algorithm)
1159         if H is None:
1160             return None
1161 
1162         user, pw = self.passwd.find_user_password(realm, req.full_url)
1163         if user is None:
1164             return None
1165 
1166         # XXX not implemented yet
1167         if req.data is not None:
1168             entdig = self.get_entity_digest(req.data, chal)
1169         else:
1170             entdig = None
1171 
1172         A1 = "%s:%s:%s" % (user, realm, pw)
1173         A2 = "%s:%s" % (req.get_method(),
1174                         # XXX selector: what about proxies and full urls
1175                         req.selector)
1176         # NOTE: As per  RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1177         #     or `auth-int` to the response back. we use `auth` to send the response back.
1178         if qop is None:
1179             respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1180         elif 'auth' in qop.split(','):
1181             if nonce == self.last_nonce:
1182                 self.nonce_count += 1
1183             else:
1184                 self.nonce_count = 1
1185                 self.last_nonce = nonce
1186             ncvalue = '%08x' % self.nonce_count
1187             cnonce = self.get_cnonce(nonce)
1188             noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
1189             respdig = KD(H(A1), noncebit)
1190         else:
1191             # XXX handle auth-int.
1192             raise URLError("qop '%s' is not supported." % qop)
1193 
1194         # XXX should the partial digests be encoded too?
1195 
1196         base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1197                'response="%s"' % (user, realm, nonce, req.selector,
1198                                   respdig)
1199         if opaque:
1200             base += ', opaque="%s"' % opaque
1201         if entdig:
1202             base += ', digest="%s"' % entdig
1203         base += ', algorithm="%s"' % algorithm
1204         if qop:
1205             base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1206         return base
1207 
1208     def get_algorithm_impls(self, algorithm):
1209         # lambdas assume digest modules are imported at the top level
1210         if algorithm == 'MD5':
1211             H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1212         elif algorithm == 'SHA':
1213             H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1214         # XXX MD5-sess
1215         else:
1216             raise ValueError("Unsupported digest authentication "
1217                              "algorithm %r" % algorithm)
1218         KD = lambda s, d: H("%s:%s" % (s, d))
1219         return H, KD
1220 
1221     def get_entity_digest(self, data, chal):
1222         # XXX not implemented yet
1223         return None
1224 
1225 
1226 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1227     """An authentication protocol defined by RFC 2069
1228 
1229     Digest authentication improves on basic authentication because it
1230     does not transmit passwords in the clear.
1231     """
1232 
1233     auth_header = 'Authorization'
1234     handler_order = 490  # before Basic auth
1235 
1236     def http_error_401(self, req, fp, code, msg, headers):
1237         host = urlparse(req.full_url)[1]
1238         retry = self.http_error_auth_reqed('www-authenticate',
1239                                            host, req, headers)
1240         self.reset_retry_count()
1241         return retry
1242 
1243 
1244 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1245 
1246     auth_header = 'Proxy-Authorization'
1247     handler_order = 490  # before Basic auth
1248 
1249     def http_error_407(self, req, fp, code, msg, headers):
1250         host = req.host
1251         retry = self.http_error_auth_reqed('proxy-authenticate',
1252                                            host, req, headers)
1253         self.reset_retry_count()
1254         return retry
1255 
1256 class AbstractHTTPHandler(BaseHandler):
1257 
1258     def __init__(self, debuglevel=0):
1259         self._debuglevel = debuglevel
1260 
1261     def set_http_debuglevel(self, level):
1262         self._debuglevel = level
1263 
1264     def _get_content_length(self, request):
1265         return http.client.HTTPConnection._get_content_length(
1266             request.data,
1267             request.get_method())
1268 
1269     def do_request_(self, request):
1270         host = request.host
1271         if not host:
1272             raise URLError('no host given')
1273 
1274         if request.data is not None:  # POST
1275             data = request.data
1276             if isinstance(data, str):
1277                 msg = "POST data should be bytes, an iterable of bytes, " \
1278                       "or a file object. It cannot be of type str."
1279                 raise TypeError(msg)
1280             if not request.has_header('Content-type'):
1281                 request.add_unredirected_header(
1282                     'Content-type',
1283                     'application/x-www-form-urlencoded')
1284             if (not request.has_header('Content-length')
1285                     and not request.has_header('Transfer-encoding')):
1286                 content_length = self._get_content_length(request)
1287                 if content_length is not None:
1288                     request.add_unredirected_header(
1289                             'Content-length', str(content_length))
1290                 else:
1291                     request.add_unredirected_header(
1292                             'Transfer-encoding', 'chunked')
1293 
1294         sel_host = host
1295         if request.has_proxy():
1296             scheme, sel = _splittype(request.selector)
1297             sel_host, sel_path = _splithost(sel)
1298         if not request.has_header('Host'):
1299             request.add_unredirected_header('Host', sel_host)
1300         for name, value in self.parent.addheaders:
1301             name = name.capitalize()
1302             if not request.has_header(name):
1303                 request.add_unredirected_header(name, value)
1304 
1305         return request
1306 
1307     def do_open(self, http_class, req, **http_conn_args):
1308         """Return an HTTPResponse object for the request, using http_class.
1309 
1310         http_class must implement the HTTPConnection API from http.client.
1311         """
1312         host = req.host
1313         if not host:
1314             raise URLError('no host given')
1315 
1316         # will parse host:port
1317         h = http_class(host, timeout=req.timeout, **http_conn_args)
1318         h.set_debuglevel(self._debuglevel)
1319 
1320         headers = dict(req.unredirected_hdrs)
1321         headers.update({k: v for k, v in req.headers.items()
1322                         if k not in headers})
1323 
1324         # TODO(jhylton): Should this be redesigned to handle
1325         # persistent connections?
1326 
1327         # We want to make an HTTP/1.1 request, but the addinfourl
1328         # class isn't prepared to deal with a persistent connection.
1329         # It will try to read all remaining data from the socket,
1330         # which will block while the server waits for the next request.
1331         # So make sure the connection gets closed after the (only)
1332         # request.
1333         headers["Connection"] = "close"
1334         headers = {name.title(): val for name, val in headers.items()}
1335 
1336         if req._tunnel_host:
1337             tunnel_headers = {}
1338             proxy_auth_hdr = "Proxy-Authorization"
1339             if proxy_auth_hdr in headers:
1340                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1341                 # Proxy-Authorization should not be sent to origin
1342                 # server.
1343                 del headers[proxy_auth_hdr]
1344             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1345 
1346         try:
1347             try:
1348                 h.request(req.get_method(), req.selector, req.data, headers,
1349                           encode_chunked=req.has_header('Transfer-encoding'))
1350             except OSError as err: # timeout error
1351                 raise URLError(err)
1352             r = h.getresponse()
1353         except:
1354             h.close()
1355             raise
1356 
1357         # If the server does not send us a 'Connection: close' header,
1358         # HTTPConnection assumes the socket should be left open. Manually
1359         # mark the socket to be closed when this response object goes away.
1360         if h.sock:
1361             h.sock.close()
1362             h.sock = None
1363 
1364         r.url = req.get_full_url()
1365         # This line replaces the .msg attribute of the HTTPResponse
1366         # with .headers, because urllib clients expect the response to
1367         # have the reason in .msg.  It would be good to mark this
1368         # attribute is deprecated and get then to use info() or
1369         # .headers.
1370         r.msg = r.reason
1371         return r
1372 
1373 
1374 class HTTPHandler(AbstractHTTPHandler):
1375 
1376     def http_open(self, req):
1377         return self.do_open(http.client.HTTPConnection, req)
1378 
1379     http_request = AbstractHTTPHandler.do_request_
1380 
1381 if hasattr(http.client, 'HTTPSConnection'):
1382 
1383     class HTTPSHandler(AbstractHTTPHandler):
1384 
1385         def __init__(self, debuglevel=0, context=None, check_hostname=None):
1386             AbstractHTTPHandler.__init__(self, debuglevel)
1387             self._context = context
1388             self._check_hostname = check_hostname
1389 
1390         def https_open(self, req):
1391             return self.do_open(http.client.HTTPSConnection, req,
1392                 context=self._context, check_hostname=self._check_hostname)
1393 
1394         https_request = AbstractHTTPHandler.do_request_
1395 
1396     __all__.append('HTTPSHandler')
1397 
1398 class HTTPCookieProcessor(BaseHandler):
1399     def __init__(self, cookiejar=None):
1400         import http.cookiejar
1401         if cookiejar is None:
1402             cookiejar = http.cookiejar.CookieJar()
1403         self.cookiejar = cookiejar
1404 
1405     def http_request(self, request):
1406         self.cookiejar.add_cookie_header(request)
1407         return request
1408 
1409     def http_response(self, request, response):
1410         self.cookiejar.extract_cookies(response, request)
1411         return response
1412 
1413     https_request = http_request
1414     https_response = http_response
1415 
1416 class UnknownHandler(BaseHandler):
1417     def unknown_open(self, req):
1418         type = req.type
1419         raise URLError('unknown url type: %s' % type)
1420 
1421 def parse_keqv_list(l):
1422     """Parse list of key=value strings where keys are not duplicated."""
1423     parsed = {}
1424     for elt in l:
1425         k, v = elt.split('=', 1)
1426         if v[0] == '"' and v[-1] == '"':
1427             v = v[1:-1]
1428         parsed[k] = v
1429     return parsed
1430 
1431 def parse_http_list(s):
1432     """Parse lists as described by RFC 2068 Section 2.
1433 
1434     In particular, parse comma-separated lists where the elements of
1435     the list may include quoted-strings.  A quoted-string could
1436     contain a comma.  A non-quoted string could have quotes in the
1437     middle.  Neither commas nor quotes count if they are escaped.
1438     Only double-quotes count, not single-quotes.
1439     """
1440     res = []
1441     part = ''
1442 
1443     escape = quote = False
1444     for cur in s:
1445         if escape:
1446             part += cur
1447             escape = False
1448             continue
1449         if quote:
1450             if cur == '\\':
1451                 escape = True
1452                 continue
1453             elif cur == '"':
1454                 quote = False
1455             part += cur
1456             continue
1457 
1458         if cur == ',':
1459             res.append(part)
1460             part = ''
1461             continue
1462 
1463         if cur == '"':
1464             quote = True
1465 
1466         part += cur
1467 
1468     # append last part
1469     if part:
1470         res.append(part)
1471 
1472     return [part.strip() for part in res]
1473 
1474 class FileHandler(BaseHandler):
1475     # Use local file or FTP depending on form of URL
1476     def file_open(self, req):
1477         url = req.selector
1478         if url[:2] == '//' and url[2:3] != '/' and (req.host and
1479                 req.host != 'localhost'):
1480             if not req.host in self.get_names():
1481                 raise URLError("file:// scheme is supported only on localhost")
1482         else:
1483             return self.open_local_file(req)
1484 
1485     # names for the localhost
1486     names = None
1487     def get_names(self):
1488         if FileHandler.names is None:
1489             try:
1490                 FileHandler.names = tuple(
1491                     socket.gethostbyname_ex('localhost')[2] +
1492                     socket.gethostbyname_ex(socket.gethostname())[2])
1493             except socket.gaierror:
1494                 FileHandler.names = (socket.gethostbyname('localhost'),)
1495         return FileHandler.names
1496 
1497     # not entirely sure what the rules are here
1498     def open_local_file(self, req):
1499         import email.utils
1500         import mimetypes
1501         host = req.host
1502         filename = req.selector
1503         localfile = url2pathname(filename)
1504         try:
1505             stats = os.stat(localfile)
1506             size = stats.st_size
1507             modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1508             mtype = mimetypes.guess_type(filename)[0]
1509             headers = email.message_from_string(
1510                 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1511                 (mtype or 'text/plain', size, modified))
1512             if host:
1513                 host, port = _splitport(host)
1514             if not host or \
1515                 (not port and _safe_gethostbyname(host) in self.get_names()):
1516                 if host:
1517                     origurl = 'file://' + host + filename
1518                 else:
1519                     origurl = 'file://' + filename
1520                 return addinfourl(open(localfile, 'rb'), headers, origurl)
1521         except OSError as exp:
1522             raise URLError(exp)
1523         raise URLError('file not on local host')
1524 
1525 def _safe_gethostbyname(host):
1526     try:
1527         return socket.gethostbyname(host)
1528     except socket.gaierror:
1529         return None
1530 
1531 class FTPHandler(BaseHandler):
1532     def ftp_open(self, req):
1533         import ftplib
1534         import mimetypes
1535         host = req.host
1536         if not host:
1537             raise URLError('ftp error: no host given')
1538         host, port = _splitport(host)
1539         if port is None:
1540             port = ftplib.FTP_PORT
1541         else:
1542             port = int(port)
1543 
1544         # username/password handling
1545         user, host = _splituser(host)
1546         if user:
1547             user, passwd = _splitpasswd(user)
1548         else:
1549             passwd = None
1550         host = unquote(host)
1551         user = user or ''
1552         passwd = passwd or ''
1553 
1554         try:
1555             host = socket.gethostbyname(host)
1556         except OSError as msg:
1557             raise URLError(msg)
1558         path, attrs = _splitattr(req.selector)
1559         dirs = path.split('/')
1560         dirs = list(map(unquote, dirs))
1561         dirs, file = dirs[:-1], dirs[-1]
1562         if dirs and not dirs[0]:
1563             dirs = dirs[1:]
1564         try:
1565             fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1566             type = file and 'I' or 'D'
1567             for attr in attrs:
1568                 attr, value = _splitvalue(attr)
1569                 if attr.lower() == 'type' and \
1570                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
1571                     type = value.upper()
1572             fp, retrlen = fw.retrfile(file, type)
1573             headers = ""
1574             mtype = mimetypes.guess_type(req.full_url)[0]
1575             if mtype:
1576                 headers += "Content-type: %s\n" % mtype
1577             if retrlen is not None and retrlen >= 0:
1578                 headers += "Content-length: %d\n" % retrlen
1579             headers = email.message_from_string(headers)
1580             return addinfourl(fp, headers, req.full_url)
1581         except ftplib.all_errors as exp:
1582             exc = URLError('ftp error: %r' % exp)
1583             raise exc.with_traceback(sys.exc_info()[2])
1584 
1585     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1586         return ftpwrapper(user, passwd, host, port, dirs, timeout,
1587                           persistent=False)
1588 
1589 class CacheFTPHandler(FTPHandler):
1590     # XXX would be nice to have pluggable cache strategies
1591     # XXX this stuff is definitely not thread safe
1592     def __init__(self):
1593         self.cache = {}
1594         self.timeout = {}
1595         self.soonest = 0
1596         self.delay = 60
1597         self.max_conns = 16
1598 
1599     def setTimeout(self, t):
1600         self.delay = t
1601 
1602     def setMaxConns(self, m):
1603         self.max_conns = m
1604 
1605     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1606         key = user, host, port, '/'.join(dirs), timeout
1607         if key in self.cache:
1608             self.timeout[key] = time.time() + self.delay
1609         else:
1610             self.cache[key] = ftpwrapper(user, passwd, host, port,
1611                                          dirs, timeout)
1612             self.timeout[key] = time.time() + self.delay
1613         self.check_cache()
1614         return self.cache[key]
1615 
1616     def check_cache(self):
1617         # first check for old ones
1618         t = time.time()
1619         if self.soonest <= t:
1620             for k, v in list(self.timeout.items()):
1621                 if v < t:
1622                     self.cache[k].close()
1623                     del self.cache[k]
1624                     del self.timeout[k]
1625         self.soonest = min(list(self.timeout.values()))
1626 
1627         # then check the size
1628         if len(self.cache) == self.max_conns:
1629             for k, v in list(self.timeout.items()):
1630                 if v == self.soonest:
1631                     del self.cache[k]
1632                     del self.timeout[k]
1633                     break
1634             self.soonest = min(list(self.timeout.values()))
1635 
1636     def clear_cache(self):
1637         for conn in self.cache.values():
1638             conn.close()
1639         self.cache.clear()
1640         self.timeout.clear()
1641 
1642 class DataHandler(BaseHandler):
1643     def data_open(self, req):
1644         # data URLs as specified in RFC 2397.
1645         #
1646         # ignores POSTed data
1647         #
1648         # syntax:
1649         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
1650         # mediatype := [ type "/" subtype ] *( ";" parameter )
1651         # data      := *urlchar
1652         # parameter := attribute "=" value
1653         url = req.full_url
1654 
1655         scheme, data = url.split(":",1)
1656         mediatype, data = data.split(",",1)
1657 
1658         # even base64 encoded data URLs might be quoted so unquote in any case:
1659         data = unquote_to_bytes(data)
1660         if mediatype.endswith(";base64"):
1661             data = base64.decodebytes(data)
1662             mediatype = mediatype[:-7]
1663 
1664         if not mediatype:
1665             mediatype = "text/plain;charset=US-ASCII"
1666 
1667         headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1668             (mediatype, len(data)))
1669 
1670         return addinfourl(io.BytesIO(data), headers, url)
1671 
1672 
1673 # Code move from the old urllib module
1674 
1675 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
1676 
1677 # Helper for non-unix systems
1678 if os.name == 'nt':
1679     from nturl2path import url2pathname, pathname2url
1680 else:
1681     def url2pathname(pathname):
1682         """OS-specific conversion from a relative URL of the 'file' scheme
1683         to a file system path; not recommended for general use."""
1684         return unquote(pathname)
1685 
1686     def pathname2url(pathname):
1687         """OS-specific conversion from a file system path to a relative URL
1688         of the 'file' scheme; not recommended for general use."""
1689         return quote(pathname)
1690 
1691 
1692 ftpcache = {}
1693 
1694 
1695 class URLopener:
1696     """Class to open URLs.
1697     This is a class rather than just a subroutine because we may need
1698     more than one set of global protocol-specific options.
1699     Note -- this is a base class for those who don't want the
1700     automatic handling of errors type 302 (relocated) and 401
1701     (authorization needed)."""
1702 
1703     __tempfiles = None
1704 
1705     version = "Python-urllib/%s" % __version__
1706 
1707     # Constructor
1708     def __init__(self, proxies=None, **x509):
1709         msg = "%(class)s style of invoking requests is deprecated. " \
1710               "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1711         warnings.warn(msg, DeprecationWarning, stacklevel=3)
1712         if proxies is None:
1713             proxies = getproxies()
1714         assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1715         self.proxies = proxies
1716         self.key_file = x509.get('key_file')
1717         self.cert_file = x509.get('cert_file')
1718         self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
1719         self.__tempfiles = []
1720         self.__unlink = os.unlink # See cleanup()
1721         self.tempcache = None
1722         # Undocumented feature: if you assign {} to tempcache,
1723         # it is used to cache files retrieved with
1724         # self.retrieve().  This is not enabled by default
1725         # since it does not work for changing documents (and I
1726         # haven't got the logic to check expiration headers
1727         # yet).
1728         self.ftpcache = ftpcache
1729         # Undocumented feature: you can use a different
1730         # ftp cache by assigning to the .ftpcache member;
1731         # in case you want logically independent URL openers
1732         # XXX This is not threadsafe.  Bah.
1733 
1734     def __del__(self):
1735         self.close()
1736 
1737     def close(self):
1738         self.cleanup()
1739 
1740     def cleanup(self):
1741         # This code sometimes runs when the rest of this module
1742         # has already been deleted, so it can't use any globals
1743         # or import anything.
1744         if self.__tempfiles:
1745             for file in self.__tempfiles:
1746                 try:
1747                     self.__unlink(file)
1748                 except OSError:
1749                     pass
1750             del self.__tempfiles[:]
1751         if self.tempcache:
1752             self.tempcache.clear()
1753 
1754     def addheader(self, *args):
1755         """Add a header to be used by the HTTP interface only
1756         e.g. u.addheader('Accept', 'sound/basic')"""
1757         self.addheaders.append(args)
1758 
1759     # External interface
1760     def open(self, fullurl, data=None):
1761         """Use URLopener().open(file) instead of open(file, 'r')."""
1762         fullurl = unwrap(_to_bytes(fullurl))
1763         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1764         if self.tempcache and fullurl in self.tempcache:
1765             filename, headers = self.tempcache[fullurl]
1766             fp = open(filename, 'rb')
1767             return addinfourl(fp, headers, fullurl)
1768         urltype, url = _splittype(fullurl)
1769         if not urltype:
1770             urltype = 'file'
1771         if urltype in self.proxies:
1772             proxy = self.proxies[urltype]
1773             urltype, proxyhost = _splittype(proxy)
1774             host, selector = _splithost(proxyhost)
1775             url = (host, fullurl) # Signal special case to open_*()
1776         else:
1777             proxy = None
1778         name = 'open_' + urltype
1779         self.type = urltype
1780         name = name.replace('-', '_')
1781         if not hasattr(self, name) or name == 'open_local_file':
1782             if proxy:
1783                 return self.open_unknown_proxy(proxy, fullurl, data)
1784             else:
1785                 return self.open_unknown(fullurl, data)
1786         try:
1787             if data is None:
1788                 return getattr(self, name)(url)
1789             else:
1790                 return getattr(self, name)(url, data)
1791         except (HTTPError, URLError):
1792             raise
1793         except OSError as msg:
1794             raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
1795 
1796     def open_unknown(self, fullurl, data=None):
1797         """Overridable interface to open unknown URL type."""
1798         type, url = _splittype(fullurl)
1799         raise OSError('url error', 'unknown url type', type)
1800 
1801     def open_unknown_proxy(self, proxy, fullurl, data=None):
1802         """Overridable interface to open unknown URL type."""
1803         type, url = _splittype(fullurl)
1804         raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1805 
1806     # External interface
1807     def retrieve(self, url, filename=None, reporthook=None, data=None):
1808         """retrieve(url) returns (filename, headers) for a local object
1809         or (tempfilename, headers) for a remote object."""
1810         url = unwrap(_to_bytes(url))
1811         if self.tempcache and url in self.tempcache:
1812             return self.tempcache[url]
1813         type, url1 = _splittype(url)
1814         if filename is None and (not type or type == 'file'):
1815             try:
1816                 fp = self.open_local_file(url1)
1817                 hdrs = fp.info()
1818                 fp.close()
1819                 return url2pathname(_splithost(url1)[1]), hdrs
1820             except OSError:
1821                 pass
1822         fp = self.open(url, data)
1823         try:
1824             headers = fp.info()
1825             if filename:
1826                 tfp = open(filename, 'wb')
1827             else:
1828                 garbage, path = _splittype(url)
1829                 garbage, path = _splithost(path or "")
1830                 path, garbage = _splitquery(path or "")
1831                 path, garbage = _splitattr(path or "")
1832                 suffix = os.path.splitext(path)[1]
1833                 (fd, filename) = tempfile.mkstemp(suffix)
1834                 self.__tempfiles.append(filename)
1835                 tfp = os.fdopen(fd, 'wb')
1836             try:
1837                 result = filename, headers
1838                 if self.tempcache is not None:
1839                     self.tempcache[url] = result
1840                 bs = 1024*8
1841                 size = -1
1842                 read = 0
1843                 blocknum = 0
1844                 if "content-length" in headers:
1845                     size = int(headers["Content-Length"])
1846                 if reporthook:
1847                     reporthook(blocknum, bs, size)
1848                 while 1:
1849                     block = fp.read(bs)
1850                     if not block:
1851                         break
1852                     read += len(block)
1853                     tfp.write(block)
1854                     blocknum += 1
1855                     if reporthook:
1856                         reporthook(blocknum, bs, size)
1857             finally:
1858                 tfp.close()
1859         finally:
1860             fp.close()
1861 
1862         # raise exception if actual size does not match content-length header
1863         if size >= 0 and read < size:
1864             raise ContentTooShortError(
1865                 "retrieval incomplete: got only %i out of %i bytes"
1866                 % (read, size), result)
1867 
1868         return result
1869 
1870     # Each method named open_<type> knows how to open that type of URL
1871 
1872     def _open_generic_http(self, connection_factory, url, data):
1873         """Make an HTTP connection using connection_class.
1874 
1875         This is an internal method that should be called from
1876         open_http() or open_https().
1877 
1878         Arguments:
1879         - connection_factory should take a host name and return an
1880           HTTPConnection instance.
1881         - url is the url to retrieval or a host, relative-path pair.
1882         - data is payload for a POST request or None.
1883         """
1884 
1885         user_passwd = None
1886         proxy_passwd= None
1887         if isinstance(url, str):
1888             host, selector = _splithost(url)
1889             if host:
1890                 user_passwd, host = _splituser(host)
1891                 host = unquote(host)
1892             realhost = host
1893         else:
1894             host, selector = url
1895             # check whether the proxy contains authorization information
1896             proxy_passwd, host = _splituser(host)
1897             # now we proceed with the url we want to obtain
1898             urltype, rest = _splittype(selector)
1899             url = rest
1900             user_passwd = None
1901             if urltype.lower() != 'http':
1902                 realhost = None
1903             else:
1904                 realhost, rest = _splithost(rest)
1905                 if realhost:
1906                     user_passwd, realhost = _splituser(realhost)
1907                 if user_passwd:
1908                     selector = "%s://%s%s" % (urltype, realhost, rest)
1909                 if proxy_bypass(realhost):
1910                     host = realhost
1911 
1912         if not host: raise OSError('http error', 'no host given')
1913 
1914         if proxy_passwd:
1915             proxy_passwd = unquote(proxy_passwd)
1916             proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1917         else:
1918             proxy_auth = None
1919 
1920         if user_passwd:
1921             user_passwd = unquote(user_passwd)
1922             auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1923         else:
1924             auth = None
1925         http_conn = connection_factory(host)
1926         headers = {}
1927         if proxy_auth:
1928             headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1929         if auth:
1930             headers["Authorization"] =  "Basic %s" % auth
1931         if realhost:
1932             headers["Host"] = realhost
1933 
1934         # Add Connection:close as we don't support persistent connections yet.
1935         # This helps in closing the socket and avoiding ResourceWarning
1936 
1937         headers["Connection"] = "close"
1938 
1939         for header, value in self.addheaders:
1940             headers[header] = value
1941 
1942         if data is not None:
1943             headers["Content-Type"] = "application/x-www-form-urlencoded"
1944             http_conn.request("POST", selector, data, headers)
1945         else:
1946             http_conn.request("GET", selector, headers=headers)
1947 
1948         try:
1949             response = http_conn.getresponse()
1950         except http.client.BadStatusLine:
1951             # something went wrong with the HTTP status line
1952             raise URLError("http protocol error: bad status line")
1953 
1954         # According to RFC 2616, "2xx" code indicates that the client's
1955         # request was successfully received, understood, and accepted.
1956         if 200 <= response.status < 300:
1957             return addinfourl(response, response.msg, "http:" + url,
1958                               response.status)
1959         else:
1960             return self.http_error(
1961                 url, response.fp,
1962                 response.status, response.reason, response.msg, data)
1963 
1964     def open_http(self, url, data=None):
1965         """Use HTTP protocol."""
1966         return self._open_generic_http(http.client.HTTPConnection, url, data)
1967 
1968     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1969         """Handle http errors.
1970 
1971         Derived class can override this, or provide specific handlers
1972         named http_error_DDD where DDD is the 3-digit error code."""
1973         # First check if there's a specific handler for this error
1974         name = 'http_error_%d' % errcode
1975         if hasattr(self, name):
1976             method = getattr(self, name)
1977             if data is None:
1978                 result = method(url, fp, errcode, errmsg, headers)
1979             else:
1980                 result = method(url, fp, errcode, errmsg, headers, data)
1981             if result: return result
1982         return self.http_error_default(url, fp, errcode, errmsg, headers)
1983 
1984     def http_error_default(self, url, fp, errcode, errmsg, headers):
1985         """Default error handler: close the connection and raise OSError."""
1986         fp.close()
1987         raise HTTPError(url, errcode, errmsg, headers, None)
1988 
1989     if _have_ssl:
1990         def _https_connection(self, host):
1991             return http.client.HTTPSConnection(host,
1992                                            key_file=self.key_file,
1993                                            cert_file=self.cert_file)
1994 
1995         def open_https(self, url, data=None):
1996             """Use HTTPS protocol."""
1997             return self._open_generic_http(self._https_connection, url, data)
1998 
1999     def open_file(self, url):
2000         """Use local file or FTP depending on form of URL."""
2001         if not isinstance(url, str):
2002             raise URLError('file error: proxy support for file protocol currently not implemented')
2003         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
2004             raise ValueError("file:// scheme is supported only on localhost")
2005         else:
2006             return self.open_local_file(url)
2007 
2008     def open_local_file(self, url):
2009         """Use local file."""
2010         import email.utils
2011         import mimetypes
2012         host, file = _splithost(url)
2013         localname = url2pathname(file)
2014         try:
2015             stats = os.stat(localname)
2016         except OSError as e:
2017             raise URLError(e.strerror, e.filename)
2018         size = stats.st_size
2019         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2020         mtype = mimetypes.guess_type(url)[0]
2021         headers = email.message_from_string(
2022             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2023             (mtype or 'text/plain', size, modified))
2024         if not host:
2025             urlfile = file
2026             if file[:1] == '/':
2027                 urlfile = 'file://' + file
2028             return addinfourl(open(localname, 'rb'), headers, urlfile)
2029         host, port = _splitport(host)
2030         if (not port
2031            and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2032             urlfile = file
2033             if file[:1] == '/':
2034                 urlfile = 'file://' + file
2035             elif file[:2] == './':
2036                 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2037             return addinfourl(open(localname, 'rb'), headers, urlfile)
2038         raise URLError('local file error: not on local host')
2039 
2040     def open_ftp(self, url):
2041         """Use FTP protocol."""
2042         if not isinstance(url, str):
2043             raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2044         import mimetypes
2045         host, path = _splithost(url)
2046         if not host: raise URLError('ftp error: no host given')
2047         host, port = _splitport(host)
2048         user, host = _splituser(host)
2049         if user: user, passwd = _splitpasswd(user)
2050         else: passwd = None
2051         host = unquote(host)
2052         user = unquote(user or '')
2053         passwd = unquote(passwd or '')
2054         host = socket.gethostbyname(host)
2055         if not port:
2056             import ftplib
2057             port = ftplib.FTP_PORT
2058         else:
2059             port = int(port)
2060         path, attrs = _splitattr(path)
2061         path = unquote(path)
2062         dirs = path.split('/')
2063         dirs, file = dirs[:-1], dirs[-1]
2064         if dirs and not dirs[0]: dirs = dirs[1:]
2065         if dirs and not dirs[0]: dirs[0] = '/'
2066         key = user, host, port, '/'.join(dirs)
2067         # XXX thread unsafe!
2068         if len(self.ftpcache) > MAXFTPCACHE:
2069             # Prune the cache, rather arbitrarily
2070             for k in list(self.ftpcache):
2071                 if k != key:
2072                     v = self.ftpcache[k]
2073                     del self.ftpcache[k]
2074                     v.close()
2075         try:
2076             if key not in self.ftpcache:
2077                 self.ftpcache[key] = \
2078                     ftpwrapper(user, passwd, host, port, dirs)
2079             if not file: type = 'D'
2080             else: type = 'I'
2081             for attr in attrs:
2082                 attr, value = _splitvalue(attr)
2083                 if attr.lower() == 'type' and \
2084                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
2085                     type = value.upper()
2086             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2087             mtype = mimetypes.guess_type("ftp:" + url)[0]
2088             headers = ""
2089             if mtype:
2090                 headers += "Content-Type: %s\n" % mtype
2091             if retrlen is not None and retrlen >= 0:
2092                 headers += "Content-Length: %d\n" % retrlen
2093             headers = email.message_from_string(headers)
2094             return addinfourl(fp, headers, "ftp:" + url)
2095         except ftperrors() as exp:
2096             raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
2097 
2098     def open_data(self, url, data=None):
2099         """Use "data" URL."""
2100         if not isinstance(url, str):
2101             raise URLError('data error: proxy support for data protocol currently not implemented')
2102         # ignore POSTed data
2103         #
2104         # syntax of data URLs:
2105         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
2106         # mediatype := [ type "/" subtype ] *( ";" parameter )
2107         # data      := *urlchar
2108         # parameter := attribute "=" value
2109         try:
2110             [type, data] = url.split(',', 1)
2111         except ValueError:
2112             raise OSError('data error', 'bad data URL')
2113         if not type:
2114             type = 'text/plain;charset=US-ASCII'
2115         semi = type.rfind(';')
2116         if semi >= 0 and '=' not in type[semi:]:
2117             encoding = type[semi+1:]
2118             type = type[:semi]
2119         else:
2120             encoding = ''
2121         msg = []
2122         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2123                                             time.gmtime(time.time())))
2124         msg.append('Content-type: %s' % type)
2125         if encoding == 'base64':
2126             # XXX is this encoding/decoding ok?
2127             data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2128         else:
2129             data = unquote(data)
2130         msg.append('Content-Length: %d' % len(data))
2131         msg.append('')
2132         msg.append(data)
2133         msg = '\n'.join(msg)
2134         headers = email.message_from_string(msg)
2135         f = io.StringIO(msg)
2136         #f.fileno = None     # needed for addinfourl
2137         return addinfourl(f, headers, url)
2138 
2139 
2140 class FancyURLopener(URLopener):
2141     """Derived class with handlers for errors we can handle (perhaps)."""
2142 
2143     def __init__(self, *args, **kwargs):
2144         URLopener.__init__(self, *args, **kwargs)
2145         self.auth_cache = {}
2146         self.tries = 0
2147         self.maxtries = 10
2148 
2149     def http_error_default(self, url, fp, errcode, errmsg, headers):
2150         """Default error handling -- don't raise an exception."""
2151         return addinfourl(fp, headers, "http:" + url, errcode)
2152 
2153     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2154         """Error 302 -- relocated (temporarily)."""
2155         self.tries += 1
2156         try:
2157             if self.maxtries and self.tries >= self.maxtries:
2158                 if hasattr(self, "http_error_500"):
2159                     meth = self.http_error_500
2160                 else:
2161                     meth = self.http_error_default
2162                 return meth(url, fp, 500,
2163                             "Internal Server Error: Redirect Recursion",
2164                             headers)
2165             result = self.redirect_internal(url, fp, errcode, errmsg,
2166                                             headers, data)
2167             return result
2168         finally:
2169             self.tries = 0
2170 
2171     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2172         if 'location' in headers:
2173             newurl = headers['location']
2174         elif 'uri' in headers:
2175             newurl = headers['uri']
2176         else:
2177             return
2178         fp.close()
2179 
2180         # In case the server sent a relative URL, join with original:
2181         newurl = urljoin(self.type + ":" + url, newurl)
2182 
2183         urlparts = urlparse(newurl)
2184 
2185         # For security reasons, we don't allow redirection to anything other
2186         # than http, https and ftp.
2187 
2188         # We are using newer HTTPError with older redirect_internal method
2189         # This older method will get deprecated in 3.3
2190 
2191         if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2192             raise HTTPError(newurl, errcode,
2193                             errmsg +
2194                             " Redirection to url '%s' is not allowed." % newurl,
2195                             headers, fp)
2196 
2197         return self.open(newurl)
2198 
2199     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2200         """Error 301 -- also relocated (permanently)."""
2201         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2202 
2203     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2204         """Error 303 -- also relocated (essentially identical to 302)."""
2205         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2206 
2207     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2208         """Error 307 -- relocated, but turn POST into error."""
2209         if data is None:
2210             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2211         else:
2212             return self.http_error_default(url, fp, errcode, errmsg, headers)
2213 
2214     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2215             retry=False):
2216         """Error 401 -- authentication required.
2217         This function supports Basic authentication only."""
2218         if 'www-authenticate' not in headers:
2219             URLopener.http_error_default(self, url, fp,
2220                                          errcode, errmsg, headers)
2221         stuff = headers['www-authenticate']
2222         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2223         if not match:
2224             URLopener.http_error_default(self, url, fp,
2225                                          errcode, errmsg, headers)
2226         scheme, realm = match.groups()
2227         if scheme.lower() != 'basic':
2228             URLopener.http_error_default(self, url, fp,
2229                                          errcode, errmsg, headers)
2230         if not retry:
2231             URLopener.http_error_default(self, url, fp, errcode, errmsg,
2232                     headers)
2233         name = 'retry_' + self.type + '_basic_auth'
2234         if data is None:
2235             return getattr(self,name)(url, realm)
2236         else:
2237             return getattr(self,name)(url, realm, data)
2238 
2239     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2240             retry=False):
2241         """Error 407 -- proxy authentication required.
2242         This function supports Basic authentication only."""
2243         if 'proxy-authenticate' not in headers:
2244             URLopener.http_error_default(self, url, fp,
2245                                          errcode, errmsg, headers)
2246         stuff = headers['proxy-authenticate']
2247         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2248         if not match:
2249             URLopener.http_error_default(self, url, fp,
2250                                          errcode, errmsg, headers)
2251         scheme, realm = match.groups()
2252         if scheme.lower() != 'basic':
2253             URLopener.http_error_default(self, url, fp,
2254                                          errcode, errmsg, headers)
2255         if not retry:
2256             URLopener.http_error_default(self, url, fp, errcode, errmsg,
2257                     headers)
2258         name = 'retry_proxy_' + self.type + '_basic_auth'
2259         if data is None:
2260             return getattr(self,name)(url, realm)
2261         else:
2262             return getattr(self,name)(url, realm, data)
2263 
2264     def retry_proxy_http_basic_auth(self, url, realm, data=None):
2265         host, selector = _splithost(url)
2266         newurl = 'http://' + host + selector
2267         proxy = self.proxies['http']
2268         urltype, proxyhost = _splittype(proxy)
2269         proxyhost, proxyselector = _splithost(proxyhost)
2270         i = proxyhost.find('@') + 1
2271         proxyhost = proxyhost[i:]
2272         user, passwd = self.get_user_passwd(proxyhost, realm, i)
2273         if not (user or passwd): return None
2274         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2275                                   quote(passwd, safe=''), proxyhost)
2276         self.proxies['http'] = 'http://' + proxyhost + proxyselector
2277         if data is None:
2278             return self.open(newurl)
2279         else:
2280             return self.open(newurl, data)
2281 
2282     def retry_proxy_https_basic_auth(self, url, realm, data=None):
2283         host, selector = _splithost(url)
2284         newurl = 'https://' + host + selector
2285         proxy = self.proxies['https']
2286         urltype, proxyhost = _splittype(proxy)
2287         proxyhost, proxyselector = _splithost(proxyhost)
2288         i = proxyhost.find('@') + 1
2289         proxyhost = proxyhost[i:]
2290         user, passwd = self.get_user_passwd(proxyhost, realm, i)
2291         if not (user or passwd): return None
2292         proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2293                                   quote(passwd, safe=''), proxyhost)
2294         self.proxies['https'] = 'https://' + proxyhost + proxyselector
2295         if data is None:
2296             return self.open(newurl)
2297         else:
2298             return self.open(newurl, data)
2299 
2300     def retry_http_basic_auth(self, url, realm, data=None):
2301         host, selector = _splithost(url)
2302         i = host.find('@') + 1
2303         host = host[i:]
2304         user, passwd = self.get_user_passwd(host, realm, i)
2305         if not (user or passwd): return None
2306         host = "%s:%s@%s" % (quote(user, safe=''),
2307                              quote(passwd, safe=''), host)
2308         newurl = 'http://' + host + selector
2309         if data is None:
2310             return self.open(newurl)
2311         else:
2312             return self.open(newurl, data)
2313 
2314     def retry_https_basic_auth(self, url, realm, data=None):
2315         host, selector = _splithost(url)
2316         i = host.find('@') + 1
2317         host = host[i:]
2318         user, passwd = self.get_user_passwd(host, realm, i)
2319         if not (user or passwd): return None
2320         host = "%s:%s@%s" % (quote(user, safe=''),
2321                              quote(passwd, safe=''), host)
2322         newurl = 'https://' + host + selector
2323         if data is None:
2324             return self.open(newurl)
2325         else:
2326             return self.open(newurl, data)
2327 
2328     def get_user_passwd(self, host, realm, clear_cache=0):
2329         key = realm + '@' + host.lower()
2330         if key in self.auth_cache:
2331             if clear_cache:
2332                 del self.auth_cache[key]
2333             else:
2334                 return self.auth_cache[key]
2335         user, passwd = self.prompt_user_passwd(host, realm)
2336         if user or passwd: self.auth_cache[key] = (user, passwd)
2337         return user, passwd
2338 
2339     def prompt_user_passwd(self, host, realm):
2340         """Override this in a GUI environment!"""
2341         import getpass
2342         try:
2343             user = input("Enter username for %s at %s: " % (realm, host))
2344             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2345                 (user, realm, host))
2346             return user, passwd
2347         except KeyboardInterrupt:
2348             print()
2349             return None, None
2350 
2351 
2352 # Utility functions
2353 
2354 _localhost = None
2355 def localhost():
2356     """Return the IP address of the magic hostname 'localhost'."""
2357     global _localhost
2358     if _localhost is None:
2359         _localhost = socket.gethostbyname('localhost')
2360     return _localhost
2361 
2362 _thishost = None
2363 def thishost():
2364     """Return the IP addresses of the current host."""
2365     global _thishost
2366     if _thishost is None:
2367         try:
2368             _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2369         except socket.gaierror:
2370             _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2371     return _thishost
2372 
2373 _ftperrors = None
2374 def ftperrors():
2375     """Return the set of errors raised by the FTP class."""
2376     global _ftperrors
2377     if _ftperrors is None:
2378         import ftplib
2379         _ftperrors = ftplib.all_errors
2380     return _ftperrors
2381 
2382 _noheaders = None
2383 def noheaders():
2384     """Return an empty email Message object."""
2385     global _noheaders
2386     if _noheaders is None:
2387         _noheaders = email.message_from_string("")
2388     return _noheaders
2389 
2390 
2391 # Utility classes
2392 
2393 class ftpwrapper:
2394     """Class used by open_ftp() for cache of open FTP connections."""
2395 
2396     def __init__(self, user, passwd, host, port, dirs, timeout=None,
2397                  persistent=True):
2398         self.user = user
2399         self.passwd = passwd
2400         self.host = host
2401         self.port = port
2402         self.dirs = dirs
2403         self.timeout = timeout
2404         self.refcount = 0
2405         self.keepalive = persistent
2406         try:
2407             self.init()
2408         except:
2409             self.close()
2410             raise
2411 
2412     def init(self):
2413         import ftplib
2414         self.busy = 0
2415         self.ftp = ftplib.FTP()
2416         self.ftp.connect(self.host, self.port, self.timeout)
2417         self.ftp.login(self.user, self.passwd)
2418         _target = '/'.join(self.dirs)
2419         self.ftp.cwd(_target)
2420 
2421     def retrfile(self, file, type):
2422         import ftplib
2423         self.endtransfer()
2424         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2425         else: cmd = 'TYPE ' + type; isdir = 0
2426         try:
2427             self.ftp.voidcmd(cmd)
2428         except ftplib.all_errors:
2429             self.init()
2430             self.ftp.voidcmd(cmd)
2431         conn = None
2432         if file and not isdir:
2433             # Try to retrieve as a file
2434             try:
2435                 cmd = 'RETR ' + file
2436                 conn, retrlen = self.ftp.ntransfercmd(cmd)
2437             except ftplib.error_perm as reason:
2438                 if str(reason)[:3] != '550':
2439                     raise URLError('ftp error: %r' % reason).with_traceback(
2440                         sys.exc_info()[2])
2441         if not conn:
2442             # Set transfer mode to ASCII!
2443             self.ftp.voidcmd('TYPE A')
2444             # Try a directory listing. Verify that directory exists.
2445             if file:
2446                 pwd = self.ftp.pwd()
2447                 try:
2448                     try:
2449                         self.ftp.cwd(file)
2450                     except ftplib.error_perm as reason:
2451                         raise URLError('ftp error: %r' % reason) from reason
2452                 finally:
2453                     self.ftp.cwd(pwd)
2454                 cmd = 'LIST ' + file
2455             else:
2456                 cmd = 'LIST'
2457             conn, retrlen = self.ftp.ntransfercmd(cmd)
2458         self.busy = 1
2459 
2460         ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2461         self.refcount += 1
2462         conn.close()
2463         # Pass back both a suitably decorated object and a retrieval length
2464         return (ftpobj, retrlen)
2465 
2466     def endtransfer(self):
2467         self.busy = 0
2468 
2469     def close(self):
2470         self.keepalive = False
2471         if self.refcount <= 0:
2472             self.real_close()
2473 
2474     def file_close(self):
2475         self.endtransfer()
2476         self.refcount -= 1
2477         if self.refcount <= 0 and not self.keepalive:
2478             self.real_close()
2479 
2480     def real_close(self):
2481         self.endtransfer()
2482         try:
2483             self.ftp.close()
2484         except ftperrors():
2485             pass
2486 
2487 # Proxy handling
2488 def getproxies_environment():
2489     """Return a dictionary of scheme -> proxy server URL mappings.
2490 
2491     Scan the environment for variables named <scheme>_proxy;
2492     this seems to be the standard convention.  If you need a
2493     different way, you can pass a proxies dictionary to the
2494     [Fancy]URLopener constructor.
2495 
2496     """
2497     proxies = {}
2498     # in order to prefer lowercase variables, process environment in
2499     # two passes: first matches any, second pass matches lowercase only
2500     for name, value in os.environ.items():
2501         name = name.lower()
2502         if value and name[-6:] == '_proxy':
2503             proxies[name[:-6]] = value
2504     # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2505     # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2506     # header from the client
2507     # If "proxy" is lowercase, it will still be used thanks to the next block
2508     if 'REQUEST_METHOD' in os.environ:
2509         proxies.pop('http', None)
2510     for name, value in os.environ.items():
2511         if name[-6:] == '_proxy':
2512             name = name.lower()
2513             if value:
2514                 proxies[name[:-6]] = value
2515             else:
2516                 proxies.pop(name[:-6], None)
2517     return proxies
2518 
2519 def proxy_bypass_environment(host, proxies=None):
2520     """Test if proxies should not be used for a particular host.
2521 
2522     Checks the proxy dict for the value of no_proxy, which should
2523     be a list of comma separated DNS suffixes, or '*' for all hosts.
2524 
2525     """
2526     if proxies is None:
2527         proxies = getproxies_environment()
2528     # don't bypass, if no_proxy isn't specified
2529     try:
2530         no_proxy = proxies['no']
2531     except KeyError:
2532         return False
2533     # '*' is special case for always bypass
2534     if no_proxy == '*':
2535         return True
2536     host = host.lower()
2537     # strip port off host
2538     hostonly, port = _splitport(host)
2539     # check if the host ends with any of the DNS suffixes
2540     for name in no_proxy.split(','):
2541         name = name.strip()
2542         if name:
2543             name = name.lstrip('.')  # ignore leading dots
2544             name = name.lower()
2545             if hostonly == name or host == name:
2546                 return True
2547             name = '.' + name
2548             if hostonly.endswith(name) or host.endswith(name):
2549                 return True
2550     # otherwise, don't bypass
2551     return False
2552 
2553 
2554 # This code tests an OSX specific data structure but is testable on all
2555 # platforms
2556 def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2557     """
2558     Return True iff this host shouldn't be accessed using a proxy
2559 
2560     This function uses the MacOSX framework SystemConfiguration
2561     to fetch the proxy information.
2562 
2563     proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2564     { 'exclude_simple': bool,
2565       'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2566     }
2567     """
2568     from fnmatch import fnmatch
2569 
2570     hostonly, port = _splitport(host)
2571 
2572     def ip2num(ipAddr):
2573         parts = ipAddr.split('.')
2574         parts = list(map(int, parts))
2575         if len(parts) != 4:
2576             parts = (parts + [0, 0, 0, 0])[:4]
2577         return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2578 
2579     # Check for simple host names:
2580     if '.' not in host:
2581         if proxy_settings['exclude_simple']:
2582             return True
2583 
2584     hostIP = None
2585 
2586     for value in proxy_settings.get('exceptions', ()):
2587         # Items in the list are strings like these: *.local, 169.254/16
2588         if not value: continue
2589 
2590         m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2591         if m is not None:
2592             if hostIP is None:
2593                 try:
2594                     hostIP = socket.gethostbyname(hostonly)
2595                     hostIP = ip2num(hostIP)
2596                 except OSError:
2597                     continue
2598 
2599             base = ip2num(m.group(1))
2600             mask = m.group(2)
2601             if mask is None:
2602                 mask = 8 * (m.group(1).count('.') + 1)
2603             else:
2604                 mask = int(mask[1:])
2605 
2606             if mask < 0 or mask > 32:
2607                 # System libraries ignore invalid prefix lengths
2608                 continue
2609 
2610             mask = 32 - mask
2611 
2612             if (hostIP >> mask) == (base >> mask):
2613                 return True
2614 
2615         elif fnmatch(host, value):
2616             return True
2617 
2618     return False
2619 
2620 
2621 if sys.platform == 'darwin':
2622     from _scproxy import _get_proxy_settings, _get_proxies
2623 
2624     def proxy_bypass_macosx_sysconf(host):
2625         proxy_settings = _get_proxy_settings()
2626         return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2627 
2628     def getproxies_macosx_sysconf():
2629         """Return a dictionary of scheme -> proxy server URL mappings.
2630 
2631         This function uses the MacOSX framework SystemConfiguration
2632         to fetch the proxy information.
2633         """
2634         return _get_proxies()
2635 
2636 
2637 
2638     def proxy_bypass(host):
2639         """Return True, if host should be bypassed.
2640 
2641         Checks proxy settings gathered from the environment, if specified,
2642         or from the MacOSX framework SystemConfiguration.
2643 
2644         """
2645         proxies = getproxies_environment()
2646         if proxies:
2647             return proxy_bypass_environment(host, proxies)
2648         else:
2649             return proxy_bypass_macosx_sysconf(host)
2650 
2651     def getproxies():
2652         return getproxies_environment() or getproxies_macosx_sysconf()
2653 
2654 
2655 elif os.name == 'nt':
2656     def getproxies_registry():
2657         """Return a dictionary of scheme -> proxy server URL mappings.
2658 
2659         Win32 uses the registry to store proxies.
2660 
2661         """
2662         proxies = {}
2663         try:
2664             import winreg
2665         except ImportError:
2666             # Std module, so should be around - but you never know!
2667             return proxies
2668         try:
2669             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2670                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2671             proxyEnable = winreg.QueryValueEx(internetSettings,
2672                                                'ProxyEnable')[0]
2673             if proxyEnable:
2674                 # Returned as Unicode but problems if not converted to ASCII
2675                 proxyServer = str(winreg.QueryValueEx(internetSettings,
2676                                                        'ProxyServer')[0])
2677                 if '=' in proxyServer:
2678                     # Per-protocol settings
2679                     for p in proxyServer.split(';'):
2680                         protocol, address = p.split('=', 1)
2681                         # See if address has a type:// prefix
2682                         if not re.match('(?:[^/:]+)://', address):
2683                             address = '%s://%s' % (protocol, address)
2684                         proxies[protocol] = address
2685                 else:
2686                     # Use one setting for all protocols
2687                     if proxyServer[:5] == 'http:':
2688                         proxies['http'] = proxyServer
2689                     else:
2690                         proxies['http'] = 'http://%s' % proxyServer
2691                         proxies['https'] = 'https://%s' % proxyServer
2692                         proxies['ftp'] = 'ftp://%s' % proxyServer
2693             internetSettings.Close()
2694         except (OSError, ValueError, TypeError):
2695             # Either registry key not found etc, or the value in an
2696             # unexpected format.
2697             # proxies already set up to be empty so nothing to do
2698             pass
2699         return proxies
2700 
2701     def getproxies():
2702         """Return a dictionary of scheme -> proxy server URL mappings.
2703 
2704         Returns settings gathered from the environment, if specified,
2705         or the registry.
2706 
2707         """
2708         return getproxies_environment() or getproxies_registry()
2709 
2710     def proxy_bypass_registry(host):
2711         try:
2712             import winreg
2713         except ImportError:
2714             # Std modules, so should be around - but you never know!
2715             return 0
2716         try:
2717             internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2718                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2719             proxyEnable = winreg.QueryValueEx(internetSettings,
2720                                                'ProxyEnable')[0]
2721             proxyOverride = str(winreg.QueryValueEx(internetSettings,
2722                                                      'ProxyOverride')[0])
2723             # ^^^^ Returned as Unicode but problems if not converted to ASCII
2724         except OSError:
2725             return 0
2726         if not proxyEnable or not proxyOverride:
2727             return 0
2728         # try to make a host list from name and IP address.
2729         rawHost, port = _splitport(host)
2730         host = [rawHost]
2731         try:
2732             addr = socket.gethostbyname(rawHost)
2733             if addr != rawHost:
2734                 host.append(addr)
2735         except OSError:
2736             pass
2737         try:
2738             fqdn = socket.getfqdn(rawHost)
2739             if fqdn != rawHost:
2740                 host.append(fqdn)
2741         except OSError:
2742             pass
2743         # make a check value list from the registry entry: replace the
2744         # '<local>' string by the localhost entry and the corresponding
2745         # canonical entry.
2746         proxyOverride = proxyOverride.split(';')
2747         # now check if we match one of the registry values.
2748         for test in proxyOverride:
2749             if test == '<local>':
2750                 if '.' not in rawHost:
2751                     return 1
2752             test = test.replace(".", r"\.")     # mask dots
2753             test = test.replace("*", r".*")     # change glob sequence
2754             test = test.replace("?", r".")      # change glob char
2755             for val in host:
2756                 if re.match(test, val, re.I):
2757                     return 1
2758         return 0
2759 
2760     def proxy_bypass(host):
2761         """Return True, if host should be bypassed.
2762 
2763         Checks proxy settings gathered from the environment, if specified,
2764         or the registry.
2765 
2766         """
2767         proxies = getproxies_environment()
2768         if proxies:
2769             return proxy_bypass_environment(host, proxies)
2770         else:
2771             return proxy_bypass_registry(host)
2772 
2773 else:
2774     # By default use environment variables
2775     getproxies = getproxies_environment
2776     proxy_bypass = proxy_bypass_environment
2777