• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31import re
32
33from urlparse import urljoin as basejoin
34
35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37           "urlencode", "url2pathname", "pathname2url", "splittag",
38           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40           "splitnport", "splitquery", "splitattr", "splitvalue",
41           "getproxies"]
42
43__version__ = '1.17'    # XXX This version is not always updated :-(
44
45MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
46
47# Helper for non-unix systems
48if os.name == 'nt':
49    from nturl2path import url2pathname, pathname2url
50elif os.name == 'riscos':
51    from rourl2path import url2pathname, pathname2url
52else:
53    def url2pathname(pathname):
54        """OS-specific conversion from a relative URL of the 'file' scheme
55        to a file system path; not recommended for general use."""
56        return unquote(pathname)
57
58    def pathname2url(pathname):
59        """OS-specific conversion from a file system path to a relative URL
60        of the 'file' scheme; not recommended for general use."""
61        return quote(pathname)
62
63# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65#     (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
72def urlopen(url, data=None, proxies=None, context=None):
73    """Create a file-like object for the specified URL to read from."""
74    from warnings import warnpy3k
75    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76             "favor of urllib2.urlopen()", stacklevel=2)
77
78    global _urlopener
79    if proxies is not None or context is not None:
80        opener = FancyURLopener(proxies=proxies, context=context)
81    elif not _urlopener:
82        opener = FancyURLopener()
83        _urlopener = opener
84    else:
85        opener = _urlopener
86    if data is None:
87        return opener.open(url)
88    else:
89        return opener.open(url, data)
90def urlretrieve(url, filename=None, reporthook=None, data=None, context=None):
91    global _urlopener
92    if context is not None:
93        opener = FancyURLopener(context=context)
94    elif not _urlopener:
95        _urlopener = opener = FancyURLopener()
96    else:
97        opener = _urlopener
98    return opener.retrieve(url, filename, reporthook, data)
99def urlcleanup():
100    if _urlopener:
101        _urlopener.cleanup()
102    _safe_quoters.clear()
103    ftpcache.clear()
104
105# check for SSL
106try:
107    import ssl
108except:
109    _have_ssl = False
110else:
111    _have_ssl = True
112
113# exception raised when downloaded size does not match content-length
114class ContentTooShortError(IOError):
115    def __init__(self, message, content):
116        IOError.__init__(self, message)
117        self.content = content
118
119ftpcache = {}
120class URLopener:
121    """Class to open URLs.
122    This is a class rather than just a subroutine because we may need
123    more than one set of global protocol-specific options.
124    Note -- this is a base class for those who don't want the
125    automatic handling of errors type 302 (relocated) and 401
126    (authorization needed)."""
127
128    __tempfiles = None
129
130    version = "Python-urllib/%s" % __version__
131
132    # Constructor
133    def __init__(self, proxies=None, context=None, **x509):
134        if proxies is None:
135            proxies = getproxies()
136        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
137        self.proxies = proxies
138        self.key_file = x509.get('key_file')
139        self.cert_file = x509.get('cert_file')
140        self.context = context
141        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
142        self.__tempfiles = []
143        self.__unlink = os.unlink # See cleanup()
144        self.tempcache = None
145        # Undocumented feature: if you assign {} to tempcache,
146        # it is used to cache files retrieved with
147        # self.retrieve().  This is not enabled by default
148        # since it does not work for changing documents (and I
149        # haven't got the logic to check expiration headers
150        # yet).
151        self.ftpcache = ftpcache
152        # Undocumented feature: you can use a different
153        # ftp cache by assigning to the .ftpcache member;
154        # in case you want logically independent URL openers
155        # XXX This is not threadsafe.  Bah.
156
157    def __del__(self):
158        self.close()
159
160    def close(self):
161        self.cleanup()
162
163    def cleanup(self):
164        # This code sometimes runs when the rest of this module
165        # has already been deleted, so it can't use any globals
166        # or import anything.
167        if self.__tempfiles:
168            for file in self.__tempfiles:
169                try:
170                    self.__unlink(file)
171                except OSError:
172                    pass
173            del self.__tempfiles[:]
174        if self.tempcache:
175            self.tempcache.clear()
176
177    def addheader(self, *args):
178        """Add a header to be used by the HTTP interface only
179        e.g. u.addheader('Accept', 'sound/basic')"""
180        self.addheaders.append(args)
181
182    # External interface
183    def open(self, fullurl, data=None):
184        """Use URLopener().open(file) instead of open(file, 'r')."""
185        fullurl = unwrap(toBytes(fullurl))
186        # percent encode url, fixing lame server errors for e.g, like space
187        # within url paths.
188        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
189        if self.tempcache and fullurl in self.tempcache:
190            filename, headers = self.tempcache[fullurl]
191            fp = open(filename, 'rb')
192            return addinfourl(fp, headers, fullurl)
193        urltype, url = splittype(fullurl)
194        if not urltype:
195            urltype = 'file'
196        if urltype in self.proxies:
197            proxy = self.proxies[urltype]
198            urltype, proxyhost = splittype(proxy)
199            host, selector = splithost(proxyhost)
200            url = (host, fullurl) # Signal special case to open_*()
201        else:
202            proxy = None
203        name = 'open_' + urltype
204        self.type = urltype
205        name = name.replace('-', '_')
206        if not hasattr(self, name):
207            if proxy:
208                return self.open_unknown_proxy(proxy, fullurl, data)
209            else:
210                return self.open_unknown(fullurl, data)
211        try:
212            if data is None:
213                return getattr(self, name)(url)
214            else:
215                return getattr(self, name)(url, data)
216        except socket.error, msg:
217            raise IOError, ('socket error', msg), sys.exc_info()[2]
218
219    def open_unknown(self, fullurl, data=None):
220        """Overridable interface to open unknown URL type."""
221        type, url = splittype(fullurl)
222        raise IOError, ('url error', 'unknown url type', type)
223
224    def open_unknown_proxy(self, proxy, fullurl, data=None):
225        """Overridable interface to open unknown URL type."""
226        type, url = splittype(fullurl)
227        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
228
229    # External interface
230    def retrieve(self, url, filename=None, reporthook=None, data=None):
231        """retrieve(url) returns (filename, headers) for a local object
232        or (tempfilename, headers) for a remote object."""
233        url = unwrap(toBytes(url))
234        if self.tempcache and url in self.tempcache:
235            return self.tempcache[url]
236        type, url1 = splittype(url)
237        if filename is None and (not type or type == 'file'):
238            try:
239                fp = self.open_local_file(url1)
240                hdrs = fp.info()
241                fp.close()
242                return url2pathname(splithost(url1)[1]), hdrs
243            except IOError:
244                pass
245        fp = self.open(url, data)
246        try:
247            headers = fp.info()
248            if filename:
249                tfp = open(filename, 'wb')
250            else:
251                import tempfile
252                garbage, path = splittype(url)
253                garbage, path = splithost(path or "")
254                path, garbage = splitquery(path or "")
255                path, garbage = splitattr(path or "")
256                suffix = os.path.splitext(path)[1]
257                (fd, filename) = tempfile.mkstemp(suffix)
258                self.__tempfiles.append(filename)
259                tfp = os.fdopen(fd, 'wb')
260            try:
261                result = filename, headers
262                if self.tempcache is not None:
263                    self.tempcache[url] = result
264                bs = 1024*8
265                size = -1
266                read = 0
267                blocknum = 0
268                if "content-length" in headers:
269                    size = int(headers["Content-Length"])
270                if reporthook:
271                    reporthook(blocknum, bs, size)
272                while 1:
273                    block = fp.read(bs)
274                    if block == "":
275                        break
276                    read += len(block)
277                    tfp.write(block)
278                    blocknum += 1
279                    if reporthook:
280                        reporthook(blocknum, bs, size)
281            finally:
282                tfp.close()
283        finally:
284            fp.close()
285
286        # raise exception if actual size does not match content-length header
287        if size >= 0 and read < size:
288            raise ContentTooShortError("retrieval incomplete: got only %i out "
289                                       "of %i bytes" % (read, size), result)
290
291        return result
292
293    # Each method named open_<type> knows how to open that type of URL
294
295    def open_http(self, url, data=None):
296        """Use HTTP protocol."""
297        import httplib
298        user_passwd = None
299        proxy_passwd= None
300        if isinstance(url, str):
301            host, selector = splithost(url)
302            if host:
303                user_passwd, host = splituser(host)
304                host = unquote(host)
305            realhost = host
306        else:
307            host, selector = url
308            # check whether the proxy contains authorization information
309            proxy_passwd, host = splituser(host)
310            # now we proceed with the url we want to obtain
311            urltype, rest = splittype(selector)
312            url = rest
313            user_passwd = None
314            if urltype.lower() != 'http':
315                realhost = None
316            else:
317                realhost, rest = splithost(rest)
318                if realhost:
319                    user_passwd, realhost = splituser(realhost)
320                if user_passwd:
321                    selector = "%s://%s%s" % (urltype, realhost, rest)
322                if proxy_bypass(realhost):
323                    host = realhost
324
325            #print "proxy via http:", host, selector
326        if not host: raise IOError, ('http error', 'no host given')
327
328        if proxy_passwd:
329            proxy_passwd = unquote(proxy_passwd)
330            proxy_auth = base64.b64encode(proxy_passwd).strip()
331        else:
332            proxy_auth = None
333
334        if user_passwd:
335            user_passwd = unquote(user_passwd)
336            auth = base64.b64encode(user_passwd).strip()
337        else:
338            auth = None
339        h = httplib.HTTP(host)
340        if data is not None:
341            h.putrequest('POST', selector)
342            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
343            h.putheader('Content-Length', '%d' % len(data))
344        else:
345            h.putrequest('GET', selector)
346        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
347        if auth: h.putheader('Authorization', 'Basic %s' % auth)
348        if realhost: h.putheader('Host', realhost)
349        for args in self.addheaders: h.putheader(*args)
350        h.endheaders(data)
351        errcode, errmsg, headers = h.getreply()
352        fp = h.getfile()
353        if errcode == -1:
354            if fp: fp.close()
355            # something went wrong with the HTTP status line
356            raise IOError, ('http protocol error', 0,
357                            'got a bad status line', None)
358        # According to RFC 2616, "2xx" code indicates that the client's
359        # request was successfully received, understood, and accepted.
360        if (200 <= errcode < 300):
361            return addinfourl(fp, headers, "http:" + url, errcode)
362        else:
363            if data is None:
364                return self.http_error(url, fp, errcode, errmsg, headers)
365            else:
366                return self.http_error(url, fp, errcode, errmsg, headers, data)
367
368    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
369        """Handle http errors.
370        Derived class can override this, or provide specific handlers
371        named http_error_DDD where DDD is the 3-digit error code."""
372        # First check if there's a specific handler for this error
373        name = 'http_error_%d' % errcode
374        if hasattr(self, name):
375            method = getattr(self, name)
376            if data is None:
377                result = method(url, fp, errcode, errmsg, headers)
378            else:
379                result = method(url, fp, errcode, errmsg, headers, data)
380            if result: return result
381        return self.http_error_default(url, fp, errcode, errmsg, headers)
382
383    def http_error_default(self, url, fp, errcode, errmsg, headers):
384        """Default error handler: close the connection and raise IOError."""
385        fp.close()
386        raise IOError, ('http error', errcode, errmsg, headers)
387
388    if _have_ssl:
389        def open_https(self, url, data=None):
390            """Use HTTPS protocol."""
391
392            import httplib
393            user_passwd = None
394            proxy_passwd = None
395            if isinstance(url, str):
396                host, selector = splithost(url)
397                if host:
398                    user_passwd, host = splituser(host)
399                    host = unquote(host)
400                realhost = host
401            else:
402                host, selector = url
403                # here, we determine, whether the proxy contains authorization information
404                proxy_passwd, host = splituser(host)
405                urltype, rest = splittype(selector)
406                url = rest
407                user_passwd = None
408                if urltype.lower() != 'https':
409                    realhost = None
410                else:
411                    realhost, rest = splithost(rest)
412                    if realhost:
413                        user_passwd, realhost = splituser(realhost)
414                    if user_passwd:
415                        selector = "%s://%s%s" % (urltype, realhost, rest)
416                #print "proxy via https:", host, selector
417            if not host: raise IOError, ('https error', 'no host given')
418            if proxy_passwd:
419                proxy_passwd = unquote(proxy_passwd)
420                proxy_auth = base64.b64encode(proxy_passwd).strip()
421            else:
422                proxy_auth = None
423            if user_passwd:
424                user_passwd = unquote(user_passwd)
425                auth = base64.b64encode(user_passwd).strip()
426            else:
427                auth = None
428            h = httplib.HTTPS(host, 0,
429                              key_file=self.key_file,
430                              cert_file=self.cert_file,
431                              context=self.context)
432            if data is not None:
433                h.putrequest('POST', selector)
434                h.putheader('Content-Type',
435                            'application/x-www-form-urlencoded')
436                h.putheader('Content-Length', '%d' % len(data))
437            else:
438                h.putrequest('GET', selector)
439            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
440            if auth: h.putheader('Authorization', 'Basic %s' % auth)
441            if realhost: h.putheader('Host', realhost)
442            for args in self.addheaders: h.putheader(*args)
443            h.endheaders(data)
444            errcode, errmsg, headers = h.getreply()
445            fp = h.getfile()
446            if errcode == -1:
447                if fp: fp.close()
448                # something went wrong with the HTTP status line
449                raise IOError, ('http protocol error', 0,
450                                'got a bad status line', None)
451            # According to RFC 2616, "2xx" code indicates that the client's
452            # request was successfully received, understood, and accepted.
453            if (200 <= errcode < 300):
454                return addinfourl(fp, headers, "https:" + url, errcode)
455            else:
456                if data is None:
457                    return self.http_error(url, fp, errcode, errmsg, headers)
458                else:
459                    return self.http_error(url, fp, errcode, errmsg, headers,
460                                           data)
461
462    def open_file(self, url):
463        """Use local file or FTP depending on form of URL."""
464        if not isinstance(url, str):
465            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
466        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
467            return self.open_ftp(url)
468        else:
469            return self.open_local_file(url)
470
471    def open_local_file(self, url):
472        """Use local file."""
473        import mimetypes, mimetools, email.utils
474        try:
475            from cStringIO import StringIO
476        except ImportError:
477            from StringIO import StringIO
478        host, file = splithost(url)
479        localname = url2pathname(file)
480        try:
481            stats = os.stat(localname)
482        except OSError, e:
483            raise IOError(e.errno, e.strerror, e.filename)
484        size = stats.st_size
485        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
486        mtype = mimetypes.guess_type(url)[0]
487        headers = mimetools.Message(StringIO(
488            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
489            (mtype or 'text/plain', size, modified)))
490        if not host:
491            urlfile = file
492            if file[:1] == '/':
493                urlfile = 'file://' + file
494            elif file[:2] == './':
495                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
496            return addinfourl(open(localname, 'rb'),
497                              headers, urlfile)
498        host, port = splitport(host)
499        if not port \
500           and socket.gethostbyname(host) in (localhost(), thishost()):
501            urlfile = file
502            if file[:1] == '/':
503                urlfile = 'file://' + file
504            return addinfourl(open(localname, 'rb'),
505                              headers, urlfile)
506        raise IOError, ('local file error', 'not on local host')
507
508    def open_ftp(self, url):
509        """Use FTP protocol."""
510        if not isinstance(url, str):
511            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
512        import mimetypes, mimetools
513        try:
514            from cStringIO import StringIO
515        except ImportError:
516            from StringIO import StringIO
517        host, path = splithost(url)
518        if not host: raise IOError, ('ftp error', 'no host given')
519        host, port = splitport(host)
520        user, host = splituser(host)
521        if user: user, passwd = splitpasswd(user)
522        else: passwd = None
523        host = unquote(host)
524        user = user or ''
525        passwd = passwd or ''
526        host = socket.gethostbyname(host)
527        if not port:
528            import ftplib
529            port = ftplib.FTP_PORT
530        else:
531            port = int(port)
532        path, attrs = splitattr(path)
533        path = unquote(path)
534        dirs = path.split('/')
535        dirs, file = dirs[:-1], dirs[-1]
536        if dirs and not dirs[0]: dirs = dirs[1:]
537        if dirs and not dirs[0]: dirs[0] = '/'
538        key = user, host, port, '/'.join(dirs)
539        # XXX thread unsafe!
540        if len(self.ftpcache) > MAXFTPCACHE:
541            # Prune the cache, rather arbitrarily
542            for k in self.ftpcache.keys():
543                if k != key:
544                    v = self.ftpcache[k]
545                    del self.ftpcache[k]
546                    v.close()
547        try:
548            if not key in self.ftpcache:
549                self.ftpcache[key] = \
550                    ftpwrapper(user, passwd, host, port, dirs)
551            if not file: type = 'D'
552            else: type = 'I'
553            for attr in attrs:
554                attr, value = splitvalue(attr)
555                if attr.lower() == 'type' and \
556                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
557                    type = value.upper()
558            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
559            mtype = mimetypes.guess_type("ftp:" + url)[0]
560            headers = ""
561            if mtype:
562                headers += "Content-Type: %s\n" % mtype
563            if retrlen is not None and retrlen >= 0:
564                headers += "Content-Length: %d\n" % retrlen
565            headers = mimetools.Message(StringIO(headers))
566            return addinfourl(fp, headers, "ftp:" + url)
567        except ftperrors(), msg:
568            raise IOError, ('ftp error', msg), sys.exc_info()[2]
569
570    def open_data(self, url, data=None):
571        """Use "data" URL."""
572        if not isinstance(url, str):
573            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
574        # ignore POSTed data
575        #
576        # syntax of data URLs:
577        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
578        # mediatype := [ type "/" subtype ] *( ";" parameter )
579        # data      := *urlchar
580        # parameter := attribute "=" value
581        import mimetools
582        try:
583            from cStringIO import StringIO
584        except ImportError:
585            from StringIO import StringIO
586        try:
587            [type, data] = url.split(',', 1)
588        except ValueError:
589            raise IOError, ('data error', 'bad data URL')
590        if not type:
591            type = 'text/plain;charset=US-ASCII'
592        semi = type.rfind(';')
593        if semi >= 0 and '=' not in type[semi:]:
594            encoding = type[semi+1:]
595            type = type[:semi]
596        else:
597            encoding = ''
598        msg = []
599        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
600                                            time.gmtime(time.time())))
601        msg.append('Content-type: %s' % type)
602        if encoding == 'base64':
603            data = base64.decodestring(data)
604        else:
605            data = unquote(data)
606        msg.append('Content-Length: %d' % len(data))
607        msg.append('')
608        msg.append(data)
609        msg = '\n'.join(msg)
610        f = StringIO(msg)
611        headers = mimetools.Message(f, 0)
612        #f.fileno = None     # needed for addinfourl
613        return addinfourl(f, headers, url)
614
615
616class FancyURLopener(URLopener):
617    """Derived class with handlers for errors we can handle (perhaps)."""
618
619    def __init__(self, *args, **kwargs):
620        URLopener.__init__(self, *args, **kwargs)
621        self.auth_cache = {}
622        self.tries = 0
623        self.maxtries = 10
624
625    def http_error_default(self, url, fp, errcode, errmsg, headers):
626        """Default error handling -- don't raise an exception."""
627        return addinfourl(fp, headers, "http:" + url, errcode)
628
629    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
630        """Error 302 -- relocated (temporarily)."""
631        self.tries += 1
632        try:
633            if self.maxtries and self.tries >= self.maxtries:
634                if hasattr(self, "http_error_500"):
635                    meth = self.http_error_500
636                else:
637                    meth = self.http_error_default
638                return meth(url, fp, 500,
639                            "Internal Server Error: Redirect Recursion",
640                            headers)
641            result = self.redirect_internal(url, fp, errcode, errmsg,
642                                            headers, data)
643            return result
644        finally:
645            self.tries = 0
646
647    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
648        if 'location' in headers:
649            newurl = headers['location']
650        elif 'uri' in headers:
651            newurl = headers['uri']
652        else:
653            return
654        fp.close()
655        # In case the server sent a relative URL, join with original:
656        newurl = basejoin(self.type + ":" + url, newurl)
657
658        # For security reasons we do not allow redirects to protocols
659        # other than HTTP, HTTPS or FTP.
660        newurl_lower = newurl.lower()
661        if not (newurl_lower.startswith('http://') or
662                newurl_lower.startswith('https://') or
663                newurl_lower.startswith('ftp://')):
664            raise IOError('redirect error', errcode,
665                          errmsg + " - Redirection to url '%s' is not allowed" %
666                          newurl,
667                          headers)
668
669        return self.open(newurl)
670
671    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
672        """Error 301 -- also relocated (permanently)."""
673        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
674
675    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
676        """Error 303 -- also relocated (essentially identical to 302)."""
677        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
678
679    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
680        """Error 307 -- relocated, but turn POST into error."""
681        if data is None:
682            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
683        else:
684            return self.http_error_default(url, fp, errcode, errmsg, headers)
685
686    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
687        """Error 401 -- authentication required.
688        This function supports Basic authentication only."""
689        if not 'www-authenticate' in headers:
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        stuff = headers['www-authenticate']
693        import re
694        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
695        if not match:
696            URLopener.http_error_default(self, url, fp,
697                                         errcode, errmsg, headers)
698        scheme, realm = match.groups()
699        if scheme.lower() != 'basic':
700            URLopener.http_error_default(self, url, fp,
701                                         errcode, errmsg, headers)
702        name = 'retry_' + self.type + '_basic_auth'
703        if data is None:
704            return getattr(self,name)(url, realm)
705        else:
706            return getattr(self,name)(url, realm, data)
707
708    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
709        """Error 407 -- proxy authentication required.
710        This function supports Basic authentication only."""
711        if not 'proxy-authenticate' in headers:
712            URLopener.http_error_default(self, url, fp,
713                                         errcode, errmsg, headers)
714        stuff = headers['proxy-authenticate']
715        import re
716        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
717        if not match:
718            URLopener.http_error_default(self, url, fp,
719                                         errcode, errmsg, headers)
720        scheme, realm = match.groups()
721        if scheme.lower() != 'basic':
722            URLopener.http_error_default(self, url, fp,
723                                         errcode, errmsg, headers)
724        name = 'retry_proxy_' + self.type + '_basic_auth'
725        if data is None:
726            return getattr(self,name)(url, realm)
727        else:
728            return getattr(self,name)(url, realm, data)
729
730    def retry_proxy_http_basic_auth(self, url, realm, data=None):
731        host, selector = splithost(url)
732        newurl = 'http://' + host + selector
733        proxy = self.proxies['http']
734        urltype, proxyhost = splittype(proxy)
735        proxyhost, proxyselector = splithost(proxyhost)
736        i = proxyhost.find('@') + 1
737        proxyhost = proxyhost[i:]
738        user, passwd = self.get_user_passwd(proxyhost, realm, i)
739        if not (user or passwd): return None
740        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
741        self.proxies['http'] = 'http://' + proxyhost + proxyselector
742        if data is None:
743            return self.open(newurl)
744        else:
745            return self.open(newurl, data)
746
747    def retry_proxy_https_basic_auth(self, url, realm, data=None):
748        host, selector = splithost(url)
749        newurl = 'https://' + host + selector
750        proxy = self.proxies['https']
751        urltype, proxyhost = splittype(proxy)
752        proxyhost, proxyselector = splithost(proxyhost)
753        i = proxyhost.find('@') + 1
754        proxyhost = proxyhost[i:]
755        user, passwd = self.get_user_passwd(proxyhost, realm, i)
756        if not (user or passwd): return None
757        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
758        self.proxies['https'] = 'https://' + proxyhost + proxyselector
759        if data is None:
760            return self.open(newurl)
761        else:
762            return self.open(newurl, data)
763
764    def retry_http_basic_auth(self, url, realm, data=None):
765        host, selector = splithost(url)
766        i = host.find('@') + 1
767        host = host[i:]
768        user, passwd = self.get_user_passwd(host, realm, i)
769        if not (user or passwd): return None
770        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
771        newurl = 'http://' + host + selector
772        if data is None:
773            return self.open(newurl)
774        else:
775            return self.open(newurl, data)
776
777    def retry_https_basic_auth(self, url, realm, data=None):
778        host, selector = splithost(url)
779        i = host.find('@') + 1
780        host = host[i:]
781        user, passwd = self.get_user_passwd(host, realm, i)
782        if not (user or passwd): return None
783        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
784        newurl = 'https://' + host + selector
785        if data is None:
786            return self.open(newurl)
787        else:
788            return self.open(newurl, data)
789
790    def get_user_passwd(self, host, realm, clear_cache=0):
791        key = realm + '@' + host.lower()
792        if key in self.auth_cache:
793            if clear_cache:
794                del self.auth_cache[key]
795            else:
796                return self.auth_cache[key]
797        user, passwd = self.prompt_user_passwd(host, realm)
798        if user or passwd: self.auth_cache[key] = (user, passwd)
799        return user, passwd
800
801    def prompt_user_passwd(self, host, realm):
802        """Override this in a GUI environment!"""
803        import getpass
804        try:
805            user = raw_input("Enter username for %s at %s: " % (realm,
806                                                                host))
807            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
808                (user, realm, host))
809            return user, passwd
810        except KeyboardInterrupt:
811            print
812            return None, None
813
814
815# Utility functions
816
817_localhost = None
818def localhost():
819    """Return the IP address of the magic hostname 'localhost'."""
820    global _localhost
821    if _localhost is None:
822        _localhost = socket.gethostbyname('localhost')
823    return _localhost
824
825_thishost = None
826def thishost():
827    """Return the IP address of the current host."""
828    global _thishost
829    if _thishost is None:
830        try:
831            _thishost = socket.gethostbyname(socket.gethostname())
832        except socket.gaierror:
833            _thishost = socket.gethostbyname('localhost')
834    return _thishost
835
836_ftperrors = None
837def ftperrors():
838    """Return the set of errors raised by the FTP class."""
839    global _ftperrors
840    if _ftperrors is None:
841        import ftplib
842        _ftperrors = ftplib.all_errors
843    return _ftperrors
844
845_noheaders = None
846def noheaders():
847    """Return an empty mimetools.Message object."""
848    global _noheaders
849    if _noheaders is None:
850        import mimetools
851        try:
852            from cStringIO import StringIO
853        except ImportError:
854            from StringIO import StringIO
855        _noheaders = mimetools.Message(StringIO(), 0)
856        _noheaders.fp.close()   # Recycle file descriptor
857    return _noheaders
858
859
860# Utility classes
861
862class ftpwrapper:
863    """Class used by open_ftp() for cache of open FTP connections."""
864
865    def __init__(self, user, passwd, host, port, dirs,
866                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
867                 persistent=True):
868        self.user = user
869        self.passwd = passwd
870        self.host = host
871        self.port = port
872        self.dirs = dirs
873        self.timeout = timeout
874        self.refcount = 0
875        self.keepalive = persistent
876        try:
877            self.init()
878        except:
879            self.close()
880            raise
881
882    def init(self):
883        import ftplib
884        self.busy = 0
885        self.ftp = ftplib.FTP()
886        self.ftp.connect(self.host, self.port, self.timeout)
887        self.ftp.login(self.user, self.passwd)
888        _target = '/'.join(self.dirs)
889        self.ftp.cwd(_target)
890
891    def retrfile(self, file, type):
892        import ftplib
893        self.endtransfer()
894        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
895        else: cmd = 'TYPE ' + type; isdir = 0
896        try:
897            self.ftp.voidcmd(cmd)
898        except ftplib.all_errors:
899            self.init()
900            self.ftp.voidcmd(cmd)
901        conn = None
902        if file and not isdir:
903            # Try to retrieve as a file
904            try:
905                cmd = 'RETR ' + file
906                conn, retrlen = self.ftp.ntransfercmd(cmd)
907            except ftplib.error_perm, reason:
908                if str(reason)[:3] != '550':
909                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
910        if not conn:
911            # Set transfer mode to ASCII!
912            self.ftp.voidcmd('TYPE A')
913            # Try a directory listing. Verify that directory exists.
914            if file:
915                pwd = self.ftp.pwd()
916                try:
917                    try:
918                        self.ftp.cwd(file)
919                    except ftplib.error_perm, reason:
920                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
921                finally:
922                    self.ftp.cwd(pwd)
923                cmd = 'LIST ' + file
924            else:
925                cmd = 'LIST'
926            conn, retrlen = self.ftp.ntransfercmd(cmd)
927        self.busy = 1
928        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
929        self.refcount += 1
930        conn.close()
931        # Pass back both a suitably decorated object and a retrieval length
932        return (ftpobj, retrlen)
933
934    def endtransfer(self):
935        self.busy = 0
936
937    def close(self):
938        self.keepalive = False
939        if self.refcount <= 0:
940            self.real_close()
941
942    def file_close(self):
943        self.endtransfer()
944        self.refcount -= 1
945        if self.refcount <= 0 and not self.keepalive:
946            self.real_close()
947
948    def real_close(self):
949        self.endtransfer()
950        try:
951            self.ftp.close()
952        except ftperrors():
953            pass
954
955class addbase:
956    """Base class for addinfo and addclosehook."""
957
958    def __init__(self, fp):
959        self.fp = fp
960        self.read = self.fp.read
961        self.readline = self.fp.readline
962        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
963        if hasattr(self.fp, "fileno"):
964            self.fileno = self.fp.fileno
965        else:
966            self.fileno = lambda: None
967        if hasattr(self.fp, "__iter__"):
968            self.__iter__ = self.fp.__iter__
969            if hasattr(self.fp, "next"):
970                self.next = self.fp.next
971
972    def __repr__(self):
973        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
974                                             id(self), self.fp)
975
976    def close(self):
977        self.read = None
978        self.readline = None
979        self.readlines = None
980        self.fileno = None
981        if self.fp: self.fp.close()
982        self.fp = None
983
984class addclosehook(addbase):
985    """Class to add a close hook to an open file."""
986
987    def __init__(self, fp, closehook, *hookargs):
988        addbase.__init__(self, fp)
989        self.closehook = closehook
990        self.hookargs = hookargs
991
992    def close(self):
993        try:
994            closehook = self.closehook
995            hookargs = self.hookargs
996            if closehook:
997                self.closehook = None
998                self.hookargs = None
999                closehook(*hookargs)
1000        finally:
1001            addbase.close(self)
1002
1003
1004class addinfo(addbase):
1005    """class to add an info() method to an open file."""
1006
1007    def __init__(self, fp, headers):
1008        addbase.__init__(self, fp)
1009        self.headers = headers
1010
1011    def info(self):
1012        return self.headers
1013
1014class addinfourl(addbase):
1015    """class to add info() and geturl() methods to an open file."""
1016
1017    def __init__(self, fp, headers, url, code=None):
1018        addbase.__init__(self, fp)
1019        self.headers = headers
1020        self.url = url
1021        self.code = code
1022
1023    def info(self):
1024        return self.headers
1025
1026    def getcode(self):
1027        return self.code
1028
1029    def geturl(self):
1030        return self.url
1031
1032
1033# Utilities to parse URLs (most of these return None for missing parts):
1034# unwrap('<URL:type://host/path>') --> 'type://host/path'
1035# splittype('type:opaquestring') --> 'type', 'opaquestring'
1036# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1037# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1038# splitpasswd('user:passwd') -> 'user', 'passwd'
1039# splitport('host:port') --> 'host', 'port'
1040# splitquery('/path?query') --> '/path', 'query'
1041# splittag('/path#tag') --> '/path', 'tag'
1042# splitattr('/path;attr1=value1;attr2=value2;...') ->
1043#   '/path', ['attr1=value1', 'attr2=value2', ...]
1044# splitvalue('attr=value') --> 'attr', 'value'
1045# unquote('abc%20def') -> 'abc def'
1046# quote('abc def') -> 'abc%20def')
1047
1048try:
1049    unicode
1050except NameError:
1051    def _is_unicode(x):
1052        return 0
1053else:
1054    def _is_unicode(x):
1055        return isinstance(x, unicode)
1056
1057def toBytes(url):
1058    """toBytes(u"URL") --> 'URL'."""
1059    # Most URL schemes require ASCII. If that changes, the conversion
1060    # can be relaxed
1061    if _is_unicode(url):
1062        try:
1063            url = url.encode("ASCII")
1064        except UnicodeError:
1065            raise UnicodeError("URL " + repr(url) +
1066                               " contains non-ASCII characters")
1067    return url
1068
1069def unwrap(url):
1070    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1071    url = url.strip()
1072    if url[:1] == '<' and url[-1:] == '>':
1073        url = url[1:-1].strip()
1074    if url[:4] == 'URL:': url = url[4:].strip()
1075    return url
1076
1077_typeprog = None
1078def splittype(url):
1079    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1080    global _typeprog
1081    if _typeprog is None:
1082        import re
1083        _typeprog = re.compile('^([^/:]+):')
1084
1085    match = _typeprog.match(url)
1086    if match:
1087        scheme = match.group(1)
1088        return scheme.lower(), url[len(scheme) + 1:]
1089    return None, url
1090
1091_hostprog = None
1092def splithost(url):
1093    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1094    global _hostprog
1095    if _hostprog is None:
1096        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
1097
1098    match = _hostprog.match(url)
1099    if match:
1100        host_port = match.group(1)
1101        path = match.group(2)
1102        if path and not path.startswith('/'):
1103            path = '/' + path
1104        return host_port, path
1105    return None, url
1106
1107_userprog = None
1108def splituser(host):
1109    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1110    global _userprog
1111    if _userprog is None:
1112        import re
1113        _userprog = re.compile('^(.*)@(.*)$')
1114
1115    match = _userprog.match(host)
1116    if match: return match.group(1, 2)
1117    return None, host
1118
1119_passwdprog = None
1120def splitpasswd(user):
1121    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1122    global _passwdprog
1123    if _passwdprog is None:
1124        import re
1125        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1126
1127    match = _passwdprog.match(user)
1128    if match: return match.group(1, 2)
1129    return user, None
1130
1131# splittag('/path#tag') --> '/path', 'tag'
1132_portprog = None
1133def splitport(host):
1134    """splitport('host:port') --> 'host', 'port'."""
1135    global _portprog
1136    if _portprog is None:
1137        import re
1138        _portprog = re.compile('^(.*):([0-9]*)$')
1139
1140    match = _portprog.match(host)
1141    if match:
1142        host, port = match.groups()
1143        if port:
1144            return host, port
1145    return host, None
1146
1147_nportprog = None
1148def splitnport(host, defport=-1):
1149    """Split host and port, returning numeric port.
1150    Return given default port if no ':' found; defaults to -1.
1151    Return numerical port if a valid number are found after ':'.
1152    Return None if ':' but not a valid number."""
1153    global _nportprog
1154    if _nportprog is None:
1155        import re
1156        _nportprog = re.compile('^(.*):(.*)$')
1157
1158    match = _nportprog.match(host)
1159    if match:
1160        host, port = match.group(1, 2)
1161        if port:
1162            try:
1163                nport = int(port)
1164            except ValueError:
1165                nport = None
1166            return host, nport
1167    return host, defport
1168
1169_queryprog = None
1170def splitquery(url):
1171    """splitquery('/path?query') --> '/path', 'query'."""
1172    global _queryprog
1173    if _queryprog is None:
1174        import re
1175        _queryprog = re.compile('^(.*)\?([^?]*)$')
1176
1177    match = _queryprog.match(url)
1178    if match: return match.group(1, 2)
1179    return url, None
1180
1181_tagprog = None
1182def splittag(url):
1183    """splittag('/path#tag') --> '/path', 'tag'."""
1184    global _tagprog
1185    if _tagprog is None:
1186        import re
1187        _tagprog = re.compile('^(.*)#([^#]*)$')
1188
1189    match = _tagprog.match(url)
1190    if match: return match.group(1, 2)
1191    return url, None
1192
1193def splitattr(url):
1194    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1195        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1196    words = url.split(';')
1197    return words[0], words[1:]
1198
1199_valueprog = None
1200def splitvalue(attr):
1201    """splitvalue('attr=value') --> 'attr', 'value'."""
1202    global _valueprog
1203    if _valueprog is None:
1204        import re
1205        _valueprog = re.compile('^([^=]*)=(.*)$')
1206
1207    match = _valueprog.match(attr)
1208    if match: return match.group(1, 2)
1209    return attr, None
1210
1211# urlparse contains a duplicate of this method to avoid a circular import.  If
1212# you update this method, also update the copy in urlparse.  This code
1213# duplication does not exist in Python3.
1214
1215_hexdig = '0123456789ABCDEFabcdef'
1216_hextochr = dict((a + b, chr(int(a + b, 16)))
1217                 for a in _hexdig for b in _hexdig)
1218_asciire = re.compile('([\x00-\x7f]+)')
1219
1220def unquote(s):
1221    """unquote('abc%20def') -> 'abc def'."""
1222    if _is_unicode(s):
1223        if '%' not in s:
1224            return s
1225        bits = _asciire.split(s)
1226        res = [bits[0]]
1227        append = res.append
1228        for i in range(1, len(bits), 2):
1229            append(unquote(str(bits[i])).decode('latin1'))
1230            append(bits[i + 1])
1231        return ''.join(res)
1232
1233    bits = s.split('%')
1234    # fastpath
1235    if len(bits) == 1:
1236        return s
1237    res = [bits[0]]
1238    append = res.append
1239    for item in bits[1:]:
1240        try:
1241            append(_hextochr[item[:2]])
1242            append(item[2:])
1243        except KeyError:
1244            append('%')
1245            append(item)
1246    return ''.join(res)
1247
1248def unquote_plus(s):
1249    """unquote('%7e/abc+def') -> '~/abc def'"""
1250    s = s.replace('+', ' ')
1251    return unquote(s)
1252
1253always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1254               'abcdefghijklmnopqrstuvwxyz'
1255               '0123456789' '_.-')
1256_safe_map = {}
1257for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1258    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1259_safe_quoters = {}
1260
1261def quote(s, safe='/'):
1262    """quote('abc def') -> 'abc%20def'
1263
1264    Each part of a URL, e.g. the path info, the query, etc., has a
1265    different set of reserved characters that must be quoted.
1266
1267    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1268    the following reserved characters.
1269
1270    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1271                  "$" | ","
1272
1273    Each of these characters is reserved in some component of a URL,
1274    but not necessarily in all of them.
1275
1276    By default, the quote function is intended for quoting the path
1277    section of a URL.  Thus, it will not encode '/'.  This character
1278    is reserved, but in typical usage the quote function is being
1279    called on a path where the existing slash characters are used as
1280    reserved characters.
1281    """
1282    # fastpath
1283    if not s:
1284        if s is None:
1285            raise TypeError('None object cannot be quoted')
1286        return s
1287    cachekey = (safe, always_safe)
1288    try:
1289        (quoter, safe) = _safe_quoters[cachekey]
1290    except KeyError:
1291        safe_map = _safe_map.copy()
1292        safe_map.update([(c, c) for c in safe])
1293        quoter = safe_map.__getitem__
1294        safe = always_safe + safe
1295        _safe_quoters[cachekey] = (quoter, safe)
1296    if not s.rstrip(safe):
1297        return s
1298    return ''.join(map(quoter, s))
1299
1300def quote_plus(s, safe=''):
1301    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1302    if ' ' in s:
1303        s = quote(s, safe + ' ')
1304        return s.replace(' ', '+')
1305    return quote(s, safe)
1306
1307def urlencode(query, doseq=0):
1308    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1309
1310    If any values in the query arg are sequences and doseq is true, each
1311    sequence element is converted to a separate parameter.
1312
1313    If the query arg is a sequence of two-element tuples, the order of the
1314    parameters in the output will match the order of parameters in the
1315    input.
1316    """
1317
1318    if hasattr(query,"items"):
1319        # mapping objects
1320        query = query.items()
1321    else:
1322        # it's a bother at times that strings and string-like objects are
1323        # sequences...
1324        try:
1325            # non-sequence items should not work with len()
1326            # non-empty strings will fail this
1327            if len(query) and not isinstance(query[0], tuple):
1328                raise TypeError
1329            # zero-length sequences of all types will get here and succeed,
1330            # but that's a minor nit - since the original implementation
1331            # allowed empty dicts that type of behavior probably should be
1332            # preserved for consistency
1333        except TypeError:
1334            ty,va,tb = sys.exc_info()
1335            raise TypeError, "not a valid non-string sequence or mapping object", tb
1336
1337    l = []
1338    if not doseq:
1339        # preserve old behavior
1340        for k, v in query:
1341            k = quote_plus(str(k))
1342            v = quote_plus(str(v))
1343            l.append(k + '=' + v)
1344    else:
1345        for k, v in query:
1346            k = quote_plus(str(k))
1347            if isinstance(v, str):
1348                v = quote_plus(v)
1349                l.append(k + '=' + v)
1350            elif _is_unicode(v):
1351                # is there a reasonable way to convert to ASCII?
1352                # encode generates a string, but "replace" or "ignore"
1353                # lose information and "strict" can raise UnicodeError
1354                v = quote_plus(v.encode("ASCII","replace"))
1355                l.append(k + '=' + v)
1356            else:
1357                try:
1358                    # is this a sufficient test for sequence-ness?
1359                    len(v)
1360                except TypeError:
1361                    # not a sequence
1362                    v = quote_plus(str(v))
1363                    l.append(k + '=' + v)
1364                else:
1365                    # loop over the sequence
1366                    for elt in v:
1367                        l.append(k + '=' + quote_plus(str(elt)))
1368    return '&'.join(l)
1369
1370# Proxy handling
1371def getproxies_environment():
1372    """Return a dictionary of scheme -> proxy server URL mappings.
1373
1374    Scan the environment for variables named <scheme>_proxy;
1375    this seems to be the standard convention.  In order to prefer lowercase
1376    variables, we process the environment in two passes, first matches any
1377    and second matches only lower case proxies.
1378
1379    If you need a different way, you can pass a proxies dictionary to the
1380    [Fancy]URLopener constructor.
1381    """
1382    # Get all variables
1383    proxies = {}
1384    for name, value in os.environ.items():
1385        name = name.lower()
1386        if value and name[-6:] == '_proxy':
1387            proxies[name[:-6]] = value
1388
1389    # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
1390    # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
1391    # header from the client
1392    # If "proxy" is lowercase, it will still be used thanks to the next block
1393    if 'REQUEST_METHOD' in os.environ:
1394        proxies.pop('http', None)
1395
1396    # Get lowercase variables
1397    for name, value in os.environ.items():
1398        if name[-6:] == '_proxy':
1399            name = name.lower()
1400            if value:
1401                proxies[name[:-6]] = value
1402            else:
1403                proxies.pop(name[:-6], None)
1404
1405    return proxies
1406
1407def proxy_bypass_environment(host, proxies=None):
1408    """Test if proxies should not be used for a particular host.
1409
1410    Checks the proxies dict for the value of no_proxy, which should be a
1411    list of comma separated DNS suffixes, or '*' for all hosts.
1412    """
1413    if proxies is None:
1414        proxies = getproxies_environment()
1415    # don't bypass, if no_proxy isn't specified
1416    try:
1417        no_proxy = proxies['no']
1418    except KeyError:
1419        return 0
1420    # '*' is special case for always bypass
1421    if no_proxy == '*':
1422        return 1
1423    # strip port off host
1424    hostonly, port = splitport(host)
1425    # check if the host ends with any of the DNS suffixes
1426    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1427    for name in no_proxy_list:
1428        if name:
1429            name = name.lstrip('.')  # ignore leading dots
1430            name = re.escape(name)
1431            pattern = r'(.+\.)?%s$' % name
1432            if (re.match(pattern, hostonly, re.I)
1433                    or re.match(pattern, host, re.I)):
1434                return 1
1435    # otherwise, don't bypass
1436    return 0
1437
1438
1439if sys.platform == 'darwin':
1440    from _scproxy import _get_proxy_settings, _get_proxies
1441
1442    def proxy_bypass_macosx_sysconf(host):
1443        """
1444        Return True iff this host shouldn't be accessed using a proxy
1445
1446        This function uses the MacOSX framework SystemConfiguration
1447        to fetch the proxy information.
1448        """
1449        import re
1450        import socket
1451        from fnmatch import fnmatch
1452
1453        hostonly, port = splitport(host)
1454
1455        def ip2num(ipAddr):
1456            parts = ipAddr.split('.')
1457            parts = map(int, parts)
1458            if len(parts) != 4:
1459                parts = (parts + [0, 0, 0, 0])[:4]
1460            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1461
1462        proxy_settings = _get_proxy_settings()
1463
1464        # Check for simple host names:
1465        if '.' not in host:
1466            if proxy_settings['exclude_simple']:
1467                return True
1468
1469        hostIP = None
1470
1471        for value in proxy_settings.get('exceptions', ()):
1472            # Items in the list are strings like these: *.local, 169.254/16
1473            if not value: continue
1474
1475            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1476            if m is not None:
1477                if hostIP is None:
1478                    try:
1479                        hostIP = socket.gethostbyname(hostonly)
1480                        hostIP = ip2num(hostIP)
1481                    except socket.error:
1482                        continue
1483
1484                base = ip2num(m.group(1))
1485                mask = m.group(2)
1486                if mask is None:
1487                    mask = 8 * (m.group(1).count('.') + 1)
1488
1489                else:
1490                    mask = int(mask[1:])
1491                mask = 32 - mask
1492
1493                if (hostIP >> mask) == (base >> mask):
1494                    return True
1495
1496            elif fnmatch(host, value):
1497                return True
1498
1499        return False
1500
1501    def getproxies_macosx_sysconf():
1502        """Return a dictionary of scheme -> proxy server URL mappings.
1503
1504        This function uses the MacOSX framework SystemConfiguration
1505        to fetch the proxy information.
1506        """
1507        return _get_proxies()
1508
1509    def proxy_bypass(host):
1510        """Return True, if a host should be bypassed.
1511
1512        Checks proxy settings gathered from the environment, if specified, or
1513        from the MacOSX framework SystemConfiguration.
1514        """
1515        proxies = getproxies_environment()
1516        if proxies:
1517            return proxy_bypass_environment(host, proxies)
1518        else:
1519            return proxy_bypass_macosx_sysconf(host)
1520
1521    def getproxies():
1522        return getproxies_environment() or getproxies_macosx_sysconf()
1523
1524elif os.name == 'nt':
1525    def getproxies_registry():
1526        """Return a dictionary of scheme -> proxy server URL mappings.
1527
1528        Win32 uses the registry to store proxies.
1529
1530        """
1531        proxies = {}
1532        try:
1533            import _winreg
1534        except ImportError:
1535            # Std module, so should be around - but you never know!
1536            return proxies
1537        try:
1538            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1539                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1540            proxyEnable = _winreg.QueryValueEx(internetSettings,
1541                                               'ProxyEnable')[0]
1542            if proxyEnable:
1543                # Returned as Unicode but problems if not converted to ASCII
1544                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1545                                                       'ProxyServer')[0])
1546                if '=' in proxyServer:
1547                    # Per-protocol settings
1548                    for p in proxyServer.split(';'):
1549                        protocol, address = p.split('=', 1)
1550                        # See if address has a type:// prefix
1551                        import re
1552                        if not re.match('^([^/:]+)://', address):
1553                            address = '%s://%s' % (protocol, address)
1554                        proxies[protocol] = address
1555                else:
1556                    # Use one setting for all protocols
1557                    if proxyServer[:5] == 'http:':
1558                        proxies['http'] = proxyServer
1559                    else:
1560                        proxies['http'] = 'http://%s' % proxyServer
1561                        proxies['https'] = 'https://%s' % proxyServer
1562                        proxies['ftp'] = 'ftp://%s' % proxyServer
1563            internetSettings.Close()
1564        except (WindowsError, ValueError, TypeError):
1565            # Either registry key not found etc, or the value in an
1566            # unexpected format.
1567            # proxies already set up to be empty so nothing to do
1568            pass
1569        return proxies
1570
1571    def getproxies():
1572        """Return a dictionary of scheme -> proxy server URL mappings.
1573
1574        Returns settings gathered from the environment, if specified,
1575        or the registry.
1576
1577        """
1578        return getproxies_environment() or getproxies_registry()
1579
1580    def proxy_bypass_registry(host):
1581        try:
1582            import _winreg
1583            import re
1584        except ImportError:
1585            # Std modules, so should be around - but you never know!
1586            return 0
1587        try:
1588            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1589                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1590            proxyEnable = _winreg.QueryValueEx(internetSettings,
1591                                               'ProxyEnable')[0]
1592            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1593                                                     'ProxyOverride')[0])
1594            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1595        except WindowsError:
1596            return 0
1597        if not proxyEnable or not proxyOverride:
1598            return 0
1599        # try to make a host list from name and IP address.
1600        rawHost, port = splitport(host)
1601        host = [rawHost]
1602        try:
1603            addr = socket.gethostbyname(rawHost)
1604            if addr != rawHost:
1605                host.append(addr)
1606        except socket.error:
1607            pass
1608        try:
1609            fqdn = socket.getfqdn(rawHost)
1610            if fqdn != rawHost:
1611                host.append(fqdn)
1612        except socket.error:
1613            pass
1614        # make a check value list from the registry entry: replace the
1615        # '<local>' string by the localhost entry and the corresponding
1616        # canonical entry.
1617        proxyOverride = proxyOverride.split(';')
1618        # now check if we match one of the registry values.
1619        for test in proxyOverride:
1620            if test == '<local>':
1621                if '.' not in rawHost:
1622                    return 1
1623            test = test.replace(".", r"\.")     # mask dots
1624            test = test.replace("*", r".*")     # change glob sequence
1625            test = test.replace("?", r".")      # change glob char
1626            for val in host:
1627                # print "%s <--> %s" %( test, val )
1628                if re.match(test, val, re.I):
1629                    return 1
1630        return 0
1631
1632    def proxy_bypass(host):
1633        """Return True, if the host should be bypassed.
1634
1635        Checks proxy settings gathered from the environment, if specified,
1636        or the registry.
1637        """
1638        proxies = getproxies_environment()
1639        if proxies:
1640            return proxy_bypass_environment(host, proxies)
1641        else:
1642            return proxy_bypass_registry(host)
1643
1644else:
1645    # By default use environment variables
1646    getproxies = getproxies_environment
1647    proxy_bypass = proxy_bypass_environment
1648
1649# Test and time quote() and unquote()
1650def test1():
1651    s = ''
1652    for i in range(256): s = s + chr(i)
1653    s = s*4
1654    t0 = time.time()
1655    qs = quote(s)
1656    uqs = unquote(qs)
1657    t1 = time.time()
1658    if uqs != s:
1659        print 'Wrong!'
1660    print repr(s)
1661    print repr(qs)
1662    print repr(uqs)
1663    print round(t1 - t0, 3), 'sec'
1664
1665
1666def reporthook(blocknum, blocksize, totalsize):
1667    # Report during remote transfers
1668    print "Block number: %d, Block size: %d, Total size: %d" % (
1669        blocknum, blocksize, totalsize)
1670