• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31import re
32
33from urlparse import urljoin as basejoin
34
35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37           "urlencode", "url2pathname", "pathname2url", "splittag",
38           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40           "splitnport", "splitquery", "splitattr", "splitvalue",
41           "getproxies"]
42
43__version__ = '1.17'    # XXX This version is not always updated :-(
44
45MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
46
47# Helper for non-unix systems
48if os.name == 'nt':
49    from nturl2path import url2pathname, pathname2url
50elif os.name == 'riscos':
51    from rourl2path import url2pathname, pathname2url
52else:
53    def url2pathname(pathname):
54        """OS-specific conversion from a relative URL of the 'file' scheme
55        to a file system path; not recommended for general use."""
56        return unquote(pathname)
57
58    def pathname2url(pathname):
59        """OS-specific conversion from a file system path to a relative URL
60        of the 'file' scheme; not recommended for general use."""
61        return quote(pathname)
62
63# This really consists of two pieces:
64# (1) a class which handles opening of all sorts of URLs
65#     (plus assorted utilities etc.)
66# (2) a set of functions for parsing URLs
67# XXX Should these be separated out into different modules?
68
69
70# Shortcut for basic usage
71_urlopener = None
72def urlopen(url, data=None, proxies=None, context=None):
73    """Create a file-like object for the specified URL to read from."""
74    from warnings import warnpy3k
75    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76             "favor of urllib2.urlopen()", stacklevel=2)
77
78    global _urlopener
79    if proxies is not None or context is not None:
80        opener = FancyURLopener(proxies=proxies, context=context)
81    elif not _urlopener:
82        opener = FancyURLopener()
83        _urlopener = opener
84    else:
85        opener = _urlopener
86    if data is None:
87        return opener.open(url)
88    else:
89        return opener.open(url, data)
90def urlretrieve(url, filename=None, reporthook=None, data=None, context=None):
91    global _urlopener
92    if context is not None:
93        opener = FancyURLopener(context=context)
94    elif not _urlopener:
95        _urlopener = opener = FancyURLopener()
96    else:
97        opener = _urlopener
98    return opener.retrieve(url, filename, reporthook, data)
99def urlcleanup():
100    if _urlopener:
101        _urlopener.cleanup()
102    _safe_quoters.clear()
103    ftpcache.clear()
104
105# check for SSL
106try:
107    import ssl
108except:
109    _have_ssl = False
110else:
111    _have_ssl = True
112
113# exception raised when downloaded size does not match content-length
114class ContentTooShortError(IOError):
115    def __init__(self, message, content):
116        IOError.__init__(self, message)
117        self.content = content
118
119ftpcache = {}
120class URLopener:
121    """Class to open URLs.
122    This is a class rather than just a subroutine because we may need
123    more than one set of global protocol-specific options.
124    Note -- this is a base class for those who don't want the
125    automatic handling of errors type 302 (relocated) and 401
126    (authorization needed)."""
127
128    __tempfiles = None
129
130    version = "Python-urllib/%s" % __version__
131
132    # Constructor
133    def __init__(self, proxies=None, context=None, **x509):
134        if proxies is None:
135            proxies = getproxies()
136        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
137        self.proxies = proxies
138        self.key_file = x509.get('key_file')
139        self.cert_file = x509.get('cert_file')
140        self.context = context
141        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
142        self.__tempfiles = []
143        self.__unlink = os.unlink # See cleanup()
144        self.tempcache = None
145        # Undocumented feature: if you assign {} to tempcache,
146        # it is used to cache files retrieved with
147        # self.retrieve().  This is not enabled by default
148        # since it does not work for changing documents (and I
149        # haven't got the logic to check expiration headers
150        # yet).
151        self.ftpcache = ftpcache
152        # Undocumented feature: you can use a different
153        # ftp cache by assigning to the .ftpcache member;
154        # in case you want logically independent URL openers
155        # XXX This is not threadsafe.  Bah.
156
157    def __del__(self):
158        self.close()
159
160    def close(self):
161        self.cleanup()
162
163    def cleanup(self):
164        # This code sometimes runs when the rest of this module
165        # has already been deleted, so it can't use any globals
166        # or import anything.
167        if self.__tempfiles:
168            for file in self.__tempfiles:
169                try:
170                    self.__unlink(file)
171                except OSError:
172                    pass
173            del self.__tempfiles[:]
174        if self.tempcache:
175            self.tempcache.clear()
176
177    def addheader(self, *args):
178        """Add a header to be used by the HTTP interface only
179        e.g. u.addheader('Accept', 'sound/basic')"""
180        self.addheaders.append(args)
181
182    # External interface
183    def open(self, fullurl, data=None):
184        """Use URLopener().open(file) instead of open(file, 'r')."""
185        fullurl = unwrap(toBytes(fullurl))
186        # percent encode url, fixing lame server errors for e.g, like space
187        # within url paths.
188        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
189        if self.tempcache and fullurl in self.tempcache:
190            filename, headers = self.tempcache[fullurl]
191            fp = open(filename, 'rb')
192            return addinfourl(fp, headers, fullurl)
193        urltype, url = splittype(fullurl)
194        if not urltype:
195            urltype = 'file'
196        if urltype in self.proxies:
197            proxy = self.proxies[urltype]
198            urltype, proxyhost = splittype(proxy)
199            host, selector = splithost(proxyhost)
200            url = (host, fullurl) # Signal special case to open_*()
201        else:
202            proxy = None
203        name = 'open_' + urltype
204        self.type = urltype
205        name = name.replace('-', '_')
206        if not hasattr(self, name):
207            if proxy:
208                return self.open_unknown_proxy(proxy, fullurl, data)
209            else:
210                return self.open_unknown(fullurl, data)
211        try:
212            if data is None:
213                return getattr(self, name)(url)
214            else:
215                return getattr(self, name)(url, data)
216        except socket.error, msg:
217            raise IOError, ('socket error', msg), sys.exc_info()[2]
218
219    def open_unknown(self, fullurl, data=None):
220        """Overridable interface to open unknown URL type."""
221        type, url = splittype(fullurl)
222        raise IOError, ('url error', 'unknown url type', type)
223
224    def open_unknown_proxy(self, proxy, fullurl, data=None):
225        """Overridable interface to open unknown URL type."""
226        type, url = splittype(fullurl)
227        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
228
229    # External interface
230    def retrieve(self, url, filename=None, reporthook=None, data=None):
231        """retrieve(url) returns (filename, headers) for a local object
232        or (tempfilename, headers) for a remote object."""
233        url = unwrap(toBytes(url))
234        if self.tempcache and url in self.tempcache:
235            return self.tempcache[url]
236        type, url1 = splittype(url)
237        if filename is None and (not type or type == 'file'):
238            try:
239                fp = self.open_local_file(url1)
240                hdrs = fp.info()
241                fp.close()
242                return url2pathname(splithost(url1)[1]), hdrs
243            except IOError:
244                pass
245        fp = self.open(url, data)
246        try:
247            headers = fp.info()
248            if filename:
249                tfp = open(filename, 'wb')
250            else:
251                import tempfile
252                garbage, path = splittype(url)
253                garbage, path = splithost(path or "")
254                path, garbage = splitquery(path or "")
255                path, garbage = splitattr(path or "")
256                suffix = os.path.splitext(path)[1]
257                (fd, filename) = tempfile.mkstemp(suffix)
258                self.__tempfiles.append(filename)
259                tfp = os.fdopen(fd, 'wb')
260            try:
261                result = filename, headers
262                if self.tempcache is not None:
263                    self.tempcache[url] = result
264                bs = 1024*8
265                size = -1
266                read = 0
267                blocknum = 0
268                if "content-length" in headers:
269                    size = int(headers["Content-Length"])
270                if reporthook:
271                    reporthook(blocknum, bs, size)
272                while 1:
273                    block = fp.read(bs)
274                    if block == "":
275                        break
276                    read += len(block)
277                    tfp.write(block)
278                    blocknum += 1
279                    if reporthook:
280                        reporthook(blocknum, bs, size)
281            finally:
282                tfp.close()
283        finally:
284            fp.close()
285
286        # raise exception if actual size does not match content-length header
287        if size >= 0 and read < size:
288            raise ContentTooShortError("retrieval incomplete: got only %i out "
289                                       "of %i bytes" % (read, size), result)
290
291        return result
292
293    # Each method named open_<type> knows how to open that type of URL
294
295    def open_http(self, url, data=None):
296        """Use HTTP protocol."""
297        import httplib
298        user_passwd = None
299        proxy_passwd= None
300        if isinstance(url, str):
301            host, selector = splithost(url)
302            if host:
303                user_passwd, host = splituser(host)
304                host = unquote(host)
305            realhost = host
306        else:
307            host, selector = url
308            # check whether the proxy contains authorization information
309            proxy_passwd, host = splituser(host)
310            # now we proceed with the url we want to obtain
311            urltype, rest = splittype(selector)
312            url = rest
313            user_passwd = None
314            if urltype.lower() != 'http':
315                realhost = None
316            else:
317                realhost, rest = splithost(rest)
318                if realhost:
319                    user_passwd, realhost = splituser(realhost)
320                if user_passwd:
321                    selector = "%s://%s%s" % (urltype, realhost, rest)
322                if proxy_bypass(realhost):
323                    host = realhost
324
325            #print "proxy via http:", host, selector
326        if not host: raise IOError, ('http error', 'no host given')
327
328        if proxy_passwd:
329            proxy_passwd = unquote(proxy_passwd)
330            proxy_auth = base64.b64encode(proxy_passwd).strip()
331        else:
332            proxy_auth = None
333
334        if user_passwd:
335            user_passwd = unquote(user_passwd)
336            auth = base64.b64encode(user_passwd).strip()
337        else:
338            auth = None
339        h = httplib.HTTP(host)
340        if data is not None:
341            h.putrequest('POST', selector)
342            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
343            h.putheader('Content-Length', '%d' % len(data))
344        else:
345            h.putrequest('GET', selector)
346        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
347        if auth: h.putheader('Authorization', 'Basic %s' % auth)
348        if realhost: h.putheader('Host', realhost)
349        for args in self.addheaders: h.putheader(*args)
350        h.endheaders(data)
351        errcode, errmsg, headers = h.getreply()
352        fp = h.getfile()
353        if errcode == -1:
354            if fp: fp.close()
355            # something went wrong with the HTTP status line
356            raise IOError, ('http protocol error', 0,
357                            'got a bad status line', None)
358        # According to RFC 2616, "2xx" code indicates that the client's
359        # request was successfully received, understood, and accepted.
360        if (200 <= errcode < 300):
361            return addinfourl(fp, headers, "http:" + url, errcode)
362        else:
363            if data is None:
364                return self.http_error(url, fp, errcode, errmsg, headers)
365            else:
366                return self.http_error(url, fp, errcode, errmsg, headers, data)
367
368    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
369        """Handle http errors.
370        Derived class can override this, or provide specific handlers
371        named http_error_DDD where DDD is the 3-digit error code."""
372        # First check if there's a specific handler for this error
373        name = 'http_error_%d' % errcode
374        if hasattr(self, name):
375            method = getattr(self, name)
376            if data is None:
377                result = method(url, fp, errcode, errmsg, headers)
378            else:
379                result = method(url, fp, errcode, errmsg, headers, data)
380            if result: return result
381        return self.http_error_default(url, fp, errcode, errmsg, headers)
382
383    def http_error_default(self, url, fp, errcode, errmsg, headers):
384        """Default error handler: close the connection and raise IOError."""
385        fp.close()
386        raise IOError, ('http error', errcode, errmsg, headers)
387
388    if _have_ssl:
389        def open_https(self, url, data=None):
390            """Use HTTPS protocol."""
391
392            import httplib
393            user_passwd = None
394            proxy_passwd = None
395            if isinstance(url, str):
396                host, selector = splithost(url)
397                if host:
398                    user_passwd, host = splituser(host)
399                    host = unquote(host)
400                realhost = host
401            else:
402                host, selector = url
403                # here, we determine, whether the proxy contains authorization information
404                proxy_passwd, host = splituser(host)
405                urltype, rest = splittype(selector)
406                url = rest
407                user_passwd = None
408                if urltype.lower() != 'https':
409                    realhost = None
410                else:
411                    realhost, rest = splithost(rest)
412                    if realhost:
413                        user_passwd, realhost = splituser(realhost)
414                    if user_passwd:
415                        selector = "%s://%s%s" % (urltype, realhost, rest)
416                #print "proxy via https:", host, selector
417            if not host: raise IOError, ('https error', 'no host given')
418            if proxy_passwd:
419                proxy_passwd = unquote(proxy_passwd)
420                proxy_auth = base64.b64encode(proxy_passwd).strip()
421            else:
422                proxy_auth = None
423            if user_passwd:
424                user_passwd = unquote(user_passwd)
425                auth = base64.b64encode(user_passwd).strip()
426            else:
427                auth = None
428            h = httplib.HTTPS(host, 0,
429                              key_file=self.key_file,
430                              cert_file=self.cert_file,
431                              context=self.context)
432            if data is not None:
433                h.putrequest('POST', selector)
434                h.putheader('Content-Type',
435                            'application/x-www-form-urlencoded')
436                h.putheader('Content-Length', '%d' % len(data))
437            else:
438                h.putrequest('GET', selector)
439            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
440            if auth: h.putheader('Authorization', 'Basic %s' % auth)
441            if realhost: h.putheader('Host', realhost)
442            for args in self.addheaders: h.putheader(*args)
443            h.endheaders(data)
444            errcode, errmsg, headers = h.getreply()
445            fp = h.getfile()
446            if errcode == -1:
447                if fp: fp.close()
448                # something went wrong with the HTTP status line
449                raise IOError, ('http protocol error', 0,
450                                'got a bad status line', None)
451            # According to RFC 2616, "2xx" code indicates that the client's
452            # request was successfully received, understood, and accepted.
453            if (200 <= errcode < 300):
454                return addinfourl(fp, headers, "https:" + url, errcode)
455            else:
456                if data is None:
457                    return self.http_error(url, fp, errcode, errmsg, headers)
458                else:
459                    return self.http_error(url, fp, errcode, errmsg, headers,
460                                           data)
461
462    def open_file(self, url):
463        """Use local file or FTP depending on form of URL."""
464        if not isinstance(url, str):
465            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
466        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
467            return self.open_ftp(url)
468        else:
469            return self.open_local_file(url)
470
471    def open_local_file(self, url):
472        """Use local file."""
473        import mimetypes, mimetools, email.utils
474        try:
475            from cStringIO import StringIO
476        except ImportError:
477            from StringIO import StringIO
478        host, file = splithost(url)
479        localname = url2pathname(file)
480        try:
481            stats = os.stat(localname)
482        except OSError, e:
483            raise IOError(e.errno, e.strerror, e.filename)
484        size = stats.st_size
485        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
486        mtype = mimetypes.guess_type(url)[0]
487        headers = mimetools.Message(StringIO(
488            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
489            (mtype or 'text/plain', size, modified)))
490        if not host:
491            urlfile = file
492            if file[:1] == '/':
493                urlfile = 'file://' + file
494            elif file[:2] == './':
495                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
496            return addinfourl(open(localname, 'rb'),
497                              headers, urlfile)
498        host, port = splitport(host)
499        if not port \
500           and socket.gethostbyname(host) in (localhost(), thishost()):
501            urlfile = file
502            if file[:1] == '/':
503                urlfile = 'file://' + file
504            return addinfourl(open(localname, 'rb'),
505                              headers, urlfile)
506        raise IOError, ('local file error', 'not on local host')
507
508    def open_ftp(self, url):
509        """Use FTP protocol."""
510        if not isinstance(url, str):
511            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
512        import mimetypes, mimetools
513        try:
514            from cStringIO import StringIO
515        except ImportError:
516            from StringIO import StringIO
517        host, path = splithost(url)
518        if not host: raise IOError, ('ftp error', 'no host given')
519        host, port = splitport(host)
520        user, host = splituser(host)
521        if user: user, passwd = splitpasswd(user)
522        else: passwd = None
523        host = unquote(host)
524        user = user or ''
525        passwd = passwd or ''
526        host = socket.gethostbyname(host)
527        if not port:
528            import ftplib
529            port = ftplib.FTP_PORT
530        else:
531            port = int(port)
532        path, attrs = splitattr(path)
533        path = unquote(path)
534        dirs = path.split('/')
535        dirs, file = dirs[:-1], dirs[-1]
536        if dirs and not dirs[0]: dirs = dirs[1:]
537        if dirs and not dirs[0]: dirs[0] = '/'
538        key = user, host, port, '/'.join(dirs)
539        # XXX thread unsafe!
540        if len(self.ftpcache) > MAXFTPCACHE:
541            # Prune the cache, rather arbitrarily
542            for k in self.ftpcache.keys():
543                if k != key:
544                    v = self.ftpcache[k]
545                    del self.ftpcache[k]
546                    v.close()
547        try:
548            if not key in self.ftpcache:
549                self.ftpcache[key] = \
550                    ftpwrapper(user, passwd, host, port, dirs)
551            if not file: type = 'D'
552            else: type = 'I'
553            for attr in attrs:
554                attr, value = splitvalue(attr)
555                if attr.lower() == 'type' and \
556                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
557                    type = value.upper()
558            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
559            mtype = mimetypes.guess_type("ftp:" + url)[0]
560            headers = ""
561            if mtype:
562                headers += "Content-Type: %s\n" % mtype
563            if retrlen is not None and retrlen >= 0:
564                headers += "Content-Length: %d\n" % retrlen
565            headers = mimetools.Message(StringIO(headers))
566            return addinfourl(fp, headers, "ftp:" + url)
567        except ftperrors(), msg:
568            raise IOError, ('ftp error', msg), sys.exc_info()[2]
569
570    def open_data(self, url, data=None):
571        """Use "data" URL."""
572        if not isinstance(url, str):
573            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
574        # ignore POSTed data
575        #
576        # syntax of data URLs:
577        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
578        # mediatype := [ type "/" subtype ] *( ";" parameter )
579        # data      := *urlchar
580        # parameter := attribute "=" value
581        import mimetools
582        try:
583            from cStringIO import StringIO
584        except ImportError:
585            from StringIO import StringIO
586        try:
587            [type, data] = url.split(',', 1)
588        except ValueError:
589            raise IOError, ('data error', 'bad data URL')
590        if not type:
591            type = 'text/plain;charset=US-ASCII'
592        semi = type.rfind(';')
593        if semi >= 0 and '=' not in type[semi:]:
594            encoding = type[semi+1:]
595            type = type[:semi]
596        else:
597            encoding = ''
598        msg = []
599        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
600                                            time.gmtime(time.time())))
601        msg.append('Content-type: %s' % type)
602        if encoding == 'base64':
603            data = base64.decodestring(data)
604        else:
605            data = unquote(data)
606        msg.append('Content-Length: %d' % len(data))
607        msg.append('')
608        msg.append(data)
609        msg = '\n'.join(msg)
610        f = StringIO(msg)
611        headers = mimetools.Message(f, 0)
612        #f.fileno = None     # needed for addinfourl
613        return addinfourl(f, headers, url)
614
615
616class FancyURLopener(URLopener):
617    """Derived class with handlers for errors we can handle (perhaps)."""
618
619    def __init__(self, *args, **kwargs):
620        URLopener.__init__(self, *args, **kwargs)
621        self.auth_cache = {}
622        self.tries = 0
623        self.maxtries = 10
624
625    def http_error_default(self, url, fp, errcode, errmsg, headers):
626        """Default error handling -- don't raise an exception."""
627        return addinfourl(fp, headers, "http:" + url, errcode)
628
629    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
630        """Error 302 -- relocated (temporarily)."""
631        self.tries += 1
632        try:
633            if self.maxtries and self.tries >= self.maxtries:
634                if hasattr(self, "http_error_500"):
635                    meth = self.http_error_500
636                else:
637                    meth = self.http_error_default
638                return meth(url, fp, 500,
639                            "Internal Server Error: Redirect Recursion",
640                            headers)
641            result = self.redirect_internal(url, fp, errcode, errmsg,
642                                            headers, data)
643            return result
644        finally:
645            self.tries = 0
646
647    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
648        if 'location' in headers:
649            newurl = headers['location']
650        elif 'uri' in headers:
651            newurl = headers['uri']
652        else:
653            return
654        fp.close()
655        # In case the server sent a relative URL, join with original:
656        newurl = basejoin(self.type + ":" + url, newurl)
657
658        # For security reasons we do not allow redirects to protocols
659        # other than HTTP, HTTPS or FTP.
660        newurl_lower = newurl.lower()
661        if not (newurl_lower.startswith('http://') or
662                newurl_lower.startswith('https://') or
663                newurl_lower.startswith('ftp://')):
664            raise IOError('redirect error', errcode,
665                          errmsg + " - Redirection to url '%s' is not allowed" %
666                          newurl,
667                          headers)
668
669        return self.open(newurl)
670
671    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
672        """Error 301 -- also relocated (permanently)."""
673        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
674
675    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
676        """Error 303 -- also relocated (essentially identical to 302)."""
677        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
678
679    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
680        """Error 307 -- relocated, but turn POST into error."""
681        if data is None:
682            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
683        else:
684            return self.http_error_default(url, fp, errcode, errmsg, headers)
685
686    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
687        """Error 401 -- authentication required.
688        This function supports Basic authentication only."""
689        if not 'www-authenticate' in headers:
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        stuff = headers['www-authenticate']
693        import re
694        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
695        if not match:
696            URLopener.http_error_default(self, url, fp,
697                                         errcode, errmsg, headers)
698        scheme, realm = match.groups()
699        if scheme.lower() != 'basic':
700            URLopener.http_error_default(self, url, fp,
701                                         errcode, errmsg, headers)
702        name = 'retry_' + self.type + '_basic_auth'
703        if data is None:
704            return getattr(self,name)(url, realm)
705        else:
706            return getattr(self,name)(url, realm, data)
707
708    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
709        """Error 407 -- proxy authentication required.
710        This function supports Basic authentication only."""
711        if not 'proxy-authenticate' in headers:
712            URLopener.http_error_default(self, url, fp,
713                                         errcode, errmsg, headers)
714        stuff = headers['proxy-authenticate']
715        import re
716        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
717        if not match:
718            URLopener.http_error_default(self, url, fp,
719                                         errcode, errmsg, headers)
720        scheme, realm = match.groups()
721        if scheme.lower() != 'basic':
722            URLopener.http_error_default(self, url, fp,
723                                         errcode, errmsg, headers)
724        name = 'retry_proxy_' + self.type + '_basic_auth'
725        if data is None:
726            return getattr(self,name)(url, realm)
727        else:
728            return getattr(self,name)(url, realm, data)
729
730    def retry_proxy_http_basic_auth(self, url, realm, data=None):
731        host, selector = splithost(url)
732        newurl = 'http://' + host + selector
733        proxy = self.proxies['http']
734        urltype, proxyhost = splittype(proxy)
735        proxyhost, proxyselector = splithost(proxyhost)
736        i = proxyhost.find('@') + 1
737        proxyhost = proxyhost[i:]
738        user, passwd = self.get_user_passwd(proxyhost, realm, i)
739        if not (user or passwd): return None
740        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
741        self.proxies['http'] = 'http://' + proxyhost + proxyselector
742        if data is None:
743            return self.open(newurl)
744        else:
745            return self.open(newurl, data)
746
747    def retry_proxy_https_basic_auth(self, url, realm, data=None):
748        host, selector = splithost(url)
749        newurl = 'https://' + host + selector
750        proxy = self.proxies['https']
751        urltype, proxyhost = splittype(proxy)
752        proxyhost, proxyselector = splithost(proxyhost)
753        i = proxyhost.find('@') + 1
754        proxyhost = proxyhost[i:]
755        user, passwd = self.get_user_passwd(proxyhost, realm, i)
756        if not (user or passwd): return None
757        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
758        self.proxies['https'] = 'https://' + proxyhost + proxyselector
759        if data is None:
760            return self.open(newurl)
761        else:
762            return self.open(newurl, data)
763
764    def retry_http_basic_auth(self, url, realm, data=None):
765        host, selector = splithost(url)
766        i = host.find('@') + 1
767        host = host[i:]
768        user, passwd = self.get_user_passwd(host, realm, i)
769        if not (user or passwd): return None
770        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
771        newurl = 'http://' + host + selector
772        if data is None:
773            return self.open(newurl)
774        else:
775            return self.open(newurl, data)
776
777    def retry_https_basic_auth(self, url, realm, data=None):
778        host, selector = splithost(url)
779        i = host.find('@') + 1
780        host = host[i:]
781        user, passwd = self.get_user_passwd(host, realm, i)
782        if not (user or passwd): return None
783        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
784        newurl = 'https://' + host + selector
785        if data is None:
786            return self.open(newurl)
787        else:
788            return self.open(newurl, data)
789
790    def get_user_passwd(self, host, realm, clear_cache=0):
791        key = realm + '@' + host.lower()
792        if key in self.auth_cache:
793            if clear_cache:
794                del self.auth_cache[key]
795            else:
796                return self.auth_cache[key]
797        user, passwd = self.prompt_user_passwd(host, realm)
798        if user or passwd: self.auth_cache[key] = (user, passwd)
799        return user, passwd
800
801    def prompt_user_passwd(self, host, realm):
802        """Override this in a GUI environment!"""
803        import getpass
804        try:
805            user = raw_input("Enter username for %s at %s: " % (realm,
806                                                                host))
807            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
808                (user, realm, host))
809            return user, passwd
810        except KeyboardInterrupt:
811            print
812            return None, None
813
814
815# Utility functions
816
817_localhost = None
818def localhost():
819    """Return the IP address of the magic hostname 'localhost'."""
820    global _localhost
821    if _localhost is None:
822        _localhost = socket.gethostbyname('localhost')
823    return _localhost
824
825_thishost = None
826def thishost():
827    """Return the IP address of the current host."""
828    global _thishost
829    if _thishost is None:
830        try:
831            _thishost = socket.gethostbyname(socket.gethostname())
832        except socket.gaierror:
833            _thishost = socket.gethostbyname('localhost')
834    return _thishost
835
836_ftperrors = None
837def ftperrors():
838    """Return the set of errors raised by the FTP class."""
839    global _ftperrors
840    if _ftperrors is None:
841        import ftplib
842        _ftperrors = ftplib.all_errors
843    return _ftperrors
844
845_noheaders = None
846def noheaders():
847    """Return an empty mimetools.Message object."""
848    global _noheaders
849    if _noheaders is None:
850        import mimetools
851        try:
852            from cStringIO import StringIO
853        except ImportError:
854            from StringIO import StringIO
855        _noheaders = mimetools.Message(StringIO(), 0)
856        _noheaders.fp.close()   # Recycle file descriptor
857    return _noheaders
858
859
860# Utility classes
861
862class ftpwrapper:
863    """Class used by open_ftp() for cache of open FTP connections."""
864
865    def __init__(self, user, passwd, host, port, dirs,
866                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
867                 persistent=True):
868        self.user = user
869        self.passwd = passwd
870        self.host = host
871        self.port = port
872        self.dirs = dirs
873        self.timeout = timeout
874        self.refcount = 0
875        self.keepalive = persistent
876        try:
877            self.init()
878        except:
879            self.close()
880            raise
881
882    def init(self):
883        import ftplib
884        self.busy = 0
885        self.ftp = ftplib.FTP()
886        self.ftp.connect(self.host, self.port, self.timeout)
887        self.ftp.login(self.user, self.passwd)
888        _target = '/'.join(self.dirs)
889        self.ftp.cwd(_target)
890
891    def retrfile(self, file, type):
892        import ftplib
893        self.endtransfer()
894        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
895        else: cmd = 'TYPE ' + type; isdir = 0
896        try:
897            self.ftp.voidcmd(cmd)
898        except ftplib.all_errors:
899            self.init()
900            self.ftp.voidcmd(cmd)
901        conn = None
902        if file and not isdir:
903            # Try to retrieve as a file
904            try:
905                cmd = 'RETR ' + file
906                conn, retrlen = self.ftp.ntransfercmd(cmd)
907            except ftplib.error_perm, reason:
908                if str(reason)[:3] != '550':
909                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
910        if not conn:
911            # Set transfer mode to ASCII!
912            self.ftp.voidcmd('TYPE A')
913            # Try a directory listing. Verify that directory exists.
914            if file:
915                pwd = self.ftp.pwd()
916                try:
917                    try:
918                        self.ftp.cwd(file)
919                    except ftplib.error_perm, reason:
920                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
921                finally:
922                    self.ftp.cwd(pwd)
923                cmd = 'LIST ' + file
924            else:
925                cmd = 'LIST'
926            conn, retrlen = self.ftp.ntransfercmd(cmd)
927        self.busy = 1
928        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
929        self.refcount += 1
930        conn.close()
931        # Pass back both a suitably decorated object and a retrieval length
932        return (ftpobj, retrlen)
933
934    def endtransfer(self):
935        self.busy = 0
936
937    def close(self):
938        self.keepalive = False
939        if self.refcount <= 0:
940            self.real_close()
941
942    def file_close(self):
943        self.endtransfer()
944        self.refcount -= 1
945        if self.refcount <= 0 and not self.keepalive:
946            self.real_close()
947
948    def real_close(self):
949        self.endtransfer()
950        try:
951            self.ftp.close()
952        except ftperrors():
953            pass
954
955class addbase:
956    """Base class for addinfo and addclosehook."""
957
958    def __init__(self, fp):
959        self.fp = fp
960        self.read = self.fp.read
961        self.readline = self.fp.readline
962        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
963        if hasattr(self.fp, "fileno"):
964            self.fileno = self.fp.fileno
965        else:
966            self.fileno = lambda: None
967        if hasattr(self.fp, "__iter__"):
968            self.__iter__ = self.fp.__iter__
969            if hasattr(self.fp, "next"):
970                self.next = self.fp.next
971
972    def __repr__(self):
973        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
974                                             id(self), self.fp)
975
976    def close(self):
977        self.read = None
978        self.readline = None
979        self.readlines = None
980        self.fileno = None
981        if self.fp: self.fp.close()
982        self.fp = None
983
984class addclosehook(addbase):
985    """Class to add a close hook to an open file."""
986
987    def __init__(self, fp, closehook, *hookargs):
988        addbase.__init__(self, fp)
989        self.closehook = closehook
990        self.hookargs = hookargs
991
992    def close(self):
993        try:
994            closehook = self.closehook
995            hookargs = self.hookargs
996            if closehook:
997                self.closehook = None
998                self.hookargs = None
999                closehook(*hookargs)
1000        finally:
1001            addbase.close(self)
1002
1003
1004class addinfo(addbase):
1005    """class to add an info() method to an open file."""
1006
1007    def __init__(self, fp, headers):
1008        addbase.__init__(self, fp)
1009        self.headers = headers
1010
1011    def info(self):
1012        return self.headers
1013
1014class addinfourl(addbase):
1015    """class to add info() and geturl() methods to an open file."""
1016
1017    def __init__(self, fp, headers, url, code=None):
1018        addbase.__init__(self, fp)
1019        self.headers = headers
1020        self.url = url
1021        self.code = code
1022
1023    def info(self):
1024        return self.headers
1025
1026    def getcode(self):
1027        return self.code
1028
1029    def geturl(self):
1030        return self.url
1031
1032
1033# Utilities to parse URLs (most of these return None for missing parts):
1034# unwrap('<URL:type://host/path>') --> 'type://host/path'
1035# splittype('type:opaquestring') --> 'type', 'opaquestring'
1036# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1037# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1038# splitpasswd('user:passwd') -> 'user', 'passwd'
1039# splitport('host:port') --> 'host', 'port'
1040# splitquery('/path?query') --> '/path', 'query'
1041# splittag('/path#tag') --> '/path', 'tag'
1042# splitattr('/path;attr1=value1;attr2=value2;...') ->
1043#   '/path', ['attr1=value1', 'attr2=value2', ...]
1044# splitvalue('attr=value') --> 'attr', 'value'
1045# unquote('abc%20def') -> 'abc def'
1046# quote('abc def') -> 'abc%20def')
1047
1048try:
1049    unicode
1050except NameError:
1051    def _is_unicode(x):
1052        return 0
1053else:
1054    def _is_unicode(x):
1055        return isinstance(x, unicode)
1056
1057def toBytes(url):
1058    """toBytes(u"URL") --> 'URL'."""
1059    # Most URL schemes require ASCII. If that changes, the conversion
1060    # can be relaxed
1061    if _is_unicode(url):
1062        try:
1063            url = url.encode("ASCII")
1064        except UnicodeError:
1065            raise UnicodeError("URL " + repr(url) +
1066                               " contains non-ASCII characters")
1067    return url
1068
1069def unwrap(url):
1070    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1071    url = url.strip()
1072    if url[:1] == '<' and url[-1:] == '>':
1073        url = url[1:-1].strip()
1074    if url[:4] == 'URL:': url = url[4:].strip()
1075    return url
1076
1077_typeprog = None
1078def splittype(url):
1079    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1080    global _typeprog
1081    if _typeprog is None:
1082        import re
1083        _typeprog = re.compile('^([^/:]+):')
1084
1085    match = _typeprog.match(url)
1086    if match:
1087        scheme = match.group(1)
1088        return scheme.lower(), url[len(scheme) + 1:]
1089    return None, url
1090
1091_hostprog = None
1092def splithost(url):
1093    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1094    global _hostprog
1095    if _hostprog is None:
1096        import re
1097        _hostprog = re.compile('^//([^/?]*)(.*)$')
1098
1099    match = _hostprog.match(url)
1100    if match:
1101        host_port = match.group(1)
1102        path = match.group(2)
1103        if path and not path.startswith('/'):
1104            path = '/' + path
1105        return host_port, path
1106    return None, url
1107
1108_userprog = None
1109def splituser(host):
1110    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1111    global _userprog
1112    if _userprog is None:
1113        import re
1114        _userprog = re.compile('^(.*)@(.*)$')
1115
1116    match = _userprog.match(host)
1117    if match: return match.group(1, 2)
1118    return None, host
1119
1120_passwdprog = None
1121def splitpasswd(user):
1122    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1123    global _passwdprog
1124    if _passwdprog is None:
1125        import re
1126        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1127
1128    match = _passwdprog.match(user)
1129    if match: return match.group(1, 2)
1130    return user, None
1131
1132# splittag('/path#tag') --> '/path', 'tag'
1133_portprog = None
1134def splitport(host):
1135    """splitport('host:port') --> 'host', 'port'."""
1136    global _portprog
1137    if _portprog is None:
1138        import re
1139        _portprog = re.compile('^(.*):([0-9]*)$')
1140
1141    match = _portprog.match(host)
1142    if match:
1143        host, port = match.groups()
1144        if port:
1145            return host, port
1146    return host, None
1147
1148_nportprog = None
1149def splitnport(host, defport=-1):
1150    """Split host and port, returning numeric port.
1151    Return given default port if no ':' found; defaults to -1.
1152    Return numerical port if a valid number are found after ':'.
1153    Return None if ':' but not a valid number."""
1154    global _nportprog
1155    if _nportprog is None:
1156        import re
1157        _nportprog = re.compile('^(.*):(.*)$')
1158
1159    match = _nportprog.match(host)
1160    if match:
1161        host, port = match.group(1, 2)
1162        if port:
1163            try:
1164                nport = int(port)
1165            except ValueError:
1166                nport = None
1167            return host, nport
1168    return host, defport
1169
1170_queryprog = None
1171def splitquery(url):
1172    """splitquery('/path?query') --> '/path', 'query'."""
1173    global _queryprog
1174    if _queryprog is None:
1175        import re
1176        _queryprog = re.compile('^(.*)\?([^?]*)$')
1177
1178    match = _queryprog.match(url)
1179    if match: return match.group(1, 2)
1180    return url, None
1181
1182_tagprog = None
1183def splittag(url):
1184    """splittag('/path#tag') --> '/path', 'tag'."""
1185    global _tagprog
1186    if _tagprog is None:
1187        import re
1188        _tagprog = re.compile('^(.*)#([^#]*)$')
1189
1190    match = _tagprog.match(url)
1191    if match: return match.group(1, 2)
1192    return url, None
1193
1194def splitattr(url):
1195    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1196        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1197    words = url.split(';')
1198    return words[0], words[1:]
1199
1200_valueprog = None
1201def splitvalue(attr):
1202    """splitvalue('attr=value') --> 'attr', 'value'."""
1203    global _valueprog
1204    if _valueprog is None:
1205        import re
1206        _valueprog = re.compile('^([^=]*)=(.*)$')
1207
1208    match = _valueprog.match(attr)
1209    if match: return match.group(1, 2)
1210    return attr, None
1211
1212# urlparse contains a duplicate of this method to avoid a circular import.  If
1213# you update this method, also update the copy in urlparse.  This code
1214# duplication does not exist in Python3.
1215
1216_hexdig = '0123456789ABCDEFabcdef'
1217_hextochr = dict((a + b, chr(int(a + b, 16)))
1218                 for a in _hexdig for b in _hexdig)
1219_asciire = re.compile('([\x00-\x7f]+)')
1220
1221def unquote(s):
1222    """unquote('abc%20def') -> 'abc def'."""
1223    if _is_unicode(s):
1224        if '%' not in s:
1225            return s
1226        bits = _asciire.split(s)
1227        res = [bits[0]]
1228        append = res.append
1229        for i in range(1, len(bits), 2):
1230            append(unquote(str(bits[i])).decode('latin1'))
1231            append(bits[i + 1])
1232        return ''.join(res)
1233
1234    bits = s.split('%')
1235    # fastpath
1236    if len(bits) == 1:
1237        return s
1238    res = [bits[0]]
1239    append = res.append
1240    for item in bits[1:]:
1241        try:
1242            append(_hextochr[item[:2]])
1243            append(item[2:])
1244        except KeyError:
1245            append('%')
1246            append(item)
1247    return ''.join(res)
1248
1249def unquote_plus(s):
1250    """unquote('%7e/abc+def') -> '~/abc def'"""
1251    s = s.replace('+', ' ')
1252    return unquote(s)
1253
1254always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1255               'abcdefghijklmnopqrstuvwxyz'
1256               '0123456789' '_.-')
1257_safe_map = {}
1258for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1259    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1260_safe_quoters = {}
1261
1262def quote(s, safe='/'):
1263    """quote('abc def') -> 'abc%20def'
1264
1265    Each part of a URL, e.g. the path info, the query, etc., has a
1266    different set of reserved characters that must be quoted.
1267
1268    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1269    the following reserved characters.
1270
1271    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1272                  "$" | ","
1273
1274    Each of these characters is reserved in some component of a URL,
1275    but not necessarily in all of them.
1276
1277    By default, the quote function is intended for quoting the path
1278    section of a URL.  Thus, it will not encode '/'.  This character
1279    is reserved, but in typical usage the quote function is being
1280    called on a path where the existing slash characters are used as
1281    reserved characters.
1282    """
1283    # fastpath
1284    if not s:
1285        if s is None:
1286            raise TypeError('None object cannot be quoted')
1287        return s
1288    cachekey = (safe, always_safe)
1289    try:
1290        (quoter, safe) = _safe_quoters[cachekey]
1291    except KeyError:
1292        safe_map = _safe_map.copy()
1293        safe_map.update([(c, c) for c in safe])
1294        quoter = safe_map.__getitem__
1295        safe = always_safe + safe
1296        _safe_quoters[cachekey] = (quoter, safe)
1297    if not s.rstrip(safe):
1298        return s
1299    return ''.join(map(quoter, s))
1300
1301def quote_plus(s, safe=''):
1302    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1303    if ' ' in s:
1304        s = quote(s, safe + ' ')
1305        return s.replace(' ', '+')
1306    return quote(s, safe)
1307
1308def urlencode(query, doseq=0):
1309    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1310
1311    If any values in the query arg are sequences and doseq is true, each
1312    sequence element is converted to a separate parameter.
1313
1314    If the query arg is a sequence of two-element tuples, the order of the
1315    parameters in the output will match the order of parameters in the
1316    input.
1317    """
1318
1319    if hasattr(query,"items"):
1320        # mapping objects
1321        query = query.items()
1322    else:
1323        # it's a bother at times that strings and string-like objects are
1324        # sequences...
1325        try:
1326            # non-sequence items should not work with len()
1327            # non-empty strings will fail this
1328            if len(query) and not isinstance(query[0], tuple):
1329                raise TypeError
1330            # zero-length sequences of all types will get here and succeed,
1331            # but that's a minor nit - since the original implementation
1332            # allowed empty dicts that type of behavior probably should be
1333            # preserved for consistency
1334        except TypeError:
1335            ty,va,tb = sys.exc_info()
1336            raise TypeError, "not a valid non-string sequence or mapping object", tb
1337
1338    l = []
1339    if not doseq:
1340        # preserve old behavior
1341        for k, v in query:
1342            k = quote_plus(str(k))
1343            v = quote_plus(str(v))
1344            l.append(k + '=' + v)
1345    else:
1346        for k, v in query:
1347            k = quote_plus(str(k))
1348            if isinstance(v, str):
1349                v = quote_plus(v)
1350                l.append(k + '=' + v)
1351            elif _is_unicode(v):
1352                # is there a reasonable way to convert to ASCII?
1353                # encode generates a string, but "replace" or "ignore"
1354                # lose information and "strict" can raise UnicodeError
1355                v = quote_plus(v.encode("ASCII","replace"))
1356                l.append(k + '=' + v)
1357            else:
1358                try:
1359                    # is this a sufficient test for sequence-ness?
1360                    len(v)
1361                except TypeError:
1362                    # not a sequence
1363                    v = quote_plus(str(v))
1364                    l.append(k + '=' + v)
1365                else:
1366                    # loop over the sequence
1367                    for elt in v:
1368                        l.append(k + '=' + quote_plus(str(elt)))
1369    return '&'.join(l)
1370
1371# Proxy handling
1372def getproxies_environment():
1373    """Return a dictionary of scheme -> proxy server URL mappings.
1374
1375    Scan the environment for variables named <scheme>_proxy;
1376    this seems to be the standard convention.  In order to prefer lowercase
1377    variables, we process the environment in two passes, first matches any
1378    and second matches only lower case proxies.
1379
1380    If you need a different way, you can pass a proxies dictionary to the
1381    [Fancy]URLopener constructor.
1382    """
1383    # Get all variables
1384    proxies = {}
1385    for name, value in os.environ.items():
1386        name = name.lower()
1387        if value and name[-6:] == '_proxy':
1388            proxies[name[:-6]] = value
1389
1390    # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
1391    # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
1392    # header from the client
1393    # If "proxy" is lowercase, it will still be used thanks to the next block
1394    if 'REQUEST_METHOD' in os.environ:
1395        proxies.pop('http', None)
1396
1397    # Get lowercase variables
1398    for name, value in os.environ.items():
1399        if name[-6:] == '_proxy':
1400            name = name.lower()
1401            if value:
1402                proxies[name[:-6]] = value
1403            else:
1404                proxies.pop(name[:-6], None)
1405
1406    return proxies
1407
1408def proxy_bypass_environment(host, proxies=None):
1409    """Test if proxies should not be used for a particular host.
1410
1411    Checks the proxies dict for the value of no_proxy, which should be a
1412    list of comma separated DNS suffixes, or '*' for all hosts.
1413    """
1414    if proxies is None:
1415        proxies = getproxies_environment()
1416    # don't bypass, if no_proxy isn't specified
1417    try:
1418        no_proxy = proxies['no']
1419    except KeyError:
1420        return 0
1421    # '*' is special case for always bypass
1422    if no_proxy == '*':
1423        return 1
1424    # strip port off host
1425    hostonly, port = splitport(host)
1426    # check if the host ends with any of the DNS suffixes
1427    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1428    for name in no_proxy_list:
1429        if name:
1430            name = re.escape(name)
1431            pattern = r'(.+\.)?%s$' % name
1432            if (re.match(pattern, hostonly, re.I)
1433                    or re.match(pattern, host, re.I)):
1434                return 1
1435    # otherwise, don't bypass
1436    return 0
1437
1438
1439if sys.platform == 'darwin':
1440    from _scproxy import _get_proxy_settings, _get_proxies
1441
1442    def proxy_bypass_macosx_sysconf(host):
1443        """
1444        Return True iff this host shouldn't be accessed using a proxy
1445
1446        This function uses the MacOSX framework SystemConfiguration
1447        to fetch the proxy information.
1448        """
1449        import re
1450        import socket
1451        from fnmatch import fnmatch
1452
1453        hostonly, port = splitport(host)
1454
1455        def ip2num(ipAddr):
1456            parts = ipAddr.split('.')
1457            parts = map(int, parts)
1458            if len(parts) != 4:
1459                parts = (parts + [0, 0, 0, 0])[:4]
1460            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1461
1462        proxy_settings = _get_proxy_settings()
1463
1464        # Check for simple host names:
1465        if '.' not in host:
1466            if proxy_settings['exclude_simple']:
1467                return True
1468
1469        hostIP = None
1470
1471        for value in proxy_settings.get('exceptions', ()):
1472            # Items in the list are strings like these: *.local, 169.254/16
1473            if not value: continue
1474
1475            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1476            if m is not None:
1477                if hostIP is None:
1478                    try:
1479                        hostIP = socket.gethostbyname(hostonly)
1480                        hostIP = ip2num(hostIP)
1481                    except socket.error:
1482                        continue
1483
1484                base = ip2num(m.group(1))
1485                mask = m.group(2)
1486                if mask is None:
1487                    mask = 8 * (m.group(1).count('.') + 1)
1488
1489                else:
1490                    mask = int(mask[1:])
1491                mask = 32 - mask
1492
1493                if (hostIP >> mask) == (base >> mask):
1494                    return True
1495
1496            elif fnmatch(host, value):
1497                return True
1498
1499        return False
1500
1501    def getproxies_macosx_sysconf():
1502        """Return a dictionary of scheme -> proxy server URL mappings.
1503
1504        This function uses the MacOSX framework SystemConfiguration
1505        to fetch the proxy information.
1506        """
1507        return _get_proxies()
1508
1509    def proxy_bypass(host):
1510        """Return True, if a host should be bypassed.
1511
1512        Checks proxy settings gathered from the environment, if specified, or
1513        from the MacOSX framework SystemConfiguration.
1514        """
1515        proxies = getproxies_environment()
1516        if proxies:
1517            return proxy_bypass_environment(host, proxies)
1518        else:
1519            return proxy_bypass_macosx_sysconf(host)
1520
1521    def getproxies():
1522        return getproxies_environment() or getproxies_macosx_sysconf()
1523
1524elif os.name == 'nt':
1525    def getproxies_registry():
1526        """Return a dictionary of scheme -> proxy server URL mappings.
1527
1528        Win32 uses the registry to store proxies.
1529
1530        """
1531        proxies = {}
1532        try:
1533            import _winreg
1534        except ImportError:
1535            # Std module, so should be around - but you never know!
1536            return proxies
1537        try:
1538            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1539                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1540            proxyEnable = _winreg.QueryValueEx(internetSettings,
1541                                               'ProxyEnable')[0]
1542            if proxyEnable:
1543                # Returned as Unicode but problems if not converted to ASCII
1544                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1545                                                       'ProxyServer')[0])
1546                if '=' in proxyServer:
1547                    # Per-protocol settings
1548                    for p in proxyServer.split(';'):
1549                        protocol, address = p.split('=', 1)
1550                        # See if address has a type:// prefix
1551                        import re
1552                        if not re.match('^([^/:]+)://', address):
1553                            address = '%s://%s' % (protocol, address)
1554                        proxies[protocol] = address
1555                else:
1556                    # Use one setting for all protocols
1557                    if proxyServer[:5] == 'http:':
1558                        proxies['http'] = proxyServer
1559                    else:
1560                        proxies['http'] = 'http://%s' % proxyServer
1561                        proxies['https'] = 'https://%s' % proxyServer
1562                        proxies['ftp'] = 'ftp://%s' % proxyServer
1563            internetSettings.Close()
1564        except (WindowsError, ValueError, TypeError):
1565            # Either registry key not found etc, or the value in an
1566            # unexpected format.
1567            # proxies already set up to be empty so nothing to do
1568            pass
1569        return proxies
1570
1571    def getproxies():
1572        """Return a dictionary of scheme -> proxy server URL mappings.
1573
1574        Returns settings gathered from the environment, if specified,
1575        or the registry.
1576
1577        """
1578        return getproxies_environment() or getproxies_registry()
1579
1580    def proxy_bypass_registry(host):
1581        try:
1582            import _winreg
1583            import re
1584        except ImportError:
1585            # Std modules, so should be around - but you never know!
1586            return 0
1587        try:
1588            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1589                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1590            proxyEnable = _winreg.QueryValueEx(internetSettings,
1591                                               'ProxyEnable')[0]
1592            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1593                                                     'ProxyOverride')[0])
1594            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1595        except WindowsError:
1596            return 0
1597        if not proxyEnable or not proxyOverride:
1598            return 0
1599        # try to make a host list from name and IP address.
1600        rawHost, port = splitport(host)
1601        host = [rawHost]
1602        try:
1603            addr = socket.gethostbyname(rawHost)
1604            if addr != rawHost:
1605                host.append(addr)
1606        except socket.error:
1607            pass
1608        try:
1609            fqdn = socket.getfqdn(rawHost)
1610            if fqdn != rawHost:
1611                host.append(fqdn)
1612        except socket.error:
1613            pass
1614        # make a check value list from the registry entry: replace the
1615        # '<local>' string by the localhost entry and the corresponding
1616        # canonical entry.
1617        proxyOverride = proxyOverride.split(';')
1618        # now check if we match one of the registry values.
1619        for test in proxyOverride:
1620            if test == '<local>':
1621                if '.' not in rawHost:
1622                    return 1
1623            test = test.replace(".", r"\.")     # mask dots
1624            test = test.replace("*", r".*")     # change glob sequence
1625            test = test.replace("?", r".")      # change glob char
1626            for val in host:
1627                # print "%s <--> %s" %( test, val )
1628                if re.match(test, val, re.I):
1629                    return 1
1630        return 0
1631
1632    def proxy_bypass(host):
1633        """Return True, if the host should be bypassed.
1634
1635        Checks proxy settings gathered from the environment, if specified,
1636        or the registry.
1637        """
1638        proxies = getproxies_environment()
1639        if proxies:
1640            return proxy_bypass_environment(host, proxies)
1641        else:
1642            return proxy_bypass_registry(host)
1643
1644else:
1645    # By default use environment variables
1646    getproxies = getproxies_environment
1647    proxy_bypass = proxy_bypass_environment
1648
1649# Test and time quote() and unquote()
1650def test1():
1651    s = ''
1652    for i in range(256): s = s + chr(i)
1653    s = s*4
1654    t0 = time.time()
1655    qs = quote(s)
1656    uqs = unquote(qs)
1657    t1 = time.time()
1658    if uqs != s:
1659        print 'Wrong!'
1660    print repr(s)
1661    print repr(qs)
1662    print repr(uqs)
1663    print round(t1 - t0, 3), 'sec'
1664
1665
1666def reporthook(blocknum, blocksize, totalsize):
1667    # Report during remote transfers
1668    print "Block number: %d, Block size: %d, Total size: %d" % (
1669        blocknum, blocksize, totalsize)
1670