• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol.  All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info().  The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33           "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34           "urlencode", "url2pathname", "pathname2url", "splittag",
35           "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36           "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37           "splitnport", "splitquery", "splitattr", "splitvalue",
38           "getproxies"]
39
40__version__ = '1.17'    # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'nt':
46    from nturl2path import url2pathname, pathname2url
47elif os.name == 'riscos':
48    from rourl2path import url2pathname, pathname2url
49else:
50    def url2pathname(pathname):
51        """OS-specific conversion from a relative URL of the 'file' scheme
52        to a file system path; not recommended for general use."""
53        return unquote(pathname)
54
55    def pathname2url(pathname):
56        """OS-specific conversion from a file system path to a relative URL
57        of the 'file' scheme; not recommended for general use."""
58        return quote(pathname)
59
60# This really consists of two pieces:
61# (1) a class which handles opening of all sorts of URLs
62#     (plus assorted utilities etc.)
63# (2) a set of functions for parsing URLs
64# XXX Should these be separated out into different modules?
65
66
67# Shortcut for basic usage
68_urlopener = None
69def urlopen(url, data=None, proxies=None):
70    """Create a file-like object for the specified URL to read from."""
71    from warnings import warnpy3k
72    warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
73             "favor of urllib2.urlopen()", stacklevel=2)
74
75    global _urlopener
76    if proxies is not None:
77        opener = FancyURLopener(proxies=proxies)
78    elif not _urlopener:
79        opener = FancyURLopener()
80        _urlopener = opener
81    else:
82        opener = _urlopener
83    if data is None:
84        return opener.open(url)
85    else:
86        return opener.open(url, data)
87def urlretrieve(url, filename=None, reporthook=None, data=None):
88    global _urlopener
89    if not _urlopener:
90        _urlopener = FancyURLopener()
91    return _urlopener.retrieve(url, filename, reporthook, data)
92def urlcleanup():
93    if _urlopener:
94        _urlopener.cleanup()
95    _safe_quoters.clear()
96    ftpcache.clear()
97
98# check for SSL
99try:
100    import ssl
101except:
102    _have_ssl = False
103else:
104    _have_ssl = True
105
106# exception raised when downloaded size does not match content-length
107class ContentTooShortError(IOError):
108    def __init__(self, message, content):
109        IOError.__init__(self, message)
110        self.content = content
111
112ftpcache = {}
113class URLopener:
114    """Class to open URLs.
115    This is a class rather than just a subroutine because we may need
116    more than one set of global protocol-specific options.
117    Note -- this is a base class for those who don't want the
118    automatic handling of errors type 302 (relocated) and 401
119    (authorization needed)."""
120
121    __tempfiles = None
122
123    version = "Python-urllib/%s" % __version__
124
125    # Constructor
126    def __init__(self, proxies=None, **x509):
127        if proxies is None:
128            proxies = getproxies()
129        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130        self.proxies = proxies
131        self.key_file = x509.get('key_file')
132        self.cert_file = x509.get('cert_file')
133        self.addheaders = [('User-Agent', self.version)]
134        self.__tempfiles = []
135        self.__unlink = os.unlink # See cleanup()
136        self.tempcache = None
137        # Undocumented feature: if you assign {} to tempcache,
138        # it is used to cache files retrieved with
139        # self.retrieve().  This is not enabled by default
140        # since it does not work for changing documents (and I
141        # haven't got the logic to check expiration headers
142        # yet).
143        self.ftpcache = ftpcache
144        # Undocumented feature: you can use a different
145        # ftp cache by assigning to the .ftpcache member;
146        # in case you want logically independent URL openers
147        # XXX This is not threadsafe.  Bah.
148
149    def __del__(self):
150        self.close()
151
152    def close(self):
153        self.cleanup()
154
155    def cleanup(self):
156        # This code sometimes runs when the rest of this module
157        # has already been deleted, so it can't use any globals
158        # or import anything.
159        if self.__tempfiles:
160            for file in self.__tempfiles:
161                try:
162                    self.__unlink(file)
163                except OSError:
164                    pass
165            del self.__tempfiles[:]
166        if self.tempcache:
167            self.tempcache.clear()
168
169    def addheader(self, *args):
170        """Add a header to be used by the HTTP interface only
171        e.g. u.addheader('Accept', 'sound/basic')"""
172        self.addheaders.append(args)
173
174    # External interface
175    def open(self, fullurl, data=None):
176        """Use URLopener().open(file) instead of open(file, 'r')."""
177        fullurl = unwrap(toBytes(fullurl))
178        # percent encode url, fixing lame server errors for e.g, like space
179        # within url paths.
180        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
181        if self.tempcache and fullurl in self.tempcache:
182            filename, headers = self.tempcache[fullurl]
183            fp = open(filename, 'rb')
184            return addinfourl(fp, headers, fullurl)
185        urltype, url = splittype(fullurl)
186        if not urltype:
187            urltype = 'file'
188        if urltype in self.proxies:
189            proxy = self.proxies[urltype]
190            urltype, proxyhost = splittype(proxy)
191            host, selector = splithost(proxyhost)
192            url = (host, fullurl) # Signal special case to open_*()
193        else:
194            proxy = None
195        name = 'open_' + urltype
196        self.type = urltype
197        name = name.replace('-', '_')
198        if not hasattr(self, name):
199            if proxy:
200                return self.open_unknown_proxy(proxy, fullurl, data)
201            else:
202                return self.open_unknown(fullurl, data)
203        try:
204            if data is None:
205                return getattr(self, name)(url)
206            else:
207                return getattr(self, name)(url, data)
208        except socket.error, msg:
209            raise IOError, ('socket error', msg), sys.exc_info()[2]
210
211    def open_unknown(self, fullurl, data=None):
212        """Overridable interface to open unknown URL type."""
213        type, url = splittype(fullurl)
214        raise IOError, ('url error', 'unknown url type', type)
215
216    def open_unknown_proxy(self, proxy, fullurl, data=None):
217        """Overridable interface to open unknown URL type."""
218        type, url = splittype(fullurl)
219        raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
221    # External interface
222    def retrieve(self, url, filename=None, reporthook=None, data=None):
223        """retrieve(url) returns (filename, headers) for a local object
224        or (tempfilename, headers) for a remote object."""
225        url = unwrap(toBytes(url))
226        if self.tempcache and url in self.tempcache:
227            return self.tempcache[url]
228        type, url1 = splittype(url)
229        if filename is None and (not type or type == 'file'):
230            try:
231                fp = self.open_local_file(url1)
232                hdrs = fp.info()
233                fp.close()
234                return url2pathname(splithost(url1)[1]), hdrs
235            except IOError:
236                pass
237        fp = self.open(url, data)
238        try:
239            headers = fp.info()
240            if filename:
241                tfp = open(filename, 'wb')
242            else:
243                import tempfile
244                garbage, path = splittype(url)
245                garbage, path = splithost(path or "")
246                path, garbage = splitquery(path or "")
247                path, garbage = splitattr(path or "")
248                suffix = os.path.splitext(path)[1]
249                (fd, filename) = tempfile.mkstemp(suffix)
250                self.__tempfiles.append(filename)
251                tfp = os.fdopen(fd, 'wb')
252            try:
253                result = filename, headers
254                if self.tempcache is not None:
255                    self.tempcache[url] = result
256                bs = 1024*8
257                size = -1
258                read = 0
259                blocknum = 0
260                if reporthook:
261                    if "content-length" in headers:
262                        size = int(headers["Content-Length"])
263                    reporthook(blocknum, bs, size)
264                while 1:
265                    block = fp.read(bs)
266                    if block == "":
267                        break
268                    read += len(block)
269                    tfp.write(block)
270                    blocknum += 1
271                    if reporthook:
272                        reporthook(blocknum, bs, size)
273            finally:
274                tfp.close()
275        finally:
276            fp.close()
277
278        # raise exception if actual size does not match content-length header
279        if size >= 0 and read < size:
280            raise ContentTooShortError("retrieval incomplete: got only %i out "
281                                       "of %i bytes" % (read, size), result)
282
283        return result
284
285    # Each method named open_<type> knows how to open that type of URL
286
287    def open_http(self, url, data=None):
288        """Use HTTP protocol."""
289        import httplib
290        user_passwd = None
291        proxy_passwd= None
292        if isinstance(url, str):
293            host, selector = splithost(url)
294            if host:
295                user_passwd, host = splituser(host)
296                host = unquote(host)
297            realhost = host
298        else:
299            host, selector = url
300            # check whether the proxy contains authorization information
301            proxy_passwd, host = splituser(host)
302            # now we proceed with the url we want to obtain
303            urltype, rest = splittype(selector)
304            url = rest
305            user_passwd = None
306            if urltype.lower() != 'http':
307                realhost = None
308            else:
309                realhost, rest = splithost(rest)
310                if realhost:
311                    user_passwd, realhost = splituser(realhost)
312                if user_passwd:
313                    selector = "%s://%s%s" % (urltype, realhost, rest)
314                if proxy_bypass(realhost):
315                    host = realhost
316
317            #print "proxy via http:", host, selector
318        if not host: raise IOError, ('http error', 'no host given')
319
320        if proxy_passwd:
321            import base64
322            proxy_auth = base64.b64encode(proxy_passwd).strip()
323        else:
324            proxy_auth = None
325
326        if user_passwd:
327            import base64
328            auth = base64.b64encode(user_passwd).strip()
329        else:
330            auth = None
331        h = httplib.HTTP(host)
332        if data is not None:
333            h.putrequest('POST', selector)
334            h.putheader('Content-Type', 'application/x-www-form-urlencoded')
335            h.putheader('Content-Length', '%d' % len(data))
336        else:
337            h.putrequest('GET', selector)
338        if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
339        if auth: h.putheader('Authorization', 'Basic %s' % auth)
340        if realhost: h.putheader('Host', realhost)
341        for args in self.addheaders: h.putheader(*args)
342        h.endheaders(data)
343        errcode, errmsg, headers = h.getreply()
344        fp = h.getfile()
345        if errcode == -1:
346            if fp: fp.close()
347            # something went wrong with the HTTP status line
348            raise IOError, ('http protocol error', 0,
349                            'got a bad status line', None)
350        # According to RFC 2616, "2xx" code indicates that the client's
351        # request was successfully received, understood, and accepted.
352        if (200 <= errcode < 300):
353            return addinfourl(fp, headers, "http:" + url, errcode)
354        else:
355            if data is None:
356                return self.http_error(url, fp, errcode, errmsg, headers)
357            else:
358                return self.http_error(url, fp, errcode, errmsg, headers, data)
359
360    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
361        """Handle http errors.
362        Derived class can override this, or provide specific handlers
363        named http_error_DDD where DDD is the 3-digit error code."""
364        # First check if there's a specific handler for this error
365        name = 'http_error_%d' % errcode
366        if hasattr(self, name):
367            method = getattr(self, name)
368            if data is None:
369                result = method(url, fp, errcode, errmsg, headers)
370            else:
371                result = method(url, fp, errcode, errmsg, headers, data)
372            if result: return result
373        return self.http_error_default(url, fp, errcode, errmsg, headers)
374
375    def http_error_default(self, url, fp, errcode, errmsg, headers):
376        """Default error handler: close the connection and raise IOError."""
377        void = fp.read()
378        fp.close()
379        raise IOError, ('http error', errcode, errmsg, headers)
380
381    if _have_ssl:
382        def open_https(self, url, data=None):
383            """Use HTTPS protocol."""
384
385            import httplib
386            user_passwd = None
387            proxy_passwd = None
388            if isinstance(url, str):
389                host, selector = splithost(url)
390                if host:
391                    user_passwd, host = splituser(host)
392                    host = unquote(host)
393                realhost = host
394            else:
395                host, selector = url
396                # here, we determine, whether the proxy contains authorization information
397                proxy_passwd, host = splituser(host)
398                urltype, rest = splittype(selector)
399                url = rest
400                user_passwd = None
401                if urltype.lower() != 'https':
402                    realhost = None
403                else:
404                    realhost, rest = splithost(rest)
405                    if realhost:
406                        user_passwd, realhost = splituser(realhost)
407                    if user_passwd:
408                        selector = "%s://%s%s" % (urltype, realhost, rest)
409                #print "proxy via https:", host, selector
410            if not host: raise IOError, ('https error', 'no host given')
411            if proxy_passwd:
412                import base64
413                proxy_auth = base64.b64encode(proxy_passwd).strip()
414            else:
415                proxy_auth = None
416            if user_passwd:
417                import base64
418                auth = base64.b64encode(user_passwd).strip()
419            else:
420                auth = None
421            h = httplib.HTTPS(host, 0,
422                              key_file=self.key_file,
423                              cert_file=self.cert_file)
424            if data is not None:
425                h.putrequest('POST', selector)
426                h.putheader('Content-Type',
427                            'application/x-www-form-urlencoded')
428                h.putheader('Content-Length', '%d' % len(data))
429            else:
430                h.putrequest('GET', selector)
431            if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
432            if auth: h.putheader('Authorization', 'Basic %s' % auth)
433            if realhost: h.putheader('Host', realhost)
434            for args in self.addheaders: h.putheader(*args)
435            h.endheaders(data)
436            errcode, errmsg, headers = h.getreply()
437            fp = h.getfile()
438            if errcode == -1:
439                if fp: fp.close()
440                # something went wrong with the HTTP status line
441                raise IOError, ('http protocol error', 0,
442                                'got a bad status line', None)
443            # According to RFC 2616, "2xx" code indicates that the client's
444            # request was successfully received, understood, and accepted.
445            if (200 <= errcode < 300):
446                return addinfourl(fp, headers, "https:" + url, errcode)
447            else:
448                if data is None:
449                    return self.http_error(url, fp, errcode, errmsg, headers)
450                else:
451                    return self.http_error(url, fp, errcode, errmsg, headers,
452                                           data)
453
454    def open_file(self, url):
455        """Use local file or FTP depending on form of URL."""
456        if not isinstance(url, str):
457            raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
458        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
459            return self.open_ftp(url)
460        else:
461            return self.open_local_file(url)
462
463    def open_local_file(self, url):
464        """Use local file."""
465        import mimetypes, mimetools, email.utils
466        try:
467            from cStringIO import StringIO
468        except ImportError:
469            from StringIO import StringIO
470        host, file = splithost(url)
471        localname = url2pathname(file)
472        try:
473            stats = os.stat(localname)
474        except OSError, e:
475            raise IOError(e.errno, e.strerror, e.filename)
476        size = stats.st_size
477        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
478        mtype = mimetypes.guess_type(url)[0]
479        headers = mimetools.Message(StringIO(
480            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481            (mtype or 'text/plain', size, modified)))
482        if not host:
483            urlfile = file
484            if file[:1] == '/':
485                urlfile = 'file://' + file
486            return addinfourl(open(localname, 'rb'),
487                              headers, urlfile)
488        host, port = splitport(host)
489        if not port \
490           and socket.gethostbyname(host) in (localhost(), thishost()):
491            urlfile = file
492            if file[:1] == '/':
493                urlfile = 'file://' + file
494            return addinfourl(open(localname, 'rb'),
495                              headers, urlfile)
496        raise IOError, ('local file error', 'not on local host')
497
498    def open_ftp(self, url):
499        """Use FTP protocol."""
500        if not isinstance(url, str):
501            raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
502        import mimetypes, mimetools
503        try:
504            from cStringIO import StringIO
505        except ImportError:
506            from StringIO import StringIO
507        host, path = splithost(url)
508        if not host: raise IOError, ('ftp error', 'no host given')
509        host, port = splitport(host)
510        user, host = splituser(host)
511        if user: user, passwd = splitpasswd(user)
512        else: passwd = None
513        host = unquote(host)
514        user = user or ''
515        passwd = passwd or ''
516        host = socket.gethostbyname(host)
517        if not port:
518            import ftplib
519            port = ftplib.FTP_PORT
520        else:
521            port = int(port)
522        path, attrs = splitattr(path)
523        path = unquote(path)
524        dirs = path.split('/')
525        dirs, file = dirs[:-1], dirs[-1]
526        if dirs and not dirs[0]: dirs = dirs[1:]
527        if dirs and not dirs[0]: dirs[0] = '/'
528        key = user, host, port, '/'.join(dirs)
529        # XXX thread unsafe!
530        if len(self.ftpcache) > MAXFTPCACHE:
531            # Prune the cache, rather arbitrarily
532            for k in self.ftpcache.keys():
533                if k != key:
534                    v = self.ftpcache[k]
535                    del self.ftpcache[k]
536                    v.close()
537        try:
538            if not key in self.ftpcache:
539                self.ftpcache[key] = \
540                    ftpwrapper(user, passwd, host, port, dirs)
541            if not file: type = 'D'
542            else: type = 'I'
543            for attr in attrs:
544                attr, value = splitvalue(attr)
545                if attr.lower() == 'type' and \
546                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
547                    type = value.upper()
548            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
549            mtype = mimetypes.guess_type("ftp:" + url)[0]
550            headers = ""
551            if mtype:
552                headers += "Content-Type: %s\n" % mtype
553            if retrlen is not None and retrlen >= 0:
554                headers += "Content-Length: %d\n" % retrlen
555            headers = mimetools.Message(StringIO(headers))
556            return addinfourl(fp, headers, "ftp:" + url)
557        except ftperrors(), msg:
558            raise IOError, ('ftp error', msg), sys.exc_info()[2]
559
560    def open_data(self, url, data=None):
561        """Use "data" URL."""
562        if not isinstance(url, str):
563            raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
564        # ignore POSTed data
565        #
566        # syntax of data URLs:
567        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
568        # mediatype := [ type "/" subtype ] *( ";" parameter )
569        # data      := *urlchar
570        # parameter := attribute "=" value
571        import mimetools
572        try:
573            from cStringIO import StringIO
574        except ImportError:
575            from StringIO import StringIO
576        try:
577            [type, data] = url.split(',', 1)
578        except ValueError:
579            raise IOError, ('data error', 'bad data URL')
580        if not type:
581            type = 'text/plain;charset=US-ASCII'
582        semi = type.rfind(';')
583        if semi >= 0 and '=' not in type[semi:]:
584            encoding = type[semi+1:]
585            type = type[:semi]
586        else:
587            encoding = ''
588        msg = []
589        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
590                                            time.gmtime(time.time())))
591        msg.append('Content-type: %s' % type)
592        if encoding == 'base64':
593            import base64
594            data = base64.decodestring(data)
595        else:
596            data = unquote(data)
597        msg.append('Content-Length: %d' % len(data))
598        msg.append('')
599        msg.append(data)
600        msg = '\n'.join(msg)
601        f = StringIO(msg)
602        headers = mimetools.Message(f, 0)
603        #f.fileno = None     # needed for addinfourl
604        return addinfourl(f, headers, url)
605
606
607class FancyURLopener(URLopener):
608    """Derived class with handlers for errors we can handle (perhaps)."""
609
610    def __init__(self, *args, **kwargs):
611        URLopener.__init__(self, *args, **kwargs)
612        self.auth_cache = {}
613        self.tries = 0
614        self.maxtries = 10
615
616    def http_error_default(self, url, fp, errcode, errmsg, headers):
617        """Default error handling -- don't raise an exception."""
618        return addinfourl(fp, headers, "http:" + url, errcode)
619
620    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
621        """Error 302 -- relocated (temporarily)."""
622        self.tries += 1
623        if self.maxtries and self.tries >= self.maxtries:
624            if hasattr(self, "http_error_500"):
625                meth = self.http_error_500
626            else:
627                meth = self.http_error_default
628            self.tries = 0
629            return meth(url, fp, 500,
630                        "Internal Server Error: Redirect Recursion", headers)
631        result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632                                        data)
633        self.tries = 0
634        return result
635
636    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
637        if 'location' in headers:
638            newurl = headers['location']
639        elif 'uri' in headers:
640            newurl = headers['uri']
641        else:
642            return
643        void = fp.read()
644        fp.close()
645        # In case the server sent a relative URL, join with original:
646        newurl = basejoin(self.type + ":" + url, newurl)
647
648        # For security reasons we do not allow redirects to protocols
649        # other than HTTP, HTTPS or FTP.
650        newurl_lower = newurl.lower()
651        if not (newurl_lower.startswith('http://') or
652                newurl_lower.startswith('https://') or
653                newurl_lower.startswith('ftp://')):
654            raise IOError('redirect error', errcode,
655                          errmsg + " - Redirection to url '%s' is not allowed" %
656                          newurl,
657                          headers)
658
659        return self.open(newurl)
660
661    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
662        """Error 301 -- also relocated (permanently)."""
663        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
664
665    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
666        """Error 303 -- also relocated (essentially identical to 302)."""
667        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
668
669    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
670        """Error 307 -- relocated, but turn POST into error."""
671        if data is None:
672            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
673        else:
674            return self.http_error_default(url, fp, errcode, errmsg, headers)
675
676    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
677        """Error 401 -- authentication required.
678        This function supports Basic authentication only."""
679        if not 'www-authenticate' in headers:
680            URLopener.http_error_default(self, url, fp,
681                                         errcode, errmsg, headers)
682        stuff = headers['www-authenticate']
683        import re
684        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
685        if not match:
686            URLopener.http_error_default(self, url, fp,
687                                         errcode, errmsg, headers)
688        scheme, realm = match.groups()
689        if scheme.lower() != 'basic':
690            URLopener.http_error_default(self, url, fp,
691                                         errcode, errmsg, headers)
692        name = 'retry_' + self.type + '_basic_auth'
693        if data is None:
694            return getattr(self,name)(url, realm)
695        else:
696            return getattr(self,name)(url, realm, data)
697
698    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
699        """Error 407 -- proxy authentication required.
700        This function supports Basic authentication only."""
701        if not 'proxy-authenticate' in headers:
702            URLopener.http_error_default(self, url, fp,
703                                         errcode, errmsg, headers)
704        stuff = headers['proxy-authenticate']
705        import re
706        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
707        if not match:
708            URLopener.http_error_default(self, url, fp,
709                                         errcode, errmsg, headers)
710        scheme, realm = match.groups()
711        if scheme.lower() != 'basic':
712            URLopener.http_error_default(self, url, fp,
713                                         errcode, errmsg, headers)
714        name = 'retry_proxy_' + self.type + '_basic_auth'
715        if data is None:
716            return getattr(self,name)(url, realm)
717        else:
718            return getattr(self,name)(url, realm, data)
719
720    def retry_proxy_http_basic_auth(self, url, realm, data=None):
721        host, selector = splithost(url)
722        newurl = 'http://' + host + selector
723        proxy = self.proxies['http']
724        urltype, proxyhost = splittype(proxy)
725        proxyhost, proxyselector = splithost(proxyhost)
726        i = proxyhost.find('@') + 1
727        proxyhost = proxyhost[i:]
728        user, passwd = self.get_user_passwd(proxyhost, realm, i)
729        if not (user or passwd): return None
730        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
731        self.proxies['http'] = 'http://' + proxyhost + proxyselector
732        if data is None:
733            return self.open(newurl)
734        else:
735            return self.open(newurl, data)
736
737    def retry_proxy_https_basic_auth(self, url, realm, data=None):
738        host, selector = splithost(url)
739        newurl = 'https://' + host + selector
740        proxy = self.proxies['https']
741        urltype, proxyhost = splittype(proxy)
742        proxyhost, proxyselector = splithost(proxyhost)
743        i = proxyhost.find('@') + 1
744        proxyhost = proxyhost[i:]
745        user, passwd = self.get_user_passwd(proxyhost, realm, i)
746        if not (user or passwd): return None
747        proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
748        self.proxies['https'] = 'https://' + proxyhost + proxyselector
749        if data is None:
750            return self.open(newurl)
751        else:
752            return self.open(newurl, data)
753
754    def retry_http_basic_auth(self, url, realm, data=None):
755        host, selector = splithost(url)
756        i = host.find('@') + 1
757        host = host[i:]
758        user, passwd = self.get_user_passwd(host, realm, i)
759        if not (user or passwd): return None
760        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
761        newurl = 'http://' + host + selector
762        if data is None:
763            return self.open(newurl)
764        else:
765            return self.open(newurl, data)
766
767    def retry_https_basic_auth(self, url, realm, data=None):
768        host, selector = splithost(url)
769        i = host.find('@') + 1
770        host = host[i:]
771        user, passwd = self.get_user_passwd(host, realm, i)
772        if not (user or passwd): return None
773        host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
774        newurl = 'https://' + host + selector
775        if data is None:
776            return self.open(newurl)
777        else:
778            return self.open(newurl, data)
779
780    def get_user_passwd(self, host, realm, clear_cache=0):
781        key = realm + '@' + host.lower()
782        if key in self.auth_cache:
783            if clear_cache:
784                del self.auth_cache[key]
785            else:
786                return self.auth_cache[key]
787        user, passwd = self.prompt_user_passwd(host, realm)
788        if user or passwd: self.auth_cache[key] = (user, passwd)
789        return user, passwd
790
791    def prompt_user_passwd(self, host, realm):
792        """Override this in a GUI environment!"""
793        import getpass
794        try:
795            user = raw_input("Enter username for %s at %s: " % (realm,
796                                                                host))
797            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
798                (user, realm, host))
799            return user, passwd
800        except KeyboardInterrupt:
801            print
802            return None, None
803
804
805# Utility functions
806
807_localhost = None
808def localhost():
809    """Return the IP address of the magic hostname 'localhost'."""
810    global _localhost
811    if _localhost is None:
812        _localhost = socket.gethostbyname('localhost')
813    return _localhost
814
815_thishost = None
816def thishost():
817    """Return the IP address of the current host."""
818    global _thishost
819    if _thishost is None:
820        _thishost = socket.gethostbyname(socket.gethostname())
821    return _thishost
822
823_ftperrors = None
824def ftperrors():
825    """Return the set of errors raised by the FTP class."""
826    global _ftperrors
827    if _ftperrors is None:
828        import ftplib
829        _ftperrors = ftplib.all_errors
830    return _ftperrors
831
832_noheaders = None
833def noheaders():
834    """Return an empty mimetools.Message object."""
835    global _noheaders
836    if _noheaders is None:
837        import mimetools
838        try:
839            from cStringIO import StringIO
840        except ImportError:
841            from StringIO import StringIO
842        _noheaders = mimetools.Message(StringIO(), 0)
843        _noheaders.fp.close()   # Recycle file descriptor
844    return _noheaders
845
846
847# Utility classes
848
849class ftpwrapper:
850    """Class used by open_ftp() for cache of open FTP connections."""
851
852    def __init__(self, user, passwd, host, port, dirs,
853                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
854        self.user = user
855        self.passwd = passwd
856        self.host = host
857        self.port = port
858        self.dirs = dirs
859        self.timeout = timeout
860        self.init()
861
862    def init(self):
863        import ftplib
864        self.busy = 0
865        self.ftp = ftplib.FTP()
866        self.ftp.connect(self.host, self.port, self.timeout)
867        self.ftp.login(self.user, self.passwd)
868        for dir in self.dirs:
869            self.ftp.cwd(dir)
870
871    def retrfile(self, file, type):
872        import ftplib
873        self.endtransfer()
874        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
875        else: cmd = 'TYPE ' + type; isdir = 0
876        try:
877            self.ftp.voidcmd(cmd)
878        except ftplib.all_errors:
879            self.init()
880            self.ftp.voidcmd(cmd)
881        conn = None
882        if file and not isdir:
883            # Try to retrieve as a file
884            try:
885                cmd = 'RETR ' + file
886                conn = self.ftp.ntransfercmd(cmd)
887            except ftplib.error_perm, reason:
888                if str(reason)[:3] != '550':
889                    raise IOError, ('ftp error', reason), sys.exc_info()[2]
890        if not conn:
891            # Set transfer mode to ASCII!
892            self.ftp.voidcmd('TYPE A')
893            # Try a directory listing. Verify that directory exists.
894            if file:
895                pwd = self.ftp.pwd()
896                try:
897                    try:
898                        self.ftp.cwd(file)
899                    except ftplib.error_perm, reason:
900                        raise IOError, ('ftp error', reason), sys.exc_info()[2]
901                finally:
902                    self.ftp.cwd(pwd)
903                cmd = 'LIST ' + file
904            else:
905                cmd = 'LIST'
906            conn = self.ftp.ntransfercmd(cmd)
907        self.busy = 1
908        # Pass back both a suitably decorated object and a retrieval length
909        return (addclosehook(conn[0].makefile('rb'),
910                             self.endtransfer), conn[1])
911    def endtransfer(self):
912        if not self.busy:
913            return
914        self.busy = 0
915        try:
916            self.ftp.voidresp()
917        except ftperrors():
918            pass
919
920    def close(self):
921        self.endtransfer()
922        try:
923            self.ftp.close()
924        except ftperrors():
925            pass
926
927class addbase:
928    """Base class for addinfo and addclosehook."""
929
930    def __init__(self, fp):
931        self.fp = fp
932        self.read = self.fp.read
933        self.readline = self.fp.readline
934        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
935        if hasattr(self.fp, "fileno"):
936            self.fileno = self.fp.fileno
937        else:
938            self.fileno = lambda: None
939        if hasattr(self.fp, "__iter__"):
940            self.__iter__ = self.fp.__iter__
941            if hasattr(self.fp, "next"):
942                self.next = self.fp.next
943
944    def __repr__(self):
945        return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
946                                             id(self), self.fp)
947
948    def close(self):
949        self.read = None
950        self.readline = None
951        self.readlines = None
952        self.fileno = None
953        if self.fp: self.fp.close()
954        self.fp = None
955
956class addclosehook(addbase):
957    """Class to add a close hook to an open file."""
958
959    def __init__(self, fp, closehook, *hookargs):
960        addbase.__init__(self, fp)
961        self.closehook = closehook
962        self.hookargs = hookargs
963
964    def close(self):
965        addbase.close(self)
966        if self.closehook:
967            self.closehook(*self.hookargs)
968            self.closehook = None
969            self.hookargs = None
970
971class addinfo(addbase):
972    """class to add an info() method to an open file."""
973
974    def __init__(self, fp, headers):
975        addbase.__init__(self, fp)
976        self.headers = headers
977
978    def info(self):
979        return self.headers
980
981class addinfourl(addbase):
982    """class to add info() and geturl() methods to an open file."""
983
984    def __init__(self, fp, headers, url, code=None):
985        addbase.__init__(self, fp)
986        self.headers = headers
987        self.url = url
988        self.code = code
989
990    def info(self):
991        return self.headers
992
993    def getcode(self):
994        return self.code
995
996    def geturl(self):
997        return self.url
998
999
1000# Utilities to parse URLs (most of these return None for missing parts):
1001# unwrap('<URL:type://host/path>') --> 'type://host/path'
1002# splittype('type:opaquestring') --> 'type', 'opaquestring'
1003# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1004# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1005# splitpasswd('user:passwd') -> 'user', 'passwd'
1006# splitport('host:port') --> 'host', 'port'
1007# splitquery('/path?query') --> '/path', 'query'
1008# splittag('/path#tag') --> '/path', 'tag'
1009# splitattr('/path;attr1=value1;attr2=value2;...') ->
1010#   '/path', ['attr1=value1', 'attr2=value2', ...]
1011# splitvalue('attr=value') --> 'attr', 'value'
1012# unquote('abc%20def') -> 'abc def'
1013# quote('abc def') -> 'abc%20def')
1014
1015try:
1016    unicode
1017except NameError:
1018    def _is_unicode(x):
1019        return 0
1020else:
1021    def _is_unicode(x):
1022        return isinstance(x, unicode)
1023
1024def toBytes(url):
1025    """toBytes(u"URL") --> 'URL'."""
1026    # Most URL schemes require ASCII. If that changes, the conversion
1027    # can be relaxed
1028    if _is_unicode(url):
1029        try:
1030            url = url.encode("ASCII")
1031        except UnicodeError:
1032            raise UnicodeError("URL " + repr(url) +
1033                               " contains non-ASCII characters")
1034    return url
1035
1036def unwrap(url):
1037    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1038    url = url.strip()
1039    if url[:1] == '<' and url[-1:] == '>':
1040        url = url[1:-1].strip()
1041    if url[:4] == 'URL:': url = url[4:].strip()
1042    return url
1043
1044_typeprog = None
1045def splittype(url):
1046    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1047    global _typeprog
1048    if _typeprog is None:
1049        import re
1050        _typeprog = re.compile('^([^/:]+):')
1051
1052    match = _typeprog.match(url)
1053    if match:
1054        scheme = match.group(1)
1055        return scheme.lower(), url[len(scheme) + 1:]
1056    return None, url
1057
1058_hostprog = None
1059def splithost(url):
1060    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1061    global _hostprog
1062    if _hostprog is None:
1063        import re
1064        _hostprog = re.compile('^//([^/?]*)(.*)$')
1065
1066    match = _hostprog.match(url)
1067    if match:
1068        host_port = match.group(1)
1069        path = match.group(2)
1070        if path and not path.startswith('/'):
1071            path = '/' + path
1072        return host_port, path
1073    return None, url
1074
1075_userprog = None
1076def splituser(host):
1077    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1078    global _userprog
1079    if _userprog is None:
1080        import re
1081        _userprog = re.compile('^(.*)@(.*)$')
1082
1083    match = _userprog.match(host)
1084    if match: return match.group(1, 2)
1085    return None, host
1086
1087_passwdprog = None
1088def splitpasswd(user):
1089    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1090    global _passwdprog
1091    if _passwdprog is None:
1092        import re
1093        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1094
1095    match = _passwdprog.match(user)
1096    if match: return match.group(1, 2)
1097    return user, None
1098
1099# splittag('/path#tag') --> '/path', 'tag'
1100_portprog = None
1101def splitport(host):
1102    """splitport('host:port') --> 'host', 'port'."""
1103    global _portprog
1104    if _portprog is None:
1105        import re
1106        _portprog = re.compile('^(.*):([0-9]+)$')
1107
1108    match = _portprog.match(host)
1109    if match: return match.group(1, 2)
1110    return host, None
1111
1112_nportprog = None
1113def splitnport(host, defport=-1):
1114    """Split host and port, returning numeric port.
1115    Return given default port if no ':' found; defaults to -1.
1116    Return numerical port if a valid number are found after ':'.
1117    Return None if ':' but not a valid number."""
1118    global _nportprog
1119    if _nportprog is None:
1120        import re
1121        _nportprog = re.compile('^(.*):(.*)$')
1122
1123    match = _nportprog.match(host)
1124    if match:
1125        host, port = match.group(1, 2)
1126        try:
1127            if not port: raise ValueError, "no digits"
1128            nport = int(port)
1129        except ValueError:
1130            nport = None
1131        return host, nport
1132    return host, defport
1133
1134_queryprog = None
1135def splitquery(url):
1136    """splitquery('/path?query') --> '/path', 'query'."""
1137    global _queryprog
1138    if _queryprog is None:
1139        import re
1140        _queryprog = re.compile('^(.*)\?([^?]*)$')
1141
1142    match = _queryprog.match(url)
1143    if match: return match.group(1, 2)
1144    return url, None
1145
1146_tagprog = None
1147def splittag(url):
1148    """splittag('/path#tag') --> '/path', 'tag'."""
1149    global _tagprog
1150    if _tagprog is None:
1151        import re
1152        _tagprog = re.compile('^(.*)#([^#]*)$')
1153
1154    match = _tagprog.match(url)
1155    if match: return match.group(1, 2)
1156    return url, None
1157
1158def splitattr(url):
1159    """splitattr('/path;attr1=value1;attr2=value2;...') ->
1160        '/path', ['attr1=value1', 'attr2=value2', ...]."""
1161    words = url.split(';')
1162    return words[0], words[1:]
1163
1164_valueprog = None
1165def splitvalue(attr):
1166    """splitvalue('attr=value') --> 'attr', 'value'."""
1167    global _valueprog
1168    if _valueprog is None:
1169        import re
1170        _valueprog = re.compile('^([^=]*)=(.*)$')
1171
1172    match = _valueprog.match(attr)
1173    if match: return match.group(1, 2)
1174    return attr, None
1175
1176# urlparse contains a duplicate of this method to avoid a circular import.  If
1177# you update this method, also update the copy in urlparse.  This code
1178# duplication does not exist in Python3.
1179
1180_hexdig = '0123456789ABCDEFabcdef'
1181_hextochr = dict((a + b, chr(int(a + b, 16)))
1182                 for a in _hexdig for b in _hexdig)
1183
1184def unquote(s):
1185    """unquote('abc%20def') -> 'abc def'."""
1186    res = s.split('%')
1187    # fastpath
1188    if len(res) == 1:
1189        return s
1190    s = res[0]
1191    for item in res[1:]:
1192        try:
1193            s += _hextochr[item[:2]] + item[2:]
1194        except KeyError:
1195            s += '%' + item
1196        except UnicodeDecodeError:
1197            s += unichr(int(item[:2], 16)) + item[2:]
1198    return s
1199
1200def unquote_plus(s):
1201    """unquote('%7e/abc+def') -> '~/abc def'"""
1202    s = s.replace('+', ' ')
1203    return unquote(s)
1204
1205always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1206               'abcdefghijklmnopqrstuvwxyz'
1207               '0123456789' '_.-')
1208_safe_map = {}
1209for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1210    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1211_safe_quoters = {}
1212
1213def quote(s, safe='/'):
1214    """quote('abc def') -> 'abc%20def'
1215
1216    Each part of a URL, e.g. the path info, the query, etc., has a
1217    different set of reserved characters that must be quoted.
1218
1219    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1220    the following reserved characters.
1221
1222    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1223                  "$" | ","
1224
1225    Each of these characters is reserved in some component of a URL,
1226    but not necessarily in all of them.
1227
1228    By default, the quote function is intended for quoting the path
1229    section of a URL.  Thus, it will not encode '/'.  This character
1230    is reserved, but in typical usage the quote function is being
1231    called on a path where the existing slash characters are used as
1232    reserved characters.
1233    """
1234    # fastpath
1235    if not s:
1236        if s is None:
1237            raise TypeError('None object cannot be quoted')
1238        return s
1239    cachekey = (safe, always_safe)
1240    try:
1241        (quoter, safe) = _safe_quoters[cachekey]
1242    except KeyError:
1243        safe_map = _safe_map.copy()
1244        safe_map.update([(c, c) for c in safe])
1245        quoter = safe_map.__getitem__
1246        safe = always_safe + safe
1247        _safe_quoters[cachekey] = (quoter, safe)
1248    if not s.rstrip(safe):
1249        return s
1250    return ''.join(map(quoter, s))
1251
1252def quote_plus(s, safe=''):
1253    """Quote the query fragment of a URL; replacing ' ' with '+'"""
1254    if ' ' in s:
1255        s = quote(s, safe + ' ')
1256        return s.replace(' ', '+')
1257    return quote(s, safe)
1258
1259def urlencode(query, doseq=0):
1260    """Encode a sequence of two-element tuples or dictionary into a URL query string.
1261
1262    If any values in the query arg are sequences and doseq is true, each
1263    sequence element is converted to a separate parameter.
1264
1265    If the query arg is a sequence of two-element tuples, the order of the
1266    parameters in the output will match the order of parameters in the
1267    input.
1268    """
1269
1270    if hasattr(query,"items"):
1271        # mapping objects
1272        query = query.items()
1273    else:
1274        # it's a bother at times that strings and string-like objects are
1275        # sequences...
1276        try:
1277            # non-sequence items should not work with len()
1278            # non-empty strings will fail this
1279            if len(query) and not isinstance(query[0], tuple):
1280                raise TypeError
1281            # zero-length sequences of all types will get here and succeed,
1282            # but that's a minor nit - since the original implementation
1283            # allowed empty dicts that type of behavior probably should be
1284            # preserved for consistency
1285        except TypeError:
1286            ty,va,tb = sys.exc_info()
1287            raise TypeError, "not a valid non-string sequence or mapping object", tb
1288
1289    l = []
1290    if not doseq:
1291        # preserve old behavior
1292        for k, v in query:
1293            k = quote_plus(str(k))
1294            v = quote_plus(str(v))
1295            l.append(k + '=' + v)
1296    else:
1297        for k, v in query:
1298            k = quote_plus(str(k))
1299            if isinstance(v, str):
1300                v = quote_plus(v)
1301                l.append(k + '=' + v)
1302            elif _is_unicode(v):
1303                # is there a reasonable way to convert to ASCII?
1304                # encode generates a string, but "replace" or "ignore"
1305                # lose information and "strict" can raise UnicodeError
1306                v = quote_plus(v.encode("ASCII","replace"))
1307                l.append(k + '=' + v)
1308            else:
1309                try:
1310                    # is this a sufficient test for sequence-ness?
1311                    len(v)
1312                except TypeError:
1313                    # not a sequence
1314                    v = quote_plus(str(v))
1315                    l.append(k + '=' + v)
1316                else:
1317                    # loop over the sequence
1318                    for elt in v:
1319                        l.append(k + '=' + quote_plus(str(elt)))
1320    return '&'.join(l)
1321
1322# Proxy handling
1323def getproxies_environment():
1324    """Return a dictionary of scheme -> proxy server URL mappings.
1325
1326    Scan the environment for variables named <scheme>_proxy;
1327    this seems to be the standard convention.  If you need a
1328    different way, you can pass a proxies dictionary to the
1329    [Fancy]URLopener constructor.
1330
1331    """
1332    proxies = {}
1333    for name, value in os.environ.items():
1334        name = name.lower()
1335        if value and name[-6:] == '_proxy':
1336            proxies[name[:-6]] = value
1337    return proxies
1338
1339def proxy_bypass_environment(host):
1340    """Test if proxies should not be used for a particular host.
1341
1342    Checks the environment for a variable named no_proxy, which should
1343    be a list of DNS suffixes separated by commas, or '*' for all hosts.
1344    """
1345    no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1346    # '*' is special case for always bypass
1347    if no_proxy == '*':
1348        return 1
1349    # strip port off host
1350    hostonly, port = splitport(host)
1351    # check if the host ends with any of the DNS suffixes
1352    for name in no_proxy.split(','):
1353        if name and (hostonly.endswith(name) or host.endswith(name)):
1354            return 1
1355    # otherwise, don't bypass
1356    return 0
1357
1358
1359if sys.platform == 'darwin':
1360    from _scproxy import _get_proxy_settings, _get_proxies
1361
1362    def proxy_bypass_macosx_sysconf(host):
1363        """
1364        Return True iff this host shouldn't be accessed using a proxy
1365
1366        This function uses the MacOSX framework SystemConfiguration
1367        to fetch the proxy information.
1368        """
1369        import re
1370        import socket
1371        from fnmatch import fnmatch
1372
1373        hostonly, port = splitport(host)
1374
1375        def ip2num(ipAddr):
1376            parts = ipAddr.split('.')
1377            parts = map(int, parts)
1378            if len(parts) != 4:
1379                parts = (parts + [0, 0, 0, 0])[:4]
1380            return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1381
1382        proxy_settings = _get_proxy_settings()
1383
1384        # Check for simple host names:
1385        if '.' not in host:
1386            if proxy_settings['exclude_simple']:
1387                return True
1388
1389        hostIP = None
1390
1391        for value in proxy_settings.get('exceptions', ()):
1392            # Items in the list are strings like these: *.local, 169.254/16
1393            if not value: continue
1394
1395            m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1396            if m is not None:
1397                if hostIP is None:
1398                    try:
1399                        hostIP = socket.gethostbyname(hostonly)
1400                        hostIP = ip2num(hostIP)
1401                    except socket.error:
1402                        continue
1403
1404                base = ip2num(m.group(1))
1405                mask = m.group(2)
1406                if mask is None:
1407                    mask = 8 * (m.group(1).count('.') + 1)
1408
1409                else:
1410                    mask = int(mask[1:])
1411                mask = 32 - mask
1412
1413                if (hostIP >> mask) == (base >> mask):
1414                    return True
1415
1416            elif fnmatch(host, value):
1417                return True
1418
1419        return False
1420
1421    def getproxies_macosx_sysconf():
1422        """Return a dictionary of scheme -> proxy server URL mappings.
1423
1424        This function uses the MacOSX framework SystemConfiguration
1425        to fetch the proxy information.
1426        """
1427        return _get_proxies()
1428
1429    def proxy_bypass(host):
1430        if getproxies_environment():
1431            return proxy_bypass_environment(host)
1432        else:
1433            return proxy_bypass_macosx_sysconf(host)
1434
1435    def getproxies():
1436        return getproxies_environment() or getproxies_macosx_sysconf()
1437
1438elif os.name == 'nt':
1439    def getproxies_registry():
1440        """Return a dictionary of scheme -> proxy server URL mappings.
1441
1442        Win32 uses the registry to store proxies.
1443
1444        """
1445        proxies = {}
1446        try:
1447            import _winreg
1448        except ImportError:
1449            # Std module, so should be around - but you never know!
1450            return proxies
1451        try:
1452            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1453                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1454            proxyEnable = _winreg.QueryValueEx(internetSettings,
1455                                               'ProxyEnable')[0]
1456            if proxyEnable:
1457                # Returned as Unicode but problems if not converted to ASCII
1458                proxyServer = str(_winreg.QueryValueEx(internetSettings,
1459                                                       'ProxyServer')[0])
1460                if '=' in proxyServer:
1461                    # Per-protocol settings
1462                    for p in proxyServer.split(';'):
1463                        protocol, address = p.split('=', 1)
1464                        # See if address has a type:// prefix
1465                        import re
1466                        if not re.match('^([^/:]+)://', address):
1467                            address = '%s://%s' % (protocol, address)
1468                        proxies[protocol] = address
1469                else:
1470                    # Use one setting for all protocols
1471                    if proxyServer[:5] == 'http:':
1472                        proxies['http'] = proxyServer
1473                    else:
1474                        proxies['http'] = 'http://%s' % proxyServer
1475                        proxies['https'] = 'https://%s' % proxyServer
1476                        proxies['ftp'] = 'ftp://%s' % proxyServer
1477            internetSettings.Close()
1478        except (WindowsError, ValueError, TypeError):
1479            # Either registry key not found etc, or the value in an
1480            # unexpected format.
1481            # proxies already set up to be empty so nothing to do
1482            pass
1483        return proxies
1484
1485    def getproxies():
1486        """Return a dictionary of scheme -> proxy server URL mappings.
1487
1488        Returns settings gathered from the environment, if specified,
1489        or the registry.
1490
1491        """
1492        return getproxies_environment() or getproxies_registry()
1493
1494    def proxy_bypass_registry(host):
1495        try:
1496            import _winreg
1497            import re
1498        except ImportError:
1499            # Std modules, so should be around - but you never know!
1500            return 0
1501        try:
1502            internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1503                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1504            proxyEnable = _winreg.QueryValueEx(internetSettings,
1505                                               'ProxyEnable')[0]
1506            proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1507                                                     'ProxyOverride')[0])
1508            # ^^^^ Returned as Unicode but problems if not converted to ASCII
1509        except WindowsError:
1510            return 0
1511        if not proxyEnable or not proxyOverride:
1512            return 0
1513        # try to make a host list from name and IP address.
1514        rawHost, port = splitport(host)
1515        host = [rawHost]
1516        try:
1517            addr = socket.gethostbyname(rawHost)
1518            if addr != rawHost:
1519                host.append(addr)
1520        except socket.error:
1521            pass
1522        try:
1523            fqdn = socket.getfqdn(rawHost)
1524            if fqdn != rawHost:
1525                host.append(fqdn)
1526        except socket.error:
1527            pass
1528        # make a check value list from the registry entry: replace the
1529        # '<local>' string by the localhost entry and the corresponding
1530        # canonical entry.
1531        proxyOverride = proxyOverride.split(';')
1532        # now check if we match one of the registry values.
1533        for test in proxyOverride:
1534            if test == '<local>':
1535                if '.' not in rawHost:
1536                    return 1
1537            test = test.replace(".", r"\.")     # mask dots
1538            test = test.replace("*", r".*")     # change glob sequence
1539            test = test.replace("?", r".")      # change glob char
1540            for val in host:
1541                # print "%s <--> %s" %( test, val )
1542                if re.match(test, val, re.I):
1543                    return 1
1544        return 0
1545
1546    def proxy_bypass(host):
1547        """Return a dictionary of scheme -> proxy server URL mappings.
1548
1549        Returns settings gathered from the environment, if specified,
1550        or the registry.
1551
1552        """
1553        if getproxies_environment():
1554            return proxy_bypass_environment(host)
1555        else:
1556            return proxy_bypass_registry(host)
1557
1558else:
1559    # By default use environment variables
1560    getproxies = getproxies_environment
1561    proxy_bypass = proxy_bypass_environment
1562
1563# Test and time quote() and unquote()
1564def test1():
1565    s = ''
1566    for i in range(256): s = s + chr(i)
1567    s = s*4
1568    t0 = time.time()
1569    qs = quote(s)
1570    uqs = unquote(qs)
1571    t1 = time.time()
1572    if uqs != s:
1573        print 'Wrong!'
1574    print repr(s)
1575    print repr(qs)
1576    print repr(uqs)
1577    print round(t1 - t0, 3), 'sec'
1578
1579
1580def reporthook(blocknum, blocksize, totalsize):
1581    # Report during remote transfers
1582    print "Block number: %d, Block size: %d, Total size: %d" % (
1583        blocknum, blocksize, totalsize)
1584
1585# Test program
1586def test(args=[]):
1587    if not args:
1588        args = [
1589            '/etc/passwd',
1590            'file:/etc/passwd',
1591            'file://localhost/etc/passwd',
1592            'ftp://ftp.gnu.org/pub/README',
1593            'http://www.python.org/index.html',
1594            ]
1595        if hasattr(URLopener, "open_https"):
1596            args.append('https://synergy.as.cmu.edu/~geek/')
1597    try:
1598        for url in args:
1599            print '-'*10, url, '-'*10
1600            fn, h = urlretrieve(url, None, reporthook)
1601            print fn
1602            if h:
1603                print '======'
1604                for k in h.keys(): print k + ':', h[k]
1605                print '======'
1606            with open(fn, 'rb') as fp:
1607                data = fp.read()
1608            if '\r' in data:
1609                table = string.maketrans("", "")
1610                data = data.translate(table, "\r")
1611            print data
1612            fn, h = None, None
1613        print '-'*40
1614    finally:
1615        urlcleanup()
1616
1617def main():
1618    import getopt, sys
1619    try:
1620        opts, args = getopt.getopt(sys.argv[1:], "th")
1621    except getopt.error, msg:
1622        print msg
1623        print "Use -h for help"
1624        return
1625    t = 0
1626    for o, a in opts:
1627        if o == '-t':
1628            t = t + 1
1629        if o == '-h':
1630            print "Usage: python urllib.py [-t] [url ...]"
1631            print "-t runs self-test;",
1632            print "otherwise, contents of urls are printed"
1633            return
1634    if t:
1635        if t > 1:
1636            test1()
1637        test(args)
1638    else:
1639        if not args:
1640            print "Use -h for help"
1641        for url in args:
1642            print urlopen(url).read(),
1643
1644# Run test program when run as a script
1645if __name__ == '__main__':
1646    main()
1647