1"""Open an arbitrary URL. 2 3See the following document for more info on URLs: 4"Names and Addresses, URIs, URLs, URNs, URCs", at 5http://www.w3.org/pub/WWW/Addressing/Overview.html 6 7See also the HTTP spec (from which the error codes are derived): 8"HTTP - Hypertext Transfer Protocol", at 9http://www.w3.org/pub/WWW/Protocols/ 10 11Related standards and specs: 12- RFC1808: the "relative URL" spec. (authoritative status) 13- RFC1738 - the "URL standard". (authoritative status) 14- RFC1630 - the "URI spec". (informational status) 15 16The object returned by URLopener().open(file) will differ per 17protocol. All you know is that is has methods read(), readline(), 18readlines(), fileno(), close() and info(). The read*(), fileno() 19and close() methods work like those of open files. 20The info() method returns a mimetools.Message object which can be 21used to query various info about the object, if available. 22(mimetools.Message objects are queried with the getheader() method.) 23""" 24 25import string 26import socket 27import os 28import time 29import sys 30import base64 31import re 32 33from urlparse import urljoin as basejoin 34 35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve", 36 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus", 37 "urlencode", "url2pathname", "pathname2url", "splittag", 38 "localhost", "thishost", "ftperrors", "basejoin", "unwrap", 39 "splittype", "splithost", "splituser", "splitpasswd", "splitport", 40 "splitnport", "splitquery", "splitattr", "splitvalue", 41 "getproxies"] 42 43__version__ = '1.17' # XXX This version is not always updated :-( 44 45MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 46 47# Helper for non-unix systems 48if os.name == 'nt': 49 from nturl2path import url2pathname, pathname2url 50elif os.name == 'riscos': 51 from rourl2path import url2pathname, pathname2url 52else: 53 def url2pathname(pathname): 54 """OS-specific conversion from a relative URL of the 'file' scheme 55 to a file system path; not recommended for general use.""" 56 return unquote(pathname) 57 58 def pathname2url(pathname): 59 """OS-specific conversion from a file system path to a relative URL 60 of the 'file' scheme; not recommended for general use.""" 61 return quote(pathname) 62 63# This really consists of two pieces: 64# (1) a class which handles opening of all sorts of URLs 65# (plus assorted utilities etc.) 66# (2) a set of functions for parsing URLs 67# XXX Should these be separated out into different modules? 68 69 70# Shortcut for basic usage 71_urlopener = None 72def urlopen(url, data=None, proxies=None, context=None): 73 """Create a file-like object for the specified URL to read from.""" 74 from warnings import warnpy3k 75 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in " 76 "favor of urllib2.urlopen()", stacklevel=2) 77 78 global _urlopener 79 if proxies is not None or context is not None: 80 opener = FancyURLopener(proxies=proxies, context=context) 81 elif not _urlopener: 82 opener = FancyURLopener() 83 _urlopener = opener 84 else: 85 opener = _urlopener 86 if data is None: 87 return opener.open(url) 88 else: 89 return opener.open(url, data) 90def urlretrieve(url, filename=None, reporthook=None, data=None, context=None): 91 global _urlopener 92 if context is not None: 93 opener = FancyURLopener(context=context) 94 elif not _urlopener: 95 _urlopener = opener = FancyURLopener() 96 else: 97 opener = _urlopener 98 return opener.retrieve(url, filename, reporthook, data) 99def urlcleanup(): 100 if _urlopener: 101 _urlopener.cleanup() 102 _safe_quoters.clear() 103 ftpcache.clear() 104 105# check for SSL 106try: 107 import ssl 108except: 109 _have_ssl = False 110else: 111 _have_ssl = True 112 113# exception raised when downloaded size does not match content-length 114class ContentTooShortError(IOError): 115 def __init__(self, message, content): 116 IOError.__init__(self, message) 117 self.content = content 118 119ftpcache = {} 120class URLopener: 121 """Class to open URLs. 122 This is a class rather than just a subroutine because we may need 123 more than one set of global protocol-specific options. 124 Note -- this is a base class for those who don't want the 125 automatic handling of errors type 302 (relocated) and 401 126 (authorization needed).""" 127 128 __tempfiles = None 129 130 version = "Python-urllib/%s" % __version__ 131 132 # Constructor 133 def __init__(self, proxies=None, context=None, **x509): 134 if proxies is None: 135 proxies = getproxies() 136 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 137 self.proxies = proxies 138 self.key_file = x509.get('key_file') 139 self.cert_file = x509.get('cert_file') 140 self.context = context 141 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] 142 self.__tempfiles = [] 143 self.__unlink = os.unlink # See cleanup() 144 self.tempcache = None 145 # Undocumented feature: if you assign {} to tempcache, 146 # it is used to cache files retrieved with 147 # self.retrieve(). This is not enabled by default 148 # since it does not work for changing documents (and I 149 # haven't got the logic to check expiration headers 150 # yet). 151 self.ftpcache = ftpcache 152 # Undocumented feature: you can use a different 153 # ftp cache by assigning to the .ftpcache member; 154 # in case you want logically independent URL openers 155 # XXX This is not threadsafe. Bah. 156 157 def __del__(self): 158 self.close() 159 160 def close(self): 161 self.cleanup() 162 163 def cleanup(self): 164 # This code sometimes runs when the rest of this module 165 # has already been deleted, so it can't use any globals 166 # or import anything. 167 if self.__tempfiles: 168 for file in self.__tempfiles: 169 try: 170 self.__unlink(file) 171 except OSError: 172 pass 173 del self.__tempfiles[:] 174 if self.tempcache: 175 self.tempcache.clear() 176 177 def addheader(self, *args): 178 """Add a header to be used by the HTTP interface only 179 e.g. u.addheader('Accept', 'sound/basic')""" 180 self.addheaders.append(args) 181 182 # External interface 183 def open(self, fullurl, data=None): 184 """Use URLopener().open(file) instead of open(file, 'r').""" 185 fullurl = unwrap(toBytes(fullurl)) 186 # percent encode url, fixing lame server errors for e.g, like space 187 # within url paths. 188 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 189 if self.tempcache and fullurl in self.tempcache: 190 filename, headers = self.tempcache[fullurl] 191 fp = open(filename, 'rb') 192 return addinfourl(fp, headers, fullurl) 193 urltype, url = splittype(fullurl) 194 if not urltype: 195 urltype = 'file' 196 if urltype in self.proxies: 197 proxy = self.proxies[urltype] 198 urltype, proxyhost = splittype(proxy) 199 host, selector = splithost(proxyhost) 200 url = (host, fullurl) # Signal special case to open_*() 201 else: 202 proxy = None 203 name = 'open_' + urltype 204 self.type = urltype 205 name = name.replace('-', '_') 206 if not hasattr(self, name): 207 if proxy: 208 return self.open_unknown_proxy(proxy, fullurl, data) 209 else: 210 return self.open_unknown(fullurl, data) 211 try: 212 if data is None: 213 return getattr(self, name)(url) 214 else: 215 return getattr(self, name)(url, data) 216 except socket.error, msg: 217 raise IOError, ('socket error', msg), sys.exc_info()[2] 218 219 def open_unknown(self, fullurl, data=None): 220 """Overridable interface to open unknown URL type.""" 221 type, url = splittype(fullurl) 222 raise IOError, ('url error', 'unknown url type', type) 223 224 def open_unknown_proxy(self, proxy, fullurl, data=None): 225 """Overridable interface to open unknown URL type.""" 226 type, url = splittype(fullurl) 227 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy) 228 229 # External interface 230 def retrieve(self, url, filename=None, reporthook=None, data=None): 231 """retrieve(url) returns (filename, headers) for a local object 232 or (tempfilename, headers) for a remote object.""" 233 url = unwrap(toBytes(url)) 234 if self.tempcache and url in self.tempcache: 235 return self.tempcache[url] 236 type, url1 = splittype(url) 237 if filename is None and (not type or type == 'file'): 238 try: 239 fp = self.open_local_file(url1) 240 hdrs = fp.info() 241 fp.close() 242 return url2pathname(splithost(url1)[1]), hdrs 243 except IOError: 244 pass 245 fp = self.open(url, data) 246 try: 247 headers = fp.info() 248 if filename: 249 tfp = open(filename, 'wb') 250 else: 251 import tempfile 252 garbage, path = splittype(url) 253 garbage, path = splithost(path or "") 254 path, garbage = splitquery(path or "") 255 path, garbage = splitattr(path or "") 256 suffix = os.path.splitext(path)[1] 257 (fd, filename) = tempfile.mkstemp(suffix) 258 self.__tempfiles.append(filename) 259 tfp = os.fdopen(fd, 'wb') 260 try: 261 result = filename, headers 262 if self.tempcache is not None: 263 self.tempcache[url] = result 264 bs = 1024*8 265 size = -1 266 read = 0 267 blocknum = 0 268 if "content-length" in headers: 269 size = int(headers["Content-Length"]) 270 if reporthook: 271 reporthook(blocknum, bs, size) 272 while 1: 273 block = fp.read(bs) 274 if block == "": 275 break 276 read += len(block) 277 tfp.write(block) 278 blocknum += 1 279 if reporthook: 280 reporthook(blocknum, bs, size) 281 finally: 282 tfp.close() 283 finally: 284 fp.close() 285 286 # raise exception if actual size does not match content-length header 287 if size >= 0 and read < size: 288 raise ContentTooShortError("retrieval incomplete: got only %i out " 289 "of %i bytes" % (read, size), result) 290 291 return result 292 293 # Each method named open_<type> knows how to open that type of URL 294 295 def open_http(self, url, data=None): 296 """Use HTTP protocol.""" 297 import httplib 298 user_passwd = None 299 proxy_passwd= None 300 if isinstance(url, str): 301 host, selector = splithost(url) 302 if host: 303 user_passwd, host = splituser(host) 304 host = unquote(host) 305 realhost = host 306 else: 307 host, selector = url 308 # check whether the proxy contains authorization information 309 proxy_passwd, host = splituser(host) 310 # now we proceed with the url we want to obtain 311 urltype, rest = splittype(selector) 312 url = rest 313 user_passwd = None 314 if urltype.lower() != 'http': 315 realhost = None 316 else: 317 realhost, rest = splithost(rest) 318 if realhost: 319 user_passwd, realhost = splituser(realhost) 320 if user_passwd: 321 selector = "%s://%s%s" % (urltype, realhost, rest) 322 if proxy_bypass(realhost): 323 host = realhost 324 325 #print "proxy via http:", host, selector 326 if not host: raise IOError, ('http error', 'no host given') 327 328 if proxy_passwd: 329 proxy_passwd = unquote(proxy_passwd) 330 proxy_auth = base64.b64encode(proxy_passwd).strip() 331 else: 332 proxy_auth = None 333 334 if user_passwd: 335 user_passwd = unquote(user_passwd) 336 auth = base64.b64encode(user_passwd).strip() 337 else: 338 auth = None 339 h = httplib.HTTP(host) 340 if data is not None: 341 h.putrequest('POST', selector) 342 h.putheader('Content-Type', 'application/x-www-form-urlencoded') 343 h.putheader('Content-Length', '%d' % len(data)) 344 else: 345 h.putrequest('GET', selector) 346 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 347 if auth: h.putheader('Authorization', 'Basic %s' % auth) 348 if realhost: h.putheader('Host', realhost) 349 for args in self.addheaders: h.putheader(*args) 350 h.endheaders(data) 351 errcode, errmsg, headers = h.getreply() 352 fp = h.getfile() 353 if errcode == -1: 354 if fp: fp.close() 355 # something went wrong with the HTTP status line 356 raise IOError, ('http protocol error', 0, 357 'got a bad status line', None) 358 # According to RFC 2616, "2xx" code indicates that the client's 359 # request was successfully received, understood, and accepted. 360 if (200 <= errcode < 300): 361 return addinfourl(fp, headers, "http:" + url, errcode) 362 else: 363 if data is None: 364 return self.http_error(url, fp, errcode, errmsg, headers) 365 else: 366 return self.http_error(url, fp, errcode, errmsg, headers, data) 367 368 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 369 """Handle http errors. 370 Derived class can override this, or provide specific handlers 371 named http_error_DDD where DDD is the 3-digit error code.""" 372 # First check if there's a specific handler for this error 373 name = 'http_error_%d' % errcode 374 if hasattr(self, name): 375 method = getattr(self, name) 376 if data is None: 377 result = method(url, fp, errcode, errmsg, headers) 378 else: 379 result = method(url, fp, errcode, errmsg, headers, data) 380 if result: return result 381 return self.http_error_default(url, fp, errcode, errmsg, headers) 382 383 def http_error_default(self, url, fp, errcode, errmsg, headers): 384 """Default error handler: close the connection and raise IOError.""" 385 fp.close() 386 raise IOError, ('http error', errcode, errmsg, headers) 387 388 if _have_ssl: 389 def open_https(self, url, data=None): 390 """Use HTTPS protocol.""" 391 392 import httplib 393 user_passwd = None 394 proxy_passwd = None 395 if isinstance(url, str): 396 host, selector = splithost(url) 397 if host: 398 user_passwd, host = splituser(host) 399 host = unquote(host) 400 realhost = host 401 else: 402 host, selector = url 403 # here, we determine, whether the proxy contains authorization information 404 proxy_passwd, host = splituser(host) 405 urltype, rest = splittype(selector) 406 url = rest 407 user_passwd = None 408 if urltype.lower() != 'https': 409 realhost = None 410 else: 411 realhost, rest = splithost(rest) 412 if realhost: 413 user_passwd, realhost = splituser(realhost) 414 if user_passwd: 415 selector = "%s://%s%s" % (urltype, realhost, rest) 416 #print "proxy via https:", host, selector 417 if not host: raise IOError, ('https error', 'no host given') 418 if proxy_passwd: 419 proxy_passwd = unquote(proxy_passwd) 420 proxy_auth = base64.b64encode(proxy_passwd).strip() 421 else: 422 proxy_auth = None 423 if user_passwd: 424 user_passwd = unquote(user_passwd) 425 auth = base64.b64encode(user_passwd).strip() 426 else: 427 auth = None 428 h = httplib.HTTPS(host, 0, 429 key_file=self.key_file, 430 cert_file=self.cert_file, 431 context=self.context) 432 if data is not None: 433 h.putrequest('POST', selector) 434 h.putheader('Content-Type', 435 'application/x-www-form-urlencoded') 436 h.putheader('Content-Length', '%d' % len(data)) 437 else: 438 h.putrequest('GET', selector) 439 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) 440 if auth: h.putheader('Authorization', 'Basic %s' % auth) 441 if realhost: h.putheader('Host', realhost) 442 for args in self.addheaders: h.putheader(*args) 443 h.endheaders(data) 444 errcode, errmsg, headers = h.getreply() 445 fp = h.getfile() 446 if errcode == -1: 447 if fp: fp.close() 448 # something went wrong with the HTTP status line 449 raise IOError, ('http protocol error', 0, 450 'got a bad status line', None) 451 # According to RFC 2616, "2xx" code indicates that the client's 452 # request was successfully received, understood, and accepted. 453 if (200 <= errcode < 300): 454 return addinfourl(fp, headers, "https:" + url, errcode) 455 else: 456 if data is None: 457 return self.http_error(url, fp, errcode, errmsg, headers) 458 else: 459 return self.http_error(url, fp, errcode, errmsg, headers, 460 data) 461 462 def open_file(self, url): 463 """Use local file or FTP depending on form of URL.""" 464 if not isinstance(url, str): 465 raise IOError, ('file error', 'proxy support for file protocol currently not implemented') 466 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 467 return self.open_ftp(url) 468 else: 469 return self.open_local_file(url) 470 471 def open_local_file(self, url): 472 """Use local file.""" 473 import mimetypes, mimetools, email.utils 474 try: 475 from cStringIO import StringIO 476 except ImportError: 477 from StringIO import StringIO 478 host, file = splithost(url) 479 localname = url2pathname(file) 480 try: 481 stats = os.stat(localname) 482 except OSError, e: 483 raise IOError(e.errno, e.strerror, e.filename) 484 size = stats.st_size 485 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 486 mtype = mimetypes.guess_type(url)[0] 487 headers = mimetools.Message(StringIO( 488 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 489 (mtype or 'text/plain', size, modified))) 490 if not host: 491 urlfile = file 492 if file[:1] == '/': 493 urlfile = 'file://' + file 494 elif file[:2] == './': 495 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 496 return addinfourl(open(localname, 'rb'), 497 headers, urlfile) 498 host, port = splitport(host) 499 if not port \ 500 and socket.gethostbyname(host) in (localhost(), thishost()): 501 urlfile = file 502 if file[:1] == '/': 503 urlfile = 'file://' + file 504 return addinfourl(open(localname, 'rb'), 505 headers, urlfile) 506 raise IOError, ('local file error', 'not on local host') 507 508 def open_ftp(self, url): 509 """Use FTP protocol.""" 510 if not isinstance(url, str): 511 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') 512 import mimetypes, mimetools 513 try: 514 from cStringIO import StringIO 515 except ImportError: 516 from StringIO import StringIO 517 host, path = splithost(url) 518 if not host: raise IOError, ('ftp error', 'no host given') 519 host, port = splitport(host) 520 user, host = splituser(host) 521 if user: user, passwd = splitpasswd(user) 522 else: passwd = None 523 host = unquote(host) 524 user = user or '' 525 passwd = passwd or '' 526 host = socket.gethostbyname(host) 527 if not port: 528 import ftplib 529 port = ftplib.FTP_PORT 530 else: 531 port = int(port) 532 path, attrs = splitattr(path) 533 path = unquote(path) 534 dirs = path.split('/') 535 dirs, file = dirs[:-1], dirs[-1] 536 if dirs and not dirs[0]: dirs = dirs[1:] 537 if dirs and not dirs[0]: dirs[0] = '/' 538 key = user, host, port, '/'.join(dirs) 539 # XXX thread unsafe! 540 if len(self.ftpcache) > MAXFTPCACHE: 541 # Prune the cache, rather arbitrarily 542 for k in self.ftpcache.keys(): 543 if k != key: 544 v = self.ftpcache[k] 545 del self.ftpcache[k] 546 v.close() 547 try: 548 if not key in self.ftpcache: 549 self.ftpcache[key] = \ 550 ftpwrapper(user, passwd, host, port, dirs) 551 if not file: type = 'D' 552 else: type = 'I' 553 for attr in attrs: 554 attr, value = splitvalue(attr) 555 if attr.lower() == 'type' and \ 556 value in ('a', 'A', 'i', 'I', 'd', 'D'): 557 type = value.upper() 558 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 559 mtype = mimetypes.guess_type("ftp:" + url)[0] 560 headers = "" 561 if mtype: 562 headers += "Content-Type: %s\n" % mtype 563 if retrlen is not None and retrlen >= 0: 564 headers += "Content-Length: %d\n" % retrlen 565 headers = mimetools.Message(StringIO(headers)) 566 return addinfourl(fp, headers, "ftp:" + url) 567 except ftperrors(), msg: 568 raise IOError, ('ftp error', msg), sys.exc_info()[2] 569 570 def open_data(self, url, data=None): 571 """Use "data" URL.""" 572 if not isinstance(url, str): 573 raise IOError, ('data error', 'proxy support for data protocol currently not implemented') 574 # ignore POSTed data 575 # 576 # syntax of data URLs: 577 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 578 # mediatype := [ type "/" subtype ] *( ";" parameter ) 579 # data := *urlchar 580 # parameter := attribute "=" value 581 import mimetools 582 try: 583 from cStringIO import StringIO 584 except ImportError: 585 from StringIO import StringIO 586 try: 587 [type, data] = url.split(',', 1) 588 except ValueError: 589 raise IOError, ('data error', 'bad data URL') 590 if not type: 591 type = 'text/plain;charset=US-ASCII' 592 semi = type.rfind(';') 593 if semi >= 0 and '=' not in type[semi:]: 594 encoding = type[semi+1:] 595 type = type[:semi] 596 else: 597 encoding = '' 598 msg = [] 599 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 600 time.gmtime(time.time()))) 601 msg.append('Content-type: %s' % type) 602 if encoding == 'base64': 603 data = base64.decodestring(data) 604 else: 605 data = unquote(data) 606 msg.append('Content-Length: %d' % len(data)) 607 msg.append('') 608 msg.append(data) 609 msg = '\n'.join(msg) 610 f = StringIO(msg) 611 headers = mimetools.Message(f, 0) 612 #f.fileno = None # needed for addinfourl 613 return addinfourl(f, headers, url) 614 615 616class FancyURLopener(URLopener): 617 """Derived class with handlers for errors we can handle (perhaps).""" 618 619 def __init__(self, *args, **kwargs): 620 URLopener.__init__(self, *args, **kwargs) 621 self.auth_cache = {} 622 self.tries = 0 623 self.maxtries = 10 624 625 def http_error_default(self, url, fp, errcode, errmsg, headers): 626 """Default error handling -- don't raise an exception.""" 627 return addinfourl(fp, headers, "http:" + url, errcode) 628 629 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 630 """Error 302 -- relocated (temporarily).""" 631 self.tries += 1 632 try: 633 if self.maxtries and self.tries >= self.maxtries: 634 if hasattr(self, "http_error_500"): 635 meth = self.http_error_500 636 else: 637 meth = self.http_error_default 638 return meth(url, fp, 500, 639 "Internal Server Error: Redirect Recursion", 640 headers) 641 result = self.redirect_internal(url, fp, errcode, errmsg, 642 headers, data) 643 return result 644 finally: 645 self.tries = 0 646 647 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 648 if 'location' in headers: 649 newurl = headers['location'] 650 elif 'uri' in headers: 651 newurl = headers['uri'] 652 else: 653 return 654 fp.close() 655 # In case the server sent a relative URL, join with original: 656 newurl = basejoin(self.type + ":" + url, newurl) 657 658 # For security reasons we do not allow redirects to protocols 659 # other than HTTP, HTTPS or FTP. 660 newurl_lower = newurl.lower() 661 if not (newurl_lower.startswith('http://') or 662 newurl_lower.startswith('https://') or 663 newurl_lower.startswith('ftp://')): 664 raise IOError('redirect error', errcode, 665 errmsg + " - Redirection to url '%s' is not allowed" % 666 newurl, 667 headers) 668 669 return self.open(newurl) 670 671 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 672 """Error 301 -- also relocated (permanently).""" 673 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 674 675 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 676 """Error 303 -- also relocated (essentially identical to 302).""" 677 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 678 679 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 680 """Error 307 -- relocated, but turn POST into error.""" 681 if data is None: 682 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 683 else: 684 return self.http_error_default(url, fp, errcode, errmsg, headers) 685 686 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): 687 """Error 401 -- authentication required. 688 This function supports Basic authentication only.""" 689 if not 'www-authenticate' in headers: 690 URLopener.http_error_default(self, url, fp, 691 errcode, errmsg, headers) 692 stuff = headers['www-authenticate'] 693 import re 694 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 695 if not match: 696 URLopener.http_error_default(self, url, fp, 697 errcode, errmsg, headers) 698 scheme, realm = match.groups() 699 if scheme.lower() != 'basic': 700 URLopener.http_error_default(self, url, fp, 701 errcode, errmsg, headers) 702 name = 'retry_' + self.type + '_basic_auth' 703 if data is None: 704 return getattr(self,name)(url, realm) 705 else: 706 return getattr(self,name)(url, realm, data) 707 708 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): 709 """Error 407 -- proxy authentication required. 710 This function supports Basic authentication only.""" 711 if not 'proxy-authenticate' in headers: 712 URLopener.http_error_default(self, url, fp, 713 errcode, errmsg, headers) 714 stuff = headers['proxy-authenticate'] 715 import re 716 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 717 if not match: 718 URLopener.http_error_default(self, url, fp, 719 errcode, errmsg, headers) 720 scheme, realm = match.groups() 721 if scheme.lower() != 'basic': 722 URLopener.http_error_default(self, url, fp, 723 errcode, errmsg, headers) 724 name = 'retry_proxy_' + self.type + '_basic_auth' 725 if data is None: 726 return getattr(self,name)(url, realm) 727 else: 728 return getattr(self,name)(url, realm, data) 729 730 def retry_proxy_http_basic_auth(self, url, realm, data=None): 731 host, selector = splithost(url) 732 newurl = 'http://' + host + selector 733 proxy = self.proxies['http'] 734 urltype, proxyhost = splittype(proxy) 735 proxyhost, proxyselector = splithost(proxyhost) 736 i = proxyhost.find('@') + 1 737 proxyhost = proxyhost[i:] 738 user, passwd = self.get_user_passwd(proxyhost, realm, i) 739 if not (user or passwd): return None 740 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 741 self.proxies['http'] = 'http://' + proxyhost + proxyselector 742 if data is None: 743 return self.open(newurl) 744 else: 745 return self.open(newurl, data) 746 747 def retry_proxy_https_basic_auth(self, url, realm, data=None): 748 host, selector = splithost(url) 749 newurl = 'https://' + host + selector 750 proxy = self.proxies['https'] 751 urltype, proxyhost = splittype(proxy) 752 proxyhost, proxyselector = splithost(proxyhost) 753 i = proxyhost.find('@') + 1 754 proxyhost = proxyhost[i:] 755 user, passwd = self.get_user_passwd(proxyhost, realm, i) 756 if not (user or passwd): return None 757 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost 758 self.proxies['https'] = 'https://' + proxyhost + proxyselector 759 if data is None: 760 return self.open(newurl) 761 else: 762 return self.open(newurl, data) 763 764 def retry_http_basic_auth(self, url, realm, data=None): 765 host, selector = splithost(url) 766 i = host.find('@') + 1 767 host = host[i:] 768 user, passwd = self.get_user_passwd(host, realm, i) 769 if not (user or passwd): return None 770 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 771 newurl = 'http://' + host + selector 772 if data is None: 773 return self.open(newurl) 774 else: 775 return self.open(newurl, data) 776 777 def retry_https_basic_auth(self, url, realm, data=None): 778 host, selector = splithost(url) 779 i = host.find('@') + 1 780 host = host[i:] 781 user, passwd = self.get_user_passwd(host, realm, i) 782 if not (user or passwd): return None 783 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host 784 newurl = 'https://' + host + selector 785 if data is None: 786 return self.open(newurl) 787 else: 788 return self.open(newurl, data) 789 790 def get_user_passwd(self, host, realm, clear_cache=0): 791 key = realm + '@' + host.lower() 792 if key in self.auth_cache: 793 if clear_cache: 794 del self.auth_cache[key] 795 else: 796 return self.auth_cache[key] 797 user, passwd = self.prompt_user_passwd(host, realm) 798 if user or passwd: self.auth_cache[key] = (user, passwd) 799 return user, passwd 800 801 def prompt_user_passwd(self, host, realm): 802 """Override this in a GUI environment!""" 803 import getpass 804 try: 805 user = raw_input("Enter username for %s at %s: " % (realm, 806 host)) 807 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 808 (user, realm, host)) 809 return user, passwd 810 except KeyboardInterrupt: 811 print 812 return None, None 813 814 815# Utility functions 816 817_localhost = None 818def localhost(): 819 """Return the IP address of the magic hostname 'localhost'.""" 820 global _localhost 821 if _localhost is None: 822 _localhost = socket.gethostbyname('localhost') 823 return _localhost 824 825_thishost = None 826def thishost(): 827 """Return the IP address of the current host.""" 828 global _thishost 829 if _thishost is None: 830 try: 831 _thishost = socket.gethostbyname(socket.gethostname()) 832 except socket.gaierror: 833 _thishost = socket.gethostbyname('localhost') 834 return _thishost 835 836_ftperrors = None 837def ftperrors(): 838 """Return the set of errors raised by the FTP class.""" 839 global _ftperrors 840 if _ftperrors is None: 841 import ftplib 842 _ftperrors = ftplib.all_errors 843 return _ftperrors 844 845_noheaders = None 846def noheaders(): 847 """Return an empty mimetools.Message object.""" 848 global _noheaders 849 if _noheaders is None: 850 import mimetools 851 try: 852 from cStringIO import StringIO 853 except ImportError: 854 from StringIO import StringIO 855 _noheaders = mimetools.Message(StringIO(), 0) 856 _noheaders.fp.close() # Recycle file descriptor 857 return _noheaders 858 859 860# Utility classes 861 862class ftpwrapper: 863 """Class used by open_ftp() for cache of open FTP connections.""" 864 865 def __init__(self, user, passwd, host, port, dirs, 866 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 867 persistent=True): 868 self.user = user 869 self.passwd = passwd 870 self.host = host 871 self.port = port 872 self.dirs = dirs 873 self.timeout = timeout 874 self.refcount = 0 875 self.keepalive = persistent 876 try: 877 self.init() 878 except: 879 self.close() 880 raise 881 882 def init(self): 883 import ftplib 884 self.busy = 0 885 self.ftp = ftplib.FTP() 886 self.ftp.connect(self.host, self.port, self.timeout) 887 self.ftp.login(self.user, self.passwd) 888 _target = '/'.join(self.dirs) 889 self.ftp.cwd(_target) 890 891 def retrfile(self, file, type): 892 import ftplib 893 self.endtransfer() 894 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 895 else: cmd = 'TYPE ' + type; isdir = 0 896 try: 897 self.ftp.voidcmd(cmd) 898 except ftplib.all_errors: 899 self.init() 900 self.ftp.voidcmd(cmd) 901 conn = None 902 if file and not isdir: 903 # Try to retrieve as a file 904 try: 905 cmd = 'RETR ' + file 906 conn, retrlen = self.ftp.ntransfercmd(cmd) 907 except ftplib.error_perm, reason: 908 if str(reason)[:3] != '550': 909 raise IOError, ('ftp error', reason), sys.exc_info()[2] 910 if not conn: 911 # Set transfer mode to ASCII! 912 self.ftp.voidcmd('TYPE A') 913 # Try a directory listing. Verify that directory exists. 914 if file: 915 pwd = self.ftp.pwd() 916 try: 917 try: 918 self.ftp.cwd(file) 919 except ftplib.error_perm, reason: 920 raise IOError, ('ftp error', reason), sys.exc_info()[2] 921 finally: 922 self.ftp.cwd(pwd) 923 cmd = 'LIST ' + file 924 else: 925 cmd = 'LIST' 926 conn, retrlen = self.ftp.ntransfercmd(cmd) 927 self.busy = 1 928 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 929 self.refcount += 1 930 conn.close() 931 # Pass back both a suitably decorated object and a retrieval length 932 return (ftpobj, retrlen) 933 934 def endtransfer(self): 935 self.busy = 0 936 937 def close(self): 938 self.keepalive = False 939 if self.refcount <= 0: 940 self.real_close() 941 942 def file_close(self): 943 self.endtransfer() 944 self.refcount -= 1 945 if self.refcount <= 0 and not self.keepalive: 946 self.real_close() 947 948 def real_close(self): 949 self.endtransfer() 950 try: 951 self.ftp.close() 952 except ftperrors(): 953 pass 954 955class addbase: 956 """Base class for addinfo and addclosehook.""" 957 958 def __init__(self, fp): 959 self.fp = fp 960 self.read = self.fp.read 961 self.readline = self.fp.readline 962 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines 963 if hasattr(self.fp, "fileno"): 964 self.fileno = self.fp.fileno 965 else: 966 self.fileno = lambda: None 967 if hasattr(self.fp, "__iter__"): 968 self.__iter__ = self.fp.__iter__ 969 if hasattr(self.fp, "next"): 970 self.next = self.fp.next 971 972 def __repr__(self): 973 return '<%s at %r whose fp = %r>' % (self.__class__.__name__, 974 id(self), self.fp) 975 976 def close(self): 977 self.read = None 978 self.readline = None 979 self.readlines = None 980 self.fileno = None 981 if self.fp: self.fp.close() 982 self.fp = None 983 984class addclosehook(addbase): 985 """Class to add a close hook to an open file.""" 986 987 def __init__(self, fp, closehook, *hookargs): 988 addbase.__init__(self, fp) 989 self.closehook = closehook 990 self.hookargs = hookargs 991 992 def close(self): 993 try: 994 closehook = self.closehook 995 hookargs = self.hookargs 996 if closehook: 997 self.closehook = None 998 self.hookargs = None 999 closehook(*hookargs) 1000 finally: 1001 addbase.close(self) 1002 1003 1004class addinfo(addbase): 1005 """class to add an info() method to an open file.""" 1006 1007 def __init__(self, fp, headers): 1008 addbase.__init__(self, fp) 1009 self.headers = headers 1010 1011 def info(self): 1012 return self.headers 1013 1014class addinfourl(addbase): 1015 """class to add info() and geturl() methods to an open file.""" 1016 1017 def __init__(self, fp, headers, url, code=None): 1018 addbase.__init__(self, fp) 1019 self.headers = headers 1020 self.url = url 1021 self.code = code 1022 1023 def info(self): 1024 return self.headers 1025 1026 def getcode(self): 1027 return self.code 1028 1029 def geturl(self): 1030 return self.url 1031 1032 1033# Utilities to parse URLs (most of these return None for missing parts): 1034# unwrap('<URL:type://host/path>') --> 'type://host/path' 1035# splittype('type:opaquestring') --> 'type', 'opaquestring' 1036# splithost('//host[:port]/path') --> 'host[:port]', '/path' 1037# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 1038# splitpasswd('user:passwd') -> 'user', 'passwd' 1039# splitport('host:port') --> 'host', 'port' 1040# splitquery('/path?query') --> '/path', 'query' 1041# splittag('/path#tag') --> '/path', 'tag' 1042# splitattr('/path;attr1=value1;attr2=value2;...') -> 1043# '/path', ['attr1=value1', 'attr2=value2', ...] 1044# splitvalue('attr=value') --> 'attr', 'value' 1045# unquote('abc%20def') -> 'abc def' 1046# quote('abc def') -> 'abc%20def') 1047 1048try: 1049 unicode 1050except NameError: 1051 def _is_unicode(x): 1052 return 0 1053else: 1054 def _is_unicode(x): 1055 return isinstance(x, unicode) 1056 1057def toBytes(url): 1058 """toBytes(u"URL") --> 'URL'.""" 1059 # Most URL schemes require ASCII. If that changes, the conversion 1060 # can be relaxed 1061 if _is_unicode(url): 1062 try: 1063 url = url.encode("ASCII") 1064 except UnicodeError: 1065 raise UnicodeError("URL " + repr(url) + 1066 " contains non-ASCII characters") 1067 return url 1068 1069def unwrap(url): 1070 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 1071 url = url.strip() 1072 if url[:1] == '<' and url[-1:] == '>': 1073 url = url[1:-1].strip() 1074 if url[:4] == 'URL:': url = url[4:].strip() 1075 return url 1076 1077_typeprog = None 1078def splittype(url): 1079 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 1080 global _typeprog 1081 if _typeprog is None: 1082 import re 1083 _typeprog = re.compile('^([^/:]+):') 1084 1085 match = _typeprog.match(url) 1086 if match: 1087 scheme = match.group(1) 1088 return scheme.lower(), url[len(scheme) + 1:] 1089 return None, url 1090 1091_hostprog = None 1092def splithost(url): 1093 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 1094 global _hostprog 1095 if _hostprog is None: 1096 _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) 1097 1098 match = _hostprog.match(url) 1099 if match: 1100 host_port = match.group(1) 1101 path = match.group(2) 1102 if path and not path.startswith('/'): 1103 path = '/' + path 1104 return host_port, path 1105 return None, url 1106 1107_userprog = None 1108def splituser(host): 1109 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 1110 global _userprog 1111 if _userprog is None: 1112 import re 1113 _userprog = re.compile('^(.*)@(.*)$') 1114 1115 match = _userprog.match(host) 1116 if match: return match.group(1, 2) 1117 return None, host 1118 1119_passwdprog = None 1120def splitpasswd(user): 1121 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 1122 global _passwdprog 1123 if _passwdprog is None: 1124 import re 1125 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1126 1127 match = _passwdprog.match(user) 1128 if match: return match.group(1, 2) 1129 return user, None 1130 1131# splittag('/path#tag') --> '/path', 'tag' 1132_portprog = None 1133def splitport(host): 1134 """splitport('host:port') --> 'host', 'port'.""" 1135 global _portprog 1136 if _portprog is None: 1137 import re 1138 _portprog = re.compile('^(.*):([0-9]*)$') 1139 1140 match = _portprog.match(host) 1141 if match: 1142 host, port = match.groups() 1143 if port: 1144 return host, port 1145 return host, None 1146 1147_nportprog = None 1148def splitnport(host, defport=-1): 1149 """Split host and port, returning numeric port. 1150 Return given default port if no ':' found; defaults to -1. 1151 Return numerical port if a valid number are found after ':'. 1152 Return None if ':' but not a valid number.""" 1153 global _nportprog 1154 if _nportprog is None: 1155 import re 1156 _nportprog = re.compile('^(.*):(.*)$') 1157 1158 match = _nportprog.match(host) 1159 if match: 1160 host, port = match.group(1, 2) 1161 if port: 1162 try: 1163 nport = int(port) 1164 except ValueError: 1165 nport = None 1166 return host, nport 1167 return host, defport 1168 1169_queryprog = None 1170def splitquery(url): 1171 """splitquery('/path?query') --> '/path', 'query'.""" 1172 global _queryprog 1173 if _queryprog is None: 1174 import re 1175 _queryprog = re.compile('^(.*)\?([^?]*)$') 1176 1177 match = _queryprog.match(url) 1178 if match: return match.group(1, 2) 1179 return url, None 1180 1181_tagprog = None 1182def splittag(url): 1183 """splittag('/path#tag') --> '/path', 'tag'.""" 1184 global _tagprog 1185 if _tagprog is None: 1186 import re 1187 _tagprog = re.compile('^(.*)#([^#]*)$') 1188 1189 match = _tagprog.match(url) 1190 if match: return match.group(1, 2) 1191 return url, None 1192 1193def splitattr(url): 1194 """splitattr('/path;attr1=value1;attr2=value2;...') -> 1195 '/path', ['attr1=value1', 'attr2=value2', ...].""" 1196 words = url.split(';') 1197 return words[0], words[1:] 1198 1199_valueprog = None 1200def splitvalue(attr): 1201 """splitvalue('attr=value') --> 'attr', 'value'.""" 1202 global _valueprog 1203 if _valueprog is None: 1204 import re 1205 _valueprog = re.compile('^([^=]*)=(.*)$') 1206 1207 match = _valueprog.match(attr) 1208 if match: return match.group(1, 2) 1209 return attr, None 1210 1211# urlparse contains a duplicate of this method to avoid a circular import. If 1212# you update this method, also update the copy in urlparse. This code 1213# duplication does not exist in Python3. 1214 1215_hexdig = '0123456789ABCDEFabcdef' 1216_hextochr = dict((a + b, chr(int(a + b, 16))) 1217 for a in _hexdig for b in _hexdig) 1218_asciire = re.compile('([\x00-\x7f]+)') 1219 1220def unquote(s): 1221 """unquote('abc%20def') -> 'abc def'.""" 1222 if _is_unicode(s): 1223 if '%' not in s: 1224 return s 1225 bits = _asciire.split(s) 1226 res = [bits[0]] 1227 append = res.append 1228 for i in range(1, len(bits), 2): 1229 append(unquote(str(bits[i])).decode('latin1')) 1230 append(bits[i + 1]) 1231 return ''.join(res) 1232 1233 bits = s.split('%') 1234 # fastpath 1235 if len(bits) == 1: 1236 return s 1237 res = [bits[0]] 1238 append = res.append 1239 for item in bits[1:]: 1240 try: 1241 append(_hextochr[item[:2]]) 1242 append(item[2:]) 1243 except KeyError: 1244 append('%') 1245 append(item) 1246 return ''.join(res) 1247 1248def unquote_plus(s): 1249 """unquote('%7e/abc+def') -> '~/abc def'""" 1250 s = s.replace('+', ' ') 1251 return unquote(s) 1252 1253always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 1254 'abcdefghijklmnopqrstuvwxyz' 1255 '0123456789' '_.-') 1256_safe_map = {} 1257for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1258 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1259_safe_quoters = {} 1260 1261def quote(s, safe='/'): 1262 """quote('abc def') -> 'abc%20def' 1263 1264 Each part of a URL, e.g. the path info, the query, etc., has a 1265 different set of reserved characters that must be quoted. 1266 1267 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 1268 the following reserved characters. 1269 1270 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 1271 "$" | "," 1272 1273 Each of these characters is reserved in some component of a URL, 1274 but not necessarily in all of them. 1275 1276 By default, the quote function is intended for quoting the path 1277 section of a URL. Thus, it will not encode '/'. This character 1278 is reserved, but in typical usage the quote function is being 1279 called on a path where the existing slash characters are used as 1280 reserved characters. 1281 """ 1282 # fastpath 1283 if not s: 1284 if s is None: 1285 raise TypeError('None object cannot be quoted') 1286 return s 1287 cachekey = (safe, always_safe) 1288 try: 1289 (quoter, safe) = _safe_quoters[cachekey] 1290 except KeyError: 1291 safe_map = _safe_map.copy() 1292 safe_map.update([(c, c) for c in safe]) 1293 quoter = safe_map.__getitem__ 1294 safe = always_safe + safe 1295 _safe_quoters[cachekey] = (quoter, safe) 1296 if not s.rstrip(safe): 1297 return s 1298 return ''.join(map(quoter, s)) 1299 1300def quote_plus(s, safe=''): 1301 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1302 if ' ' in s: 1303 s = quote(s, safe + ' ') 1304 return s.replace(' ', '+') 1305 return quote(s, safe) 1306 1307def urlencode(query, doseq=0): 1308 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1309 1310 If any values in the query arg are sequences and doseq is true, each 1311 sequence element is converted to a separate parameter. 1312 1313 If the query arg is a sequence of two-element tuples, the order of the 1314 parameters in the output will match the order of parameters in the 1315 input. 1316 """ 1317 1318 if hasattr(query,"items"): 1319 # mapping objects 1320 query = query.items() 1321 else: 1322 # it's a bother at times that strings and string-like objects are 1323 # sequences... 1324 try: 1325 # non-sequence items should not work with len() 1326 # non-empty strings will fail this 1327 if len(query) and not isinstance(query[0], tuple): 1328 raise TypeError 1329 # zero-length sequences of all types will get here and succeed, 1330 # but that's a minor nit - since the original implementation 1331 # allowed empty dicts that type of behavior probably should be 1332 # preserved for consistency 1333 except TypeError: 1334 ty,va,tb = sys.exc_info() 1335 raise TypeError, "not a valid non-string sequence or mapping object", tb 1336 1337 l = [] 1338 if not doseq: 1339 # preserve old behavior 1340 for k, v in query: 1341 k = quote_plus(str(k)) 1342 v = quote_plus(str(v)) 1343 l.append(k + '=' + v) 1344 else: 1345 for k, v in query: 1346 k = quote_plus(str(k)) 1347 if isinstance(v, str): 1348 v = quote_plus(v) 1349 l.append(k + '=' + v) 1350 elif _is_unicode(v): 1351 # is there a reasonable way to convert to ASCII? 1352 # encode generates a string, but "replace" or "ignore" 1353 # lose information and "strict" can raise UnicodeError 1354 v = quote_plus(v.encode("ASCII","replace")) 1355 l.append(k + '=' + v) 1356 else: 1357 try: 1358 # is this a sufficient test for sequence-ness? 1359 len(v) 1360 except TypeError: 1361 # not a sequence 1362 v = quote_plus(str(v)) 1363 l.append(k + '=' + v) 1364 else: 1365 # loop over the sequence 1366 for elt in v: 1367 l.append(k + '=' + quote_plus(str(elt))) 1368 return '&'.join(l) 1369 1370# Proxy handling 1371def getproxies_environment(): 1372 """Return a dictionary of scheme -> proxy server URL mappings. 1373 1374 Scan the environment for variables named <scheme>_proxy; 1375 this seems to be the standard convention. In order to prefer lowercase 1376 variables, we process the environment in two passes, first matches any 1377 and second matches only lower case proxies. 1378 1379 If you need a different way, you can pass a proxies dictionary to the 1380 [Fancy]URLopener constructor. 1381 """ 1382 # Get all variables 1383 proxies = {} 1384 for name, value in os.environ.items(): 1385 name = name.lower() 1386 if value and name[-6:] == '_proxy': 1387 proxies[name[:-6]] = value 1388 1389 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY 1390 # (non-all-lowercase) as it may be set from the web server by a "Proxy:" 1391 # header from the client 1392 # If "proxy" is lowercase, it will still be used thanks to the next block 1393 if 'REQUEST_METHOD' in os.environ: 1394 proxies.pop('http', None) 1395 1396 # Get lowercase variables 1397 for name, value in os.environ.items(): 1398 if name[-6:] == '_proxy': 1399 name = name.lower() 1400 if value: 1401 proxies[name[:-6]] = value 1402 else: 1403 proxies.pop(name[:-6], None) 1404 1405 return proxies 1406 1407def proxy_bypass_environment(host, proxies=None): 1408 """Test if proxies should not be used for a particular host. 1409 1410 Checks the proxies dict for the value of no_proxy, which should be a 1411 list of comma separated DNS suffixes, or '*' for all hosts. 1412 """ 1413 if proxies is None: 1414 proxies = getproxies_environment() 1415 # don't bypass, if no_proxy isn't specified 1416 try: 1417 no_proxy = proxies['no'] 1418 except KeyError: 1419 return 0 1420 # '*' is special case for always bypass 1421 if no_proxy == '*': 1422 return 1 1423 # strip port off host 1424 hostonly, port = splitport(host) 1425 # check if the host ends with any of the DNS suffixes 1426 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 1427 for name in no_proxy_list: 1428 if name: 1429 name = name.lstrip('.') # ignore leading dots 1430 name = re.escape(name) 1431 pattern = r'(.+\.)?%s$' % name 1432 if (re.match(pattern, hostonly, re.I) 1433 or re.match(pattern, host, re.I)): 1434 return 1 1435 # otherwise, don't bypass 1436 return 0 1437 1438 1439if sys.platform == 'darwin': 1440 from _scproxy import _get_proxy_settings, _get_proxies 1441 1442 def proxy_bypass_macosx_sysconf(host): 1443 """ 1444 Return True iff this host shouldn't be accessed using a proxy 1445 1446 This function uses the MacOSX framework SystemConfiguration 1447 to fetch the proxy information. 1448 """ 1449 import re 1450 import socket 1451 from fnmatch import fnmatch 1452 1453 hostonly, port = splitport(host) 1454 1455 def ip2num(ipAddr): 1456 parts = ipAddr.split('.') 1457 parts = map(int, parts) 1458 if len(parts) != 4: 1459 parts = (parts + [0, 0, 0, 0])[:4] 1460 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 1461 1462 proxy_settings = _get_proxy_settings() 1463 1464 # Check for simple host names: 1465 if '.' not in host: 1466 if proxy_settings['exclude_simple']: 1467 return True 1468 1469 hostIP = None 1470 1471 for value in proxy_settings.get('exceptions', ()): 1472 # Items in the list are strings like these: *.local, 169.254/16 1473 if not value: continue 1474 1475 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 1476 if m is not None: 1477 if hostIP is None: 1478 try: 1479 hostIP = socket.gethostbyname(hostonly) 1480 hostIP = ip2num(hostIP) 1481 except socket.error: 1482 continue 1483 1484 base = ip2num(m.group(1)) 1485 mask = m.group(2) 1486 if mask is None: 1487 mask = 8 * (m.group(1).count('.') + 1) 1488 1489 else: 1490 mask = int(mask[1:]) 1491 mask = 32 - mask 1492 1493 if (hostIP >> mask) == (base >> mask): 1494 return True 1495 1496 elif fnmatch(host, value): 1497 return True 1498 1499 return False 1500 1501 def getproxies_macosx_sysconf(): 1502 """Return a dictionary of scheme -> proxy server URL mappings. 1503 1504 This function uses the MacOSX framework SystemConfiguration 1505 to fetch the proxy information. 1506 """ 1507 return _get_proxies() 1508 1509 def proxy_bypass(host): 1510 """Return True, if a host should be bypassed. 1511 1512 Checks proxy settings gathered from the environment, if specified, or 1513 from the MacOSX framework SystemConfiguration. 1514 """ 1515 proxies = getproxies_environment() 1516 if proxies: 1517 return proxy_bypass_environment(host, proxies) 1518 else: 1519 return proxy_bypass_macosx_sysconf(host) 1520 1521 def getproxies(): 1522 return getproxies_environment() or getproxies_macosx_sysconf() 1523 1524elif os.name == 'nt': 1525 def getproxies_registry(): 1526 """Return a dictionary of scheme -> proxy server URL mappings. 1527 1528 Win32 uses the registry to store proxies. 1529 1530 """ 1531 proxies = {} 1532 try: 1533 import _winreg 1534 except ImportError: 1535 # Std module, so should be around - but you never know! 1536 return proxies 1537 try: 1538 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1539 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1540 proxyEnable = _winreg.QueryValueEx(internetSettings, 1541 'ProxyEnable')[0] 1542 if proxyEnable: 1543 # Returned as Unicode but problems if not converted to ASCII 1544 proxyServer = str(_winreg.QueryValueEx(internetSettings, 1545 'ProxyServer')[0]) 1546 if '=' in proxyServer: 1547 # Per-protocol settings 1548 for p in proxyServer.split(';'): 1549 protocol, address = p.split('=', 1) 1550 # See if address has a type:// prefix 1551 import re 1552 if not re.match('^([^/:]+)://', address): 1553 address = '%s://%s' % (protocol, address) 1554 proxies[protocol] = address 1555 else: 1556 # Use one setting for all protocols 1557 if proxyServer[:5] == 'http:': 1558 proxies['http'] = proxyServer 1559 else: 1560 proxies['http'] = 'http://%s' % proxyServer 1561 proxies['https'] = 'https://%s' % proxyServer 1562 proxies['ftp'] = 'ftp://%s' % proxyServer 1563 internetSettings.Close() 1564 except (WindowsError, ValueError, TypeError): 1565 # Either registry key not found etc, or the value in an 1566 # unexpected format. 1567 # proxies already set up to be empty so nothing to do 1568 pass 1569 return proxies 1570 1571 def getproxies(): 1572 """Return a dictionary of scheme -> proxy server URL mappings. 1573 1574 Returns settings gathered from the environment, if specified, 1575 or the registry. 1576 1577 """ 1578 return getproxies_environment() or getproxies_registry() 1579 1580 def proxy_bypass_registry(host): 1581 try: 1582 import _winreg 1583 import re 1584 except ImportError: 1585 # Std modules, so should be around - but you never know! 1586 return 0 1587 try: 1588 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 1589 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 1590 proxyEnable = _winreg.QueryValueEx(internetSettings, 1591 'ProxyEnable')[0] 1592 proxyOverride = str(_winreg.QueryValueEx(internetSettings, 1593 'ProxyOverride')[0]) 1594 # ^^^^ Returned as Unicode but problems if not converted to ASCII 1595 except WindowsError: 1596 return 0 1597 if not proxyEnable or not proxyOverride: 1598 return 0 1599 # try to make a host list from name and IP address. 1600 rawHost, port = splitport(host) 1601 host = [rawHost] 1602 try: 1603 addr = socket.gethostbyname(rawHost) 1604 if addr != rawHost: 1605 host.append(addr) 1606 except socket.error: 1607 pass 1608 try: 1609 fqdn = socket.getfqdn(rawHost) 1610 if fqdn != rawHost: 1611 host.append(fqdn) 1612 except socket.error: 1613 pass 1614 # make a check value list from the registry entry: replace the 1615 # '<local>' string by the localhost entry and the corresponding 1616 # canonical entry. 1617 proxyOverride = proxyOverride.split(';') 1618 # now check if we match one of the registry values. 1619 for test in proxyOverride: 1620 if test == '<local>': 1621 if '.' not in rawHost: 1622 return 1 1623 test = test.replace(".", r"\.") # mask dots 1624 test = test.replace("*", r".*") # change glob sequence 1625 test = test.replace("?", r".") # change glob char 1626 for val in host: 1627 # print "%s <--> %s" %( test, val ) 1628 if re.match(test, val, re.I): 1629 return 1 1630 return 0 1631 1632 def proxy_bypass(host): 1633 """Return True, if the host should be bypassed. 1634 1635 Checks proxy settings gathered from the environment, if specified, 1636 or the registry. 1637 """ 1638 proxies = getproxies_environment() 1639 if proxies: 1640 return proxy_bypass_environment(host, proxies) 1641 else: 1642 return proxy_bypass_registry(host) 1643 1644else: 1645 # By default use environment variables 1646 getproxies = getproxies_environment 1647 proxy_bypass = proxy_bypass_environment 1648 1649# Test and time quote() and unquote() 1650def test1(): 1651 s = '' 1652 for i in range(256): s = s + chr(i) 1653 s = s*4 1654 t0 = time.time() 1655 qs = quote(s) 1656 uqs = unquote(qs) 1657 t1 = time.time() 1658 if uqs != s: 1659 print 'Wrong!' 1660 print repr(s) 1661 print repr(qs) 1662 print repr(uqs) 1663 print round(t1 - t0, 3), 'sec' 1664 1665 1666def reporthook(blocknum, blocksize, totalsize): 1667 # Report during remote transfers 1668 print "Block number: %d, Block size: %d, Total size: %d" % ( 1669 blocknum, blocksize, totalsize) 1670