1"""An extensible library for opening URLs using a variety of protocols 2 3The simplest way to use this module is to call the urlopen function, 4which accepts a string containing a URL or a Request object (described 5below). It opens the URL and returns the results as file-like 6object; the returned object has some extra methods described below. 7 8The OpenerDirector manages a collection of Handler objects that do 9all the actual work. Each Handler implements a particular protocol or 10option. The OpenerDirector is a composite object that invokes the 11Handlers needed to open the requested URL. For example, the 12HTTPHandler performs HTTP GET and POST requests and deals with 13non-error returns. The HTTPRedirectHandler automatically deals with 14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler 15deals with digest authentication. 16 17urlopen(url, data=None) -- Basic usage is the same as original 18urllib. pass the url and optionally data to post to an HTTP URL, and 19get a file-like object back. One difference is that you can also pass 20a Request instance instead of URL. Raises a URLError (subclass of 21IOError); for HTTP errors, raises an HTTPError, which can also be 22treated as a valid response. 23 24build_opener -- Function that creates a new OpenerDirector instance. 25Will install the default handlers. Accepts one or more Handlers as 26arguments, either instances or Handler classes that it will 27instantiate. If one of the argument is a subclass of the default 28handler, the argument will be installed instead of the default. 29 30install_opener -- Installs a new opener as the default opener. 31 32objects of interest: 33 34OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages 35the Handler classes, while dealing with requests and responses. 36 37Request -- An object that encapsulates the state of a request. The 38state can be as simple as the URL. It can also include extra HTTP 39headers, e.g. a User-Agent. 40 41BaseHandler -- 42 43exceptions: 44URLError -- A subclass of IOError, individual protocols have their own 45specific subclass. 46 47HTTPError -- Also a valid HTTP response, so you can treat an HTTP error 48as an exceptional event or valid response. 49 50internals: 51BaseHandler and parent 52_call_chain conventions 53 54Example usage: 55 56import urllib2 57 58# set up authentication info 59authinfo = urllib2.HTTPBasicAuthHandler() 60authinfo.add_password(realm='PDQ Application', 61 uri='https://mahler:8092/site-updates.py', 62 user='klem', 63 passwd='geheim$parole') 64 65proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"}) 66 67# build a new opener that adds authentication and caching FTP handlers 68opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler) 69 70# install it 71urllib2.install_opener(opener) 72 73f = urllib2.urlopen('http://www.python.org/') 74 75 76""" 77 78# XXX issues: 79# If an authentication error handler that tries to perform 80# authentication for some reason but fails, how should the error be 81# signalled? The client needs to know the HTTP error code. But if 82# the handler knows that the problem was, e.g., that it didn't know 83# that hash algo that requested in the challenge, it would be good to 84# pass that information along to the client, too. 85# ftp errors aren't handled cleanly 86# check digest against correct (i.e. non-apache) implementation 87 88# Possible extensions: 89# complex proxies XXX not sure what exactly was meant by this 90# abstract factory for opener 91 92import base64 93import hashlib 94import httplib 95import mimetools 96import os 97import posixpath 98import random 99import re 100import socket 101import sys 102import time 103import urlparse 104import bisect 105import warnings 106 107try: 108 from cStringIO import StringIO 109except ImportError: 110 from StringIO import StringIO 111 112# check for SSL 113try: 114 import ssl 115except ImportError: 116 _have_ssl = False 117else: 118 _have_ssl = True 119 120from urllib import (unwrap, unquote, splittype, splithost, quote, 121 addinfourl, splitport, splittag, toBytes, 122 splitattr, ftpwrapper, splituser, splitpasswd, splitvalue) 123 124# support for FileHandler, proxies via environment variables 125from urllib import localhost, url2pathname, getproxies, proxy_bypass 126 127# used in User-Agent header sent 128__version__ = sys.version[:3] 129 130_opener = None 131def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 132 cafile=None, capath=None, cadefault=False, context=None): 133 global _opener 134 if cafile or capath or cadefault: 135 if context is not None: 136 raise ValueError( 137 "You can't pass both context and any of cafile, capath, and " 138 "cadefault" 139 ) 140 if not _have_ssl: 141 raise ValueError('SSL support not available') 142 context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, 143 cafile=cafile, 144 capath=capath) 145 https_handler = HTTPSHandler(context=context) 146 opener = build_opener(https_handler) 147 elif context: 148 https_handler = HTTPSHandler(context=context) 149 opener = build_opener(https_handler) 150 elif _opener is None: 151 _opener = opener = build_opener() 152 else: 153 opener = _opener 154 return opener.open(url, data, timeout) 155 156def install_opener(opener): 157 global _opener 158 _opener = opener 159 160# do these error classes make sense? 161# make sure all of the IOError stuff is overridden. we just want to be 162# subtypes. 163 164class URLError(IOError): 165 # URLError is a sub-type of IOError, but it doesn't share any of 166 # the implementation. need to override __init__ and __str__. 167 # It sets self.args for compatibility with other EnvironmentError 168 # subclasses, but args doesn't have the typical format with errno in 169 # slot 0 and strerror in slot 1. This may be better than nothing. 170 def __init__(self, reason): 171 self.args = reason, 172 self.reason = reason 173 174 def __str__(self): 175 return '<urlopen error %s>' % self.reason 176 177class HTTPError(URLError, addinfourl): 178 """Raised when HTTP error occurs, but also acts like non-error return""" 179 __super_init = addinfourl.__init__ 180 181 def __init__(self, url, code, msg, hdrs, fp): 182 self.code = code 183 self.msg = msg 184 self.hdrs = hdrs 185 self.fp = fp 186 self.filename = url 187 # The addinfourl classes depend on fp being a valid file 188 # object. In some cases, the HTTPError may not have a valid 189 # file object. If this happens, the simplest workaround is to 190 # not initialize the base classes. 191 if fp is not None: 192 self.__super_init(fp, hdrs, url, code) 193 194 def __str__(self): 195 return 'HTTP Error %s: %s' % (self.code, self.msg) 196 197 # since URLError specifies a .reason attribute, HTTPError should also 198 # provide this attribute. See issue13211 fo discussion. 199 @property 200 def reason(self): 201 return self.msg 202 203 def info(self): 204 return self.hdrs 205 206# copied from cookielib.py 207_cut_port_re = re.compile(r":\d+$") 208def request_host(request): 209 """Return request-host, as defined by RFC 2965. 210 211 Variation from RFC: returned value is lowercased, for convenient 212 comparison. 213 214 """ 215 url = request.get_full_url() 216 host = urlparse.urlparse(url)[1] 217 if host == "": 218 host = request.get_header("Host", "") 219 220 # remove port, if present 221 host = _cut_port_re.sub("", host, 1) 222 return host.lower() 223 224class Request: 225 226 def __init__(self, url, data=None, headers={}, 227 origin_req_host=None, unverifiable=False): 228 # unwrap('<URL:type://host/path>') --> 'type://host/path' 229 self.__original = unwrap(url) 230 self.__original, self.__fragment = splittag(self.__original) 231 self.type = None 232 # self.__r_type is what's left after doing the splittype 233 self.host = None 234 self.port = None 235 self._tunnel_host = None 236 self.data = data 237 self.headers = {} 238 for key, value in headers.items(): 239 self.add_header(key, value) 240 self.unredirected_hdrs = {} 241 if origin_req_host is None: 242 origin_req_host = request_host(self) 243 self.origin_req_host = origin_req_host 244 self.unverifiable = unverifiable 245 246 def __getattr__(self, attr): 247 # XXX this is a fallback mechanism to guard against these 248 # methods getting called in a non-standard order. this may be 249 # too complicated and/or unnecessary. 250 # XXX should the __r_XXX attributes be public? 251 if attr in ('_Request__r_type', '_Request__r_host'): 252 getattr(self, 'get_' + attr[12:])() 253 return self.__dict__[attr] 254 raise AttributeError, attr 255 256 def get_method(self): 257 if self.has_data(): 258 return "POST" 259 else: 260 return "GET" 261 262 # XXX these helper methods are lame 263 264 def add_data(self, data): 265 self.data = data 266 267 def has_data(self): 268 return self.data is not None 269 270 def get_data(self): 271 return self.data 272 273 def get_full_url(self): 274 if self.__fragment: 275 return '%s#%s' % (self.__original, self.__fragment) 276 else: 277 return self.__original 278 279 def get_type(self): 280 if self.type is None: 281 self.type, self.__r_type = splittype(self.__original) 282 if self.type is None: 283 raise ValueError, "unknown url type: %s" % self.__original 284 return self.type 285 286 def get_host(self): 287 if self.host is None: 288 self.host, self.__r_host = splithost(self.__r_type) 289 if self.host: 290 self.host = unquote(self.host) 291 return self.host 292 293 def get_selector(self): 294 return self.__r_host 295 296 def set_proxy(self, host, type): 297 if self.type == 'https' and not self._tunnel_host: 298 self._tunnel_host = self.host 299 else: 300 self.type = type 301 self.__r_host = self.__original 302 303 self.host = host 304 305 def has_proxy(self): 306 return self.__r_host == self.__original 307 308 def get_origin_req_host(self): 309 return self.origin_req_host 310 311 def is_unverifiable(self): 312 return self.unverifiable 313 314 def add_header(self, key, val): 315 # useful for something like authentication 316 self.headers[key.capitalize()] = val 317 318 def add_unredirected_header(self, key, val): 319 # will not be added to a redirected request 320 self.unredirected_hdrs[key.capitalize()] = val 321 322 def has_header(self, header_name): 323 return (header_name in self.headers or 324 header_name in self.unredirected_hdrs) 325 326 def get_header(self, header_name, default=None): 327 return self.headers.get( 328 header_name, 329 self.unredirected_hdrs.get(header_name, default)) 330 331 def header_items(self): 332 hdrs = self.unredirected_hdrs.copy() 333 hdrs.update(self.headers) 334 return hdrs.items() 335 336class OpenerDirector: 337 def __init__(self): 338 client_version = "Python-urllib/%s" % __version__ 339 self.addheaders = [('User-agent', client_version)] 340 # self.handlers is retained only for backward compatibility 341 self.handlers = [] 342 # manage the individual handlers 343 self.handle_open = {} 344 self.handle_error = {} 345 self.process_response = {} 346 self.process_request = {} 347 348 def add_handler(self, handler): 349 if not hasattr(handler, "add_parent"): 350 raise TypeError("expected BaseHandler instance, got %r" % 351 type(handler)) 352 353 added = False 354 for meth in dir(handler): 355 if meth in ["redirect_request", "do_open", "proxy_open"]: 356 # oops, coincidental match 357 continue 358 359 i = meth.find("_") 360 protocol = meth[:i] 361 condition = meth[i+1:] 362 363 if condition.startswith("error"): 364 j = condition.find("_") + i + 1 365 kind = meth[j+1:] 366 try: 367 kind = int(kind) 368 except ValueError: 369 pass 370 lookup = self.handle_error.get(protocol, {}) 371 self.handle_error[protocol] = lookup 372 elif condition == "open": 373 kind = protocol 374 lookup = self.handle_open 375 elif condition == "response": 376 kind = protocol 377 lookup = self.process_response 378 elif condition == "request": 379 kind = protocol 380 lookup = self.process_request 381 else: 382 continue 383 384 handlers = lookup.setdefault(kind, []) 385 if handlers: 386 bisect.insort(handlers, handler) 387 else: 388 handlers.append(handler) 389 added = True 390 391 if added: 392 bisect.insort(self.handlers, handler) 393 handler.add_parent(self) 394 395 def close(self): 396 # Only exists for backwards compatibility. 397 pass 398 399 def _call_chain(self, chain, kind, meth_name, *args): 400 # Handlers raise an exception if no one else should try to handle 401 # the request, or return None if they can't but another handler 402 # could. Otherwise, they return the response. 403 handlers = chain.get(kind, ()) 404 for handler in handlers: 405 func = getattr(handler, meth_name) 406 407 result = func(*args) 408 if result is not None: 409 return result 410 411 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): 412 # accept a URL or a Request object 413 if isinstance(fullurl, basestring): 414 req = Request(fullurl, data) 415 else: 416 req = fullurl 417 if data is not None: 418 req.add_data(data) 419 420 req.timeout = timeout 421 protocol = req.get_type() 422 423 # pre-process request 424 meth_name = protocol+"_request" 425 for processor in self.process_request.get(protocol, []): 426 meth = getattr(processor, meth_name) 427 req = meth(req) 428 429 response = self._open(req, data) 430 431 # post-process response 432 meth_name = protocol+"_response" 433 for processor in self.process_response.get(protocol, []): 434 meth = getattr(processor, meth_name) 435 response = meth(req, response) 436 437 return response 438 439 def _open(self, req, data=None): 440 result = self._call_chain(self.handle_open, 'default', 441 'default_open', req) 442 if result: 443 return result 444 445 protocol = req.get_type() 446 result = self._call_chain(self.handle_open, protocol, protocol + 447 '_open', req) 448 if result: 449 return result 450 451 return self._call_chain(self.handle_open, 'unknown', 452 'unknown_open', req) 453 454 def error(self, proto, *args): 455 if proto in ('http', 'https'): 456 # XXX http[s] protocols are special-cased 457 dict = self.handle_error['http'] # https is not different than http 458 proto = args[2] # YUCK! 459 meth_name = 'http_error_%s' % proto 460 http_err = 1 461 orig_args = args 462 else: 463 dict = self.handle_error 464 meth_name = proto + '_error' 465 http_err = 0 466 args = (dict, proto, meth_name) + args 467 result = self._call_chain(*args) 468 if result: 469 return result 470 471 if http_err: 472 args = (dict, 'default', 'http_error_default') + orig_args 473 return self._call_chain(*args) 474 475# XXX probably also want an abstract factory that knows when it makes 476# sense to skip a superclass in favor of a subclass and when it might 477# make sense to include both 478 479def build_opener(*handlers): 480 """Create an opener object from a list of handlers. 481 482 The opener will use several default handlers, including support 483 for HTTP, FTP and when applicable, HTTPS. 484 485 If any of the handlers passed as arguments are subclasses of the 486 default handlers, the default handlers will not be used. 487 """ 488 import types 489 def isclass(obj): 490 return isinstance(obj, (types.ClassType, type)) 491 492 opener = OpenerDirector() 493 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, 494 HTTPDefaultErrorHandler, HTTPRedirectHandler, 495 FTPHandler, FileHandler, HTTPErrorProcessor] 496 if hasattr(httplib, 'HTTPS'): 497 default_classes.append(HTTPSHandler) 498 skip = set() 499 for klass in default_classes: 500 for check in handlers: 501 if isclass(check): 502 if issubclass(check, klass): 503 skip.add(klass) 504 elif isinstance(check, klass): 505 skip.add(klass) 506 for klass in skip: 507 default_classes.remove(klass) 508 509 for klass in default_classes: 510 opener.add_handler(klass()) 511 512 for h in handlers: 513 if isclass(h): 514 h = h() 515 opener.add_handler(h) 516 return opener 517 518class BaseHandler: 519 handler_order = 500 520 521 def add_parent(self, parent): 522 self.parent = parent 523 524 def close(self): 525 # Only exists for backwards compatibility 526 pass 527 528 def __lt__(self, other): 529 if not hasattr(other, "handler_order"): 530 # Try to preserve the old behavior of having custom classes 531 # inserted after default ones (works only for custom user 532 # classes which are not aware of handler_order). 533 return True 534 return self.handler_order < other.handler_order 535 536 537class HTTPErrorProcessor(BaseHandler): 538 """Process HTTP error responses.""" 539 handler_order = 1000 # after all other processing 540 541 def http_response(self, request, response): 542 code, msg, hdrs = response.code, response.msg, response.info() 543 544 # According to RFC 2616, "2xx" code indicates that the client's 545 # request was successfully received, understood, and accepted. 546 if not (200 <= code < 300): 547 response = self.parent.error( 548 'http', request, response, code, msg, hdrs) 549 550 return response 551 552 https_response = http_response 553 554class HTTPDefaultErrorHandler(BaseHandler): 555 def http_error_default(self, req, fp, code, msg, hdrs): 556 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) 557 558class HTTPRedirectHandler(BaseHandler): 559 # maximum number of redirections to any single URL 560 # this is needed because of the state that cookies introduce 561 max_repeats = 4 562 # maximum total number of redirections (regardless of URL) before 563 # assuming we're in a loop 564 max_redirections = 10 565 566 def redirect_request(self, req, fp, code, msg, headers, newurl): 567 """Return a Request or None in response to a redirect. 568 569 This is called by the http_error_30x methods when a 570 redirection response is received. If a redirection should 571 take place, return a new Request to allow http_error_30x to 572 perform the redirect. Otherwise, raise HTTPError if no-one 573 else should try to handle this url. Return None if you can't 574 but another Handler might. 575 """ 576 m = req.get_method() 577 if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") 578 or code in (301, 302, 303) and m == "POST"): 579 # Strictly (according to RFC 2616), 301 or 302 in response 580 # to a POST MUST NOT cause a redirection without confirmation 581 # from the user (of urllib2, in this case). In practice, 582 # essentially all clients do redirect in this case, so we 583 # do the same. 584 # be conciliant with URIs containing a space 585 newurl = newurl.replace(' ', '%20') 586 newheaders = dict((k,v) for k,v in req.headers.items() 587 if k.lower() not in ("content-length", "content-type") 588 ) 589 return Request(newurl, 590 headers=newheaders, 591 origin_req_host=req.get_origin_req_host(), 592 unverifiable=True) 593 else: 594 raise HTTPError(req.get_full_url(), code, msg, headers, fp) 595 596 # Implementation note: To avoid the server sending us into an 597 # infinite loop, the request object needs to track what URLs we 598 # have already seen. Do this by adding a handler-specific 599 # attribute to the Request object. 600 def http_error_302(self, req, fp, code, msg, headers): 601 # Some servers (incorrectly) return multiple Location headers 602 # (so probably same goes for URI). Use first header. 603 if 'location' in headers: 604 newurl = headers.getheaders('location')[0] 605 elif 'uri' in headers: 606 newurl = headers.getheaders('uri')[0] 607 else: 608 return 609 610 # fix a possible malformed URL 611 urlparts = urlparse.urlparse(newurl) 612 if not urlparts.path and urlparts.netloc: 613 urlparts = list(urlparts) 614 urlparts[2] = "/" 615 newurl = urlparse.urlunparse(urlparts) 616 617 newurl = urlparse.urljoin(req.get_full_url(), newurl) 618 619 # For security reasons we do not allow redirects to protocols 620 # other than HTTP, HTTPS or FTP. 621 newurl_lower = newurl.lower() 622 if not (newurl_lower.startswith('http://') or 623 newurl_lower.startswith('https://') or 624 newurl_lower.startswith('ftp://')): 625 raise HTTPError(newurl, code, 626 msg + " - Redirection to url '%s' is not allowed" % 627 newurl, 628 headers, fp) 629 630 # XXX Probably want to forget about the state of the current 631 # request, although that might interact poorly with other 632 # handlers that also use handler-specific request attributes 633 new = self.redirect_request(req, fp, code, msg, headers, newurl) 634 if new is None: 635 return 636 637 # loop detection 638 # .redirect_dict has a key url if url was previously visited. 639 if hasattr(req, 'redirect_dict'): 640 visited = new.redirect_dict = req.redirect_dict 641 if (visited.get(newurl, 0) >= self.max_repeats or 642 len(visited) >= self.max_redirections): 643 raise HTTPError(req.get_full_url(), code, 644 self.inf_msg + msg, headers, fp) 645 else: 646 visited = new.redirect_dict = req.redirect_dict = {} 647 visited[newurl] = visited.get(newurl, 0) + 1 648 649 # Don't close the fp until we are sure that we won't use it 650 # with HTTPError. 651 fp.read() 652 fp.close() 653 654 return self.parent.open(new, timeout=req.timeout) 655 656 http_error_301 = http_error_303 = http_error_307 = http_error_302 657 658 inf_msg = "The HTTP server returned a redirect error that would " \ 659 "lead to an infinite loop.\n" \ 660 "The last 30x error message was:\n" 661 662 663def _parse_proxy(proxy): 664 """Return (scheme, user, password, host/port) given a URL or an authority. 665 666 If a URL is supplied, it must have an authority (host:port) component. 667 According to RFC 3986, having an authority component means the URL must 668 have two slashes after the scheme: 669 670 >>> _parse_proxy('file:/ftp.example.com/') 671 Traceback (most recent call last): 672 ValueError: proxy URL with no authority: 'file:/ftp.example.com/' 673 674 The first three items of the returned tuple may be None. 675 676 Examples of authority parsing: 677 678 >>> _parse_proxy('proxy.example.com') 679 (None, None, None, 'proxy.example.com') 680 >>> _parse_proxy('proxy.example.com:3128') 681 (None, None, None, 'proxy.example.com:3128') 682 683 The authority component may optionally include userinfo (assumed to be 684 username:password): 685 686 >>> _parse_proxy('joe:password@proxy.example.com') 687 (None, 'joe', 'password', 'proxy.example.com') 688 >>> _parse_proxy('joe:password@proxy.example.com:3128') 689 (None, 'joe', 'password', 'proxy.example.com:3128') 690 691 Same examples, but with URLs instead: 692 693 >>> _parse_proxy('http://proxy.example.com/') 694 ('http', None, None, 'proxy.example.com') 695 >>> _parse_proxy('http://proxy.example.com:3128/') 696 ('http', None, None, 'proxy.example.com:3128') 697 >>> _parse_proxy('http://joe:password@proxy.example.com/') 698 ('http', 'joe', 'password', 'proxy.example.com') 699 >>> _parse_proxy('http://joe:password@proxy.example.com:3128') 700 ('http', 'joe', 'password', 'proxy.example.com:3128') 701 702 Everything after the authority is ignored: 703 704 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') 705 ('ftp', 'joe', 'password', 'proxy.example.com') 706 707 Test for no trailing '/' case: 708 709 >>> _parse_proxy('http://joe:password@proxy.example.com') 710 ('http', 'joe', 'password', 'proxy.example.com') 711 712 """ 713 scheme, r_scheme = splittype(proxy) 714 if not r_scheme.startswith("/"): 715 # authority 716 scheme = None 717 authority = proxy 718 else: 719 # URL 720 if not r_scheme.startswith("//"): 721 raise ValueError("proxy URL with no authority: %r" % proxy) 722 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. 723 # and 3.3.), path is empty or starts with '/' 724 end = r_scheme.find("/", 2) 725 if end == -1: 726 end = None 727 authority = r_scheme[2:end] 728 userinfo, hostport = splituser(authority) 729 if userinfo is not None: 730 user, password = splitpasswd(userinfo) 731 else: 732 user = password = None 733 return scheme, user, password, hostport 734 735class ProxyHandler(BaseHandler): 736 # Proxies must be in front 737 handler_order = 100 738 739 def __init__(self, proxies=None): 740 if proxies is None: 741 proxies = getproxies() 742 assert hasattr(proxies, 'has_key'), "proxies must be a mapping" 743 self.proxies = proxies 744 for type, url in proxies.items(): 745 setattr(self, '%s_open' % type, 746 lambda r, proxy=url, type=type, meth=self.proxy_open: \ 747 meth(r, proxy, type)) 748 749 def proxy_open(self, req, proxy, type): 750 orig_type = req.get_type() 751 proxy_type, user, password, hostport = _parse_proxy(proxy) 752 753 if proxy_type is None: 754 proxy_type = orig_type 755 756 if req.host and proxy_bypass(req.host): 757 return None 758 759 if user and password: 760 user_pass = '%s:%s' % (unquote(user), unquote(password)) 761 creds = base64.b64encode(user_pass).strip() 762 req.add_header('Proxy-authorization', 'Basic ' + creds) 763 hostport = unquote(hostport) 764 req.set_proxy(hostport, proxy_type) 765 766 if orig_type == proxy_type or orig_type == 'https': 767 # let other handlers take care of it 768 return None 769 else: 770 # need to start over, because the other handlers don't 771 # grok the proxy's URL type 772 # e.g. if we have a constructor arg proxies like so: 773 # {'http': 'ftp://proxy.example.com'}, we may end up turning 774 # a request for http://acme.example.com/a into one for 775 # ftp://proxy.example.com/a 776 return self.parent.open(req, timeout=req.timeout) 777 778class HTTPPasswordMgr: 779 780 def __init__(self): 781 self.passwd = {} 782 783 def add_password(self, realm, uri, user, passwd): 784 # uri could be a single URI or a sequence 785 if isinstance(uri, basestring): 786 uri = [uri] 787 if not realm in self.passwd: 788 self.passwd[realm] = {} 789 for default_port in True, False: 790 reduced_uri = tuple( 791 [self.reduce_uri(u, default_port) for u in uri]) 792 self.passwd[realm][reduced_uri] = (user, passwd) 793 794 def find_user_password(self, realm, authuri): 795 domains = self.passwd.get(realm, {}) 796 for default_port in True, False: 797 reduced_authuri = self.reduce_uri(authuri, default_port) 798 for uris, authinfo in domains.iteritems(): 799 for uri in uris: 800 if self.is_suburi(uri, reduced_authuri): 801 return authinfo 802 return None, None 803 804 def reduce_uri(self, uri, default_port=True): 805 """Accept authority or URI and extract only the authority and path.""" 806 # note HTTP URLs do not have a userinfo component 807 parts = urlparse.urlsplit(uri) 808 if parts[1]: 809 # URI 810 scheme = parts[0] 811 authority = parts[1] 812 path = parts[2] or '/' 813 else: 814 # host or host:port 815 scheme = None 816 authority = uri 817 path = '/' 818 host, port = splitport(authority) 819 if default_port and port is None and scheme is not None: 820 dport = {"http": 80, 821 "https": 443, 822 }.get(scheme) 823 if dport is not None: 824 authority = "%s:%d" % (host, dport) 825 return authority, path 826 827 def is_suburi(self, base, test): 828 """Check if test is below base in a URI tree 829 830 Both args must be URIs in reduced form. 831 """ 832 if base == test: 833 return True 834 if base[0] != test[0]: 835 return False 836 common = posixpath.commonprefix((base[1], test[1])) 837 if len(common) == len(base[1]): 838 return True 839 return False 840 841 842class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): 843 844 def find_user_password(self, realm, authuri): 845 user, password = HTTPPasswordMgr.find_user_password(self, realm, 846 authuri) 847 if user is not None: 848 return user, password 849 return HTTPPasswordMgr.find_user_password(self, None, authuri) 850 851 852class AbstractBasicAuthHandler: 853 854 # XXX this allows for multiple auth-schemes, but will stupidly pick 855 # the last one with a realm specified. 856 857 # allow for double- and single-quoted realm values 858 # (single quotes are a violation of the RFC, but appear in the wild) 859 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' 860 'realm=(["\']?)([^"\']*)\\2', re.I) 861 862 # XXX could pre-emptively send auth info already accepted (RFC 2617, 863 # end of section 2, and section 1.2 immediately after "credentials" 864 # production). 865 866 def __init__(self, password_mgr=None): 867 if password_mgr is None: 868 password_mgr = HTTPPasswordMgr() 869 self.passwd = password_mgr 870 self.add_password = self.passwd.add_password 871 872 873 def http_error_auth_reqed(self, authreq, host, req, headers): 874 # host may be an authority (without userinfo) or a URL with an 875 # authority 876 # XXX could be multiple headers 877 authreq = headers.get(authreq, None) 878 879 if authreq: 880 mo = AbstractBasicAuthHandler.rx.search(authreq) 881 if mo: 882 scheme, quote, realm = mo.groups() 883 if quote not in ['"', "'"]: 884 warnings.warn("Basic Auth Realm was unquoted", 885 UserWarning, 2) 886 if scheme.lower() == 'basic': 887 return self.retry_http_basic_auth(host, req, realm) 888 889 def retry_http_basic_auth(self, host, req, realm): 890 user, pw = self.passwd.find_user_password(realm, host) 891 if pw is not None: 892 raw = "%s:%s" % (user, pw) 893 auth = 'Basic %s' % base64.b64encode(raw).strip() 894 if req.get_header(self.auth_header, None) == auth: 895 return None 896 req.add_unredirected_header(self.auth_header, auth) 897 return self.parent.open(req, timeout=req.timeout) 898 else: 899 return None 900 901 902class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): 903 904 auth_header = 'Authorization' 905 906 def http_error_401(self, req, fp, code, msg, headers): 907 url = req.get_full_url() 908 response = self.http_error_auth_reqed('www-authenticate', 909 url, req, headers) 910 return response 911 912 913class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): 914 915 auth_header = 'Proxy-authorization' 916 917 def http_error_407(self, req, fp, code, msg, headers): 918 # http_error_auth_reqed requires that there is no userinfo component in 919 # authority. Assume there isn't one, since urllib2 does not (and 920 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing 921 # userinfo. 922 authority = req.get_host() 923 response = self.http_error_auth_reqed('proxy-authenticate', 924 authority, req, headers) 925 return response 926 927 928def randombytes(n): 929 """Return n random bytes.""" 930 # Use /dev/urandom if it is available. Fall back to random module 931 # if not. It might be worthwhile to extend this function to use 932 # other platform-specific mechanisms for getting random bytes. 933 if os.path.exists("/dev/urandom"): 934 f = open("/dev/urandom") 935 s = f.read(n) 936 f.close() 937 return s 938 else: 939 L = [chr(random.randrange(0, 256)) for i in range(n)] 940 return "".join(L) 941 942class AbstractDigestAuthHandler: 943 # Digest authentication is specified in RFC 2617. 944 945 # XXX The client does not inspect the Authentication-Info header 946 # in a successful response. 947 948 # XXX It should be possible to test this implementation against 949 # a mock server that just generates a static set of challenges. 950 951 # XXX qop="auth-int" supports is shaky 952 953 def __init__(self, passwd=None): 954 if passwd is None: 955 passwd = HTTPPasswordMgr() 956 self.passwd = passwd 957 self.add_password = self.passwd.add_password 958 self.retried = 0 959 self.nonce_count = 0 960 self.last_nonce = None 961 962 def reset_retry_count(self): 963 self.retried = 0 964 965 def http_error_auth_reqed(self, auth_header, host, req, headers): 966 authreq = headers.get(auth_header, None) 967 if self.retried > 5: 968 # Don't fail endlessly - if we failed once, we'll probably 969 # fail a second time. Hm. Unless the Password Manager is 970 # prompting for the information. Crap. This isn't great 971 # but it's better than the current 'repeat until recursion 972 # depth exceeded' approach <wink> 973 raise HTTPError(req.get_full_url(), 401, "digest auth failed", 974 headers, None) 975 else: 976 self.retried += 1 977 if authreq: 978 scheme = authreq.split()[0] 979 if scheme.lower() == 'digest': 980 return self.retry_http_digest_auth(req, authreq) 981 982 def retry_http_digest_auth(self, req, auth): 983 token, challenge = auth.split(' ', 1) 984 chal = parse_keqv_list(parse_http_list(challenge)) 985 auth = self.get_authorization(req, chal) 986 if auth: 987 auth_val = 'Digest %s' % auth 988 if req.headers.get(self.auth_header, None) == auth_val: 989 return None 990 req.add_unredirected_header(self.auth_header, auth_val) 991 resp = self.parent.open(req, timeout=req.timeout) 992 return resp 993 994 def get_cnonce(self, nonce): 995 # The cnonce-value is an opaque 996 # quoted string value provided by the client and used by both client 997 # and server to avoid chosen plaintext attacks, to provide mutual 998 # authentication, and to provide some message integrity protection. 999 # This isn't a fabulous effort, but it's probably Good Enough. 1000 dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(), 1001 randombytes(8))).hexdigest() 1002 return dig[:16] 1003 1004 def get_authorization(self, req, chal): 1005 try: 1006 realm = chal['realm'] 1007 nonce = chal['nonce'] 1008 qop = chal.get('qop') 1009 algorithm = chal.get('algorithm', 'MD5') 1010 # mod_digest doesn't send an opaque, even though it isn't 1011 # supposed to be optional 1012 opaque = chal.get('opaque', None) 1013 except KeyError: 1014 return None 1015 1016 H, KD = self.get_algorithm_impls(algorithm) 1017 if H is None: 1018 return None 1019 1020 user, pw = self.passwd.find_user_password(realm, req.get_full_url()) 1021 if user is None: 1022 return None 1023 1024 # XXX not implemented yet 1025 if req.has_data(): 1026 entdig = self.get_entity_digest(req.get_data(), chal) 1027 else: 1028 entdig = None 1029 1030 A1 = "%s:%s:%s" % (user, realm, pw) 1031 A2 = "%s:%s" % (req.get_method(), 1032 # XXX selector: what about proxies and full urls 1033 req.get_selector()) 1034 if qop == 'auth': 1035 if nonce == self.last_nonce: 1036 self.nonce_count += 1 1037 else: 1038 self.nonce_count = 1 1039 self.last_nonce = nonce 1040 1041 ncvalue = '%08x' % self.nonce_count 1042 cnonce = self.get_cnonce(nonce) 1043 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) 1044 respdig = KD(H(A1), noncebit) 1045 elif qop is None: 1046 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) 1047 else: 1048 # XXX handle auth-int. 1049 raise URLError("qop '%s' is not supported." % qop) 1050 1051 # XXX should the partial digests be encoded too? 1052 1053 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 1054 'response="%s"' % (user, realm, nonce, req.get_selector(), 1055 respdig) 1056 if opaque: 1057 base += ', opaque="%s"' % opaque 1058 if entdig: 1059 base += ', digest="%s"' % entdig 1060 base += ', algorithm="%s"' % algorithm 1061 if qop: 1062 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) 1063 return base 1064 1065 def get_algorithm_impls(self, algorithm): 1066 # algorithm should be case-insensitive according to RFC2617 1067 algorithm = algorithm.upper() 1068 # lambdas assume digest modules are imported at the top level 1069 if algorithm == 'MD5': 1070 H = lambda x: hashlib.md5(x).hexdigest() 1071 elif algorithm == 'SHA': 1072 H = lambda x: hashlib.sha1(x).hexdigest() 1073 # XXX MD5-sess 1074 else: 1075 raise ValueError("Unsupported digest authentication " 1076 "algorithm %r" % algorithm.lower()) 1077 KD = lambda s, d: H("%s:%s" % (s, d)) 1078 return H, KD 1079 1080 def get_entity_digest(self, data, chal): 1081 # XXX not implemented yet 1082 return None 1083 1084 1085class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): 1086 """An authentication protocol defined by RFC 2069 1087 1088 Digest authentication improves on basic authentication because it 1089 does not transmit passwords in the clear. 1090 """ 1091 1092 auth_header = 'Authorization' 1093 handler_order = 490 # before Basic auth 1094 1095 def http_error_401(self, req, fp, code, msg, headers): 1096 host = urlparse.urlparse(req.get_full_url())[1] 1097 retry = self.http_error_auth_reqed('www-authenticate', 1098 host, req, headers) 1099 self.reset_retry_count() 1100 return retry 1101 1102 1103class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): 1104 1105 auth_header = 'Proxy-Authorization' 1106 handler_order = 490 # before Basic auth 1107 1108 def http_error_407(self, req, fp, code, msg, headers): 1109 host = req.get_host() 1110 retry = self.http_error_auth_reqed('proxy-authenticate', 1111 host, req, headers) 1112 self.reset_retry_count() 1113 return retry 1114 1115class AbstractHTTPHandler(BaseHandler): 1116 1117 def __init__(self, debuglevel=0): 1118 self._debuglevel = debuglevel 1119 1120 def set_http_debuglevel(self, level): 1121 self._debuglevel = level 1122 1123 def do_request_(self, request): 1124 host = request.get_host() 1125 if not host: 1126 raise URLError('no host given') 1127 1128 if request.has_data(): # POST 1129 data = request.get_data() 1130 if not request.has_header('Content-type'): 1131 request.add_unredirected_header( 1132 'Content-type', 1133 'application/x-www-form-urlencoded') 1134 if not request.has_header('Content-length'): 1135 request.add_unredirected_header( 1136 'Content-length', '%d' % len(data)) 1137 1138 sel_host = host 1139 if request.has_proxy(): 1140 scheme, sel = splittype(request.get_selector()) 1141 sel_host, sel_path = splithost(sel) 1142 1143 if not request.has_header('Host'): 1144 request.add_unredirected_header('Host', sel_host) 1145 for name, value in self.parent.addheaders: 1146 name = name.capitalize() 1147 if not request.has_header(name): 1148 request.add_unredirected_header(name, value) 1149 1150 return request 1151 1152 def do_open(self, http_class, req, **http_conn_args): 1153 """Return an addinfourl object for the request, using http_class. 1154 1155 http_class must implement the HTTPConnection API from httplib. 1156 The addinfourl return value is a file-like object. It also 1157 has methods and attributes including: 1158 - info(): return a mimetools.Message object for the headers 1159 - geturl(): return the original request URL 1160 - code: HTTP status code 1161 """ 1162 host = req.get_host() 1163 if not host: 1164 raise URLError('no host given') 1165 1166 # will parse host:port 1167 h = http_class(host, timeout=req.timeout, **http_conn_args) 1168 h.set_debuglevel(self._debuglevel) 1169 1170 headers = dict(req.unredirected_hdrs) 1171 headers.update(dict((k, v) for k, v in req.headers.items() 1172 if k not in headers)) 1173 1174 # We want to make an HTTP/1.1 request, but the addinfourl 1175 # class isn't prepared to deal with a persistent connection. 1176 # It will try to read all remaining data from the socket, 1177 # which will block while the server waits for the next request. 1178 # So make sure the connection gets closed after the (only) 1179 # request. 1180 headers["Connection"] = "close" 1181 headers = dict( 1182 (name.title(), val) for name, val in headers.items()) 1183 1184 if req._tunnel_host: 1185 tunnel_headers = {} 1186 proxy_auth_hdr = "Proxy-Authorization" 1187 if proxy_auth_hdr in headers: 1188 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 1189 # Proxy-Authorization should not be sent to origin 1190 # server. 1191 del headers[proxy_auth_hdr] 1192 h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 1193 1194 try: 1195 h.request(req.get_method(), req.get_selector(), req.data, headers) 1196 except socket.error, err: # XXX what error? 1197 h.close() 1198 raise URLError(err) 1199 else: 1200 try: 1201 r = h.getresponse(buffering=True) 1202 except TypeError: # buffering kw not supported 1203 r = h.getresponse() 1204 1205 # Pick apart the HTTPResponse object to get the addinfourl 1206 # object initialized properly. 1207 1208 # Wrap the HTTPResponse object in socket's file object adapter 1209 # for Windows. That adapter calls recv(), so delegate recv() 1210 # to read(). This weird wrapping allows the returned object to 1211 # have readline() and readlines() methods. 1212 1213 # XXX It might be better to extract the read buffering code 1214 # out of socket._fileobject() and into a base class. 1215 1216 r.recv = r.read 1217 fp = socket._fileobject(r, close=True) 1218 1219 resp = addinfourl(fp, r.msg, req.get_full_url()) 1220 resp.code = r.status 1221 resp.msg = r.reason 1222 return resp 1223 1224 1225class HTTPHandler(AbstractHTTPHandler): 1226 1227 def http_open(self, req): 1228 return self.do_open(httplib.HTTPConnection, req) 1229 1230 http_request = AbstractHTTPHandler.do_request_ 1231 1232if hasattr(httplib, 'HTTPS'): 1233 class HTTPSHandler(AbstractHTTPHandler): 1234 1235 def __init__(self, debuglevel=0, context=None): 1236 AbstractHTTPHandler.__init__(self, debuglevel) 1237 self._context = context 1238 1239 def https_open(self, req): 1240 return self.do_open(httplib.HTTPSConnection, req, 1241 context=self._context) 1242 1243 https_request = AbstractHTTPHandler.do_request_ 1244 1245class HTTPCookieProcessor(BaseHandler): 1246 def __init__(self, cookiejar=None): 1247 import cookielib 1248 if cookiejar is None: 1249 cookiejar = cookielib.CookieJar() 1250 self.cookiejar = cookiejar 1251 1252 def http_request(self, request): 1253 self.cookiejar.add_cookie_header(request) 1254 return request 1255 1256 def http_response(self, request, response): 1257 self.cookiejar.extract_cookies(response, request) 1258 return response 1259 1260 https_request = http_request 1261 https_response = http_response 1262 1263class UnknownHandler(BaseHandler): 1264 def unknown_open(self, req): 1265 type = req.get_type() 1266 raise URLError('unknown url type: %s' % type) 1267 1268def parse_keqv_list(l): 1269 """Parse list of key=value strings where keys are not duplicated.""" 1270 parsed = {} 1271 for elt in l: 1272 k, v = elt.split('=', 1) 1273 if v[0] == '"' and v[-1] == '"': 1274 v = v[1:-1] 1275 parsed[k] = v 1276 return parsed 1277 1278def parse_http_list(s): 1279 """Parse lists as described by RFC 2068 Section 2. 1280 1281 In particular, parse comma-separated lists where the elements of 1282 the list may include quoted-strings. A quoted-string could 1283 contain a comma. A non-quoted string could have quotes in the 1284 middle. Neither commas nor quotes count if they are escaped. 1285 Only double-quotes count, not single-quotes. 1286 """ 1287 res = [] 1288 part = '' 1289 1290 escape = quote = False 1291 for cur in s: 1292 if escape: 1293 part += cur 1294 escape = False 1295 continue 1296 if quote: 1297 if cur == '\\': 1298 escape = True 1299 continue 1300 elif cur == '"': 1301 quote = False 1302 part += cur 1303 continue 1304 1305 if cur == ',': 1306 res.append(part) 1307 part = '' 1308 continue 1309 1310 if cur == '"': 1311 quote = True 1312 1313 part += cur 1314 1315 # append last part 1316 if part: 1317 res.append(part) 1318 1319 return [part.strip() for part in res] 1320 1321def _safe_gethostbyname(host): 1322 try: 1323 return socket.gethostbyname(host) 1324 except socket.gaierror: 1325 return None 1326 1327class FileHandler(BaseHandler): 1328 # Use local file or FTP depending on form of URL 1329 def file_open(self, req): 1330 url = req.get_selector() 1331 if url[:2] == '//' and url[2:3] != '/' and (req.host and 1332 req.host != 'localhost'): 1333 req.type = 'ftp' 1334 return self.parent.open(req) 1335 else: 1336 return self.open_local_file(req) 1337 1338 # names for the localhost 1339 names = None 1340 def get_names(self): 1341 if FileHandler.names is None: 1342 try: 1343 FileHandler.names = tuple( 1344 socket.gethostbyname_ex('localhost')[2] + 1345 socket.gethostbyname_ex(socket.gethostname())[2]) 1346 except socket.gaierror: 1347 FileHandler.names = (socket.gethostbyname('localhost'),) 1348 return FileHandler.names 1349 1350 # not entirely sure what the rules are here 1351 def open_local_file(self, req): 1352 import email.utils 1353 import mimetypes 1354 host = req.get_host() 1355 filename = req.get_selector() 1356 localfile = url2pathname(filename) 1357 try: 1358 stats = os.stat(localfile) 1359 size = stats.st_size 1360 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 1361 mtype = mimetypes.guess_type(filename)[0] 1362 headers = mimetools.Message(StringIO( 1363 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % 1364 (mtype or 'text/plain', size, modified))) 1365 if host: 1366 host, port = splitport(host) 1367 if not host or \ 1368 (not port and _safe_gethostbyname(host) in self.get_names()): 1369 if host: 1370 origurl = 'file://' + host + filename 1371 else: 1372 origurl = 'file://' + filename 1373 return addinfourl(open(localfile, 'rb'), headers, origurl) 1374 except OSError, msg: 1375 # urllib2 users shouldn't expect OSErrors coming from urlopen() 1376 raise URLError(msg) 1377 raise URLError('file not on local host') 1378 1379class FTPHandler(BaseHandler): 1380 def ftp_open(self, req): 1381 import ftplib 1382 import mimetypes 1383 host = req.get_host() 1384 if not host: 1385 raise URLError('ftp error: no host given') 1386 host, port = splitport(host) 1387 if port is None: 1388 port = ftplib.FTP_PORT 1389 else: 1390 port = int(port) 1391 1392 # username/password handling 1393 user, host = splituser(host) 1394 if user: 1395 user, passwd = splitpasswd(user) 1396 else: 1397 passwd = None 1398 host = unquote(host) 1399 user = user or '' 1400 passwd = passwd or '' 1401 1402 try: 1403 host = socket.gethostbyname(host) 1404 except socket.error, msg: 1405 raise URLError(msg) 1406 path, attrs = splitattr(req.get_selector()) 1407 dirs = path.split('/') 1408 dirs = map(unquote, dirs) 1409 dirs, file = dirs[:-1], dirs[-1] 1410 if dirs and not dirs[0]: 1411 dirs = dirs[1:] 1412 try: 1413 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) 1414 type = file and 'I' or 'D' 1415 for attr in attrs: 1416 attr, value = splitvalue(attr) 1417 if attr.lower() == 'type' and \ 1418 value in ('a', 'A', 'i', 'I', 'd', 'D'): 1419 type = value.upper() 1420 fp, retrlen = fw.retrfile(file, type) 1421 headers = "" 1422 mtype = mimetypes.guess_type(req.get_full_url())[0] 1423 if mtype: 1424 headers += "Content-type: %s\n" % mtype 1425 if retrlen is not None and retrlen >= 0: 1426 headers += "Content-length: %d\n" % retrlen 1427 sf = StringIO(headers) 1428 headers = mimetools.Message(sf) 1429 return addinfourl(fp, headers, req.get_full_url()) 1430 except ftplib.all_errors, msg: 1431 raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2] 1432 1433 def connect_ftp(self, user, passwd, host, port, dirs, timeout): 1434 fw = ftpwrapper(user, passwd, host, port, dirs, timeout, 1435 persistent=False) 1436## fw.ftp.set_debuglevel(1) 1437 return fw 1438 1439class CacheFTPHandler(FTPHandler): 1440 # XXX would be nice to have pluggable cache strategies 1441 # XXX this stuff is definitely not thread safe 1442 def __init__(self): 1443 self.cache = {} 1444 self.timeout = {} 1445 self.soonest = 0 1446 self.delay = 60 1447 self.max_conns = 16 1448 1449 def setTimeout(self, t): 1450 self.delay = t 1451 1452 def setMaxConns(self, m): 1453 self.max_conns = m 1454 1455 def connect_ftp(self, user, passwd, host, port, dirs, timeout): 1456 key = user, host, port, '/'.join(dirs), timeout 1457 if key in self.cache: 1458 self.timeout[key] = time.time() + self.delay 1459 else: 1460 self.cache[key] = ftpwrapper(user, passwd, host, port, dirs, timeout) 1461 self.timeout[key] = time.time() + self.delay 1462 self.check_cache() 1463 return self.cache[key] 1464 1465 def check_cache(self): 1466 # first check for old ones 1467 t = time.time() 1468 if self.soonest <= t: 1469 for k, v in self.timeout.items(): 1470 if v < t: 1471 self.cache[k].close() 1472 del self.cache[k] 1473 del self.timeout[k] 1474 self.soonest = min(self.timeout.values()) 1475 1476 # then check the size 1477 if len(self.cache) == self.max_conns: 1478 for k, v in self.timeout.items(): 1479 if v == self.soonest: 1480 del self.cache[k] 1481 del self.timeout[k] 1482 break 1483 self.soonest = min(self.timeout.values()) 1484 1485 def clear_cache(self): 1486 for conn in self.cache.values(): 1487 conn.close() 1488 self.cache.clear() 1489 self.timeout.clear() 1490