1"""Small, fast HTTP client library for Python. 2 3Features persistent connections, cache, and Google App Engine Standard 4Environment support. 5""" 6 7from __future__ import print_function 8 9__author__ = "Joe Gregorio (joe@bitworking.org)" 10__copyright__ = "Copyright 2006, Joe Gregorio" 11__contributors__ = [ 12 "Thomas Broyer (t.broyer@ltgt.net)", 13 "James Antill", 14 "Xavier Verges Farrero", 15 "Jonathan Feinberg", 16 "Blair Zajac", 17 "Sam Ruby", 18 "Louis Nyffenegger", 19 "Alex Yu", 20] 21__license__ = "MIT" 22__version__ = "0.18.1" 23 24import base64 25import calendar 26import copy 27import email 28import email.FeedParser 29import email.Message 30import email.Utils 31import errno 32import gzip 33import httplib 34import os 35import random 36import re 37import StringIO 38import sys 39import time 40import urllib 41import urlparse 42import zlib 43 44try: 45 from hashlib import sha1 as _sha, md5 as _md5 46except ImportError: 47 # prior to Python 2.5, these were separate modules 48 import sha 49 import md5 50 51 _sha = sha.new 52 _md5 = md5.new 53import hmac 54from gettext import gettext as _ 55import socket 56 57try: 58 from httplib2 import socks 59except ImportError: 60 try: 61 import socks 62 except (ImportError, AttributeError): 63 socks = None 64 65# Build the appropriate socket wrapper for ssl 66ssl = None 67ssl_SSLError = None 68ssl_CertificateError = None 69try: 70 import ssl # python 2.6 71except ImportError: 72 pass 73if ssl is not None: 74 ssl_SSLError = getattr(ssl, "SSLError", None) 75 ssl_CertificateError = getattr(ssl, "CertificateError", None) 76 77 78def _ssl_wrap_socket( 79 sock, key_file, cert_file, disable_validation, ca_certs, ssl_version, hostname, key_password 80): 81 if disable_validation: 82 cert_reqs = ssl.CERT_NONE 83 else: 84 cert_reqs = ssl.CERT_REQUIRED 85 if ssl_version is None: 86 ssl_version = ssl.PROTOCOL_SSLv23 87 88 if hasattr(ssl, "SSLContext"): # Python 2.7.9 89 context = ssl.SSLContext(ssl_version) 90 context.verify_mode = cert_reqs 91 context.check_hostname = cert_reqs != ssl.CERT_NONE 92 if cert_file: 93 if key_password: 94 context.load_cert_chain(cert_file, key_file, key_password) 95 else: 96 context.load_cert_chain(cert_file, key_file) 97 if ca_certs: 98 context.load_verify_locations(ca_certs) 99 return context.wrap_socket(sock, server_hostname=hostname) 100 else: 101 if key_password: 102 raise NotSupportedOnThisPlatform("Certificate with password is not supported.") 103 return ssl.wrap_socket( 104 sock, 105 keyfile=key_file, 106 certfile=cert_file, 107 cert_reqs=cert_reqs, 108 ca_certs=ca_certs, 109 ssl_version=ssl_version, 110 ) 111 112 113def _ssl_wrap_socket_unsupported( 114 sock, key_file, cert_file, disable_validation, ca_certs, ssl_version, hostname, key_password 115): 116 if not disable_validation: 117 raise CertificateValidationUnsupported( 118 "SSL certificate validation is not supported without " 119 "the ssl module installed. To avoid this error, install " 120 "the ssl module, or explicity disable validation." 121 ) 122 if key_password: 123 raise NotSupportedOnThisPlatform("Certificate with password is not supported.") 124 ssl_sock = socket.ssl(sock, key_file, cert_file) 125 return httplib.FakeSocket(sock, ssl_sock) 126 127 128if ssl is None: 129 _ssl_wrap_socket = _ssl_wrap_socket_unsupported 130 131if sys.version_info >= (2, 3): 132 from .iri2uri import iri2uri 133else: 134 135 def iri2uri(uri): 136 return uri 137 138 139def has_timeout(timeout): # python 2.6 140 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"): 141 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT 142 return timeout is not None 143 144 145__all__ = [ 146 "Http", 147 "Response", 148 "ProxyInfo", 149 "HttpLib2Error", 150 "RedirectMissingLocation", 151 "RedirectLimit", 152 "FailedToDecompressContent", 153 "UnimplementedDigestAuthOptionError", 154 "UnimplementedHmacDigestAuthOptionError", 155 "debuglevel", 156 "ProxiesUnavailableError", 157] 158 159# The httplib debug level, set to a non-zero value to get debug output 160debuglevel = 0 161 162# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 163RETRIES = 2 164 165# Python 2.3 support 166if sys.version_info < (2, 4): 167 168 def sorted(seq): 169 seq.sort() 170 return seq 171 172 173# Python 2.3 support 174def HTTPResponse__getheaders(self): 175 """Return list of (header, value) tuples.""" 176 if self.msg is None: 177 raise httplib.ResponseNotReady() 178 return self.msg.items() 179 180 181if not hasattr(httplib.HTTPResponse, "getheaders"): 182 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders 183 184 185# All exceptions raised here derive from HttpLib2Error 186class HttpLib2Error(Exception): 187 pass 188 189 190# Some exceptions can be caught and optionally 191# be turned back into responses. 192class HttpLib2ErrorWithResponse(HttpLib2Error): 193 def __init__(self, desc, response, content): 194 self.response = response 195 self.content = content 196 HttpLib2Error.__init__(self, desc) 197 198 199class RedirectMissingLocation(HttpLib2ErrorWithResponse): 200 pass 201 202 203class RedirectLimit(HttpLib2ErrorWithResponse): 204 pass 205 206 207class FailedToDecompressContent(HttpLib2ErrorWithResponse): 208 pass 209 210 211class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): 212 pass 213 214 215class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): 216 pass 217 218 219class MalformedHeader(HttpLib2Error): 220 pass 221 222 223class RelativeURIError(HttpLib2Error): 224 pass 225 226 227class ServerNotFoundError(HttpLib2Error): 228 pass 229 230 231class ProxiesUnavailableError(HttpLib2Error): 232 pass 233 234 235class CertificateValidationUnsupported(HttpLib2Error): 236 pass 237 238 239class SSLHandshakeError(HttpLib2Error): 240 pass 241 242 243class NotSupportedOnThisPlatform(HttpLib2Error): 244 pass 245 246 247class CertificateHostnameMismatch(SSLHandshakeError): 248 def __init__(self, desc, host, cert): 249 HttpLib2Error.__init__(self, desc) 250 self.host = host 251 self.cert = cert 252 253 254class NotRunningAppEngineEnvironment(HttpLib2Error): 255 pass 256 257 258# Open Items: 259# ----------- 260# Proxy support 261 262# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 263 264# Pluggable cache storage (supports storing the cache in 265# flat files by default. We need a plug-in architecture 266# that can support Berkeley DB and Squid) 267 268# == Known Issues == 269# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 270# Does not handle Cache-Control: max-stale 271# Does not use Age: headers when calculating cache freshness. 272 273# The number of redirections to follow before giving up. 274# Note that only GET redirects are automatically followed. 275# Will also honor 301 requests by saving that info and never 276# requesting that URI again. 277DEFAULT_MAX_REDIRECTS = 5 278 279from httplib2 import certs 280CA_CERTS = certs.where() 281 282# Which headers are hop-by-hop headers by default 283HOP_BY_HOP = [ 284 "connection", 285 "keep-alive", 286 "proxy-authenticate", 287 "proxy-authorization", 288 "te", 289 "trailers", 290 "transfer-encoding", 291 "upgrade", 292] 293 294# https://tools.ietf.org/html/rfc7231#section-8.1.3 295SAFE_METHODS = ("GET", "HEAD") # TODO add "OPTIONS", "TRACE" 296 297# To change, assign to `Http().redirect_codes` 298REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308)) 299 300 301def _get_end2end_headers(response): 302 hopbyhop = list(HOP_BY_HOP) 303 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")]) 304 return [header for header in response.keys() if header not in hopbyhop] 305 306 307URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 308 309 310def parse_uri(uri): 311 """Parses a URI using the regex given in Appendix B of RFC 3986. 312 313 (scheme, authority, path, query, fragment) = parse_uri(uri) 314 """ 315 groups = URI.match(uri).groups() 316 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 317 318 319def urlnorm(uri): 320 (scheme, authority, path, query, fragment) = parse_uri(uri) 321 if not scheme or not authority: 322 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 323 authority = authority.lower() 324 scheme = scheme.lower() 325 if not path: 326 path = "/" 327 # Could do syntax based normalization of the URI before 328 # computing the digest. See Section 6.2.2 of Std 66. 329 request_uri = query and "?".join([path, query]) or path 330 scheme = scheme.lower() 331 defrag_uri = scheme + "://" + authority + request_uri 332 return scheme, authority, request_uri, defrag_uri 333 334 335# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 336re_url_scheme = re.compile(r"^\w+://") 337re_unsafe = re.compile(r"[^\w\-_.()=!]+") 338 339 340def safename(filename): 341 """Return a filename suitable for the cache. 342 Strips dangerous and common characters to create a filename we 343 can use to store the cache in. 344 """ 345 if isinstance(filename, str): 346 filename_bytes = filename 347 filename = filename.decode("utf-8") 348 else: 349 filename_bytes = filename.encode("utf-8") 350 filemd5 = _md5(filename_bytes).hexdigest() 351 filename = re_url_scheme.sub("", filename) 352 filename = re_unsafe.sub("", filename) 353 354 # limit length of filename (vital for Windows) 355 # https://github.com/httplib2/httplib2/pull/74 356 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5> 357 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars 358 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum: 359 filename = filename[:90] 360 361 return ",".join((filename, filemd5)) 362 363 364NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+") 365 366 367def _normalize_headers(headers): 368 return dict( 369 [ 370 (key.lower(), NORMALIZE_SPACE.sub(value, " ").strip()) 371 for (key, value) in headers.iteritems() 372 ] 373 ) 374 375 376def _parse_cache_control(headers): 377 retval = {} 378 if "cache-control" in headers: 379 parts = headers["cache-control"].split(",") 380 parts_with_args = [ 381 tuple([x.strip().lower() for x in part.split("=", 1)]) 382 for part in parts 383 if -1 != part.find("=") 384 ] 385 parts_wo_args = [ 386 (name.strip().lower(), 1) for name in parts if -1 == name.find("=") 387 ] 388 retval = dict(parts_with_args + parts_wo_args) 389 return retval 390 391 392# Whether to use a strict mode to parse WWW-Authenticate headers 393# Might lead to bad results in case of ill-formed header value, 394# so disabled by default, falling back to relaxed parsing. 395# Set to true to turn on, usefull for testing servers. 396USE_WWW_AUTH_STRICT_PARSING = 0 397 398# In regex below: 399# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP 400# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space 401# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: 402# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? 403WWW_AUTH_STRICT = re.compile( 404 r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$" 405) 406WWW_AUTH_RELAXED = re.compile( 407 r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$" 408) 409UNQUOTE_PAIRS = re.compile(r"\\(.)") 410 411 412def _parse_www_authenticate(headers, headername="www-authenticate"): 413 """Returns a dictionary of dictionaries, one dict 414 per auth_scheme.""" 415 retval = {} 416 if headername in headers: 417 try: 418 419 authenticate = headers[headername].strip() 420 www_auth = ( 421 USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED 422 ) 423 while authenticate: 424 # Break off the scheme at the beginning of the line 425 if headername == "authentication-info": 426 (auth_scheme, the_rest) = ("digest", authenticate) 427 else: 428 (auth_scheme, the_rest) = authenticate.split(" ", 1) 429 # Now loop over all the key value pairs that come after the scheme, 430 # being careful not to roll into the next scheme 431 match = www_auth.search(the_rest) 432 auth_params = {} 433 while match: 434 if match and len(match.groups()) == 3: 435 (key, value, the_rest) = match.groups() 436 auth_params[key.lower()] = UNQUOTE_PAIRS.sub( 437 r"\1", value 438 ) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) 439 match = www_auth.search(the_rest) 440 retval[auth_scheme.lower()] = auth_params 441 authenticate = the_rest.strip() 442 443 except ValueError: 444 raise MalformedHeader("WWW-Authenticate") 445 return retval 446 447 448# TODO: add current time as _entry_disposition argument to avoid sleep in tests 449def _entry_disposition(response_headers, request_headers): 450 """Determine freshness from the Date, Expires and Cache-Control headers. 451 452 We don't handle the following: 453 454 1. Cache-Control: max-stale 455 2. Age: headers are not used in the calculations. 456 457 Not that this algorithm is simpler than you might think 458 because we are operating as a private (non-shared) cache. 459 This lets us ignore 's-maxage'. We can also ignore 460 'proxy-invalidate' since we aren't a proxy. 461 We will never return a stale document as 462 fresh as a design decision, and thus the non-implementation 463 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 464 since we operate as if every server has sent 'must-revalidate'. 465 Since we are private we get to ignore both 'public' and 466 'private' parameters. We also ignore 'no-transform' since 467 we don't do any transformations. 468 The 'no-store' parameter is handled at a higher level. 469 So the only Cache-Control parameters we look at are: 470 471 no-cache 472 only-if-cached 473 max-age 474 min-fresh 475 """ 476 477 retval = "STALE" 478 cc = _parse_cache_control(request_headers) 479 cc_response = _parse_cache_control(response_headers) 480 481 if ( 482 "pragma" in request_headers 483 and request_headers["pragma"].lower().find("no-cache") != -1 484 ): 485 retval = "TRANSPARENT" 486 if "cache-control" not in request_headers: 487 request_headers["cache-control"] = "no-cache" 488 elif "no-cache" in cc: 489 retval = "TRANSPARENT" 490 elif "no-cache" in cc_response: 491 retval = "STALE" 492 elif "only-if-cached" in cc: 493 retval = "FRESH" 494 elif "date" in response_headers: 495 date = calendar.timegm(email.Utils.parsedate_tz(response_headers["date"])) 496 now = time.time() 497 current_age = max(0, now - date) 498 if "max-age" in cc_response: 499 try: 500 freshness_lifetime = int(cc_response["max-age"]) 501 except ValueError: 502 freshness_lifetime = 0 503 elif "expires" in response_headers: 504 expires = email.Utils.parsedate_tz(response_headers["expires"]) 505 if None == expires: 506 freshness_lifetime = 0 507 else: 508 freshness_lifetime = max(0, calendar.timegm(expires) - date) 509 else: 510 freshness_lifetime = 0 511 if "max-age" in cc: 512 try: 513 freshness_lifetime = int(cc["max-age"]) 514 except ValueError: 515 freshness_lifetime = 0 516 if "min-fresh" in cc: 517 try: 518 min_fresh = int(cc["min-fresh"]) 519 except ValueError: 520 min_fresh = 0 521 current_age += min_fresh 522 if freshness_lifetime > current_age: 523 retval = "FRESH" 524 return retval 525 526 527def _decompressContent(response, new_content): 528 content = new_content 529 try: 530 encoding = response.get("content-encoding", None) 531 if encoding in ["gzip", "deflate"]: 532 if encoding == "gzip": 533 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() 534 if encoding == "deflate": 535 content = zlib.decompress(content, -zlib.MAX_WBITS) 536 response["content-length"] = str(len(content)) 537 # Record the historical presence of the encoding in a way the won't interfere. 538 response["-content-encoding"] = response["content-encoding"] 539 del response["content-encoding"] 540 except (IOError, zlib.error): 541 content = "" 542 raise FailedToDecompressContent( 543 _("Content purported to be compressed with %s but failed to decompress.") 544 % response.get("content-encoding"), 545 response, 546 content, 547 ) 548 return content 549 550 551def _updateCache(request_headers, response_headers, content, cache, cachekey): 552 if cachekey: 553 cc = _parse_cache_control(request_headers) 554 cc_response = _parse_cache_control(response_headers) 555 if "no-store" in cc or "no-store" in cc_response: 556 cache.delete(cachekey) 557 else: 558 info = email.Message.Message() 559 for key, value in response_headers.iteritems(): 560 if key not in ["status", "content-encoding", "transfer-encoding"]: 561 info[key] = value 562 563 # Add annotations to the cache to indicate what headers 564 # are variant for this request. 565 vary = response_headers.get("vary", None) 566 if vary: 567 vary_headers = vary.lower().replace(" ", "").split(",") 568 for header in vary_headers: 569 key = "-varied-%s" % header 570 try: 571 info[key] = request_headers[header] 572 except KeyError: 573 pass 574 575 status = response_headers.status 576 if status == 304: 577 status = 200 578 579 status_header = "status: %d\r\n" % status 580 581 header_str = info.as_string() 582 583 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 584 text = "".join([status_header, header_str, content]) 585 586 cache.set(cachekey, text) 587 588 589def _cnonce(): 590 dig = _md5( 591 "%s:%s" 592 % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)]) 593 ).hexdigest() 594 return dig[:16] 595 596 597def _wsse_username_token(cnonce, iso_now, password): 598 return base64.b64encode( 599 _sha("%s%s%s" % (cnonce, iso_now, password)).digest() 600 ).strip() 601 602 603# For credentials we need two things, first 604# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 605# Then we also need a list of URIs that have already demanded authentication 606# That list is tricky since sub-URIs can take the same auth, or the 607# auth scheme may change as you descend the tree. 608# So we also need each Auth instance to be able to tell us 609# how close to the 'top' it is. 610 611 612class Authentication(object): 613 def __init__( 614 self, credentials, host, request_uri, headers, response, content, http 615 ): 616 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 617 self.path = path 618 self.host = host 619 self.credentials = credentials 620 self.http = http 621 622 def depth(self, request_uri): 623 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 624 return request_uri[len(self.path) :].count("/") 625 626 def inscope(self, host, request_uri): 627 # XXX Should we normalize the request_uri? 628 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 629 return (host == self.host) and path.startswith(self.path) 630 631 def request(self, method, request_uri, headers, content): 632 """Modify the request headers to add the appropriate 633 Authorization header. Over-ride this in sub-classes.""" 634 pass 635 636 def response(self, response, content): 637 """Gives us a chance to update with new nonces 638 or such returned from the last authorized response. 639 Over-rise this in sub-classes if necessary. 640 641 Return TRUE is the request is to be retried, for 642 example Digest may return stale=true. 643 """ 644 return False 645 646 647class BasicAuthentication(Authentication): 648 def __init__( 649 self, credentials, host, request_uri, headers, response, content, http 650 ): 651 Authentication.__init__( 652 self, credentials, host, request_uri, headers, response, content, http 653 ) 654 655 def request(self, method, request_uri, headers, content): 656 """Modify the request headers to add the appropriate 657 Authorization header.""" 658 headers["authorization"] = ( 659 "Basic " + base64.b64encode("%s:%s" % self.credentials).strip() 660 ) 661 662 663class DigestAuthentication(Authentication): 664 """Only do qop='auth' and MD5, since that 665 is all Apache currently implements""" 666 667 def __init__( 668 self, credentials, host, request_uri, headers, response, content, http 669 ): 670 Authentication.__init__( 671 self, credentials, host, request_uri, headers, response, content, http 672 ) 673 challenge = _parse_www_authenticate(response, "www-authenticate") 674 self.challenge = challenge["digest"] 675 qop = self.challenge.get("qop", "auth") 676 self.challenge["qop"] = ( 677 ("auth" in [x.strip() for x in qop.split()]) and "auth" or None 678 ) 679 if self.challenge["qop"] is None: 680 raise UnimplementedDigestAuthOptionError( 681 _("Unsupported value for qop: %s." % qop) 682 ) 683 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper() 684 if self.challenge["algorithm"] != "MD5": 685 raise UnimplementedDigestAuthOptionError( 686 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 687 ) 688 self.A1 = "".join( 689 [ 690 self.credentials[0], 691 ":", 692 self.challenge["realm"], 693 ":", 694 self.credentials[1], 695 ] 696 ) 697 self.challenge["nc"] = 1 698 699 def request(self, method, request_uri, headers, content, cnonce=None): 700 """Modify the request headers""" 701 H = lambda x: _md5(x).hexdigest() 702 KD = lambda s, d: H("%s:%s" % (s, d)) 703 A2 = "".join([method, ":", request_uri]) 704 self.challenge["cnonce"] = cnonce or _cnonce() 705 request_digest = '"%s"' % KD( 706 H(self.A1), 707 "%s:%s:%s:%s:%s" 708 % ( 709 self.challenge["nonce"], 710 "%08x" % self.challenge["nc"], 711 self.challenge["cnonce"], 712 self.challenge["qop"], 713 H(A2), 714 ), 715 ) 716 headers["authorization"] = ( 717 'Digest username="%s", realm="%s", nonce="%s", ' 718 'uri="%s", algorithm=%s, response=%s, qop=%s, ' 719 'nc=%08x, cnonce="%s"' 720 ) % ( 721 self.credentials[0], 722 self.challenge["realm"], 723 self.challenge["nonce"], 724 request_uri, 725 self.challenge["algorithm"], 726 request_digest, 727 self.challenge["qop"], 728 self.challenge["nc"], 729 self.challenge["cnonce"], 730 ) 731 if self.challenge.get("opaque"): 732 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"] 733 self.challenge["nc"] += 1 734 735 def response(self, response, content): 736 if "authentication-info" not in response: 737 challenge = _parse_www_authenticate(response, "www-authenticate").get( 738 "digest", {} 739 ) 740 if "true" == challenge.get("stale"): 741 self.challenge["nonce"] = challenge["nonce"] 742 self.challenge["nc"] = 1 743 return True 744 else: 745 updated_challenge = _parse_www_authenticate( 746 response, "authentication-info" 747 ).get("digest", {}) 748 749 if "nextnonce" in updated_challenge: 750 self.challenge["nonce"] = updated_challenge["nextnonce"] 751 self.challenge["nc"] = 1 752 return False 753 754 755class HmacDigestAuthentication(Authentication): 756 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 757 758 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 759 760 def __init__( 761 self, credentials, host, request_uri, headers, response, content, http 762 ): 763 Authentication.__init__( 764 self, credentials, host, request_uri, headers, response, content, http 765 ) 766 challenge = _parse_www_authenticate(response, "www-authenticate") 767 self.challenge = challenge["hmacdigest"] 768 # TODO: self.challenge['domain'] 769 self.challenge["reason"] = self.challenge.get("reason", "unauthorized") 770 if self.challenge["reason"] not in ["unauthorized", "integrity"]: 771 self.challenge["reason"] = "unauthorized" 772 self.challenge["salt"] = self.challenge.get("salt", "") 773 if not self.challenge.get("snonce"): 774 raise UnimplementedHmacDigestAuthOptionError( 775 _("The challenge doesn't contain a server nonce, or this one is empty.") 776 ) 777 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1") 778 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]: 779 raise UnimplementedHmacDigestAuthOptionError( 780 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 781 ) 782 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1") 783 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]: 784 raise UnimplementedHmacDigestAuthOptionError( 785 _( 786 "Unsupported value for pw-algorithm: %s." 787 % self.challenge["pw-algorithm"] 788 ) 789 ) 790 if self.challenge["algorithm"] == "HMAC-MD5": 791 self.hashmod = _md5 792 else: 793 self.hashmod = _sha 794 if self.challenge["pw-algorithm"] == "MD5": 795 self.pwhashmod = _md5 796 else: 797 self.pwhashmod = _sha 798 self.key = "".join( 799 [ 800 self.credentials[0], 801 ":", 802 self.pwhashmod.new( 803 "".join([self.credentials[1], self.challenge["salt"]]) 804 ) 805 .hexdigest() 806 .lower(), 807 ":", 808 self.challenge["realm"], 809 ] 810 ) 811 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 812 813 def request(self, method, request_uri, headers, content): 814 """Modify the request headers""" 815 keys = _get_end2end_headers(headers) 816 keylist = "".join(["%s " % k for k in keys]) 817 headers_val = "".join([headers[k] for k in keys]) 818 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 819 cnonce = _cnonce() 820 request_digest = "%s:%s:%s:%s:%s" % ( 821 method, 822 request_uri, 823 cnonce, 824 self.challenge["snonce"], 825 headers_val, 826 ) 827 request_digest = ( 828 hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 829 ) 830 headers["authorization"] = ( 831 'HMACDigest username="%s", realm="%s", snonce="%s",' 832 ' cnonce="%s", uri="%s", created="%s", ' 833 'response="%s", headers="%s"' 834 ) % ( 835 self.credentials[0], 836 self.challenge["realm"], 837 self.challenge["snonce"], 838 cnonce, 839 request_uri, 840 created, 841 request_digest, 842 keylist, 843 ) 844 845 def response(self, response, content): 846 challenge = _parse_www_authenticate(response, "www-authenticate").get( 847 "hmacdigest", {} 848 ) 849 if challenge.get("reason") in ["integrity", "stale"]: 850 return True 851 return False 852 853 854class WsseAuthentication(Authentication): 855 """This is thinly tested and should not be relied upon. 856 At this time there isn't any third party server to test against. 857 Blogger and TypePad implemented this algorithm at one point 858 but Blogger has since switched to Basic over HTTPS and 859 TypePad has implemented it wrong, by never issuing a 401 860 challenge but instead requiring your client to telepathically know that 861 their endpoint is expecting WSSE profile="UsernameToken".""" 862 863 def __init__( 864 self, credentials, host, request_uri, headers, response, content, http 865 ): 866 Authentication.__init__( 867 self, credentials, host, request_uri, headers, response, content, http 868 ) 869 870 def request(self, method, request_uri, headers, content): 871 """Modify the request headers to add the appropriate 872 Authorization header.""" 873 headers["authorization"] = 'WSSE profile="UsernameToken"' 874 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 875 cnonce = _cnonce() 876 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 877 headers["X-WSSE"] = ( 878 'UsernameToken Username="%s", PasswordDigest="%s", ' 879 'Nonce="%s", Created="%s"' 880 ) % (self.credentials[0], password_digest, cnonce, iso_now) 881 882 883class GoogleLoginAuthentication(Authentication): 884 def __init__( 885 self, credentials, host, request_uri, headers, response, content, http 886 ): 887 from urllib import urlencode 888 889 Authentication.__init__( 890 self, credentials, host, request_uri, headers, response, content, http 891 ) 892 challenge = _parse_www_authenticate(response, "www-authenticate") 893 service = challenge["googlelogin"].get("service", "xapi") 894 # Bloggger actually returns the service in the challenge 895 # For the rest we guess based on the URI 896 if service == "xapi" and request_uri.find("calendar") > 0: 897 service = "cl" 898 # No point in guessing Base or Spreadsheet 899 # elif request_uri.find("spreadsheets") > 0: 900 # service = "wise" 901 902 auth = dict( 903 Email=credentials[0], 904 Passwd=credentials[1], 905 service=service, 906 source=headers["user-agent"], 907 ) 908 resp, content = self.http.request( 909 "https://www.google.com/accounts/ClientLogin", 910 method="POST", 911 body=urlencode(auth), 912 headers={"Content-Type": "application/x-www-form-urlencoded"}, 913 ) 914 lines = content.split("\n") 915 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 916 if resp.status == 403: 917 self.Auth = "" 918 else: 919 self.Auth = d["Auth"] 920 921 def request(self, method, request_uri, headers, content): 922 """Modify the request headers to add the appropriate 923 Authorization header.""" 924 headers["authorization"] = "GoogleLogin Auth=" + self.Auth 925 926 927AUTH_SCHEME_CLASSES = { 928 "basic": BasicAuthentication, 929 "wsse": WsseAuthentication, 930 "digest": DigestAuthentication, 931 "hmacdigest": HmacDigestAuthentication, 932 "googlelogin": GoogleLoginAuthentication, 933} 934 935AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 936 937 938class FileCache(object): 939 """Uses a local directory as a store for cached files. 940 Not really safe to use if multiple threads or processes are going to 941 be running on the same cache. 942 """ 943 944 def __init__( 945 self, cache, safe=safename 946 ): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 947 self.cache = cache 948 self.safe = safe 949 if not os.path.exists(cache): 950 os.makedirs(self.cache) 951 952 def get(self, key): 953 retval = None 954 cacheFullPath = os.path.join(self.cache, self.safe(key)) 955 try: 956 f = file(cacheFullPath, "rb") 957 retval = f.read() 958 f.close() 959 except IOError: 960 pass 961 return retval 962 963 def set(self, key, value): 964 cacheFullPath = os.path.join(self.cache, self.safe(key)) 965 f = file(cacheFullPath, "wb") 966 f.write(value) 967 f.close() 968 969 def delete(self, key): 970 cacheFullPath = os.path.join(self.cache, self.safe(key)) 971 if os.path.exists(cacheFullPath): 972 os.remove(cacheFullPath) 973 974 975class Credentials(object): 976 def __init__(self): 977 self.credentials = [] 978 979 def add(self, name, password, domain=""): 980 self.credentials.append((domain.lower(), name, password)) 981 982 def clear(self): 983 self.credentials = [] 984 985 def iter(self, domain): 986 for (cdomain, name, password) in self.credentials: 987 if cdomain == "" or domain == cdomain: 988 yield (name, password) 989 990 991class KeyCerts(Credentials): 992 """Identical to Credentials except that 993 name/password are mapped to key/cert.""" 994 def add(self, key, cert, domain, password): 995 self.credentials.append((domain.lower(), key, cert, password)) 996 997 def iter(self, domain): 998 for (cdomain, key, cert, password) in self.credentials: 999 if cdomain == "" or domain == cdomain: 1000 yield (key, cert, password) 1001 1002 1003class AllHosts(object): 1004 pass 1005 1006 1007class ProxyInfo(object): 1008 """Collect information required to use a proxy.""" 1009 1010 bypass_hosts = () 1011 1012 def __init__( 1013 self, 1014 proxy_type, 1015 proxy_host, 1016 proxy_port, 1017 proxy_rdns=True, 1018 proxy_user=None, 1019 proxy_pass=None, 1020 proxy_headers=None, 1021 ): 1022 """Args: 1023 1024 proxy_type: The type of proxy server. This must be set to one of 1025 socks.PROXY_TYPE_XXX constants. For example: p = 1026 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', 1027 proxy_port=8000) 1028 proxy_host: The hostname or IP address of the proxy server. 1029 proxy_port: The port that the proxy server is running on. 1030 proxy_rdns: If True (default), DNS queries will not be performed 1031 locally, and instead, handed to the proxy to resolve. This is useful 1032 if the network does not allow resolution of non-local names. In 1033 httplib2 0.9 and earlier, this defaulted to False. 1034 proxy_user: The username used to authenticate with the proxy server. 1035 proxy_pass: The password used to authenticate with the proxy server. 1036 proxy_headers: Additional or modified headers for the proxy connect 1037 request. 1038 """ 1039 self.proxy_type = proxy_type 1040 self.proxy_host = proxy_host 1041 self.proxy_port = proxy_port 1042 self.proxy_rdns = proxy_rdns 1043 self.proxy_user = proxy_user 1044 self.proxy_pass = proxy_pass 1045 self.proxy_headers = proxy_headers 1046 1047 def astuple(self): 1048 return ( 1049 self.proxy_type, 1050 self.proxy_host, 1051 self.proxy_port, 1052 self.proxy_rdns, 1053 self.proxy_user, 1054 self.proxy_pass, 1055 self.proxy_headers, 1056 ) 1057 1058 def isgood(self): 1059 return (self.proxy_host != None) and (self.proxy_port != None) 1060 1061 def applies_to(self, hostname): 1062 return not self.bypass_host(hostname) 1063 1064 def bypass_host(self, hostname): 1065 """Has this host been excluded from the proxy config""" 1066 if self.bypass_hosts is AllHosts: 1067 return True 1068 1069 hostname = "." + hostname.lstrip(".") 1070 for skip_name in self.bypass_hosts: 1071 # *.suffix 1072 if skip_name.startswith(".") and hostname.endswith(skip_name): 1073 return True 1074 # exact match 1075 if hostname == "." + skip_name: 1076 return True 1077 return False 1078 1079 def __repr__(self): 1080 return ( 1081 "<ProxyInfo type={p.proxy_type} " 1082 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}" 1083 + " user={p.proxy_user} headers={p.proxy_headers}>" 1084 ).format(p=self) 1085 1086 1087def proxy_info_from_environment(method="http"): 1088 """Read proxy info from the environment variables. 1089 """ 1090 if method not in ["http", "https"]: 1091 return 1092 1093 env_var = method + "_proxy" 1094 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 1095 if not url: 1096 return 1097 return proxy_info_from_url(url, method, None) 1098 1099 1100def proxy_info_from_url(url, method="http", noproxy=None): 1101 """Construct a ProxyInfo from a URL (such as http_proxy env var) 1102 """ 1103 url = urlparse.urlparse(url) 1104 username = None 1105 password = None 1106 port = None 1107 if "@" in url[1]: 1108 ident, host_port = url[1].split("@", 1) 1109 if ":" in ident: 1110 username, password = ident.split(":", 1) 1111 else: 1112 password = ident 1113 else: 1114 host_port = url[1] 1115 if ":" in host_port: 1116 host, port = host_port.split(":", 1) 1117 else: 1118 host = host_port 1119 1120 if port: 1121 port = int(port) 1122 else: 1123 port = dict(https=443, http=80)[method] 1124 1125 proxy_type = 3 # socks.PROXY_TYPE_HTTP 1126 pi = ProxyInfo( 1127 proxy_type=proxy_type, 1128 proxy_host=host, 1129 proxy_port=port, 1130 proxy_user=username or None, 1131 proxy_pass=password or None, 1132 proxy_headers=None, 1133 ) 1134 1135 bypass_hosts = [] 1136 # If not given an explicit noproxy value, respect values in env vars. 1137 if noproxy is None: 1138 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")) 1139 # Special case: A single '*' character means all hosts should be bypassed. 1140 if noproxy == "*": 1141 bypass_hosts = AllHosts 1142 elif noproxy.strip(): 1143 bypass_hosts = noproxy.split(",") 1144 bypass_hosts = filter(bool, bypass_hosts) # To exclude empty string. 1145 1146 pi.bypass_hosts = bypass_hosts 1147 return pi 1148 1149 1150class HTTPConnectionWithTimeout(httplib.HTTPConnection): 1151 """HTTPConnection subclass that supports timeouts 1152 1153 All timeouts are in seconds. If None is passed for timeout then 1154 Python's default timeout for sockets will be used. See for example 1155 the docs of socket.setdefaulttimeout(): 1156 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1157 """ 1158 1159 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): 1160 httplib.HTTPConnection.__init__(self, host, port, strict) 1161 self.timeout = timeout 1162 self.proxy_info = proxy_info 1163 1164 def connect(self): 1165 """Connect to the host and port specified in __init__.""" 1166 # Mostly verbatim from httplib.py. 1167 if self.proxy_info and socks is None: 1168 raise ProxiesUnavailableError( 1169 "Proxy support missing but proxy use was requested!" 1170 ) 1171 if self.proxy_info and self.proxy_info.isgood(): 1172 use_proxy = True 1173 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers = ( 1174 self.proxy_info.astuple() 1175 ) 1176 1177 host = proxy_host 1178 port = proxy_port 1179 else: 1180 use_proxy = False 1181 1182 host = self.host 1183 port = self.port 1184 1185 socket_err = None 1186 1187 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 1188 af, socktype, proto, canonname, sa = res 1189 try: 1190 if use_proxy: 1191 self.sock = socks.socksocket(af, socktype, proto) 1192 self.sock.setproxy( 1193 proxy_type, 1194 proxy_host, 1195 proxy_port, 1196 proxy_rdns, 1197 proxy_user, 1198 proxy_pass, 1199 proxy_headers, 1200 ) 1201 else: 1202 self.sock = socket.socket(af, socktype, proto) 1203 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 1204 # Different from httplib: support timeouts. 1205 if has_timeout(self.timeout): 1206 self.sock.settimeout(self.timeout) 1207 # End of difference from httplib. 1208 if self.debuglevel > 0: 1209 print("connect: (%s, %s) ************" % (self.host, self.port)) 1210 if use_proxy: 1211 print( 1212 "proxy: %s ************" 1213 % str( 1214 ( 1215 proxy_host, 1216 proxy_port, 1217 proxy_rdns, 1218 proxy_user, 1219 proxy_pass, 1220 proxy_headers, 1221 ) 1222 ) 1223 ) 1224 if use_proxy: 1225 self.sock.connect((self.host, self.port) + sa[2:]) 1226 else: 1227 self.sock.connect(sa) 1228 except socket.error as e: 1229 socket_err = e 1230 if self.debuglevel > 0: 1231 print("connect fail: (%s, %s)" % (self.host, self.port)) 1232 if use_proxy: 1233 print( 1234 "proxy: %s" 1235 % str( 1236 ( 1237 proxy_host, 1238 proxy_port, 1239 proxy_rdns, 1240 proxy_user, 1241 proxy_pass, 1242 proxy_headers, 1243 ) 1244 ) 1245 ) 1246 if self.sock: 1247 self.sock.close() 1248 self.sock = None 1249 continue 1250 break 1251 if not self.sock: 1252 raise socket_err or socket.error("getaddrinfo returns an empty list") 1253 1254 1255class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): 1256 """This class allows communication via SSL. 1257 1258 All timeouts are in seconds. If None is passed for timeout then 1259 Python's default timeout for sockets will be used. See for example 1260 the docs of socket.setdefaulttimeout(): 1261 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1262 """ 1263 1264 def __init__( 1265 self, 1266 host, 1267 port=None, 1268 key_file=None, 1269 cert_file=None, 1270 strict=None, 1271 timeout=None, 1272 proxy_info=None, 1273 ca_certs=None, 1274 disable_ssl_certificate_validation=False, 1275 ssl_version=None, 1276 key_password=None, 1277 ): 1278 if key_password: 1279 httplib.HTTPSConnection.__init__(self, host, port=port, strict=strict) 1280 self._context.load_cert_chain(cert_file, key_file, key_password) 1281 self.key_file = key_file 1282 self.cert_file = cert_file 1283 self.key_password = key_password 1284 else: 1285 httplib.HTTPSConnection.__init__( 1286 self, host, port=port, key_file=key_file, cert_file=cert_file, strict=strict 1287 ) 1288 self.key_password = None 1289 self.timeout = timeout 1290 self.proxy_info = proxy_info 1291 if ca_certs is None: 1292 ca_certs = CA_CERTS 1293 self.ca_certs = ca_certs 1294 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 1295 self.ssl_version = ssl_version 1296 1297 # The following two methods were adapted from https_wrapper.py, released 1298 # with the Google Appengine SDK at 1299 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py 1300 # under the following license: 1301 # 1302 # Copyright 2007 Google Inc. 1303 # 1304 # Licensed under the Apache License, Version 2.0 (the "License"); 1305 # you may not use this file except in compliance with the License. 1306 # You may obtain a copy of the License at 1307 # 1308 # http://www.apache.org/licenses/LICENSE-2.0 1309 # 1310 # Unless required by applicable law or agreed to in writing, software 1311 # distributed under the License is distributed on an "AS IS" BASIS, 1312 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1313 # See the License for the specific language governing permissions and 1314 # limitations under the License. 1315 # 1316 1317 def _GetValidHostsForCert(self, cert): 1318 """Returns a list of valid host globs for an SSL certificate. 1319 1320 Args: 1321 cert: A dictionary representing an SSL certificate. 1322 Returns: 1323 list: A list of valid host globs. 1324 """ 1325 if "subjectAltName" in cert: 1326 return [x[1] for x in cert["subjectAltName"] if x[0].lower() == "dns"] 1327 else: 1328 return [x[0][1] for x in cert["subject"] if x[0][0].lower() == "commonname"] 1329 1330 def _ValidateCertificateHostname(self, cert, hostname): 1331 """Validates that a given hostname is valid for an SSL certificate. 1332 1333 Args: 1334 cert: A dictionary representing an SSL certificate. 1335 hostname: The hostname to test. 1336 Returns: 1337 bool: Whether or not the hostname is valid for this certificate. 1338 """ 1339 hosts = self._GetValidHostsForCert(cert) 1340 for host in hosts: 1341 host_re = host.replace(".", "\.").replace("*", "[^.]*") 1342 if re.search("^%s$" % (host_re,), hostname, re.I): 1343 return True 1344 return False 1345 1346 def connect(self): 1347 "Connect to a host on a given (SSL) port." 1348 1349 if self.proxy_info and self.proxy_info.isgood(): 1350 use_proxy = True 1351 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers = ( 1352 self.proxy_info.astuple() 1353 ) 1354 1355 host = proxy_host 1356 port = proxy_port 1357 else: 1358 use_proxy = False 1359 1360 host = self.host 1361 port = self.port 1362 1363 socket_err = None 1364 1365 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 1366 for family, socktype, proto, canonname, sockaddr in address_info: 1367 try: 1368 if use_proxy: 1369 sock = socks.socksocket(family, socktype, proto) 1370 1371 sock.setproxy( 1372 proxy_type, 1373 proxy_host, 1374 proxy_port, 1375 proxy_rdns, 1376 proxy_user, 1377 proxy_pass, 1378 proxy_headers, 1379 ) 1380 else: 1381 sock = socket.socket(family, socktype, proto) 1382 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 1383 1384 if has_timeout(self.timeout): 1385 sock.settimeout(self.timeout) 1386 1387 if use_proxy: 1388 sock.connect((self.host, self.port) + sockaddr[:2]) 1389 else: 1390 sock.connect(sockaddr) 1391 self.sock = _ssl_wrap_socket( 1392 sock, 1393 self.key_file, 1394 self.cert_file, 1395 self.disable_ssl_certificate_validation, 1396 self.ca_certs, 1397 self.ssl_version, 1398 self.host, 1399 self.key_password, 1400 ) 1401 if self.debuglevel > 0: 1402 print("connect: (%s, %s)" % (self.host, self.port)) 1403 if use_proxy: 1404 print( 1405 "proxy: %s" 1406 % str( 1407 ( 1408 proxy_host, 1409 proxy_port, 1410 proxy_rdns, 1411 proxy_user, 1412 proxy_pass, 1413 proxy_headers, 1414 ) 1415 ) 1416 ) 1417 if not self.disable_ssl_certificate_validation: 1418 cert = self.sock.getpeercert() 1419 hostname = self.host.split(":", 0)[0] 1420 if not self._ValidateCertificateHostname(cert, hostname): 1421 raise CertificateHostnameMismatch( 1422 "Server presented certificate that does not match " 1423 "host %s: %s" % (hostname, cert), 1424 hostname, 1425 cert, 1426 ) 1427 except ( 1428 ssl_SSLError, 1429 ssl_CertificateError, 1430 CertificateHostnameMismatch, 1431 ) as e: 1432 if sock: 1433 sock.close() 1434 if self.sock: 1435 self.sock.close() 1436 self.sock = None 1437 # Unfortunately the ssl module doesn't seem to provide any way 1438 # to get at more detailed error information, in particular 1439 # whether the error is due to certificate validation or 1440 # something else (such as SSL protocol mismatch). 1441 if getattr(e, "errno", None) == ssl.SSL_ERROR_SSL: 1442 raise SSLHandshakeError(e) 1443 else: 1444 raise 1445 except (socket.timeout, socket.gaierror): 1446 raise 1447 except socket.error as e: 1448 socket_err = e 1449 if self.debuglevel > 0: 1450 print("connect fail: (%s, %s)" % (self.host, self.port)) 1451 if use_proxy: 1452 print( 1453 "proxy: %s" 1454 % str( 1455 ( 1456 proxy_host, 1457 proxy_port, 1458 proxy_rdns, 1459 proxy_user, 1460 proxy_pass, 1461 proxy_headers, 1462 ) 1463 ) 1464 ) 1465 if self.sock: 1466 self.sock.close() 1467 self.sock = None 1468 continue 1469 break 1470 if not self.sock: 1471 raise socket_err or socket.error("getaddrinfo returns an empty list") 1472 1473 1474SCHEME_TO_CONNECTION = { 1475 "http": HTTPConnectionWithTimeout, 1476 "https": HTTPSConnectionWithTimeout, 1477} 1478 1479 1480def _new_fixed_fetch(validate_certificate): 1481 1482 def fixed_fetch( 1483 url, 1484 payload=None, 1485 method="GET", 1486 headers={}, 1487 allow_truncated=False, 1488 follow_redirects=True, 1489 deadline=None, 1490 ): 1491 return fetch( 1492 url, 1493 payload=payload, 1494 method=method, 1495 headers=headers, 1496 allow_truncated=allow_truncated, 1497 follow_redirects=follow_redirects, 1498 deadline=deadline, 1499 validate_certificate=validate_certificate, 1500 ) 1501 1502 return fixed_fetch 1503 1504 1505class AppEngineHttpConnection(httplib.HTTPConnection): 1506 """Use httplib on App Engine, but compensate for its weirdness. 1507 1508 The parameters key_file, cert_file, proxy_info, ca_certs, 1509 disable_ssl_certificate_validation, and ssl_version are all dropped on 1510 the ground. 1511 """ 1512 1513 def __init__( 1514 self, 1515 host, 1516 port=None, 1517 key_file=None, 1518 cert_file=None, 1519 strict=None, 1520 timeout=None, 1521 proxy_info=None, 1522 ca_certs=None, 1523 disable_ssl_certificate_validation=False, 1524 ssl_version=None, 1525 ): 1526 httplib.HTTPConnection.__init__( 1527 self, host, port=port, strict=strict, timeout=timeout 1528 ) 1529 1530 1531class AppEngineHttpsConnection(httplib.HTTPSConnection): 1532 """Same as AppEngineHttpConnection, but for HTTPS URIs. 1533 1534 The parameters proxy_info, ca_certs, disable_ssl_certificate_validation, 1535 and ssl_version are all dropped on the ground. 1536 """ 1537 1538 def __init__( 1539 self, 1540 host, 1541 port=None, 1542 key_file=None, 1543 cert_file=None, 1544 strict=None, 1545 timeout=None, 1546 proxy_info=None, 1547 ca_certs=None, 1548 disable_ssl_certificate_validation=False, 1549 ssl_version=None, 1550 key_password=None, 1551 ): 1552 if key_password: 1553 raise NotSupportedOnThisPlatform("Certificate with password is not supported.") 1554 httplib.HTTPSConnection.__init__( 1555 self, 1556 host, 1557 port=port, 1558 key_file=key_file, 1559 cert_file=cert_file, 1560 strict=strict, 1561 timeout=timeout, 1562 ) 1563 self._fetch = _new_fixed_fetch(not disable_ssl_certificate_validation) 1564 1565 1566# Use a different connection object for Google App Engine Standard Environment. 1567def is_gae_instance(): 1568 server_software = os.environ.get('SERVER_SOFTWARE', '') 1569 if (server_software.startswith('Google App Engine/') or 1570 server_software.startswith('Development/') or 1571 server_software.startswith('testutil/')): 1572 return True 1573 return False 1574 1575 1576try: 1577 if not is_gae_instance(): 1578 raise NotRunningAppEngineEnvironment() 1579 1580 from google.appengine.api import apiproxy_stub_map 1581 if apiproxy_stub_map.apiproxy.GetStub("urlfetch") is None: 1582 raise ImportError 1583 1584 from google.appengine.api.urlfetch import fetch 1585 1586 # Update the connection classes to use the Googel App Engine specific ones. 1587 SCHEME_TO_CONNECTION = { 1588 "http": AppEngineHttpConnection, 1589 "https": AppEngineHttpsConnection, 1590 } 1591except (ImportError, NotRunningAppEngineEnvironment): 1592 pass 1593 1594 1595class Http(object): 1596 """An HTTP client that handles: 1597 1598 - all methods 1599 - caching 1600 - ETags 1601 - compression, 1602 - HTTPS 1603 - Basic 1604 - Digest 1605 - WSSE 1606 1607 and more. 1608 """ 1609 1610 def __init__( 1611 self, 1612 cache=None, 1613 timeout=None, 1614 proxy_info=proxy_info_from_environment, 1615 ca_certs=None, 1616 disable_ssl_certificate_validation=False, 1617 ssl_version=None, 1618 ): 1619 """If 'cache' is a string then it is used as a directory name for 1620 a disk cache. Otherwise it must be an object that supports the 1621 same interface as FileCache. 1622 1623 All timeouts are in seconds. If None is passed for timeout 1624 then Python's default timeout for sockets will be used. See 1625 for example the docs of socket.setdefaulttimeout(): 1626 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1627 1628 `proxy_info` may be: 1629 - a callable that takes the http scheme ('http' or 'https') and 1630 returns a ProxyInfo instance per request. By default, uses 1631 proxy_nfo_from_environment. 1632 - a ProxyInfo instance (static proxy config). 1633 - None (proxy disabled). 1634 1635 ca_certs is the path of a file containing root CA certificates for SSL 1636 server certificate validation. By default, a CA cert file bundled with 1637 httplib2 is used. 1638 1639 If disable_ssl_certificate_validation is true, SSL cert validation will 1640 not be performed. 1641 1642 By default, ssl.PROTOCOL_SSLv23 will be used for the ssl version. 1643 """ 1644 self.proxy_info = proxy_info 1645 self.ca_certs = ca_certs 1646 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 1647 self.ssl_version = ssl_version 1648 1649 # Map domain name to an httplib connection 1650 self.connections = {} 1651 # The location of the cache, for now a directory 1652 # where cached responses are held. 1653 if cache and isinstance(cache, basestring): 1654 self.cache = FileCache(cache) 1655 else: 1656 self.cache = cache 1657 1658 # Name/password 1659 self.credentials = Credentials() 1660 1661 # Key/cert 1662 self.certificates = KeyCerts() 1663 1664 # authorization objects 1665 self.authorizations = [] 1666 1667 # If set to False then no redirects are followed, even safe ones. 1668 self.follow_redirects = True 1669 1670 self.redirect_codes = REDIRECT_CODES 1671 1672 # Which HTTP methods do we apply optimistic concurrency to, i.e. 1673 # which methods get an "if-match:" etag header added to them. 1674 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 1675 1676 self.safe_methods = list(SAFE_METHODS) 1677 1678 # If 'follow_redirects' is True, and this is set to True then 1679 # all redirecs are followed, including unsafe ones. 1680 self.follow_all_redirects = False 1681 1682 self.ignore_etag = False 1683 1684 self.force_exception_to_status_code = False 1685 1686 self.timeout = timeout 1687 1688 # Keep Authorization: headers on a redirect. 1689 self.forward_authorization_headers = False 1690 1691 def close(self): 1692 """Close persistent connections, clear sensitive data. 1693 Not thread-safe, requires external synchronization against concurrent requests. 1694 """ 1695 existing, self.connections = self.connections, {} 1696 for _, c in existing.iteritems(): 1697 c.close() 1698 self.certificates.clear() 1699 self.clear_credentials() 1700 1701 def __getstate__(self): 1702 state_dict = copy.copy(self.__dict__) 1703 # In case request is augmented by some foreign object such as 1704 # credentials which handle auth 1705 if "request" in state_dict: 1706 del state_dict["request"] 1707 if "connections" in state_dict: 1708 del state_dict["connections"] 1709 return state_dict 1710 1711 def __setstate__(self, state): 1712 self.__dict__.update(state) 1713 self.connections = {} 1714 1715 def _auth_from_challenge(self, host, request_uri, headers, response, content): 1716 """A generator that creates Authorization objects 1717 that can be applied to requests. 1718 """ 1719 challenges = _parse_www_authenticate(response, "www-authenticate") 1720 for cred in self.credentials.iter(host): 1721 for scheme in AUTH_SCHEME_ORDER: 1722 if scheme in challenges: 1723 yield AUTH_SCHEME_CLASSES[scheme]( 1724 cred, host, request_uri, headers, response, content, self 1725 ) 1726 1727 def add_credentials(self, name, password, domain=""): 1728 """Add a name and password that will be used 1729 any time a request requires authentication.""" 1730 self.credentials.add(name, password, domain) 1731 1732 def add_certificate(self, key, cert, domain, password=None): 1733 """Add a key and cert that will be used 1734 any time a request requires authentication.""" 1735 self.certificates.add(key, cert, domain, password) 1736 1737 def clear_credentials(self): 1738 """Remove all the names and passwords 1739 that are used for authentication""" 1740 self.credentials.clear() 1741 self.authorizations = [] 1742 1743 def _conn_request(self, conn, request_uri, method, body, headers): 1744 i = 0 1745 seen_bad_status_line = False 1746 while i < RETRIES: 1747 i += 1 1748 try: 1749 if hasattr(conn, "sock") and conn.sock is None: 1750 conn.connect() 1751 conn.request(method, request_uri, body, headers) 1752 except socket.timeout: 1753 raise 1754 except socket.gaierror: 1755 conn.close() 1756 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 1757 except ssl_SSLError: 1758 conn.close() 1759 raise 1760 except socket.error as e: 1761 err = 0 1762 if hasattr(e, "args"): 1763 err = getattr(e, "args")[0] 1764 else: 1765 err = e.errno 1766 if err == errno.ECONNREFUSED: # Connection refused 1767 raise 1768 if err in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES: 1769 continue # retry on potentially transient socket errors 1770 except httplib.HTTPException: 1771 # Just because the server closed the connection doesn't apparently mean 1772 # that the server didn't send a response. 1773 if hasattr(conn, "sock") and conn.sock is None: 1774 if i < RETRIES - 1: 1775 conn.close() 1776 conn.connect() 1777 continue 1778 else: 1779 conn.close() 1780 raise 1781 if i < RETRIES - 1: 1782 conn.close() 1783 conn.connect() 1784 continue 1785 try: 1786 response = conn.getresponse() 1787 except httplib.BadStatusLine: 1788 # If we get a BadStatusLine on the first try then that means 1789 # the connection just went stale, so retry regardless of the 1790 # number of RETRIES set. 1791 if not seen_bad_status_line and i == 1: 1792 i = 0 1793 seen_bad_status_line = True 1794 conn.close() 1795 conn.connect() 1796 continue 1797 else: 1798 conn.close() 1799 raise 1800 except (socket.error, httplib.HTTPException): 1801 if i < RETRIES - 1: 1802 conn.close() 1803 conn.connect() 1804 continue 1805 else: 1806 conn.close() 1807 raise 1808 else: 1809 content = "" 1810 if method == "HEAD": 1811 conn.close() 1812 else: 1813 content = response.read() 1814 response = Response(response) 1815 if method != "HEAD": 1816 content = _decompressContent(response, content) 1817 break 1818 return (response, content) 1819 1820 def _request( 1821 self, 1822 conn, 1823 host, 1824 absolute_uri, 1825 request_uri, 1826 method, 1827 body, 1828 headers, 1829 redirections, 1830 cachekey, 1831 ): 1832 """Do the actual request using the connection object 1833 and also follow one level of redirects if necessary""" 1834 1835 auths = [ 1836 (auth.depth(request_uri), auth) 1837 for auth in self.authorizations 1838 if auth.inscope(host, request_uri) 1839 ] 1840 auth = auths and sorted(auths)[0][1] or None 1841 if auth: 1842 auth.request(method, request_uri, headers, body) 1843 1844 (response, content) = self._conn_request( 1845 conn, request_uri, method, body, headers 1846 ) 1847 1848 if auth: 1849 if auth.response(response, body): 1850 auth.request(method, request_uri, headers, body) 1851 (response, content) = self._conn_request( 1852 conn, request_uri, method, body, headers 1853 ) 1854 response._stale_digest = 1 1855 1856 if response.status == 401: 1857 for authorization in self._auth_from_challenge( 1858 host, request_uri, headers, response, content 1859 ): 1860 authorization.request(method, request_uri, headers, body) 1861 (response, content) = self._conn_request( 1862 conn, request_uri, method, body, headers 1863 ) 1864 if response.status != 401: 1865 self.authorizations.append(authorization) 1866 authorization.response(response, body) 1867 break 1868 1869 if ( 1870 self.follow_all_redirects 1871 or method in self.safe_methods 1872 or response.status in (303, 308) 1873 ): 1874 if self.follow_redirects and response.status in self.redirect_codes: 1875 # Pick out the location header and basically start from the beginning 1876 # remembering first to strip the ETag header and decrement our 'depth' 1877 if redirections: 1878 if "location" not in response and response.status != 300: 1879 raise RedirectMissingLocation( 1880 _( 1881 "Redirected but the response is missing a Location: header." 1882 ), 1883 response, 1884 content, 1885 ) 1886 # Fix-up relative redirects (which violate an RFC 2616 MUST) 1887 if "location" in response: 1888 location = response["location"] 1889 (scheme, authority, path, query, fragment) = parse_uri(location) 1890 if authority == None: 1891 response["location"] = urlparse.urljoin( 1892 absolute_uri, location 1893 ) 1894 if response.status == 308 or (response.status == 301 and method in self.safe_methods): 1895 response["-x-permanent-redirect-url"] = response["location"] 1896 if "content-location" not in response: 1897 response["content-location"] = absolute_uri 1898 _updateCache(headers, response, content, self.cache, cachekey) 1899 if "if-none-match" in headers: 1900 del headers["if-none-match"] 1901 if "if-modified-since" in headers: 1902 del headers["if-modified-since"] 1903 if ( 1904 "authorization" in headers 1905 and not self.forward_authorization_headers 1906 ): 1907 del headers["authorization"] 1908 if "location" in response: 1909 location = response["location"] 1910 old_response = copy.deepcopy(response) 1911 if "content-location" not in old_response: 1912 old_response["content-location"] = absolute_uri 1913 redirect_method = method 1914 if response.status in [302, 303]: 1915 redirect_method = "GET" 1916 body = None 1917 (response, content) = self.request( 1918 location, 1919 method=redirect_method, 1920 body=body, 1921 headers=headers, 1922 redirections=redirections - 1, 1923 ) 1924 response.previous = old_response 1925 else: 1926 raise RedirectLimit( 1927 "Redirected more times than rediection_limit allows.", 1928 response, 1929 content, 1930 ) 1931 elif response.status in [200, 203] and method in self.safe_methods: 1932 # Don't cache 206's since we aren't going to handle byte range requests 1933 if "content-location" not in response: 1934 response["content-location"] = absolute_uri 1935 _updateCache(headers, response, content, self.cache, cachekey) 1936 1937 return (response, content) 1938 1939 def _normalize_headers(self, headers): 1940 return _normalize_headers(headers) 1941 1942 # Need to catch and rebrand some exceptions 1943 # Then need to optionally turn all exceptions into status codes 1944 # including all socket.* and httplib.* exceptions. 1945 1946 def request( 1947 self, 1948 uri, 1949 method="GET", 1950 body=None, 1951 headers=None, 1952 redirections=DEFAULT_MAX_REDIRECTS, 1953 connection_type=None, 1954 ): 1955 """ Performs a single HTTP request. 1956 1957 The 'uri' is the URI of the HTTP resource and can begin with either 1958 'http' or 'https'. The value of 'uri' must be an absolute URI. 1959 1960 The 'method' is the HTTP method to perform, such as GET, POST, DELETE, 1961 etc. There is no restriction on the methods allowed. 1962 1963 The 'body' is the entity body to be sent with the request. It is a 1964 string object. 1965 1966 Any extra headers that are to be sent with the request should be 1967 provided in the 'headers' dictionary. 1968 1969 The maximum number of redirect to follow before raising an 1970 exception is 'redirections. The default is 5. 1971 1972 The return value is a tuple of (response, content), the first 1973 being and instance of the 'Response' class, the second being 1974 a string that contains the response entity body. 1975 """ 1976 conn_key = '' 1977 1978 try: 1979 if headers is None: 1980 headers = {} 1981 else: 1982 headers = self._normalize_headers(headers) 1983 1984 if "user-agent" not in headers: 1985 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__ 1986 1987 uri = iri2uri(uri) 1988 # Prevent CWE-75 space injection to manipulate request via part of uri. 1989 # Prevent CWE-93 CRLF injection to modify headers via part of uri. 1990 uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A") 1991 1992 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 1993 1994 proxy_info = self._get_proxy_info(scheme, authority) 1995 1996 conn_key = scheme + ":" + authority 1997 conn = self.connections.get(conn_key) 1998 if conn is None: 1999 if not connection_type: 2000 connection_type = SCHEME_TO_CONNECTION[scheme] 2001 certs = list(self.certificates.iter(authority)) 2002 if scheme == "https": 2003 if certs: 2004 conn = self.connections[conn_key] = connection_type( 2005 authority, 2006 key_file=certs[0][0], 2007 cert_file=certs[0][1], 2008 timeout=self.timeout, 2009 proxy_info=proxy_info, 2010 ca_certs=self.ca_certs, 2011 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 2012 ssl_version=self.ssl_version, 2013 key_password=certs[0][2], 2014 ) 2015 else: 2016 conn = self.connections[conn_key] = connection_type( 2017 authority, 2018 timeout=self.timeout, 2019 proxy_info=proxy_info, 2020 ca_certs=self.ca_certs, 2021 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 2022 ssl_version=self.ssl_version, 2023 ) 2024 else: 2025 conn = self.connections[conn_key] = connection_type( 2026 authority, timeout=self.timeout, proxy_info=proxy_info 2027 ) 2028 conn.set_debuglevel(debuglevel) 2029 2030 if "range" not in headers and "accept-encoding" not in headers: 2031 headers["accept-encoding"] = "gzip, deflate" 2032 2033 info = email.Message.Message() 2034 cachekey = None 2035 cached_value = None 2036 if self.cache: 2037 cachekey = defrag_uri.encode("utf-8") 2038 cached_value = self.cache.get(cachekey) 2039 if cached_value: 2040 # info = email.message_from_string(cached_value) 2041 # 2042 # Need to replace the line above with the kludge below 2043 # to fix the non-existent bug not fixed in this 2044 # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html 2045 try: 2046 info, content = cached_value.split("\r\n\r\n", 1) 2047 feedparser = email.FeedParser.FeedParser() 2048 feedparser.feed(info) 2049 info = feedparser.close() 2050 feedparser._parse = None 2051 except (IndexError, ValueError): 2052 self.cache.delete(cachekey) 2053 cachekey = None 2054 cached_value = None 2055 2056 if ( 2057 method in self.optimistic_concurrency_methods 2058 and self.cache 2059 and "etag" in info 2060 and not self.ignore_etag 2061 and "if-match" not in headers 2062 ): 2063 # http://www.w3.org/1999/04/Editing/ 2064 headers["if-match"] = info["etag"] 2065 2066 # https://tools.ietf.org/html/rfc7234 2067 # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location 2068 # when a non-error status code is received in response to an unsafe request method. 2069 if self.cache and cachekey and method not in self.safe_methods: 2070 self.cache.delete(cachekey) 2071 2072 # Check the vary header in the cache to see if this request 2073 # matches what varies in the cache. 2074 if method in self.safe_methods and "vary" in info: 2075 vary = info["vary"] 2076 vary_headers = vary.lower().replace(" ", "").split(",") 2077 for header in vary_headers: 2078 key = "-varied-%s" % header 2079 value = info[key] 2080 if headers.get(header, None) != value: 2081 cached_value = None 2082 break 2083 2084 if ( 2085 self.cache 2086 and cached_value 2087 and (method in self.safe_methods or info["status"] == "308") 2088 and "range" not in headers 2089 ): 2090 redirect_method = method 2091 if info["status"] not in ("307", "308"): 2092 redirect_method = "GET" 2093 if "-x-permanent-redirect-url" in info: 2094 # Should cached permanent redirects be counted in our redirection count? For now, yes. 2095 if redirections <= 0: 2096 raise RedirectLimit( 2097 "Redirected more times than rediection_limit allows.", 2098 {}, 2099 "", 2100 ) 2101 (response, new_content) = self.request( 2102 info["-x-permanent-redirect-url"], 2103 method=redirect_method, 2104 headers=headers, 2105 redirections=redirections - 1, 2106 ) 2107 response.previous = Response(info) 2108 response.previous.fromcache = True 2109 else: 2110 # Determine our course of action: 2111 # Is the cached entry fresh or stale? 2112 # Has the client requested a non-cached response? 2113 # 2114 # There seems to be three possible answers: 2115 # 1. [FRESH] Return the cache entry w/o doing a GET 2116 # 2. [STALE] Do the GET (but add in cache validators if available) 2117 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 2118 entry_disposition = _entry_disposition(info, headers) 2119 2120 if entry_disposition == "FRESH": 2121 if not cached_value: 2122 info["status"] = "504" 2123 content = "" 2124 response = Response(info) 2125 if cached_value: 2126 response.fromcache = True 2127 return (response, content) 2128 2129 if entry_disposition == "STALE": 2130 if ( 2131 "etag" in info 2132 and not self.ignore_etag 2133 and not "if-none-match" in headers 2134 ): 2135 headers["if-none-match"] = info["etag"] 2136 if "last-modified" in info and not "last-modified" in headers: 2137 headers["if-modified-since"] = info["last-modified"] 2138 elif entry_disposition == "TRANSPARENT": 2139 pass 2140 2141 (response, new_content) = self._request( 2142 conn, 2143 authority, 2144 uri, 2145 request_uri, 2146 method, 2147 body, 2148 headers, 2149 redirections, 2150 cachekey, 2151 ) 2152 2153 if response.status == 304 and method == "GET": 2154 # Rewrite the cache entry with the new end-to-end headers 2155 # Take all headers that are in response 2156 # and overwrite their values in info. 2157 # unless they are hop-by-hop, or are listed in the connection header. 2158 2159 for key in _get_end2end_headers(response): 2160 info[key] = response[key] 2161 merged_response = Response(info) 2162 if hasattr(response, "_stale_digest"): 2163 merged_response._stale_digest = response._stale_digest 2164 _updateCache( 2165 headers, merged_response, content, self.cache, cachekey 2166 ) 2167 response = merged_response 2168 response.status = 200 2169 response.fromcache = True 2170 2171 elif response.status == 200: 2172 content = new_content 2173 else: 2174 self.cache.delete(cachekey) 2175 content = new_content 2176 else: 2177 cc = _parse_cache_control(headers) 2178 if "only-if-cached" in cc: 2179 info["status"] = "504" 2180 response = Response(info) 2181 content = "" 2182 else: 2183 (response, content) = self._request( 2184 conn, 2185 authority, 2186 uri, 2187 request_uri, 2188 method, 2189 body, 2190 headers, 2191 redirections, 2192 cachekey, 2193 ) 2194 except Exception as e: 2195 is_timeout = isinstance(e, socket.timeout) 2196 if is_timeout: 2197 conn = self.connections.pop(conn_key, None) 2198 if conn: 2199 conn.close() 2200 2201 if self.force_exception_to_status_code: 2202 if isinstance(e, HttpLib2ErrorWithResponse): 2203 response = e.response 2204 content = e.content 2205 response.status = 500 2206 response.reason = str(e) 2207 elif is_timeout: 2208 content = "Request Timeout" 2209 response = Response( 2210 { 2211 "content-type": "text/plain", 2212 "status": "408", 2213 "content-length": len(content), 2214 } 2215 ) 2216 response.reason = "Request Timeout" 2217 else: 2218 content = str(e) 2219 response = Response( 2220 { 2221 "content-type": "text/plain", 2222 "status": "400", 2223 "content-length": len(content), 2224 } 2225 ) 2226 response.reason = "Bad Request" 2227 else: 2228 raise 2229 2230 return (response, content) 2231 2232 def _get_proxy_info(self, scheme, authority): 2233 """Return a ProxyInfo instance (or None) based on the scheme 2234 and authority. 2235 """ 2236 hostname, port = urllib.splitport(authority) 2237 proxy_info = self.proxy_info 2238 if callable(proxy_info): 2239 proxy_info = proxy_info(scheme) 2240 2241 if hasattr(proxy_info, "applies_to") and not proxy_info.applies_to(hostname): 2242 proxy_info = None 2243 return proxy_info 2244 2245 2246class Response(dict): 2247 """An object more like email.Message than httplib.HTTPResponse.""" 2248 2249 """Is this response from our local cache""" 2250 fromcache = False 2251 """HTTP protocol version used by server. 2252 2253 10 for HTTP/1.0, 11 for HTTP/1.1. 2254 """ 2255 version = 11 2256 2257 "Status code returned by server. " 2258 status = 200 2259 """Reason phrase returned by server.""" 2260 reason = "Ok" 2261 2262 previous = None 2263 2264 def __init__(self, info): 2265 # info is either an email.Message or 2266 # an httplib.HTTPResponse object. 2267 if isinstance(info, httplib.HTTPResponse): 2268 for key, value in info.getheaders(): 2269 self[key.lower()] = value 2270 self.status = info.status 2271 self["status"] = str(self.status) 2272 self.reason = info.reason 2273 self.version = info.version 2274 elif isinstance(info, email.Message.Message): 2275 for key, value in info.items(): 2276 self[key.lower()] = value 2277 self.status = int(self["status"]) 2278 else: 2279 for key, value in info.iteritems(): 2280 self[key.lower()] = value 2281 self.status = int(self.get("status", self.status)) 2282 self.reason = self.get("reason", self.reason) 2283 2284 def __getattr__(self, name): 2285 if name == "dict": 2286 return self 2287 else: 2288 raise AttributeError(name) 2289