1"""Small, fast HTTP client library for Python. 2 3Features persistent connections, cache, and Google App Engine Standard 4Environment support. 5""" 6 7from __future__ import print_function 8 9__author__ = "Joe Gregorio (joe@bitworking.org)" 10__copyright__ = "Copyright 2006, Joe Gregorio" 11__contributors__ = [ 12 "Thomas Broyer (t.broyer@ltgt.net)", 13 "James Antill", 14 "Xavier Verges Farrero", 15 "Jonathan Feinberg", 16 "Blair Zajac", 17 "Sam Ruby", 18 "Louis Nyffenegger", 19 "Alex Yu", 20] 21__license__ = "MIT" 22__version__ = '0.12.1' 23 24import base64 25import calendar 26import copy 27import email 28import email.FeedParser 29import email.Message 30import email.Utils 31import errno 32import gzip 33import httplib 34import os 35import random 36import re 37import StringIO 38import sys 39import time 40import urllib 41import urlparse 42import zlib 43 44try: 45 from hashlib import sha1 as _sha, md5 as _md5 46except ImportError: 47 # prior to Python 2.5, these were separate modules 48 import sha 49 import md5 50 51 _sha = sha.new 52 _md5 = md5.new 53import hmac 54from gettext import gettext as _ 55import socket 56 57try: 58 from httplib2 import socks 59except ImportError: 60 try: 61 import socks 62 except (ImportError, AttributeError): 63 socks = None 64 65# Build the appropriate socket wrapper for ssl 66ssl = None 67ssl_SSLError = None 68ssl_CertificateError = None 69try: 70 import ssl # python 2.6 71except ImportError: 72 pass 73if ssl is not None: 74 ssl_SSLError = getattr(ssl, "SSLError", None) 75 ssl_CertificateError = getattr(ssl, "CertificateError", None) 76 77 78def _ssl_wrap_socket( 79 sock, key_file, cert_file, disable_validation, ca_certs, ssl_version, hostname 80): 81 if disable_validation: 82 cert_reqs = ssl.CERT_NONE 83 else: 84 cert_reqs = ssl.CERT_REQUIRED 85 if ssl_version is None: 86 ssl_version = ssl.PROTOCOL_SSLv23 87 88 if hasattr(ssl, "SSLContext"): # Python 2.7.9 89 context = ssl.SSLContext(ssl_version) 90 context.verify_mode = cert_reqs 91 context.check_hostname = cert_reqs != ssl.CERT_NONE 92 if cert_file: 93 context.load_cert_chain(cert_file, key_file) 94 if ca_certs: 95 context.load_verify_locations(ca_certs) 96 return context.wrap_socket(sock, server_hostname=hostname) 97 else: 98 return ssl.wrap_socket( 99 sock, 100 keyfile=key_file, 101 certfile=cert_file, 102 cert_reqs=cert_reqs, 103 ca_certs=ca_certs, 104 ssl_version=ssl_version, 105 ) 106 107 108def _ssl_wrap_socket_unsupported( 109 sock, key_file, cert_file, disable_validation, ca_certs, ssl_version, hostname 110): 111 if not disable_validation: 112 raise CertificateValidationUnsupported( 113 "SSL certificate validation is not supported without " 114 "the ssl module installed. To avoid this error, install " 115 "the ssl module, or explicity disable validation." 116 ) 117 ssl_sock = socket.ssl(sock, key_file, cert_file) 118 return httplib.FakeSocket(sock, ssl_sock) 119 120 121if ssl is None: 122 _ssl_wrap_socket = _ssl_wrap_socket_unsupported 123 124if sys.version_info >= (2, 3): 125 from iri2uri import iri2uri 126else: 127 128 def iri2uri(uri): 129 return uri 130 131 132def has_timeout(timeout): # python 2.6 133 if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"): 134 return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT 135 return timeout is not None 136 137 138__all__ = [ 139 "Http", 140 "Response", 141 "ProxyInfo", 142 "HttpLib2Error", 143 "RedirectMissingLocation", 144 "RedirectLimit", 145 "FailedToDecompressContent", 146 "UnimplementedDigestAuthOptionError", 147 "UnimplementedHmacDigestAuthOptionError", 148 "debuglevel", 149 "ProxiesUnavailableError", 150] 151 152# The httplib debug level, set to a non-zero value to get debug output 153debuglevel = 0 154 155# A request will be tried 'RETRIES' times if it fails at the socket/connection level. 156RETRIES = 2 157 158# Python 2.3 support 159if sys.version_info < (2, 4): 160 161 def sorted(seq): 162 seq.sort() 163 return seq 164 165 166# Python 2.3 support 167def HTTPResponse__getheaders(self): 168 """Return list of (header, value) tuples.""" 169 if self.msg is None: 170 raise httplib.ResponseNotReady() 171 return self.msg.items() 172 173 174if not hasattr(httplib.HTTPResponse, "getheaders"): 175 httplib.HTTPResponse.getheaders = HTTPResponse__getheaders 176 177 178# All exceptions raised here derive from HttpLib2Error 179class HttpLib2Error(Exception): 180 pass 181 182 183# Some exceptions can be caught and optionally 184# be turned back into responses. 185class HttpLib2ErrorWithResponse(HttpLib2Error): 186 def __init__(self, desc, response, content): 187 self.response = response 188 self.content = content 189 HttpLib2Error.__init__(self, desc) 190 191 192class RedirectMissingLocation(HttpLib2ErrorWithResponse): 193 pass 194 195 196class RedirectLimit(HttpLib2ErrorWithResponse): 197 pass 198 199 200class FailedToDecompressContent(HttpLib2ErrorWithResponse): 201 pass 202 203 204class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): 205 pass 206 207 208class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): 209 pass 210 211 212class MalformedHeader(HttpLib2Error): 213 pass 214 215 216class RelativeURIError(HttpLib2Error): 217 pass 218 219 220class ServerNotFoundError(HttpLib2Error): 221 pass 222 223 224class ProxiesUnavailableError(HttpLib2Error): 225 pass 226 227 228class CertificateValidationUnsupported(HttpLib2Error): 229 pass 230 231 232class SSLHandshakeError(HttpLib2Error): 233 pass 234 235 236class NotSupportedOnThisPlatform(HttpLib2Error): 237 pass 238 239 240class CertificateHostnameMismatch(SSLHandshakeError): 241 def __init__(self, desc, host, cert): 242 HttpLib2Error.__init__(self, desc) 243 self.host = host 244 self.cert = cert 245 246 247class NotRunningAppEngineEnvironment(HttpLib2Error): 248 pass 249 250 251# Open Items: 252# ----------- 253# Proxy support 254 255# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) 256 257# Pluggable cache storage (supports storing the cache in 258# flat files by default. We need a plug-in architecture 259# that can support Berkeley DB and Squid) 260 261# == Known Issues == 262# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. 263# Does not handle Cache-Control: max-stale 264# Does not use Age: headers when calculating cache freshness. 265 266# The number of redirections to follow before giving up. 267# Note that only GET redirects are automatically followed. 268# Will also honor 301 requests by saving that info and never 269# requesting that URI again. 270DEFAULT_MAX_REDIRECTS = 5 271 272from httplib2 import certs 273CA_CERTS = certs.where() 274 275# Which headers are hop-by-hop headers by default 276HOP_BY_HOP = [ 277 "connection", 278 "keep-alive", 279 "proxy-authenticate", 280 "proxy-authorization", 281 "te", 282 "trailers", 283 "transfer-encoding", 284 "upgrade", 285] 286 287 288def _get_end2end_headers(response): 289 hopbyhop = list(HOP_BY_HOP) 290 hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")]) 291 return [header for header in response.keys() if header not in hopbyhop] 292 293 294URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 295 296 297def parse_uri(uri): 298 """Parses a URI using the regex given in Appendix B of RFC 3986. 299 300 (scheme, authority, path, query, fragment) = parse_uri(uri) 301 """ 302 groups = URI.match(uri).groups() 303 return (groups[1], groups[3], groups[4], groups[6], groups[8]) 304 305 306def urlnorm(uri): 307 (scheme, authority, path, query, fragment) = parse_uri(uri) 308 if not scheme or not authority: 309 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) 310 authority = authority.lower() 311 scheme = scheme.lower() 312 if not path: 313 path = "/" 314 # Could do syntax based normalization of the URI before 315 # computing the digest. See Section 6.2.2 of Std 66. 316 request_uri = query and "?".join([path, query]) or path 317 scheme = scheme.lower() 318 defrag_uri = scheme + "://" + authority + request_uri 319 return scheme, authority, request_uri, defrag_uri 320 321 322# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) 323re_url_scheme = re.compile(r"^\w+://") 324re_unsafe = re.compile(r"[^\w\-_.()=!]+") 325 326 327def safename(filename): 328 """Return a filename suitable for the cache. 329 Strips dangerous and common characters to create a filename we 330 can use to store the cache in. 331 """ 332 if isinstance(filename, str): 333 filename_bytes = filename 334 filename = filename.decode("utf-8") 335 else: 336 filename_bytes = filename.encode("utf-8") 337 filemd5 = _md5(filename_bytes).hexdigest() 338 filename = re_url_scheme.sub("", filename) 339 filename = re_unsafe.sub("", filename) 340 341 # limit length of filename (vital for Windows) 342 # https://github.com/httplib2/httplib2/pull/74 343 # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5> 344 # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars 345 # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum: 346 filename = filename[:90] 347 348 return ",".join((filename, filemd5)) 349 350 351NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+") 352 353 354def _normalize_headers(headers): 355 return dict( 356 [ 357 (key.lower(), NORMALIZE_SPACE.sub(value, " ").strip()) 358 for (key, value) in headers.iteritems() 359 ] 360 ) 361 362 363def _parse_cache_control(headers): 364 retval = {} 365 if "cache-control" in headers: 366 parts = headers["cache-control"].split(",") 367 parts_with_args = [ 368 tuple([x.strip().lower() for x in part.split("=", 1)]) 369 for part in parts 370 if -1 != part.find("=") 371 ] 372 parts_wo_args = [ 373 (name.strip().lower(), 1) for name in parts if -1 == name.find("=") 374 ] 375 retval = dict(parts_with_args + parts_wo_args) 376 return retval 377 378 379# Whether to use a strict mode to parse WWW-Authenticate headers 380# Might lead to bad results in case of ill-formed header value, 381# so disabled by default, falling back to relaxed parsing. 382# Set to true to turn on, usefull for testing servers. 383USE_WWW_AUTH_STRICT_PARSING = 0 384 385# In regex below: 386# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP 387# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space 388# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: 389# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? 390WWW_AUTH_STRICT = re.compile( 391 r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$" 392) 393WWW_AUTH_RELAXED = re.compile( 394 r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$" 395) 396UNQUOTE_PAIRS = re.compile(r"\\(.)") 397 398 399def _parse_www_authenticate(headers, headername="www-authenticate"): 400 """Returns a dictionary of dictionaries, one dict 401 per auth_scheme.""" 402 retval = {} 403 if headername in headers: 404 try: 405 406 authenticate = headers[headername].strip() 407 www_auth = ( 408 USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED 409 ) 410 while authenticate: 411 # Break off the scheme at the beginning of the line 412 if headername == "authentication-info": 413 (auth_scheme, the_rest) = ("digest", authenticate) 414 else: 415 (auth_scheme, the_rest) = authenticate.split(" ", 1) 416 # Now loop over all the key value pairs that come after the scheme, 417 # being careful not to roll into the next scheme 418 match = www_auth.search(the_rest) 419 auth_params = {} 420 while match: 421 if match and len(match.groups()) == 3: 422 (key, value, the_rest) = match.groups() 423 auth_params[key.lower()] = UNQUOTE_PAIRS.sub( 424 r"\1", value 425 ) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) 426 match = www_auth.search(the_rest) 427 retval[auth_scheme.lower()] = auth_params 428 authenticate = the_rest.strip() 429 430 except ValueError: 431 raise MalformedHeader("WWW-Authenticate") 432 return retval 433 434 435# TODO: add current time as _entry_disposition argument to avoid sleep in tests 436def _entry_disposition(response_headers, request_headers): 437 """Determine freshness from the Date, Expires and Cache-Control headers. 438 439 We don't handle the following: 440 441 1. Cache-Control: max-stale 442 2. Age: headers are not used in the calculations. 443 444 Not that this algorithm is simpler than you might think 445 because we are operating as a private (non-shared) cache. 446 This lets us ignore 's-maxage'. We can also ignore 447 'proxy-invalidate' since we aren't a proxy. 448 We will never return a stale document as 449 fresh as a design decision, and thus the non-implementation 450 of 'max-stale'. This also lets us safely ignore 'must-revalidate' 451 since we operate as if every server has sent 'must-revalidate'. 452 Since we are private we get to ignore both 'public' and 453 'private' parameters. We also ignore 'no-transform' since 454 we don't do any transformations. 455 The 'no-store' parameter is handled at a higher level. 456 So the only Cache-Control parameters we look at are: 457 458 no-cache 459 only-if-cached 460 max-age 461 min-fresh 462 """ 463 464 retval = "STALE" 465 cc = _parse_cache_control(request_headers) 466 cc_response = _parse_cache_control(response_headers) 467 468 if ( 469 "pragma" in request_headers 470 and request_headers["pragma"].lower().find("no-cache") != -1 471 ): 472 retval = "TRANSPARENT" 473 if "cache-control" not in request_headers: 474 request_headers["cache-control"] = "no-cache" 475 elif "no-cache" in cc: 476 retval = "TRANSPARENT" 477 elif "no-cache" in cc_response: 478 retval = "STALE" 479 elif "only-if-cached" in cc: 480 retval = "FRESH" 481 elif "date" in response_headers: 482 date = calendar.timegm(email.Utils.parsedate_tz(response_headers["date"])) 483 now = time.time() 484 current_age = max(0, now - date) 485 if "max-age" in cc_response: 486 try: 487 freshness_lifetime = int(cc_response["max-age"]) 488 except ValueError: 489 freshness_lifetime = 0 490 elif "expires" in response_headers: 491 expires = email.Utils.parsedate_tz(response_headers["expires"]) 492 if None == expires: 493 freshness_lifetime = 0 494 else: 495 freshness_lifetime = max(0, calendar.timegm(expires) - date) 496 else: 497 freshness_lifetime = 0 498 if "max-age" in cc: 499 try: 500 freshness_lifetime = int(cc["max-age"]) 501 except ValueError: 502 freshness_lifetime = 0 503 if "min-fresh" in cc: 504 try: 505 min_fresh = int(cc["min-fresh"]) 506 except ValueError: 507 min_fresh = 0 508 current_age += min_fresh 509 if freshness_lifetime > current_age: 510 retval = "FRESH" 511 return retval 512 513 514def _decompressContent(response, new_content): 515 content = new_content 516 try: 517 encoding = response.get("content-encoding", None) 518 if encoding in ["gzip", "deflate"]: 519 if encoding == "gzip": 520 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() 521 if encoding == "deflate": 522 content = zlib.decompress(content, -zlib.MAX_WBITS) 523 response["content-length"] = str(len(content)) 524 # Record the historical presence of the encoding in a way the won't interfere. 525 response["-content-encoding"] = response["content-encoding"] 526 del response["content-encoding"] 527 except (IOError, zlib.error): 528 content = "" 529 raise FailedToDecompressContent( 530 _("Content purported to be compressed with %s but failed to decompress.") 531 % response.get("content-encoding"), 532 response, 533 content, 534 ) 535 return content 536 537 538def _updateCache(request_headers, response_headers, content, cache, cachekey): 539 if cachekey: 540 cc = _parse_cache_control(request_headers) 541 cc_response = _parse_cache_control(response_headers) 542 if "no-store" in cc or "no-store" in cc_response: 543 cache.delete(cachekey) 544 else: 545 info = email.Message.Message() 546 for key, value in response_headers.iteritems(): 547 if key not in ["status", "content-encoding", "transfer-encoding"]: 548 info[key] = value 549 550 # Add annotations to the cache to indicate what headers 551 # are variant for this request. 552 vary = response_headers.get("vary", None) 553 if vary: 554 vary_headers = vary.lower().replace(" ", "").split(",") 555 for header in vary_headers: 556 key = "-varied-%s" % header 557 try: 558 info[key] = request_headers[header] 559 except KeyError: 560 pass 561 562 status = response_headers.status 563 if status == 304: 564 status = 200 565 566 status_header = "status: %d\r\n" % status 567 568 header_str = info.as_string() 569 570 header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) 571 text = "".join([status_header, header_str, content]) 572 573 cache.set(cachekey, text) 574 575 576def _cnonce(): 577 dig = _md5( 578 "%s:%s" 579 % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)]) 580 ).hexdigest() 581 return dig[:16] 582 583 584def _wsse_username_token(cnonce, iso_now, password): 585 return base64.b64encode( 586 _sha("%s%s%s" % (cnonce, iso_now, password)).digest() 587 ).strip() 588 589 590# For credentials we need two things, first 591# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) 592# Then we also need a list of URIs that have already demanded authentication 593# That list is tricky since sub-URIs can take the same auth, or the 594# auth scheme may change as you descend the tree. 595# So we also need each Auth instance to be able to tell us 596# how close to the 'top' it is. 597 598 599class Authentication(object): 600 def __init__( 601 self, credentials, host, request_uri, headers, response, content, http 602 ): 603 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 604 self.path = path 605 self.host = host 606 self.credentials = credentials 607 self.http = http 608 609 def depth(self, request_uri): 610 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 611 return request_uri[len(self.path) :].count("/") 612 613 def inscope(self, host, request_uri): 614 # XXX Should we normalize the request_uri? 615 (scheme, authority, path, query, fragment) = parse_uri(request_uri) 616 return (host == self.host) and path.startswith(self.path) 617 618 def request(self, method, request_uri, headers, content): 619 """Modify the request headers to add the appropriate 620 Authorization header. Over-ride this in sub-classes.""" 621 pass 622 623 def response(self, response, content): 624 """Gives us a chance to update with new nonces 625 or such returned from the last authorized response. 626 Over-rise this in sub-classes if necessary. 627 628 Return TRUE is the request is to be retried, for 629 example Digest may return stale=true. 630 """ 631 return False 632 633 634class BasicAuthentication(Authentication): 635 def __init__( 636 self, credentials, host, request_uri, headers, response, content, http 637 ): 638 Authentication.__init__( 639 self, credentials, host, request_uri, headers, response, content, http 640 ) 641 642 def request(self, method, request_uri, headers, content): 643 """Modify the request headers to add the appropriate 644 Authorization header.""" 645 headers["authorization"] = ( 646 "Basic " + base64.b64encode("%s:%s" % self.credentials).strip() 647 ) 648 649 650class DigestAuthentication(Authentication): 651 """Only do qop='auth' and MD5, since that 652 is all Apache currently implements""" 653 654 def __init__( 655 self, credentials, host, request_uri, headers, response, content, http 656 ): 657 Authentication.__init__( 658 self, credentials, host, request_uri, headers, response, content, http 659 ) 660 challenge = _parse_www_authenticate(response, "www-authenticate") 661 self.challenge = challenge["digest"] 662 qop = self.challenge.get("qop", "auth") 663 self.challenge["qop"] = ( 664 ("auth" in [x.strip() for x in qop.split()]) and "auth" or None 665 ) 666 if self.challenge["qop"] is None: 667 raise UnimplementedDigestAuthOptionError( 668 _("Unsupported value for qop: %s." % qop) 669 ) 670 self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper() 671 if self.challenge["algorithm"] != "MD5": 672 raise UnimplementedDigestAuthOptionError( 673 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 674 ) 675 self.A1 = "".join( 676 [ 677 self.credentials[0], 678 ":", 679 self.challenge["realm"], 680 ":", 681 self.credentials[1], 682 ] 683 ) 684 self.challenge["nc"] = 1 685 686 def request(self, method, request_uri, headers, content, cnonce=None): 687 """Modify the request headers""" 688 H = lambda x: _md5(x).hexdigest() 689 KD = lambda s, d: H("%s:%s" % (s, d)) 690 A2 = "".join([method, ":", request_uri]) 691 self.challenge["cnonce"] = cnonce or _cnonce() 692 request_digest = '"%s"' % KD( 693 H(self.A1), 694 "%s:%s:%s:%s:%s" 695 % ( 696 self.challenge["nonce"], 697 "%08x" % self.challenge["nc"], 698 self.challenge["cnonce"], 699 self.challenge["qop"], 700 H(A2), 701 ), 702 ) 703 headers["authorization"] = ( 704 'Digest username="%s", realm="%s", nonce="%s", ' 705 'uri="%s", algorithm=%s, response=%s, qop=%s, ' 706 'nc=%08x, cnonce="%s"' 707 ) % ( 708 self.credentials[0], 709 self.challenge["realm"], 710 self.challenge["nonce"], 711 request_uri, 712 self.challenge["algorithm"], 713 request_digest, 714 self.challenge["qop"], 715 self.challenge["nc"], 716 self.challenge["cnonce"], 717 ) 718 if self.challenge.get("opaque"): 719 headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"] 720 self.challenge["nc"] += 1 721 722 def response(self, response, content): 723 if "authentication-info" not in response: 724 challenge = _parse_www_authenticate(response, "www-authenticate").get( 725 "digest", {} 726 ) 727 if "true" == challenge.get("stale"): 728 self.challenge["nonce"] = challenge["nonce"] 729 self.challenge["nc"] = 1 730 return True 731 else: 732 updated_challenge = _parse_www_authenticate( 733 response, "authentication-info" 734 ).get("digest", {}) 735 736 if "nextnonce" in updated_challenge: 737 self.challenge["nonce"] = updated_challenge["nextnonce"] 738 self.challenge["nc"] = 1 739 return False 740 741 742class HmacDigestAuthentication(Authentication): 743 """Adapted from Robert Sayre's code and DigestAuthentication above.""" 744 745 __author__ = "Thomas Broyer (t.broyer@ltgt.net)" 746 747 def __init__( 748 self, credentials, host, request_uri, headers, response, content, http 749 ): 750 Authentication.__init__( 751 self, credentials, host, request_uri, headers, response, content, http 752 ) 753 challenge = _parse_www_authenticate(response, "www-authenticate") 754 self.challenge = challenge["hmacdigest"] 755 # TODO: self.challenge['domain'] 756 self.challenge["reason"] = self.challenge.get("reason", "unauthorized") 757 if self.challenge["reason"] not in ["unauthorized", "integrity"]: 758 self.challenge["reason"] = "unauthorized" 759 self.challenge["salt"] = self.challenge.get("salt", "") 760 if not self.challenge.get("snonce"): 761 raise UnimplementedHmacDigestAuthOptionError( 762 _("The challenge doesn't contain a server nonce, or this one is empty.") 763 ) 764 self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1") 765 if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]: 766 raise UnimplementedHmacDigestAuthOptionError( 767 _("Unsupported value for algorithm: %s." % self.challenge["algorithm"]) 768 ) 769 self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1") 770 if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]: 771 raise UnimplementedHmacDigestAuthOptionError( 772 _( 773 "Unsupported value for pw-algorithm: %s." 774 % self.challenge["pw-algorithm"] 775 ) 776 ) 777 if self.challenge["algorithm"] == "HMAC-MD5": 778 self.hashmod = _md5 779 else: 780 self.hashmod = _sha 781 if self.challenge["pw-algorithm"] == "MD5": 782 self.pwhashmod = _md5 783 else: 784 self.pwhashmod = _sha 785 self.key = "".join( 786 [ 787 self.credentials[0], 788 ":", 789 self.pwhashmod.new( 790 "".join([self.credentials[1], self.challenge["salt"]]) 791 ) 792 .hexdigest() 793 .lower(), 794 ":", 795 self.challenge["realm"], 796 ] 797 ) 798 self.key = self.pwhashmod.new(self.key).hexdigest().lower() 799 800 def request(self, method, request_uri, headers, content): 801 """Modify the request headers""" 802 keys = _get_end2end_headers(headers) 803 keylist = "".join(["%s " % k for k in keys]) 804 headers_val = "".join([headers[k] for k in keys]) 805 created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 806 cnonce = _cnonce() 807 request_digest = "%s:%s:%s:%s:%s" % ( 808 method, 809 request_uri, 810 cnonce, 811 self.challenge["snonce"], 812 headers_val, 813 ) 814 request_digest = ( 815 hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() 816 ) 817 headers["authorization"] = ( 818 'HMACDigest username="%s", realm="%s", snonce="%s",' 819 ' cnonce="%s", uri="%s", created="%s", ' 820 'response="%s", headers="%s"' 821 ) % ( 822 self.credentials[0], 823 self.challenge["realm"], 824 self.challenge["snonce"], 825 cnonce, 826 request_uri, 827 created, 828 request_digest, 829 keylist, 830 ) 831 832 def response(self, response, content): 833 challenge = _parse_www_authenticate(response, "www-authenticate").get( 834 "hmacdigest", {} 835 ) 836 if challenge.get("reason") in ["integrity", "stale"]: 837 return True 838 return False 839 840 841class WsseAuthentication(Authentication): 842 """This is thinly tested and should not be relied upon. 843 At this time there isn't any third party server to test against. 844 Blogger and TypePad implemented this algorithm at one point 845 but Blogger has since switched to Basic over HTTPS and 846 TypePad has implemented it wrong, by never issuing a 401 847 challenge but instead requiring your client to telepathically know that 848 their endpoint is expecting WSSE profile="UsernameToken".""" 849 850 def __init__( 851 self, credentials, host, request_uri, headers, response, content, http 852 ): 853 Authentication.__init__( 854 self, credentials, host, request_uri, headers, response, content, http 855 ) 856 857 def request(self, method, request_uri, headers, content): 858 """Modify the request headers to add the appropriate 859 Authorization header.""" 860 headers["authorization"] = 'WSSE profile="UsernameToken"' 861 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) 862 cnonce = _cnonce() 863 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) 864 headers["X-WSSE"] = ( 865 'UsernameToken Username="%s", PasswordDigest="%s", ' 866 'Nonce="%s", Created="%s"' 867 ) % (self.credentials[0], password_digest, cnonce, iso_now) 868 869 870class GoogleLoginAuthentication(Authentication): 871 def __init__( 872 self, credentials, host, request_uri, headers, response, content, http 873 ): 874 from urllib import urlencode 875 876 Authentication.__init__( 877 self, credentials, host, request_uri, headers, response, content, http 878 ) 879 challenge = _parse_www_authenticate(response, "www-authenticate") 880 service = challenge["googlelogin"].get("service", "xapi") 881 # Bloggger actually returns the service in the challenge 882 # For the rest we guess based on the URI 883 if service == "xapi" and request_uri.find("calendar") > 0: 884 service = "cl" 885 # No point in guessing Base or Spreadsheet 886 # elif request_uri.find("spreadsheets") > 0: 887 # service = "wise" 888 889 auth = dict( 890 Email=credentials[0], 891 Passwd=credentials[1], 892 service=service, 893 source=headers["user-agent"], 894 ) 895 resp, content = self.http.request( 896 "https://www.google.com/accounts/ClientLogin", 897 method="POST", 898 body=urlencode(auth), 899 headers={"Content-Type": "application/x-www-form-urlencoded"}, 900 ) 901 lines = content.split("\n") 902 d = dict([tuple(line.split("=", 1)) for line in lines if line]) 903 if resp.status == 403: 904 self.Auth = "" 905 else: 906 self.Auth = d["Auth"] 907 908 def request(self, method, request_uri, headers, content): 909 """Modify the request headers to add the appropriate 910 Authorization header.""" 911 headers["authorization"] = "GoogleLogin Auth=" + self.Auth 912 913 914AUTH_SCHEME_CLASSES = { 915 "basic": BasicAuthentication, 916 "wsse": WsseAuthentication, 917 "digest": DigestAuthentication, 918 "hmacdigest": HmacDigestAuthentication, 919 "googlelogin": GoogleLoginAuthentication, 920} 921 922AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] 923 924 925class FileCache(object): 926 """Uses a local directory as a store for cached files. 927 Not really safe to use if multiple threads or processes are going to 928 be running on the same cache. 929 """ 930 931 def __init__( 932 self, cache, safe=safename 933 ): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior 934 self.cache = cache 935 self.safe = safe 936 if not os.path.exists(cache): 937 os.makedirs(self.cache) 938 939 def get(self, key): 940 retval = None 941 cacheFullPath = os.path.join(self.cache, self.safe(key)) 942 try: 943 f = file(cacheFullPath, "rb") 944 retval = f.read() 945 f.close() 946 except IOError: 947 pass 948 return retval 949 950 def set(self, key, value): 951 cacheFullPath = os.path.join(self.cache, self.safe(key)) 952 f = file(cacheFullPath, "wb") 953 f.write(value) 954 f.close() 955 956 def delete(self, key): 957 cacheFullPath = os.path.join(self.cache, self.safe(key)) 958 if os.path.exists(cacheFullPath): 959 os.remove(cacheFullPath) 960 961 962class Credentials(object): 963 def __init__(self): 964 self.credentials = [] 965 966 def add(self, name, password, domain=""): 967 self.credentials.append((domain.lower(), name, password)) 968 969 def clear(self): 970 self.credentials = [] 971 972 def iter(self, domain): 973 for (cdomain, name, password) in self.credentials: 974 if cdomain == "" or domain == cdomain: 975 yield (name, password) 976 977 978class KeyCerts(Credentials): 979 """Identical to Credentials except that 980 name/password are mapped to key/cert.""" 981 982 pass 983 984 985class AllHosts(object): 986 pass 987 988 989class ProxyInfo(object): 990 """Collect information required to use a proxy.""" 991 992 bypass_hosts = () 993 994 def __init__( 995 self, 996 proxy_type, 997 proxy_host, 998 proxy_port, 999 proxy_rdns=True, 1000 proxy_user=None, 1001 proxy_pass=None, 1002 proxy_headers=None, 1003 ): 1004 """Args: 1005 1006 proxy_type: The type of proxy server. This must be set to one of 1007 socks.PROXY_TYPE_XXX constants. For example: p = 1008 ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', 1009 proxy_port=8000) 1010 proxy_host: The hostname or IP address of the proxy server. 1011 proxy_port: The port that the proxy server is running on. 1012 proxy_rdns: If True (default), DNS queries will not be performed 1013 locally, and instead, handed to the proxy to resolve. This is useful 1014 if the network does not allow resolution of non-local names. In 1015 httplib2 0.9 and earlier, this defaulted to False. 1016 proxy_user: The username used to authenticate with the proxy server. 1017 proxy_pass: The password used to authenticate with the proxy server. 1018 proxy_headers: Additional or modified headers for the proxy connect 1019 request. 1020 """ 1021 self.proxy_type = proxy_type 1022 self.proxy_host = proxy_host 1023 self.proxy_port = proxy_port 1024 self.proxy_rdns = proxy_rdns 1025 self.proxy_user = proxy_user 1026 self.proxy_pass = proxy_pass 1027 self.proxy_headers = proxy_headers 1028 1029 def astuple(self): 1030 return ( 1031 self.proxy_type, 1032 self.proxy_host, 1033 self.proxy_port, 1034 self.proxy_rdns, 1035 self.proxy_user, 1036 self.proxy_pass, 1037 self.proxy_headers, 1038 ) 1039 1040 def isgood(self): 1041 return (self.proxy_host != None) and (self.proxy_port != None) 1042 1043 def applies_to(self, hostname): 1044 return not self.bypass_host(hostname) 1045 1046 def bypass_host(self, hostname): 1047 """Has this host been excluded from the proxy config""" 1048 if self.bypass_hosts is AllHosts: 1049 return True 1050 1051 hostname = "." + hostname.lstrip(".") 1052 for skip_name in self.bypass_hosts: 1053 # *.suffix 1054 if skip_name.startswith(".") and hostname.endswith(skip_name): 1055 return True 1056 # exact match 1057 if hostname == "." + skip_name: 1058 return True 1059 return False 1060 1061 def __repr__(self): 1062 return ( 1063 "<ProxyInfo type={p.proxy_type} " 1064 "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}" 1065 + " user={p.proxy_user} headers={p.proxy_headers}>" 1066 ).format(p=self) 1067 1068 1069def proxy_info_from_environment(method="http"): 1070 """Read proxy info from the environment variables. 1071 """ 1072 if method not in ["http", "https"]: 1073 return 1074 1075 env_var = method + "_proxy" 1076 url = os.environ.get(env_var, os.environ.get(env_var.upper())) 1077 if not url: 1078 return 1079 return proxy_info_from_url(url, method, None) 1080 1081 1082def proxy_info_from_url(url, method="http", noproxy=None): 1083 """Construct a ProxyInfo from a URL (such as http_proxy env var) 1084 """ 1085 url = urlparse.urlparse(url) 1086 username = None 1087 password = None 1088 port = None 1089 if "@" in url[1]: 1090 ident, host_port = url[1].split("@", 1) 1091 if ":" in ident: 1092 username, password = ident.split(":", 1) 1093 else: 1094 password = ident 1095 else: 1096 host_port = url[1] 1097 if ":" in host_port: 1098 host, port = host_port.split(":", 1) 1099 else: 1100 host = host_port 1101 1102 if port: 1103 port = int(port) 1104 else: 1105 port = dict(https=443, http=80)[method] 1106 1107 proxy_type = 3 # socks.PROXY_TYPE_HTTP 1108 pi = ProxyInfo( 1109 proxy_type=proxy_type, 1110 proxy_host=host, 1111 proxy_port=port, 1112 proxy_user=username or None, 1113 proxy_pass=password or None, 1114 proxy_headers=None, 1115 ) 1116 1117 bypass_hosts = [] 1118 # If not given an explicit noproxy value, respect values in env vars. 1119 if noproxy is None: 1120 noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")) 1121 # Special case: A single '*' character means all hosts should be bypassed. 1122 if noproxy == "*": 1123 bypass_hosts = AllHosts 1124 elif noproxy.strip(): 1125 bypass_hosts = noproxy.split(",") 1126 bypass_hosts = filter(bool, bypass_hosts) # To exclude empty string. 1127 1128 pi.bypass_hosts = bypass_hosts 1129 return pi 1130 1131 1132class HTTPConnectionWithTimeout(httplib.HTTPConnection): 1133 """HTTPConnection subclass that supports timeouts 1134 1135 All timeouts are in seconds. If None is passed for timeout then 1136 Python's default timeout for sockets will be used. See for example 1137 the docs of socket.setdefaulttimeout(): 1138 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1139 """ 1140 1141 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): 1142 httplib.HTTPConnection.__init__(self, host, port, strict) 1143 self.timeout = timeout 1144 self.proxy_info = proxy_info 1145 1146 def connect(self): 1147 """Connect to the host and port specified in __init__.""" 1148 # Mostly verbatim from httplib.py. 1149 if self.proxy_info and socks is None: 1150 raise ProxiesUnavailableError( 1151 "Proxy support missing but proxy use was requested!" 1152 ) 1153 msg = "getaddrinfo returns an empty list" 1154 if self.proxy_info and self.proxy_info.isgood(): 1155 use_proxy = True 1156 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers = ( 1157 self.proxy_info.astuple() 1158 ) 1159 1160 host = proxy_host 1161 port = proxy_port 1162 else: 1163 use_proxy = False 1164 1165 host = self.host 1166 port = self.port 1167 1168 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 1169 af, socktype, proto, canonname, sa = res 1170 try: 1171 if use_proxy: 1172 self.sock = socks.socksocket(af, socktype, proto) 1173 self.sock.setproxy( 1174 proxy_type, 1175 proxy_host, 1176 proxy_port, 1177 proxy_rdns, 1178 proxy_user, 1179 proxy_pass, 1180 proxy_headers, 1181 ) 1182 else: 1183 self.sock = socket.socket(af, socktype, proto) 1184 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 1185 # Different from httplib: support timeouts. 1186 if has_timeout(self.timeout): 1187 self.sock.settimeout(self.timeout) 1188 # End of difference from httplib. 1189 if self.debuglevel > 0: 1190 print("connect: (%s, %s) ************" % (self.host, self.port)) 1191 if use_proxy: 1192 print( 1193 "proxy: %s ************" 1194 % str( 1195 ( 1196 proxy_host, 1197 proxy_port, 1198 proxy_rdns, 1199 proxy_user, 1200 proxy_pass, 1201 proxy_headers, 1202 ) 1203 ) 1204 ) 1205 if use_proxy: 1206 self.sock.connect((self.host, self.port) + sa[2:]) 1207 else: 1208 self.sock.connect(sa) 1209 except socket.error as msg: 1210 if self.debuglevel > 0: 1211 print("connect fail: (%s, %s)" % (self.host, self.port)) 1212 if use_proxy: 1213 print( 1214 "proxy: %s" 1215 % str( 1216 ( 1217 proxy_host, 1218 proxy_port, 1219 proxy_rdns, 1220 proxy_user, 1221 proxy_pass, 1222 proxy_headers, 1223 ) 1224 ) 1225 ) 1226 if self.sock: 1227 self.sock.close() 1228 self.sock = None 1229 continue 1230 break 1231 if not self.sock: 1232 raise socket.error(msg) 1233 1234 1235class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): 1236 """This class allows communication via SSL. 1237 1238 All timeouts are in seconds. If None is passed for timeout then 1239 Python's default timeout for sockets will be used. See for example 1240 the docs of socket.setdefaulttimeout(): 1241 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1242 """ 1243 1244 def __init__( 1245 self, 1246 host, 1247 port=None, 1248 key_file=None, 1249 cert_file=None, 1250 strict=None, 1251 timeout=None, 1252 proxy_info=None, 1253 ca_certs=None, 1254 disable_ssl_certificate_validation=False, 1255 ssl_version=None, 1256 ): 1257 httplib.HTTPSConnection.__init__( 1258 self, host, port=port, key_file=key_file, cert_file=cert_file, strict=strict 1259 ) 1260 self.timeout = timeout 1261 self.proxy_info = proxy_info 1262 if ca_certs is None: 1263 ca_certs = CA_CERTS 1264 self.ca_certs = ca_certs 1265 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 1266 self.ssl_version = ssl_version 1267 1268 # The following two methods were adapted from https_wrapper.py, released 1269 # with the Google Appengine SDK at 1270 # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py 1271 # under the following license: 1272 # 1273 # Copyright 2007 Google Inc. 1274 # 1275 # Licensed under the Apache License, Version 2.0 (the "License"); 1276 # you may not use this file except in compliance with the License. 1277 # You may obtain a copy of the License at 1278 # 1279 # http://www.apache.org/licenses/LICENSE-2.0 1280 # 1281 # Unless required by applicable law or agreed to in writing, software 1282 # distributed under the License is distributed on an "AS IS" BASIS, 1283 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1284 # See the License for the specific language governing permissions and 1285 # limitations under the License. 1286 # 1287 1288 def _GetValidHostsForCert(self, cert): 1289 """Returns a list of valid host globs for an SSL certificate. 1290 1291 Args: 1292 cert: A dictionary representing an SSL certificate. 1293 Returns: 1294 list: A list of valid host globs. 1295 """ 1296 if "subjectAltName" in cert: 1297 return [x[1] for x in cert["subjectAltName"] if x[0].lower() == "dns"] 1298 else: 1299 return [x[0][1] for x in cert["subject"] if x[0][0].lower() == "commonname"] 1300 1301 def _ValidateCertificateHostname(self, cert, hostname): 1302 """Validates that a given hostname is valid for an SSL certificate. 1303 1304 Args: 1305 cert: A dictionary representing an SSL certificate. 1306 hostname: The hostname to test. 1307 Returns: 1308 bool: Whether or not the hostname is valid for this certificate. 1309 """ 1310 hosts = self._GetValidHostsForCert(cert) 1311 for host in hosts: 1312 host_re = host.replace(".", "\.").replace("*", "[^.]*") 1313 if re.search("^%s$" % (host_re,), hostname, re.I): 1314 return True 1315 return False 1316 1317 def connect(self): 1318 "Connect to a host on a given (SSL) port." 1319 1320 msg = "getaddrinfo returns an empty list" 1321 if self.proxy_info and self.proxy_info.isgood(): 1322 use_proxy = True 1323 proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers = ( 1324 self.proxy_info.astuple() 1325 ) 1326 1327 host = proxy_host 1328 port = proxy_port 1329 else: 1330 use_proxy = False 1331 1332 host = self.host 1333 port = self.port 1334 1335 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) 1336 for family, socktype, proto, canonname, sockaddr in address_info: 1337 try: 1338 if use_proxy: 1339 sock = socks.socksocket(family, socktype, proto) 1340 1341 sock.setproxy( 1342 proxy_type, 1343 proxy_host, 1344 proxy_port, 1345 proxy_rdns, 1346 proxy_user, 1347 proxy_pass, 1348 proxy_headers, 1349 ) 1350 else: 1351 sock = socket.socket(family, socktype, proto) 1352 sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 1353 1354 if has_timeout(self.timeout): 1355 sock.settimeout(self.timeout) 1356 1357 if use_proxy: 1358 sock.connect((self.host, self.port) + sockaddr[:2]) 1359 else: 1360 sock.connect(sockaddr) 1361 self.sock = _ssl_wrap_socket( 1362 sock, 1363 self.key_file, 1364 self.cert_file, 1365 self.disable_ssl_certificate_validation, 1366 self.ca_certs, 1367 self.ssl_version, 1368 self.host, 1369 ) 1370 if self.debuglevel > 0: 1371 print("connect: (%s, %s)" % (self.host, self.port)) 1372 if use_proxy: 1373 print( 1374 "proxy: %s" 1375 % str( 1376 ( 1377 proxy_host, 1378 proxy_port, 1379 proxy_rdns, 1380 proxy_user, 1381 proxy_pass, 1382 proxy_headers, 1383 ) 1384 ) 1385 ) 1386 if not self.disable_ssl_certificate_validation: 1387 cert = self.sock.getpeercert() 1388 hostname = self.host.split(":", 0)[0] 1389 if not self._ValidateCertificateHostname(cert, hostname): 1390 raise CertificateHostnameMismatch( 1391 "Server presented certificate that does not match " 1392 "host %s: %s" % (hostname, cert), 1393 hostname, 1394 cert, 1395 ) 1396 except ( 1397 ssl_SSLError, 1398 ssl_CertificateError, 1399 CertificateHostnameMismatch, 1400 ) as e: 1401 if sock: 1402 sock.close() 1403 if self.sock: 1404 self.sock.close() 1405 self.sock = None 1406 # Unfortunately the ssl module doesn't seem to provide any way 1407 # to get at more detailed error information, in particular 1408 # whether the error is due to certificate validation or 1409 # something else (such as SSL protocol mismatch). 1410 if getattr(e, "errno", None) == ssl.SSL_ERROR_SSL: 1411 raise SSLHandshakeError(e) 1412 else: 1413 raise 1414 except (socket.timeout, socket.gaierror): 1415 raise 1416 except socket.error as msg: 1417 if self.debuglevel > 0: 1418 print("connect fail: (%s, %s)" % (self.host, self.port)) 1419 if use_proxy: 1420 print( 1421 "proxy: %s" 1422 % str( 1423 ( 1424 proxy_host, 1425 proxy_port, 1426 proxy_rdns, 1427 proxy_user, 1428 proxy_pass, 1429 proxy_headers, 1430 ) 1431 ) 1432 ) 1433 if self.sock: 1434 self.sock.close() 1435 self.sock = None 1436 continue 1437 break 1438 if not self.sock: 1439 raise socket.error(msg) 1440 1441 1442SCHEME_TO_CONNECTION = { 1443 "http": HTTPConnectionWithTimeout, 1444 "https": HTTPSConnectionWithTimeout, 1445} 1446 1447 1448def _new_fixed_fetch(validate_certificate): 1449 1450 def fixed_fetch( 1451 url, 1452 payload=None, 1453 method="GET", 1454 headers={}, 1455 allow_truncated=False, 1456 follow_redirects=True, 1457 deadline=None, 1458 ): 1459 return fetch( 1460 url, 1461 payload=payload, 1462 method=method, 1463 headers=headers, 1464 allow_truncated=allow_truncated, 1465 follow_redirects=follow_redirects, 1466 deadline=deadline, 1467 validate_certificate=validate_certificate, 1468 ) 1469 1470 return fixed_fetch 1471 1472 1473class AppEngineHttpConnection(httplib.HTTPConnection): 1474 """Use httplib on App Engine, but compensate for its weirdness. 1475 1476 The parameters key_file, cert_file, proxy_info, ca_certs, 1477 disable_ssl_certificate_validation, and ssl_version are all dropped on 1478 the ground. 1479 """ 1480 1481 def __init__( 1482 self, 1483 host, 1484 port=None, 1485 key_file=None, 1486 cert_file=None, 1487 strict=None, 1488 timeout=None, 1489 proxy_info=None, 1490 ca_certs=None, 1491 disable_ssl_certificate_validation=False, 1492 ssl_version=None, 1493 ): 1494 httplib.HTTPConnection.__init__( 1495 self, host, port=port, strict=strict, timeout=timeout 1496 ) 1497 1498 1499class AppEngineHttpsConnection(httplib.HTTPSConnection): 1500 """Same as AppEngineHttpConnection, but for HTTPS URIs. 1501 1502 The parameters proxy_info, ca_certs, disable_ssl_certificate_validation, 1503 and ssl_version are all dropped on the ground. 1504 """ 1505 1506 def __init__( 1507 self, 1508 host, 1509 port=None, 1510 key_file=None, 1511 cert_file=None, 1512 strict=None, 1513 timeout=None, 1514 proxy_info=None, 1515 ca_certs=None, 1516 disable_ssl_certificate_validation=False, 1517 ssl_version=None, 1518 ): 1519 httplib.HTTPSConnection.__init__( 1520 self, 1521 host, 1522 port=port, 1523 key_file=key_file, 1524 cert_file=cert_file, 1525 strict=strict, 1526 timeout=timeout, 1527 ) 1528 self._fetch = _new_fixed_fetch(not disable_ssl_certificate_validation) 1529 1530 1531# Use a different connection object for Google App Engine Standard Environment. 1532def is_gae_instance(): 1533 server_software = os.environ.get('SERVER_SOFTWARE', '') 1534 if (server_software.startswith('Google App Engine/') or 1535 server_software.startswith('Development/') or 1536 server_software.startswith('testutil/')): 1537 return True 1538 return False 1539 1540 1541try: 1542 if not is_gae_instance(): 1543 raise NotRunningAppEngineEnvironment() 1544 1545 from google.appengine.api import apiproxy_stub_map 1546 if apiproxy_stub_map.apiproxy.GetStub("urlfetch") is None: 1547 raise ImportError 1548 1549 from google.appengine.api.urlfetch import fetch 1550 1551 # Update the connection classes to use the Googel App Engine specific ones. 1552 SCHEME_TO_CONNECTION = { 1553 "http": AppEngineHttpConnection, 1554 "https": AppEngineHttpsConnection, 1555 } 1556except (ImportError, NotRunningAppEngineEnvironment): 1557 pass 1558 1559 1560class Http(object): 1561 """An HTTP client that handles: 1562 1563 - all methods 1564 - caching 1565 - ETags 1566 - compression, 1567 - HTTPS 1568 - Basic 1569 - Digest 1570 - WSSE 1571 1572 and more. 1573 """ 1574 1575 def __init__( 1576 self, 1577 cache=None, 1578 timeout=None, 1579 proxy_info=proxy_info_from_environment, 1580 ca_certs=None, 1581 disable_ssl_certificate_validation=False, 1582 ssl_version=None, 1583 ): 1584 """If 'cache' is a string then it is used as a directory name for 1585 a disk cache. Otherwise it must be an object that supports the 1586 same interface as FileCache. 1587 1588 All timeouts are in seconds. If None is passed for timeout 1589 then Python's default timeout for sockets will be used. See 1590 for example the docs of socket.setdefaulttimeout(): 1591 http://docs.python.org/library/socket.html#socket.setdefaulttimeout 1592 1593 `proxy_info` may be: 1594 - a callable that takes the http scheme ('http' or 'https') and 1595 returns a ProxyInfo instance per request. By default, uses 1596 proxy_nfo_from_environment. 1597 - a ProxyInfo instance (static proxy config). 1598 - None (proxy disabled). 1599 1600 ca_certs is the path of a file containing root CA certificates for SSL 1601 server certificate validation. By default, a CA cert file bundled with 1602 httplib2 is used. 1603 1604 If disable_ssl_certificate_validation is true, SSL cert validation will 1605 not be performed. 1606 1607 By default, ssl.PROTOCOL_SSLv23 will be used for the ssl version. 1608 """ 1609 self.proxy_info = proxy_info 1610 self.ca_certs = ca_certs 1611 self.disable_ssl_certificate_validation = disable_ssl_certificate_validation 1612 self.ssl_version = ssl_version 1613 1614 # Map domain name to an httplib connection 1615 self.connections = {} 1616 # The location of the cache, for now a directory 1617 # where cached responses are held. 1618 if cache and isinstance(cache, basestring): 1619 self.cache = FileCache(cache) 1620 else: 1621 self.cache = cache 1622 1623 # Name/password 1624 self.credentials = Credentials() 1625 1626 # Key/cert 1627 self.certificates = KeyCerts() 1628 1629 # authorization objects 1630 self.authorizations = [] 1631 1632 # If set to False then no redirects are followed, even safe ones. 1633 self.follow_redirects = True 1634 1635 # Which HTTP methods do we apply optimistic concurrency to, i.e. 1636 # which methods get an "if-match:" etag header added to them. 1637 self.optimistic_concurrency_methods = ["PUT", "PATCH"] 1638 1639 # If 'follow_redirects' is True, and this is set to True then 1640 # all redirecs are followed, including unsafe ones. 1641 self.follow_all_redirects = False 1642 1643 self.ignore_etag = False 1644 1645 self.force_exception_to_status_code = False 1646 1647 self.timeout = timeout 1648 1649 # Keep Authorization: headers on a redirect. 1650 self.forward_authorization_headers = False 1651 1652 def __getstate__(self): 1653 state_dict = copy.copy(self.__dict__) 1654 # In case request is augmented by some foreign object such as 1655 # credentials which handle auth 1656 if "request" in state_dict: 1657 del state_dict["request"] 1658 if "connections" in state_dict: 1659 del state_dict["connections"] 1660 return state_dict 1661 1662 def __setstate__(self, state): 1663 self.__dict__.update(state) 1664 self.connections = {} 1665 1666 def _auth_from_challenge(self, host, request_uri, headers, response, content): 1667 """A generator that creates Authorization objects 1668 that can be applied to requests. 1669 """ 1670 challenges = _parse_www_authenticate(response, "www-authenticate") 1671 for cred in self.credentials.iter(host): 1672 for scheme in AUTH_SCHEME_ORDER: 1673 if scheme in challenges: 1674 yield AUTH_SCHEME_CLASSES[scheme]( 1675 cred, host, request_uri, headers, response, content, self 1676 ) 1677 1678 def add_credentials(self, name, password, domain=""): 1679 """Add a name and password that will be used 1680 any time a request requires authentication.""" 1681 self.credentials.add(name, password, domain) 1682 1683 def add_certificate(self, key, cert, domain): 1684 """Add a key and cert that will be used 1685 any time a request requires authentication.""" 1686 self.certificates.add(key, cert, domain) 1687 1688 def clear_credentials(self): 1689 """Remove all the names and passwords 1690 that are used for authentication""" 1691 self.credentials.clear() 1692 self.authorizations = [] 1693 1694 def _conn_request(self, conn, request_uri, method, body, headers): 1695 i = 0 1696 seen_bad_status_line = False 1697 while i < RETRIES: 1698 i += 1 1699 try: 1700 if hasattr(conn, "sock") and conn.sock is None: 1701 conn.connect() 1702 conn.request(method, request_uri, body, headers) 1703 except socket.timeout: 1704 raise 1705 except socket.gaierror: 1706 conn.close() 1707 raise ServerNotFoundError("Unable to find the server at %s" % conn.host) 1708 except ssl_SSLError: 1709 conn.close() 1710 raise 1711 except socket.error as e: 1712 err = 0 1713 if hasattr(e, "args"): 1714 err = getattr(e, "args")[0] 1715 else: 1716 err = e.errno 1717 if err == errno.ECONNREFUSED: # Connection refused 1718 raise 1719 if err in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES: 1720 continue # retry on potentially transient socket errors 1721 except httplib.HTTPException: 1722 # Just because the server closed the connection doesn't apparently mean 1723 # that the server didn't send a response. 1724 if hasattr(conn, "sock") and conn.sock is None: 1725 if i < RETRIES - 1: 1726 conn.close() 1727 conn.connect() 1728 continue 1729 else: 1730 conn.close() 1731 raise 1732 if i < RETRIES - 1: 1733 conn.close() 1734 conn.connect() 1735 continue 1736 try: 1737 response = conn.getresponse() 1738 except httplib.BadStatusLine: 1739 # If we get a BadStatusLine on the first try then that means 1740 # the connection just went stale, so retry regardless of the 1741 # number of RETRIES set. 1742 if not seen_bad_status_line and i == 1: 1743 i = 0 1744 seen_bad_status_line = True 1745 conn.close() 1746 conn.connect() 1747 continue 1748 else: 1749 conn.close() 1750 raise 1751 except (socket.error, httplib.HTTPException): 1752 if i < RETRIES - 1: 1753 conn.close() 1754 conn.connect() 1755 continue 1756 else: 1757 conn.close() 1758 raise 1759 else: 1760 content = "" 1761 if method == "HEAD": 1762 conn.close() 1763 else: 1764 content = response.read() 1765 response = Response(response) 1766 if method != "HEAD": 1767 content = _decompressContent(response, content) 1768 break 1769 return (response, content) 1770 1771 def _request( 1772 self, 1773 conn, 1774 host, 1775 absolute_uri, 1776 request_uri, 1777 method, 1778 body, 1779 headers, 1780 redirections, 1781 cachekey, 1782 ): 1783 """Do the actual request using the connection object 1784 and also follow one level of redirects if necessary""" 1785 1786 auths = [ 1787 (auth.depth(request_uri), auth) 1788 for auth in self.authorizations 1789 if auth.inscope(host, request_uri) 1790 ] 1791 auth = auths and sorted(auths)[0][1] or None 1792 if auth: 1793 auth.request(method, request_uri, headers, body) 1794 1795 (response, content) = self._conn_request( 1796 conn, request_uri, method, body, headers 1797 ) 1798 1799 if auth: 1800 if auth.response(response, body): 1801 auth.request(method, request_uri, headers, body) 1802 (response, content) = self._conn_request( 1803 conn, request_uri, method, body, headers 1804 ) 1805 response._stale_digest = 1 1806 1807 if response.status == 401: 1808 for authorization in self._auth_from_challenge( 1809 host, request_uri, headers, response, content 1810 ): 1811 authorization.request(method, request_uri, headers, body) 1812 (response, content) = self._conn_request( 1813 conn, request_uri, method, body, headers 1814 ) 1815 if response.status != 401: 1816 self.authorizations.append(authorization) 1817 authorization.response(response, body) 1818 break 1819 1820 if ( 1821 self.follow_all_redirects 1822 or (method in ["GET", "HEAD"]) 1823 or response.status == 303 1824 ): 1825 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: 1826 # Pick out the location header and basically start from the beginning 1827 # remembering first to strip the ETag header and decrement our 'depth' 1828 if redirections: 1829 if "location" not in response and response.status != 300: 1830 raise RedirectMissingLocation( 1831 _( 1832 "Redirected but the response is missing a Location: header." 1833 ), 1834 response, 1835 content, 1836 ) 1837 # Fix-up relative redirects (which violate an RFC 2616 MUST) 1838 if "location" in response: 1839 location = response["location"] 1840 (scheme, authority, path, query, fragment) = parse_uri(location) 1841 if authority == None: 1842 response["location"] = urlparse.urljoin( 1843 absolute_uri, location 1844 ) 1845 if response.status == 301 and method in ["GET", "HEAD"]: 1846 response["-x-permanent-redirect-url"] = response["location"] 1847 if "content-location" not in response: 1848 response["content-location"] = absolute_uri 1849 _updateCache(headers, response, content, self.cache, cachekey) 1850 if "if-none-match" in headers: 1851 del headers["if-none-match"] 1852 if "if-modified-since" in headers: 1853 del headers["if-modified-since"] 1854 if ( 1855 "authorization" in headers 1856 and not self.forward_authorization_headers 1857 ): 1858 del headers["authorization"] 1859 if "location" in response: 1860 location = response["location"] 1861 old_response = copy.deepcopy(response) 1862 if "content-location" not in old_response: 1863 old_response["content-location"] = absolute_uri 1864 redirect_method = method 1865 if response.status in [302, 303]: 1866 redirect_method = "GET" 1867 body = None 1868 (response, content) = self.request( 1869 location, 1870 method=redirect_method, 1871 body=body, 1872 headers=headers, 1873 redirections=redirections - 1, 1874 ) 1875 response.previous = old_response 1876 else: 1877 raise RedirectLimit( 1878 "Redirected more times than rediection_limit allows.", 1879 response, 1880 content, 1881 ) 1882 elif response.status in [200, 203] and method in ["GET", "HEAD"]: 1883 # Don't cache 206's since we aren't going to handle byte range requests 1884 if "content-location" not in response: 1885 response["content-location"] = absolute_uri 1886 _updateCache(headers, response, content, self.cache, cachekey) 1887 1888 return (response, content) 1889 1890 def _normalize_headers(self, headers): 1891 return _normalize_headers(headers) 1892 1893 # Need to catch and rebrand some exceptions 1894 # Then need to optionally turn all exceptions into status codes 1895 # including all socket.* and httplib.* exceptions. 1896 1897 def request( 1898 self, 1899 uri, 1900 method="GET", 1901 body=None, 1902 headers=None, 1903 redirections=DEFAULT_MAX_REDIRECTS, 1904 connection_type=None, 1905 ): 1906 """ Performs a single HTTP request. 1907 1908 The 'uri' is the URI of the HTTP resource and can begin with either 1909 'http' or 'https'. The value of 'uri' must be an absolute URI. 1910 1911 The 'method' is the HTTP method to perform, such as GET, POST, DELETE, 1912 etc. There is no restriction on the methods allowed. 1913 1914 The 'body' is the entity body to be sent with the request. It is a 1915 string object. 1916 1917 Any extra headers that are to be sent with the request should be 1918 provided in the 'headers' dictionary. 1919 1920 The maximum number of redirect to follow before raising an 1921 exception is 'redirections. The default is 5. 1922 1923 The return value is a tuple of (response, content), the first 1924 being and instance of the 'Response' class, the second being 1925 a string that contains the response entity body. 1926 """ 1927 conn_key = '' 1928 1929 try: 1930 if headers is None: 1931 headers = {} 1932 else: 1933 headers = self._normalize_headers(headers) 1934 1935 if "user-agent" not in headers: 1936 headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__ 1937 1938 uri = iri2uri(uri) 1939 1940 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) 1941 1942 proxy_info = self._get_proxy_info(scheme, authority) 1943 1944 conn_key = scheme + ":" + authority 1945 conn = self.connections.get(conn_key) 1946 if conn is None: 1947 if not connection_type: 1948 connection_type = SCHEME_TO_CONNECTION[scheme] 1949 certs = list(self.certificates.iter(authority)) 1950 if scheme == "https": 1951 if certs: 1952 conn = self.connections[conn_key] = connection_type( 1953 authority, 1954 key_file=certs[0][0], 1955 cert_file=certs[0][1], 1956 timeout=self.timeout, 1957 proxy_info=proxy_info, 1958 ca_certs=self.ca_certs, 1959 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 1960 ssl_version=self.ssl_version, 1961 ) 1962 else: 1963 conn = self.connections[conn_key] = connection_type( 1964 authority, 1965 timeout=self.timeout, 1966 proxy_info=proxy_info, 1967 ca_certs=self.ca_certs, 1968 disable_ssl_certificate_validation=self.disable_ssl_certificate_validation, 1969 ssl_version=self.ssl_version, 1970 ) 1971 else: 1972 conn = self.connections[conn_key] = connection_type( 1973 authority, timeout=self.timeout, proxy_info=proxy_info 1974 ) 1975 conn.set_debuglevel(debuglevel) 1976 1977 if "range" not in headers and "accept-encoding" not in headers: 1978 headers["accept-encoding"] = "gzip, deflate" 1979 1980 info = email.Message.Message() 1981 cached_value = None 1982 if self.cache: 1983 cachekey = defrag_uri.encode("utf-8") 1984 cached_value = self.cache.get(cachekey) 1985 if cached_value: 1986 # info = email.message_from_string(cached_value) 1987 # 1988 # Need to replace the line above with the kludge below 1989 # to fix the non-existent bug not fixed in this 1990 # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html 1991 try: 1992 info, content = cached_value.split("\r\n\r\n", 1) 1993 feedparser = email.FeedParser.FeedParser() 1994 feedparser.feed(info) 1995 info = feedparser.close() 1996 feedparser._parse = None 1997 except (IndexError, ValueError): 1998 self.cache.delete(cachekey) 1999 cachekey = None 2000 cached_value = None 2001 else: 2002 cachekey = None 2003 2004 if ( 2005 method in self.optimistic_concurrency_methods 2006 and self.cache 2007 and "etag" in info 2008 and not self.ignore_etag 2009 and "if-match" not in headers 2010 ): 2011 # http://www.w3.org/1999/04/Editing/ 2012 headers["if-match"] = info["etag"] 2013 2014 if method not in ["GET", "HEAD"] and self.cache and cachekey: 2015 # RFC 2616 Section 13.10 2016 self.cache.delete(cachekey) 2017 2018 # Check the vary header in the cache to see if this request 2019 # matches what varies in the cache. 2020 if method in ["GET", "HEAD"] and "vary" in info: 2021 vary = info["vary"] 2022 vary_headers = vary.lower().replace(" ", "").split(",") 2023 for header in vary_headers: 2024 key = "-varied-%s" % header 2025 value = info[key] 2026 if headers.get(header, None) != value: 2027 cached_value = None 2028 break 2029 2030 if ( 2031 cached_value 2032 and method in ["GET", "HEAD"] 2033 and self.cache 2034 and "range" not in headers 2035 ): 2036 if "-x-permanent-redirect-url" in info: 2037 # Should cached permanent redirects be counted in our redirection count? For now, yes. 2038 if redirections <= 0: 2039 raise RedirectLimit( 2040 "Redirected more times than rediection_limit allows.", 2041 {}, 2042 "", 2043 ) 2044 (response, new_content) = self.request( 2045 info["-x-permanent-redirect-url"], 2046 method="GET", 2047 headers=headers, 2048 redirections=redirections - 1, 2049 ) 2050 response.previous = Response(info) 2051 response.previous.fromcache = True 2052 else: 2053 # Determine our course of action: 2054 # Is the cached entry fresh or stale? 2055 # Has the client requested a non-cached response? 2056 # 2057 # There seems to be three possible answers: 2058 # 1. [FRESH] Return the cache entry w/o doing a GET 2059 # 2. [STALE] Do the GET (but add in cache validators if available) 2060 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request 2061 entry_disposition = _entry_disposition(info, headers) 2062 2063 if entry_disposition == "FRESH": 2064 if not cached_value: 2065 info["status"] = "504" 2066 content = "" 2067 response = Response(info) 2068 if cached_value: 2069 response.fromcache = True 2070 return (response, content) 2071 2072 if entry_disposition == "STALE": 2073 if ( 2074 "etag" in info 2075 and not self.ignore_etag 2076 and not "if-none-match" in headers 2077 ): 2078 headers["if-none-match"] = info["etag"] 2079 if "last-modified" in info and not "last-modified" in headers: 2080 headers["if-modified-since"] = info["last-modified"] 2081 elif entry_disposition == "TRANSPARENT": 2082 pass 2083 2084 (response, new_content) = self._request( 2085 conn, 2086 authority, 2087 uri, 2088 request_uri, 2089 method, 2090 body, 2091 headers, 2092 redirections, 2093 cachekey, 2094 ) 2095 2096 if response.status == 304 and method == "GET": 2097 # Rewrite the cache entry with the new end-to-end headers 2098 # Take all headers that are in response 2099 # and overwrite their values in info. 2100 # unless they are hop-by-hop, or are listed in the connection header. 2101 2102 for key in _get_end2end_headers(response): 2103 info[key] = response[key] 2104 merged_response = Response(info) 2105 if hasattr(response, "_stale_digest"): 2106 merged_response._stale_digest = response._stale_digest 2107 _updateCache( 2108 headers, merged_response, content, self.cache, cachekey 2109 ) 2110 response = merged_response 2111 response.status = 200 2112 response.fromcache = True 2113 2114 elif response.status == 200: 2115 content = new_content 2116 else: 2117 self.cache.delete(cachekey) 2118 content = new_content 2119 else: 2120 cc = _parse_cache_control(headers) 2121 if "only-if-cached" in cc: 2122 info["status"] = "504" 2123 response = Response(info) 2124 content = "" 2125 else: 2126 (response, content) = self._request( 2127 conn, 2128 authority, 2129 uri, 2130 request_uri, 2131 method, 2132 body, 2133 headers, 2134 redirections, 2135 cachekey, 2136 ) 2137 except Exception as e: 2138 is_timeout = isinstance(e, socket.timeout) 2139 if is_timeout: 2140 conn = self.connections.pop(conn_key, None) 2141 if conn: 2142 conn.close() 2143 2144 if self.force_exception_to_status_code: 2145 if isinstance(e, HttpLib2ErrorWithResponse): 2146 response = e.response 2147 content = e.content 2148 response.status = 500 2149 response.reason = str(e) 2150 elif is_timeout: 2151 content = "Request Timeout" 2152 response = Response( 2153 { 2154 "content-type": "text/plain", 2155 "status": "408", 2156 "content-length": len(content), 2157 } 2158 ) 2159 response.reason = "Request Timeout" 2160 else: 2161 content = str(e) 2162 response = Response( 2163 { 2164 "content-type": "text/plain", 2165 "status": "400", 2166 "content-length": len(content), 2167 } 2168 ) 2169 response.reason = "Bad Request" 2170 else: 2171 raise 2172 2173 return (response, content) 2174 2175 def _get_proxy_info(self, scheme, authority): 2176 """Return a ProxyInfo instance (or None) based on the scheme 2177 and authority. 2178 """ 2179 hostname, port = urllib.splitport(authority) 2180 proxy_info = self.proxy_info 2181 if callable(proxy_info): 2182 proxy_info = proxy_info(scheme) 2183 2184 if hasattr(proxy_info, "applies_to") and not proxy_info.applies_to(hostname): 2185 proxy_info = None 2186 return proxy_info 2187 2188 2189class Response(dict): 2190 """An object more like email.Message than httplib.HTTPResponse.""" 2191 2192 """Is this response from our local cache""" 2193 fromcache = False 2194 """HTTP protocol version used by server. 2195 2196 10 for HTTP/1.0, 11 for HTTP/1.1. 2197 """ 2198 version = 11 2199 2200 "Status code returned by server. " 2201 status = 200 2202 """Reason phrase returned by server.""" 2203 reason = "Ok" 2204 2205 previous = None 2206 2207 def __init__(self, info): 2208 # info is either an email.Message or 2209 # an httplib.HTTPResponse object. 2210 if isinstance(info, httplib.HTTPResponse): 2211 for key, value in info.getheaders(): 2212 self[key.lower()] = value 2213 self.status = info.status 2214 self["status"] = str(self.status) 2215 self.reason = info.reason 2216 self.version = info.version 2217 elif isinstance(info, email.Message.Message): 2218 for key, value in info.items(): 2219 self[key.lower()] = value 2220 self.status = int(self["status"]) 2221 else: 2222 for key, value in info.iteritems(): 2223 self[key.lower()] = value 2224 self.status = int(self.get("status", self.status)) 2225 self.reason = self.get("reason", self.reason) 2226 2227 def __getattr__(self, name): 2228 if name == "dict": 2229 return self 2230 else: 2231 raise AttributeError(name) 2232