1r"""HTTP/1.1 client library 2 3<intro stuff goes here> 4<other stuff, too> 5 6HTTPConnection goes through a number of "states", which define when a client 7may legally make another request or fetch the response for a particular 8request. This diagram details these state transitions: 9 10 (null) 11 | 12 | HTTPConnection() 13 v 14 Idle 15 | 16 | putrequest() 17 v 18 Request-started 19 | 20 | ( putheader() )* endheaders() 21 v 22 Request-sent 23 | 24 | response = getresponse() 25 v 26 Unread-response [Response-headers-read] 27 |\____________________ 28 | | 29 | response.read() | putrequest() 30 v v 31 Idle Req-started-unread-response 32 ______/| 33 / | 34 response.read() | | ( putheader() )* endheaders() 35 v v 36 Request-started Req-sent-unread-response 37 | 38 | response.read() 39 v 40 Request-sent 41 42This diagram presents the following rules: 43 -- a second request may not be started until {response-headers-read} 44 -- a response [object] cannot be retrieved until {request-sent} 45 -- there is no differentiation between an unread response body and a 46 partially read response body 47 48Note: this enforcement is applied by the HTTPConnection class. The 49 HTTPResponse class does not enforce this state machine, which 50 implies sophisticated clients may accelerate the request/response 51 pipeline. Caution should be taken, though: accelerating the states 52 beyond the above pattern may imply knowledge of the server's 53 connection-close behavior for certain requests. For example, it 54 is impossible to tell whether the server will close the connection 55 UNTIL the response headers have been read; this means that further 56 requests cannot be placed into the pipeline until it is known that 57 the server will NOT be closing the connection. 58 59Logical State __state __response 60------------- ------- ---------- 61Idle _CS_IDLE None 62Request-started _CS_REQ_STARTED None 63Request-sent _CS_REQ_SENT None 64Unread-response _CS_IDLE <response_class> 65Req-started-unread-response _CS_REQ_STARTED <response_class> 66Req-sent-unread-response _CS_REQ_SENT <response_class> 67""" 68 69from array import array 70import os 71import re 72import socket 73from sys import py3kwarning 74from urlparse import urlsplit 75import warnings 76with warnings.catch_warnings(): 77 if py3kwarning: 78 warnings.filterwarnings("ignore", ".*mimetools has been removed", 79 DeprecationWarning) 80 import mimetools 81 82try: 83 from cStringIO import StringIO 84except ImportError: 85 from StringIO import StringIO 86 87__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", 88 "HTTPException", "NotConnected", "UnknownProtocol", 89 "UnknownTransferEncoding", "UnimplementedFileMode", 90 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 91 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 92 "BadStatusLine", "error", "responses"] 93 94HTTP_PORT = 80 95HTTPS_PORT = 443 96 97_UNKNOWN = 'UNKNOWN' 98 99# connection states 100_CS_IDLE = 'Idle' 101_CS_REQ_STARTED = 'Request-started' 102_CS_REQ_SENT = 'Request-sent' 103 104# status codes 105# informational 106CONTINUE = 100 107SWITCHING_PROTOCOLS = 101 108PROCESSING = 102 109 110# successful 111OK = 200 112CREATED = 201 113ACCEPTED = 202 114NON_AUTHORITATIVE_INFORMATION = 203 115NO_CONTENT = 204 116RESET_CONTENT = 205 117PARTIAL_CONTENT = 206 118MULTI_STATUS = 207 119IM_USED = 226 120 121# redirection 122MULTIPLE_CHOICES = 300 123MOVED_PERMANENTLY = 301 124FOUND = 302 125SEE_OTHER = 303 126NOT_MODIFIED = 304 127USE_PROXY = 305 128TEMPORARY_REDIRECT = 307 129 130# client error 131BAD_REQUEST = 400 132UNAUTHORIZED = 401 133PAYMENT_REQUIRED = 402 134FORBIDDEN = 403 135NOT_FOUND = 404 136METHOD_NOT_ALLOWED = 405 137NOT_ACCEPTABLE = 406 138PROXY_AUTHENTICATION_REQUIRED = 407 139REQUEST_TIMEOUT = 408 140CONFLICT = 409 141GONE = 410 142LENGTH_REQUIRED = 411 143PRECONDITION_FAILED = 412 144REQUEST_ENTITY_TOO_LARGE = 413 145REQUEST_URI_TOO_LONG = 414 146UNSUPPORTED_MEDIA_TYPE = 415 147REQUESTED_RANGE_NOT_SATISFIABLE = 416 148EXPECTATION_FAILED = 417 149UNPROCESSABLE_ENTITY = 422 150LOCKED = 423 151FAILED_DEPENDENCY = 424 152UPGRADE_REQUIRED = 426 153 154# server error 155INTERNAL_SERVER_ERROR = 500 156NOT_IMPLEMENTED = 501 157BAD_GATEWAY = 502 158SERVICE_UNAVAILABLE = 503 159GATEWAY_TIMEOUT = 504 160HTTP_VERSION_NOT_SUPPORTED = 505 161INSUFFICIENT_STORAGE = 507 162NOT_EXTENDED = 510 163 164# Mapping status codes to official W3C names 165responses = { 166 100: 'Continue', 167 101: 'Switching Protocols', 168 169 200: 'OK', 170 201: 'Created', 171 202: 'Accepted', 172 203: 'Non-Authoritative Information', 173 204: 'No Content', 174 205: 'Reset Content', 175 206: 'Partial Content', 176 177 300: 'Multiple Choices', 178 301: 'Moved Permanently', 179 302: 'Found', 180 303: 'See Other', 181 304: 'Not Modified', 182 305: 'Use Proxy', 183 306: '(Unused)', 184 307: 'Temporary Redirect', 185 186 400: 'Bad Request', 187 401: 'Unauthorized', 188 402: 'Payment Required', 189 403: 'Forbidden', 190 404: 'Not Found', 191 405: 'Method Not Allowed', 192 406: 'Not Acceptable', 193 407: 'Proxy Authentication Required', 194 408: 'Request Timeout', 195 409: 'Conflict', 196 410: 'Gone', 197 411: 'Length Required', 198 412: 'Precondition Failed', 199 413: 'Request Entity Too Large', 200 414: 'Request-URI Too Long', 201 415: 'Unsupported Media Type', 202 416: 'Requested Range Not Satisfiable', 203 417: 'Expectation Failed', 204 205 500: 'Internal Server Error', 206 501: 'Not Implemented', 207 502: 'Bad Gateway', 208 503: 'Service Unavailable', 209 504: 'Gateway Timeout', 210 505: 'HTTP Version Not Supported', 211} 212 213# maximal amount of data to read at one time in _safe_read 214MAXAMOUNT = 1048576 215 216# maximal line length when calling readline(). 217_MAXLINE = 65536 218 219# maximum amount of headers accepted 220_MAXHEADERS = 100 221 222# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) 223# 224# VCHAR = %x21-7E 225# obs-text = %x80-FF 226# header-field = field-name ":" OWS field-value OWS 227# field-name = token 228# field-value = *( field-content / obs-fold ) 229# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 230# field-vchar = VCHAR / obs-text 231# 232# obs-fold = CRLF 1*( SP / HTAB ) 233# ; obsolete line folding 234# ; see Section 3.2.4 235 236# token = 1*tchar 237# 238# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 239# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 240# / DIGIT / ALPHA 241# ; any VCHAR, except delimiters 242# 243# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 244 245# the patterns for both name and value are more lenient than RFC 246# definitions to allow for backwards compatibility 247_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match 248_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search 249 250# We always set the Content-Length header for these methods because some 251# servers will otherwise respond with a 411 252_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} 253 254 255class HTTPMessage(mimetools.Message): 256 257 def addheader(self, key, value): 258 """Add header for field key handling repeats.""" 259 prev = self.dict.get(key) 260 if prev is None: 261 self.dict[key] = value 262 else: 263 combined = ", ".join((prev, value)) 264 self.dict[key] = combined 265 266 def addcontinue(self, key, more): 267 """Add more field data from a continuation line.""" 268 prev = self.dict[key] 269 self.dict[key] = prev + "\n " + more 270 271 def readheaders(self): 272 """Read header lines. 273 274 Read header lines up to the entirely blank line that terminates them. 275 The (normally blank) line that ends the headers is skipped, but not 276 included in the returned list. If an invalid line is found in the 277 header section, it is skipped, and further lines are processed. 278 279 The variable self.status is set to the empty string if all went well, 280 otherwise it is an error message. The variable self.headers is a 281 completely uninterpreted list of lines contained in the header (so 282 printing them will reproduce the header exactly as it appears in the 283 file). 284 285 If multiple header fields with the same name occur, they are combined 286 according to the rules in RFC 2616 sec 4.2: 287 288 Appending each subsequent field-value to the first, each separated 289 by a comma. The order in which header fields with the same field-name 290 are received is significant to the interpretation of the combined 291 field value. 292 """ 293 # XXX The implementation overrides the readheaders() method of 294 # rfc822.Message. The base class design isn't amenable to 295 # customized behavior here so the method here is a copy of the 296 # base class code with a few small changes. 297 298 self.dict = {} 299 self.unixfrom = '' 300 self.headers = hlist = [] 301 self.status = '' 302 headerseen = "" 303 firstline = 1 304 tell = None 305 if not hasattr(self.fp, 'unread') and self.seekable: 306 tell = self.fp.tell 307 while True: 308 if len(hlist) > _MAXHEADERS: 309 raise HTTPException("got more than %d headers" % _MAXHEADERS) 310 if tell: 311 try: 312 tell() 313 except IOError: 314 tell = None 315 self.seekable = 0 316 line = self.fp.readline(_MAXLINE + 1) 317 if len(line) > _MAXLINE: 318 raise LineTooLong("header line") 319 if not line: 320 self.status = 'EOF in headers' 321 break 322 # Skip unix From name time lines 323 if firstline and line.startswith('From '): 324 self.unixfrom = self.unixfrom + line 325 continue 326 firstline = 0 327 if headerseen and line[0] in ' \t': 328 # XXX Not sure if continuation lines are handled properly 329 # for http and/or for repeating headers 330 # It's a continuation line. 331 hlist.append(line) 332 self.addcontinue(headerseen, line.strip()) 333 continue 334 elif self.iscomment(line): 335 # It's a comment. Ignore it. 336 continue 337 elif self.islast(line): 338 # Note! No pushback here! The delimiter line gets eaten. 339 break 340 headerseen = self.isheader(line) 341 if headerseen: 342 # It's a legal header line, save it. 343 hlist.append(line) 344 self.addheader(headerseen, line[len(headerseen)+1:].strip()) 345 elif headerseen is not None: 346 # An empty header name. These aren't allowed in HTTP, but it's 347 # probably a benign mistake. Don't add the header, just keep 348 # going. 349 pass 350 else: 351 # It's not a header line; skip it and try the next line. 352 self.status = 'Non-header line where header expected' 353 354class HTTPResponse: 355 356 # strict: If true, raise BadStatusLine if the status line can't be 357 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is 358 # false because it prevents clients from talking to HTTP/0.9 359 # servers. Note that a response with a sufficiently corrupted 360 # status line will look like an HTTP/0.9 response. 361 362 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 363 364 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False): 365 if buffering: 366 # The caller won't be using any sock.recv() calls, so buffering 367 # is fine and recommended for performance. 368 self.fp = sock.makefile('rb') 369 else: 370 # The buffer size is specified as zero, because the headers of 371 # the response are read with readline(). If the reads were 372 # buffered the readline() calls could consume some of the 373 # response, which make be read via a recv() on the underlying 374 # socket. 375 self.fp = sock.makefile('rb', 0) 376 self.debuglevel = debuglevel 377 self.strict = strict 378 self._method = method 379 380 self.msg = None 381 382 # from the Status-Line of the response 383 self.version = _UNKNOWN # HTTP-Version 384 self.status = _UNKNOWN # Status-Code 385 self.reason = _UNKNOWN # Reason-Phrase 386 387 self.chunked = _UNKNOWN # is "chunked" being used? 388 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 389 self.length = _UNKNOWN # number of bytes left in response 390 self.will_close = _UNKNOWN # conn will close at end of response 391 392 def _read_status(self): 393 # Initialize with Simple-Response defaults 394 line = self.fp.readline(_MAXLINE + 1) 395 if len(line) > _MAXLINE: 396 raise LineTooLong("header line") 397 if self.debuglevel > 0: 398 print "reply:", repr(line) 399 if not line: 400 # Presumably, the server closed the connection before 401 # sending a valid response. 402 raise BadStatusLine(line) 403 try: 404 [version, status, reason] = line.split(None, 2) 405 except ValueError: 406 try: 407 [version, status] = line.split(None, 1) 408 reason = "" 409 except ValueError: 410 # empty version will cause next test to fail and status 411 # will be treated as 0.9 response. 412 version = "" 413 if not version.startswith('HTTP/'): 414 if self.strict: 415 self.close() 416 raise BadStatusLine(line) 417 else: 418 # assume it's a Simple-Response from an 0.9 server 419 self.fp = LineAndFileWrapper(line, self.fp) 420 return "HTTP/0.9", 200, "" 421 422 # The status code is a three-digit number 423 try: 424 status = int(status) 425 if status < 100 or status > 999: 426 raise BadStatusLine(line) 427 except ValueError: 428 raise BadStatusLine(line) 429 return version, status, reason 430 431 def begin(self): 432 if self.msg is not None: 433 # we've already started reading the response 434 return 435 436 # read until we get a non-100 response 437 while True: 438 version, status, reason = self._read_status() 439 if status != CONTINUE: 440 break 441 # skip the header from the 100 response 442 while True: 443 skip = self.fp.readline(_MAXLINE + 1) 444 if len(skip) > _MAXLINE: 445 raise LineTooLong("header line") 446 skip = skip.strip() 447 if not skip: 448 break 449 if self.debuglevel > 0: 450 print "header:", skip 451 452 self.status = status 453 self.reason = reason.strip() 454 if version == 'HTTP/1.0': 455 self.version = 10 456 elif version.startswith('HTTP/1.'): 457 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 458 elif version == 'HTTP/0.9': 459 self.version = 9 460 else: 461 raise UnknownProtocol(version) 462 463 if self.version == 9: 464 self.length = None 465 self.chunked = 0 466 self.will_close = 1 467 self.msg = HTTPMessage(StringIO()) 468 return 469 470 self.msg = HTTPMessage(self.fp, 0) 471 if self.debuglevel > 0: 472 for hdr in self.msg.headers: 473 print "header:", hdr, 474 475 # don't let the msg keep an fp 476 self.msg.fp = None 477 478 # are we using the chunked-style of transfer encoding? 479 tr_enc = self.msg.getheader('transfer-encoding') 480 if tr_enc and tr_enc.lower() == "chunked": 481 self.chunked = 1 482 self.chunk_left = None 483 else: 484 self.chunked = 0 485 486 # will the connection close at the end of the response? 487 self.will_close = self._check_close() 488 489 # do we have a Content-Length? 490 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 491 length = self.msg.getheader('content-length') 492 if length and not self.chunked: 493 try: 494 self.length = int(length) 495 except ValueError: 496 self.length = None 497 else: 498 if self.length < 0: # ignore nonsensical negative lengths 499 self.length = None 500 else: 501 self.length = None 502 503 # does the body have a fixed length? (of zero) 504 if (status == NO_CONTENT or status == NOT_MODIFIED or 505 100 <= status < 200 or # 1xx codes 506 self._method == 'HEAD'): 507 self.length = 0 508 509 # if the connection remains open, and we aren't using chunked, and 510 # a content-length was not provided, then assume that the connection 511 # WILL close. 512 if not self.will_close and \ 513 not self.chunked and \ 514 self.length is None: 515 self.will_close = 1 516 517 def _check_close(self): 518 conn = self.msg.getheader('connection') 519 if self.version == 11: 520 # An HTTP/1.1 proxy is assumed to stay open unless 521 # explicitly closed. 522 conn = self.msg.getheader('connection') 523 if conn and "close" in conn.lower(): 524 return True 525 return False 526 527 # Some HTTP/1.0 implementations have support for persistent 528 # connections, using rules different than HTTP/1.1. 529 530 # For older HTTP, Keep-Alive indicates persistent connection. 531 if self.msg.getheader('keep-alive'): 532 return False 533 534 # At least Akamai returns a "Connection: Keep-Alive" header, 535 # which was supposed to be sent by the client. 536 if conn and "keep-alive" in conn.lower(): 537 return False 538 539 # Proxy-Connection is a netscape hack. 540 pconn = self.msg.getheader('proxy-connection') 541 if pconn and "keep-alive" in pconn.lower(): 542 return False 543 544 # otherwise, assume it will close 545 return True 546 547 def close(self): 548 fp = self.fp 549 if fp: 550 self.fp = None 551 fp.close() 552 553 def isclosed(self): 554 # NOTE: it is possible that we will not ever call self.close(). This 555 # case occurs when will_close is TRUE, length is None, and we 556 # read up to the last byte, but NOT past it. 557 # 558 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 559 # called, meaning self.isclosed() is meaningful. 560 return self.fp is None 561 562 # XXX It would be nice to have readline and __iter__ for this, too. 563 564 def read(self, amt=None): 565 if self.fp is None: 566 return '' 567 568 if self._method == 'HEAD': 569 self.close() 570 return '' 571 572 if self.chunked: 573 return self._read_chunked(amt) 574 575 if amt is None: 576 # unbounded read 577 if self.length is None: 578 s = self.fp.read() 579 else: 580 try: 581 s = self._safe_read(self.length) 582 except IncompleteRead: 583 self.close() 584 raise 585 self.length = 0 586 self.close() # we read everything 587 return s 588 589 if self.length is not None: 590 if amt > self.length: 591 # clip the read to the "end of response" 592 amt = self.length 593 594 # we do not use _safe_read() here because this may be a .will_close 595 # connection, and the user is reading more bytes than will be provided 596 # (for example, reading in 1k chunks) 597 s = self.fp.read(amt) 598 if not s and amt: 599 # Ideally, we would raise IncompleteRead if the content-length 600 # wasn't satisfied, but it might break compatibility. 601 self.close() 602 if self.length is not None: 603 self.length -= len(s) 604 if not self.length: 605 self.close() 606 607 return s 608 609 def _read_chunked(self, amt): 610 assert self.chunked != _UNKNOWN 611 chunk_left = self.chunk_left 612 value = [] 613 while True: 614 if chunk_left is None: 615 line = self.fp.readline(_MAXLINE + 1) 616 if len(line) > _MAXLINE: 617 raise LineTooLong("chunk size") 618 i = line.find(';') 619 if i >= 0: 620 line = line[:i] # strip chunk-extensions 621 try: 622 chunk_left = int(line, 16) 623 except ValueError: 624 # close the connection as protocol synchronisation is 625 # probably lost 626 self.close() 627 raise IncompleteRead(''.join(value)) 628 if chunk_left == 0: 629 break 630 if amt is None: 631 value.append(self._safe_read(chunk_left)) 632 elif amt < chunk_left: 633 value.append(self._safe_read(amt)) 634 self.chunk_left = chunk_left - amt 635 return ''.join(value) 636 elif amt == chunk_left: 637 value.append(self._safe_read(amt)) 638 self._safe_read(2) # toss the CRLF at the end of the chunk 639 self.chunk_left = None 640 return ''.join(value) 641 else: 642 value.append(self._safe_read(chunk_left)) 643 amt -= chunk_left 644 645 # we read the whole chunk, get another 646 self._safe_read(2) # toss the CRLF at the end of the chunk 647 chunk_left = None 648 649 # read and discard trailer up to the CRLF terminator 650 ### note: we shouldn't have any trailers! 651 while True: 652 line = self.fp.readline(_MAXLINE + 1) 653 if len(line) > _MAXLINE: 654 raise LineTooLong("trailer line") 655 if not line: 656 # a vanishingly small number of sites EOF without 657 # sending the trailer 658 break 659 if line == '\r\n': 660 break 661 662 # we read everything; close the "file" 663 self.close() 664 665 return ''.join(value) 666 667 def _safe_read(self, amt): 668 """Read the number of bytes requested, compensating for partial reads. 669 670 Normally, we have a blocking socket, but a read() can be interrupted 671 by a signal (resulting in a partial read). 672 673 Note that we cannot distinguish between EOF and an interrupt when zero 674 bytes have been read. IncompleteRead() will be raised in this 675 situation. 676 677 This function should be used when <amt> bytes "should" be present for 678 reading. If the bytes are truly not available (due to EOF), then the 679 IncompleteRead exception can be used to detect the problem. 680 """ 681 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never 682 # return less than x bytes unless EOF is encountered. It now handles 683 # signal interruptions (socket.error EINTR) internally. This code 684 # never caught that exception anyways. It seems largely pointless. 685 # self.fp.read(amt) will work fine. 686 s = [] 687 while amt > 0: 688 chunk = self.fp.read(min(amt, MAXAMOUNT)) 689 if not chunk: 690 raise IncompleteRead(''.join(s), amt) 691 s.append(chunk) 692 amt -= len(chunk) 693 return ''.join(s) 694 695 def fileno(self): 696 return self.fp.fileno() 697 698 def getheader(self, name, default=None): 699 if self.msg is None: 700 raise ResponseNotReady() 701 return self.msg.getheader(name, default) 702 703 def getheaders(self): 704 """Return list of (header, value) tuples.""" 705 if self.msg is None: 706 raise ResponseNotReady() 707 return self.msg.items() 708 709 710class HTTPConnection: 711 712 _http_vsn = 11 713 _http_vsn_str = 'HTTP/1.1' 714 715 response_class = HTTPResponse 716 default_port = HTTP_PORT 717 auto_open = 1 718 debuglevel = 0 719 strict = 0 720 721 def __init__(self, host, port=None, strict=None, 722 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): 723 self.timeout = timeout 724 self.source_address = source_address 725 self.sock = None 726 self._buffer = [] 727 self.__response = None 728 self.__state = _CS_IDLE 729 self._method = None 730 self._tunnel_host = None 731 self._tunnel_port = None 732 self._tunnel_headers = {} 733 if strict is not None: 734 self.strict = strict 735 736 (self.host, self.port) = self._get_hostport(host, port) 737 738 # This is stored as an instance variable to allow unittests 739 # to replace with a suitable mock 740 self._create_connection = socket.create_connection 741 742 def set_tunnel(self, host, port=None, headers=None): 743 """ Set up host and port for HTTP CONNECT tunnelling. 744 745 In a connection that uses HTTP Connect tunneling, the host passed to the 746 constructor is used as proxy server that relays all communication to the 747 endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT 748 request to the proxy server when the connection is established. 749 750 This method must be called before the HTTP connection has been 751 established. 752 753 The headers argument should be a mapping of extra HTTP headers 754 to send with the CONNECT request. 755 """ 756 # Verify if this is required. 757 if self.sock: 758 raise RuntimeError("Can't setup tunnel for established connection.") 759 760 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) 761 if headers: 762 self._tunnel_headers = headers 763 else: 764 self._tunnel_headers.clear() 765 766 def _get_hostport(self, host, port): 767 if port is None: 768 i = host.rfind(':') 769 j = host.rfind(']') # ipv6 addresses have [...] 770 if i > j: 771 try: 772 port = int(host[i+1:]) 773 except ValueError: 774 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 775 port = self.default_port 776 else: 777 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 778 host = host[:i] 779 else: 780 port = self.default_port 781 if host and host[0] == '[' and host[-1] == ']': 782 host = host[1:-1] 783 return (host, port) 784 785 def set_debuglevel(self, level): 786 self.debuglevel = level 787 788 def _tunnel(self): 789 self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, 790 self._tunnel_port)) 791 for header, value in self._tunnel_headers.iteritems(): 792 self.send("%s: %s\r\n" % (header, value)) 793 self.send("\r\n") 794 response = self.response_class(self.sock, strict = self.strict, 795 method = self._method) 796 (version, code, message) = response._read_status() 797 798 if version == "HTTP/0.9": 799 # HTTP/0.9 doesn't support the CONNECT verb, so if httplib has 800 # concluded HTTP/0.9 is being used something has gone wrong. 801 self.close() 802 raise socket.error("Invalid response from tunnel request") 803 if code != 200: 804 self.close() 805 raise socket.error("Tunnel connection failed: %d %s" % (code, 806 message.strip())) 807 while True: 808 line = response.fp.readline(_MAXLINE + 1) 809 if len(line) > _MAXLINE: 810 raise LineTooLong("header line") 811 if not line: 812 # for sites which EOF without sending trailer 813 break 814 if line == '\r\n': 815 break 816 817 818 def connect(self): 819 """Connect to the host and port specified in __init__.""" 820 self.sock = self._create_connection((self.host,self.port), 821 self.timeout, self.source_address) 822 823 if self._tunnel_host: 824 self._tunnel() 825 826 def close(self): 827 """Close the connection to the HTTP server.""" 828 self.__state = _CS_IDLE 829 try: 830 sock = self.sock 831 if sock: 832 self.sock = None 833 sock.close() # close it manually... there may be other refs 834 finally: 835 response = self.__response 836 if response: 837 self.__response = None 838 response.close() 839 840 def send(self, data): 841 """Send `data' to the server.""" 842 if self.sock is None: 843 if self.auto_open: 844 self.connect() 845 else: 846 raise NotConnected() 847 848 if self.debuglevel > 0: 849 print "send:", repr(data) 850 blocksize = 8192 851 if hasattr(data,'read') and not isinstance(data, array): 852 if self.debuglevel > 0: print "sendIng a read()able" 853 datablock = data.read(blocksize) 854 while datablock: 855 self.sock.sendall(datablock) 856 datablock = data.read(blocksize) 857 else: 858 self.sock.sendall(data) 859 860 def _output(self, s): 861 """Add a line of output to the current request buffer. 862 863 Assumes that the line does *not* end with \\r\\n. 864 """ 865 self._buffer.append(s) 866 867 def _send_output(self, message_body=None): 868 """Send the currently buffered request and clear the buffer. 869 870 Appends an extra \\r\\n to the buffer. 871 A message_body may be specified, to be appended to the request. 872 """ 873 self._buffer.extend(("", "")) 874 msg = "\r\n".join(self._buffer) 875 del self._buffer[:] 876 # If msg and message_body are sent in a single send() call, 877 # it will avoid performance problems caused by the interaction 878 # between delayed ack and the Nagle algorithm. 879 if isinstance(message_body, str): 880 msg += message_body 881 message_body = None 882 self.send(msg) 883 if message_body is not None: 884 #message_body was not a string (i.e. it is a file) and 885 #we must run the risk of Nagle 886 self.send(message_body) 887 888 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): 889 """Send a request to the server. 890 891 `method' specifies an HTTP request method, e.g. 'GET'. 892 `url' specifies the object being requested, e.g. '/index.html'. 893 `skip_host' if True does not add automatically a 'Host:' header 894 `skip_accept_encoding' if True does not add automatically an 895 'Accept-Encoding:' header 896 """ 897 898 # if a prior response has been completed, then forget about it. 899 if self.__response and self.__response.isclosed(): 900 self.__response = None 901 902 903 # in certain cases, we cannot issue another request on this connection. 904 # this occurs when: 905 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 906 # 2) a response to a previous request has signalled that it is going 907 # to close the connection upon completion. 908 # 3) the headers for the previous response have not been read, thus 909 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 910 # 911 # if there is no prior response, then we can request at will. 912 # 913 # if point (2) is true, then we will have passed the socket to the 914 # response (effectively meaning, "there is no prior response"), and 915 # will open a new one when a new request is made. 916 # 917 # Note: if a prior response exists, then we *can* start a new request. 918 # We are not allowed to begin fetching the response to this new 919 # request, however, until that prior response is complete. 920 # 921 if self.__state == _CS_IDLE: 922 self.__state = _CS_REQ_STARTED 923 else: 924 raise CannotSendRequest() 925 926 # Save the method we use, we need it later in the response phase 927 self._method = method 928 if not url: 929 url = '/' 930 hdr = '%s %s %s' % (method, url, self._http_vsn_str) 931 932 self._output(hdr) 933 934 if self._http_vsn == 11: 935 # Issue some standard headers for better HTTP/1.1 compliance 936 937 if not skip_host: 938 # this header is issued *only* for HTTP/1.1 939 # connections. more specifically, this means it is 940 # only issued when the client uses the new 941 # HTTPConnection() class. backwards-compat clients 942 # will be using HTTP/1.0 and those clients may be 943 # issuing this header themselves. we should NOT issue 944 # it twice; some web servers (such as Apache) barf 945 # when they see two Host: headers 946 947 # If we need a non-standard port,include it in the 948 # header. If the request is going through a proxy, 949 # but the host of the actual URL, not the host of the 950 # proxy. 951 952 netloc = '' 953 if url.startswith('http'): 954 nil, netloc, nil, nil, nil = urlsplit(url) 955 956 if netloc: 957 try: 958 netloc_enc = netloc.encode("ascii") 959 except UnicodeEncodeError: 960 netloc_enc = netloc.encode("idna") 961 self.putheader('Host', netloc_enc) 962 else: 963 if self._tunnel_host: 964 host = self._tunnel_host 965 port = self._tunnel_port 966 else: 967 host = self.host 968 port = self.port 969 970 try: 971 host_enc = host.encode("ascii") 972 except UnicodeEncodeError: 973 host_enc = host.encode("idna") 974 # Wrap the IPv6 Host Header with [] (RFC 2732) 975 if host_enc.find(':') >= 0: 976 host_enc = "[" + host_enc + "]" 977 if port == self.default_port: 978 self.putheader('Host', host_enc) 979 else: 980 self.putheader('Host', "%s:%s" % (host_enc, port)) 981 982 # note: we are assuming that clients will not attempt to set these 983 # headers since *this* library must deal with the 984 # consequences. this also means that when the supporting 985 # libraries are updated to recognize other forms, then this 986 # code should be changed (removed or updated). 987 988 # we only want a Content-Encoding of "identity" since we don't 989 # support encodings such as x-gzip or x-deflate. 990 if not skip_accept_encoding: 991 self.putheader('Accept-Encoding', 'identity') 992 993 # we can accept "chunked" Transfer-Encodings, but no others 994 # NOTE: no TE header implies *only* "chunked" 995 #self.putheader('TE', 'chunked') 996 997 # if TE is supplied in the header, then it must appear in a 998 # Connection header. 999 #self.putheader('Connection', 'TE') 1000 1001 else: 1002 # For HTTP/1.0, the server will assume "not chunked" 1003 pass 1004 1005 def putheader(self, header, *values): 1006 """Send a request header line to the server. 1007 1008 For example: h.putheader('Accept', 'text/html') 1009 """ 1010 if self.__state != _CS_REQ_STARTED: 1011 raise CannotSendHeader() 1012 1013 header = '%s' % header 1014 if not _is_legal_header_name(header): 1015 raise ValueError('Invalid header name %r' % (header,)) 1016 1017 values = [str(v) for v in values] 1018 for one_value in values: 1019 if _is_illegal_header_value(one_value): 1020 raise ValueError('Invalid header value %r' % (one_value,)) 1021 1022 hdr = '%s: %s' % (header, '\r\n\t'.join(values)) 1023 self._output(hdr) 1024 1025 def endheaders(self, message_body=None): 1026 """Indicate that the last header line has been sent to the server. 1027 1028 This method sends the request to the server. The optional 1029 message_body argument can be used to pass a message body 1030 associated with the request. The message body will be sent in 1031 the same packet as the message headers if it is string, otherwise it is 1032 sent as a separate packet. 1033 """ 1034 if self.__state == _CS_REQ_STARTED: 1035 self.__state = _CS_REQ_SENT 1036 else: 1037 raise CannotSendHeader() 1038 self._send_output(message_body) 1039 1040 def request(self, method, url, body=None, headers={}): 1041 """Send a complete request to the server.""" 1042 self._send_request(method, url, body, headers) 1043 1044 def _set_content_length(self, body, method): 1045 # Set the content-length based on the body. If the body is "empty", we 1046 # set Content-Length: 0 for methods that expect a body (RFC 7230, 1047 # Section 3.3.2). If the body is set for other methods, we set the 1048 # header provided we can figure out what the length is. 1049 thelen = None 1050 if body is None and method.upper() in _METHODS_EXPECTING_BODY: 1051 thelen = '0' 1052 elif body is not None: 1053 try: 1054 thelen = str(len(body)) 1055 except (TypeError, AttributeError): 1056 # If this is a file-like object, try to 1057 # fstat its file descriptor 1058 try: 1059 thelen = str(os.fstat(body.fileno()).st_size) 1060 except (AttributeError, OSError): 1061 # Don't send a length if this failed 1062 if self.debuglevel > 0: print "Cannot stat!!" 1063 1064 if thelen is not None: 1065 self.putheader('Content-Length', thelen) 1066 1067 def _send_request(self, method, url, body, headers): 1068 # Honor explicitly requested Host: and Accept-Encoding: headers. 1069 header_names = dict.fromkeys([k.lower() for k in headers]) 1070 skips = {} 1071 if 'host' in header_names: 1072 skips['skip_host'] = 1 1073 if 'accept-encoding' in header_names: 1074 skips['skip_accept_encoding'] = 1 1075 1076 self.putrequest(method, url, **skips) 1077 1078 if 'content-length' not in header_names: 1079 self._set_content_length(body, method) 1080 for hdr, value in headers.iteritems(): 1081 self.putheader(hdr, value) 1082 self.endheaders(body) 1083 1084 def getresponse(self, buffering=False): 1085 "Get the response from the server." 1086 1087 # if a prior response has been completed, then forget about it. 1088 if self.__response and self.__response.isclosed(): 1089 self.__response = None 1090 1091 # 1092 # if a prior response exists, then it must be completed (otherwise, we 1093 # cannot read this response's header to determine the connection-close 1094 # behavior) 1095 # 1096 # note: if a prior response existed, but was connection-close, then the 1097 # socket and response were made independent of this HTTPConnection 1098 # object since a new request requires that we open a whole new 1099 # connection 1100 # 1101 # this means the prior response had one of two states: 1102 # 1) will_close: this connection was reset and the prior socket and 1103 # response operate independently 1104 # 2) persistent: the response was retained and we await its 1105 # isclosed() status to become true. 1106 # 1107 if self.__state != _CS_REQ_SENT or self.__response: 1108 raise ResponseNotReady() 1109 1110 args = (self.sock,) 1111 kwds = {"strict":self.strict, "method":self._method} 1112 if self.debuglevel > 0: 1113 args += (self.debuglevel,) 1114 if buffering: 1115 #only add this keyword if non-default, for compatibility with 1116 #other response_classes. 1117 kwds["buffering"] = True; 1118 response = self.response_class(*args, **kwds) 1119 1120 try: 1121 response.begin() 1122 assert response.will_close != _UNKNOWN 1123 self.__state = _CS_IDLE 1124 1125 if response.will_close: 1126 # this effectively passes the connection to the response 1127 self.close() 1128 else: 1129 # remember this, so we can tell when it is complete 1130 self.__response = response 1131 1132 return response 1133 except: 1134 response.close() 1135 raise 1136 1137 1138class HTTP: 1139 "Compatibility class with httplib.py from 1.5." 1140 1141 _http_vsn = 10 1142 _http_vsn_str = 'HTTP/1.0' 1143 1144 debuglevel = 0 1145 1146 _connection_class = HTTPConnection 1147 1148 def __init__(self, host='', port=None, strict=None): 1149 "Provide a default host, since the superclass requires one." 1150 1151 # some joker passed 0 explicitly, meaning default port 1152 if port == 0: 1153 port = None 1154 1155 # Note that we may pass an empty string as the host; this will raise 1156 # an error when we attempt to connect. Presumably, the client code 1157 # will call connect before then, with a proper host. 1158 self._setup(self._connection_class(host, port, strict)) 1159 1160 def _setup(self, conn): 1161 self._conn = conn 1162 1163 # set up delegation to flesh out interface 1164 self.send = conn.send 1165 self.putrequest = conn.putrequest 1166 self.putheader = conn.putheader 1167 self.endheaders = conn.endheaders 1168 self.set_debuglevel = conn.set_debuglevel 1169 1170 conn._http_vsn = self._http_vsn 1171 conn._http_vsn_str = self._http_vsn_str 1172 1173 self.file = None 1174 1175 def connect(self, host=None, port=None): 1176 "Accept arguments to set the host/port, since the superclass doesn't." 1177 1178 if host is not None: 1179 (self._conn.host, self._conn.port) = self._conn._get_hostport(host, port) 1180 self._conn.connect() 1181 1182 def getfile(self): 1183 "Provide a getfile, since the superclass' does not use this concept." 1184 return self.file 1185 1186 def getreply(self, buffering=False): 1187 """Compat definition since superclass does not define it. 1188 1189 Returns a tuple consisting of: 1190 - server status code (e.g. '200' if all goes well) 1191 - server "reason" corresponding to status code 1192 - any RFC822 headers in the response from the server 1193 """ 1194 try: 1195 if not buffering: 1196 response = self._conn.getresponse() 1197 else: 1198 #only add this keyword if non-default for compatibility 1199 #with other connection classes 1200 response = self._conn.getresponse(buffering) 1201 except BadStatusLine, e: 1202 ### hmm. if getresponse() ever closes the socket on a bad request, 1203 ### then we are going to have problems with self.sock 1204 1205 ### should we keep this behavior? do people use it? 1206 # keep the socket open (as a file), and return it 1207 self.file = self._conn.sock.makefile('rb', 0) 1208 1209 # close our socket -- we want to restart after any protocol error 1210 self.close() 1211 1212 self.headers = None 1213 return -1, e.line, None 1214 1215 self.headers = response.msg 1216 self.file = response.fp 1217 return response.status, response.reason, response.msg 1218 1219 def close(self): 1220 self._conn.close() 1221 1222 # note that self.file == response.fp, which gets closed by the 1223 # superclass. just clear the object ref here. 1224 ### hmm. messy. if status==-1, then self.file is owned by us. 1225 ### well... we aren't explicitly closing, but losing this ref will 1226 ### do it 1227 self.file = None 1228 1229try: 1230 import ssl 1231except ImportError: 1232 pass 1233else: 1234 class HTTPSConnection(HTTPConnection): 1235 "This class allows communication via SSL." 1236 1237 default_port = HTTPS_PORT 1238 1239 def __init__(self, host, port=None, key_file=None, cert_file=None, 1240 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1241 source_address=None, context=None): 1242 HTTPConnection.__init__(self, host, port, strict, timeout, 1243 source_address) 1244 self.key_file = key_file 1245 self.cert_file = cert_file 1246 if context is None: 1247 context = ssl._create_default_https_context() 1248 if key_file or cert_file: 1249 context.load_cert_chain(cert_file, key_file) 1250 self._context = context 1251 1252 def connect(self): 1253 "Connect to a host on a given (SSL) port." 1254 1255 HTTPConnection.connect(self) 1256 1257 if self._tunnel_host: 1258 server_hostname = self._tunnel_host 1259 else: 1260 server_hostname = self.host 1261 1262 self.sock = self._context.wrap_socket(self.sock, 1263 server_hostname=server_hostname) 1264 1265 __all__.append("HTTPSConnection") 1266 1267 class HTTPS(HTTP): 1268 """Compatibility with 1.5 httplib interface 1269 1270 Python 1.5.2 did not have an HTTPS class, but it defined an 1271 interface for sending http requests that is also useful for 1272 https. 1273 """ 1274 1275 _connection_class = HTTPSConnection 1276 1277 def __init__(self, host='', port=None, key_file=None, cert_file=None, 1278 strict=None, context=None): 1279 # provide a default host, pass the X509 cert info 1280 1281 # urf. compensate for bad input. 1282 if port == 0: 1283 port = None 1284 self._setup(self._connection_class(host, port, key_file, 1285 cert_file, strict, 1286 context=context)) 1287 1288 # we never actually use these for anything, but we keep them 1289 # here for compatibility with post-1.5.2 CVS. 1290 self.key_file = key_file 1291 self.cert_file = cert_file 1292 1293 1294 def FakeSocket (sock, sslobj): 1295 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + 1296 "Use the result of ssl.wrap_socket() directly instead.", 1297 DeprecationWarning, stacklevel=2) 1298 return sslobj 1299 1300 1301class HTTPException(Exception): 1302 # Subclasses that define an __init__ must call Exception.__init__ 1303 # or define self.args. Otherwise, str() will fail. 1304 pass 1305 1306class NotConnected(HTTPException): 1307 pass 1308 1309class InvalidURL(HTTPException): 1310 pass 1311 1312class UnknownProtocol(HTTPException): 1313 def __init__(self, version): 1314 self.args = version, 1315 self.version = version 1316 1317class UnknownTransferEncoding(HTTPException): 1318 pass 1319 1320class UnimplementedFileMode(HTTPException): 1321 pass 1322 1323class IncompleteRead(HTTPException): 1324 def __init__(self, partial, expected=None): 1325 self.args = partial, 1326 self.partial = partial 1327 self.expected = expected 1328 def __repr__(self): 1329 if self.expected is not None: 1330 e = ', %i more expected' % self.expected 1331 else: 1332 e = '' 1333 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) 1334 def __str__(self): 1335 return repr(self) 1336 1337class ImproperConnectionState(HTTPException): 1338 pass 1339 1340class CannotSendRequest(ImproperConnectionState): 1341 pass 1342 1343class CannotSendHeader(ImproperConnectionState): 1344 pass 1345 1346class ResponseNotReady(ImproperConnectionState): 1347 pass 1348 1349class BadStatusLine(HTTPException): 1350 def __init__(self, line): 1351 if not line: 1352 line = repr(line) 1353 self.args = line, 1354 self.line = line 1355 1356class LineTooLong(HTTPException): 1357 def __init__(self, line_type): 1358 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1359 % (_MAXLINE, line_type)) 1360 1361# for backwards compatibility 1362error = HTTPException 1363 1364class LineAndFileWrapper: 1365 """A limited file-like object for HTTP/0.9 responses.""" 1366 1367 # The status-line parsing code calls readline(), which normally 1368 # get the HTTP status line. For a 0.9 response, however, this is 1369 # actually the first line of the body! Clients need to get a 1370 # readable file object that contains that line. 1371 1372 def __init__(self, line, file): 1373 self._line = line 1374 self._file = file 1375 self._line_consumed = 0 1376 self._line_offset = 0 1377 self._line_left = len(line) 1378 1379 def __getattr__(self, attr): 1380 return getattr(self._file, attr) 1381 1382 def _done(self): 1383 # called when the last byte is read from the line. After the 1384 # call, all read methods are delegated to the underlying file 1385 # object. 1386 self._line_consumed = 1 1387 self.read = self._file.read 1388 self.readline = self._file.readline 1389 self.readlines = self._file.readlines 1390 1391 def read(self, amt=None): 1392 if self._line_consumed: 1393 return self._file.read(amt) 1394 assert self._line_left 1395 if amt is None or amt > self._line_left: 1396 s = self._line[self._line_offset:] 1397 self._done() 1398 if amt is None: 1399 return s + self._file.read() 1400 else: 1401 return s + self._file.read(amt - len(s)) 1402 else: 1403 assert amt <= self._line_left 1404 i = self._line_offset 1405 j = i + amt 1406 s = self._line[i:j] 1407 self._line_offset = j 1408 self._line_left -= amt 1409 if self._line_left == 0: 1410 self._done() 1411 return s 1412 1413 def readline(self): 1414 if self._line_consumed: 1415 return self._file.readline() 1416 assert self._line_left 1417 s = self._line[self._line_offset:] 1418 self._done() 1419 return s 1420 1421 def readlines(self, size=None): 1422 if self._line_consumed: 1423 return self._file.readlines(size) 1424 assert self._line_left 1425 L = [self._line[self._line_offset:]] 1426 self._done() 1427 if size is None: 1428 return L + self._file.readlines() 1429 else: 1430 return L + self._file.readlines(size) 1431