1r"""HTTP/1.1 client library 2 3<intro stuff goes here> 4<other stuff, too> 5 6HTTPConnection goes through a number of "states", which define when a client 7may legally make another request or fetch the response for a particular 8request. This diagram details these state transitions: 9 10 (null) 11 | 12 | HTTPConnection() 13 v 14 Idle 15 | 16 | putrequest() 17 v 18 Request-started 19 | 20 | ( putheader() )* endheaders() 21 v 22 Request-sent 23 |\_____________________________ 24 | | getresponse() raises 25 | response = getresponse() | ConnectionError 26 v v 27 Unread-response Idle 28 [Response-headers-read] 29 |\____________________ 30 | | 31 | response.read() | putrequest() 32 v v 33 Idle Req-started-unread-response 34 ______/| 35 / | 36 response.read() | | ( putheader() )* endheaders() 37 v v 38 Request-started Req-sent-unread-response 39 | 40 | response.read() 41 v 42 Request-sent 43 44This diagram presents the following rules: 45 -- a second request may not be started until {response-headers-read} 46 -- a response [object] cannot be retrieved until {request-sent} 47 -- there is no differentiation between an unread response body and a 48 partially read response body 49 50Note: this enforcement is applied by the HTTPConnection class. The 51 HTTPResponse class does not enforce this state machine, which 52 implies sophisticated clients may accelerate the request/response 53 pipeline. Caution should be taken, though: accelerating the states 54 beyond the above pattern may imply knowledge of the server's 55 connection-close behavior for certain requests. For example, it 56 is impossible to tell whether the server will close the connection 57 UNTIL the response headers have been read; this means that further 58 requests cannot be placed into the pipeline until it is known that 59 the server will NOT be closing the connection. 60 61Logical State __state __response 62------------- ------- ---------- 63Idle _CS_IDLE None 64Request-started _CS_REQ_STARTED None 65Request-sent _CS_REQ_SENT None 66Unread-response _CS_IDLE <response_class> 67Req-started-unread-response _CS_REQ_STARTED <response_class> 68Req-sent-unread-response _CS_REQ_SENT <response_class> 69""" 70 71import email.parser 72import email.message 73import http 74import io 75import re 76import socket 77import collections.abc 78from urllib.parse import urlsplit 79 80# HTTPMessage, parse_headers(), and the HTTP status code constants are 81# intentionally omitted for simplicity 82__all__ = ["HTTPResponse", "HTTPConnection", 83 "HTTPException", "NotConnected", "UnknownProtocol", 84 "UnknownTransferEncoding", "UnimplementedFileMode", 85 "IncompleteRead", "InvalidURL", "ImproperConnectionState", 86 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 87 "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", 88 "responses"] 89 90HTTP_PORT = 80 91HTTPS_PORT = 443 92 93_UNKNOWN = 'UNKNOWN' 94 95# connection states 96_CS_IDLE = 'Idle' 97_CS_REQ_STARTED = 'Request-started' 98_CS_REQ_SENT = 'Request-sent' 99 100 101# hack to maintain backwards compatibility 102globals().update(http.HTTPStatus.__members__) 103 104# another hack to maintain backwards compatibility 105# Mapping status codes to official W3C names 106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} 107 108# maximal amount of data to read at one time in _safe_read 109MAXAMOUNT = 1048576 110 111# maximal line length when calling readline(). 112_MAXLINE = 65536 113_MAXHEADERS = 100 114 115# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) 116# 117# VCHAR = %x21-7E 118# obs-text = %x80-FF 119# header-field = field-name ":" OWS field-value OWS 120# field-name = token 121# field-value = *( field-content / obs-fold ) 122# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] 123# field-vchar = VCHAR / obs-text 124# 125# obs-fold = CRLF 1*( SP / HTAB ) 126# ; obsolete line folding 127# ; see Section 3.2.4 128 129# token = 1*tchar 130# 131# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" 132# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" 133# / DIGIT / ALPHA 134# ; any VCHAR, except delimiters 135# 136# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 137 138# the patterns for both name and value are more lenient than RFC 139# definitions to allow for backwards compatibility 140_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch 141_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search 142 143# We always set the Content-Length header for these methods because some 144# servers will otherwise respond with a 411 145_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} 146 147 148def _encode(data, name='data'): 149 """Call data.encode("latin-1") but show a better error message.""" 150 try: 151 return data.encode("latin-1") 152 except UnicodeEncodeError as err: 153 raise UnicodeEncodeError( 154 err.encoding, 155 err.object, 156 err.start, 157 err.end, 158 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') " 159 "if you want to send it encoded in UTF-8." % 160 (name.title(), data[err.start:err.end], name)) from None 161 162 163class HTTPMessage(email.message.Message): 164 # XXX The only usage of this method is in 165 # http.server.CGIHTTPRequestHandler. Maybe move the code there so 166 # that it doesn't need to be part of the public API. The API has 167 # never been defined so this could cause backwards compatibility 168 # issues. 169 170 def getallmatchingheaders(self, name): 171 """Find all header lines matching a given header name. 172 173 Look through the list of headers and find all lines matching a given 174 header name (and their continuation lines). A list of the lines is 175 returned, without interpretation. If the header does not occur, an 176 empty list is returned. If the header occurs multiple times, all 177 occurrences are returned. Case is not important in the header name. 178 179 """ 180 name = name.lower() + ':' 181 n = len(name) 182 lst = [] 183 hit = 0 184 for line in self.keys(): 185 if line[:n].lower() == name: 186 hit = 1 187 elif not line[:1].isspace(): 188 hit = 0 189 if hit: 190 lst.append(line) 191 return lst 192 193def parse_headers(fp, _class=HTTPMessage): 194 """Parses only RFC2822 headers from a file pointer. 195 196 email Parser wants to see strings rather than bytes. 197 But a TextIOWrapper around self.rfile would buffer too many bytes 198 from the stream, bytes which we later need to read as bytes. 199 So we read the correct bytes here, as bytes, for email Parser 200 to parse. 201 202 """ 203 headers = [] 204 while True: 205 line = fp.readline(_MAXLINE + 1) 206 if len(line) > _MAXLINE: 207 raise LineTooLong("header line") 208 headers.append(line) 209 if len(headers) > _MAXHEADERS: 210 raise HTTPException("got more than %d headers" % _MAXHEADERS) 211 if line in (b'\r\n', b'\n', b''): 212 break 213 hstring = b''.join(headers).decode('iso-8859-1') 214 return email.parser.Parser(_class=_class).parsestr(hstring) 215 216 217class HTTPResponse(io.BufferedIOBase): 218 219 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 220 221 # The bytes from the socket object are iso-8859-1 strings. 222 # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded 223 # text following RFC 2047. The basic status line parsing only 224 # accepts iso-8859-1. 225 226 def __init__(self, sock, debuglevel=0, method=None, url=None): 227 # If the response includes a content-length header, we need to 228 # make sure that the client doesn't read more than the 229 # specified number of bytes. If it does, it will block until 230 # the server times out and closes the connection. This will 231 # happen if a self.fp.read() is done (without a size) whether 232 # self.fp is buffered or not. So, no self.fp.read() by 233 # clients unless they know what they are doing. 234 self.fp = sock.makefile("rb") 235 self.debuglevel = debuglevel 236 self._method = method 237 238 # The HTTPResponse object is returned via urllib. The clients 239 # of http and urllib expect different attributes for the 240 # headers. headers is used here and supports urllib. msg is 241 # provided as a backwards compatibility layer for http 242 # clients. 243 244 self.headers = self.msg = None 245 246 # from the Status-Line of the response 247 self.version = _UNKNOWN # HTTP-Version 248 self.status = _UNKNOWN # Status-Code 249 self.reason = _UNKNOWN # Reason-Phrase 250 251 self.chunked = _UNKNOWN # is "chunked" being used? 252 self.chunk_left = _UNKNOWN # bytes left to read in current chunk 253 self.length = _UNKNOWN # number of bytes left in response 254 self.will_close = _UNKNOWN # conn will close at end of response 255 256 def _read_status(self): 257 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 258 if len(line) > _MAXLINE: 259 raise LineTooLong("status line") 260 if self.debuglevel > 0: 261 print("reply:", repr(line)) 262 if not line: 263 # Presumably, the server closed the connection before 264 # sending a valid response. 265 raise RemoteDisconnected("Remote end closed connection without" 266 " response") 267 try: 268 version, status, reason = line.split(None, 2) 269 except ValueError: 270 try: 271 version, status = line.split(None, 1) 272 reason = "" 273 except ValueError: 274 # empty version will cause next test to fail. 275 version = "" 276 if not version.startswith("HTTP/"): 277 self._close_conn() 278 raise BadStatusLine(line) 279 280 # The status code is a three-digit number 281 try: 282 status = int(status) 283 if status < 100 or status > 999: 284 raise BadStatusLine(line) 285 except ValueError: 286 raise BadStatusLine(line) 287 return version, status, reason 288 289 def begin(self): 290 if self.headers is not None: 291 # we've already started reading the response 292 return 293 294 # read until we get a non-100 response 295 while True: 296 version, status, reason = self._read_status() 297 if status != CONTINUE: 298 break 299 # skip the header from the 100 response 300 while True: 301 skip = self.fp.readline(_MAXLINE + 1) 302 if len(skip) > _MAXLINE: 303 raise LineTooLong("header line") 304 skip = skip.strip() 305 if not skip: 306 break 307 if self.debuglevel > 0: 308 print("header:", skip) 309 310 self.code = self.status = status 311 self.reason = reason.strip() 312 if version in ("HTTP/1.0", "HTTP/0.9"): 313 # Some servers might still return "0.9", treat it as 1.0 anyway 314 self.version = 10 315 elif version.startswith("HTTP/1."): 316 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 317 else: 318 raise UnknownProtocol(version) 319 320 self.headers = self.msg = parse_headers(self.fp) 321 322 if self.debuglevel > 0: 323 for hdr in self.headers: 324 print("header:", hdr + ":", self.headers.get(hdr)) 325 326 # are we using the chunked-style of transfer encoding? 327 tr_enc = self.headers.get("transfer-encoding") 328 if tr_enc and tr_enc.lower() == "chunked": 329 self.chunked = True 330 self.chunk_left = None 331 else: 332 self.chunked = False 333 334 # will the connection close at the end of the response? 335 self.will_close = self._check_close() 336 337 # do we have a Content-Length? 338 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 339 self.length = None 340 length = self.headers.get("content-length") 341 342 # are we using the chunked-style of transfer encoding? 343 tr_enc = self.headers.get("transfer-encoding") 344 if length and not self.chunked: 345 try: 346 self.length = int(length) 347 except ValueError: 348 self.length = None 349 else: 350 if self.length < 0: # ignore nonsensical negative lengths 351 self.length = None 352 else: 353 self.length = None 354 355 # does the body have a fixed length? (of zero) 356 if (status == NO_CONTENT or status == NOT_MODIFIED or 357 100 <= status < 200 or # 1xx codes 358 self._method == "HEAD"): 359 self.length = 0 360 361 # if the connection remains open, and we aren't using chunked, and 362 # a content-length was not provided, then assume that the connection 363 # WILL close. 364 if (not self.will_close and 365 not self.chunked and 366 self.length is None): 367 self.will_close = True 368 369 def _check_close(self): 370 conn = self.headers.get("connection") 371 if self.version == 11: 372 # An HTTP/1.1 proxy is assumed to stay open unless 373 # explicitly closed. 374 if conn and "close" in conn.lower(): 375 return True 376 return False 377 378 # Some HTTP/1.0 implementations have support for persistent 379 # connections, using rules different than HTTP/1.1. 380 381 # For older HTTP, Keep-Alive indicates persistent connection. 382 if self.headers.get("keep-alive"): 383 return False 384 385 # At least Akamai returns a "Connection: Keep-Alive" header, 386 # which was supposed to be sent by the client. 387 if conn and "keep-alive" in conn.lower(): 388 return False 389 390 # Proxy-Connection is a netscape hack. 391 pconn = self.headers.get("proxy-connection") 392 if pconn and "keep-alive" in pconn.lower(): 393 return False 394 395 # otherwise, assume it will close 396 return True 397 398 def _close_conn(self): 399 fp = self.fp 400 self.fp = None 401 fp.close() 402 403 def close(self): 404 try: 405 super().close() # set "closed" flag 406 finally: 407 if self.fp: 408 self._close_conn() 409 410 # These implementations are for the benefit of io.BufferedReader. 411 412 # XXX This class should probably be revised to act more like 413 # the "raw stream" that BufferedReader expects. 414 415 def flush(self): 416 super().flush() 417 if self.fp: 418 self.fp.flush() 419 420 def readable(self): 421 """Always returns True""" 422 return True 423 424 # End of "raw stream" methods 425 426 def isclosed(self): 427 """True if the connection is closed.""" 428 # NOTE: it is possible that we will not ever call self.close(). This 429 # case occurs when will_close is TRUE, length is None, and we 430 # read up to the last byte, but NOT past it. 431 # 432 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 433 # called, meaning self.isclosed() is meaningful. 434 return self.fp is None 435 436 def read(self, amt=None): 437 if self.fp is None: 438 return b"" 439 440 if self._method == "HEAD": 441 self._close_conn() 442 return b"" 443 444 if amt is not None: 445 # Amount is given, implement using readinto 446 b = bytearray(amt) 447 n = self.readinto(b) 448 return memoryview(b)[:n].tobytes() 449 else: 450 # Amount is not given (unbounded read) so we must check self.length 451 # and self.chunked 452 453 if self.chunked: 454 return self._readall_chunked() 455 456 if self.length is None: 457 s = self.fp.read() 458 else: 459 try: 460 s = self._safe_read(self.length) 461 except IncompleteRead: 462 self._close_conn() 463 raise 464 self.length = 0 465 self._close_conn() # we read everything 466 return s 467 468 def readinto(self, b): 469 """Read up to len(b) bytes into bytearray b and return the number 470 of bytes read. 471 """ 472 473 if self.fp is None: 474 return 0 475 476 if self._method == "HEAD": 477 self._close_conn() 478 return 0 479 480 if self.chunked: 481 return self._readinto_chunked(b) 482 483 if self.length is not None: 484 if len(b) > self.length: 485 # clip the read to the "end of response" 486 b = memoryview(b)[0:self.length] 487 488 # we do not use _safe_read() here because this may be a .will_close 489 # connection, and the user is reading more bytes than will be provided 490 # (for example, reading in 1k chunks) 491 n = self.fp.readinto(b) 492 if not n and b: 493 # Ideally, we would raise IncompleteRead if the content-length 494 # wasn't satisfied, but it might break compatibility. 495 self._close_conn() 496 elif self.length is not None: 497 self.length -= n 498 if not self.length: 499 self._close_conn() 500 return n 501 502 def _read_next_chunk_size(self): 503 # Read the next chunk size from the file 504 line = self.fp.readline(_MAXLINE + 1) 505 if len(line) > _MAXLINE: 506 raise LineTooLong("chunk size") 507 i = line.find(b";") 508 if i >= 0: 509 line = line[:i] # strip chunk-extensions 510 try: 511 return int(line, 16) 512 except ValueError: 513 # close the connection as protocol synchronisation is 514 # probably lost 515 self._close_conn() 516 raise 517 518 def _read_and_discard_trailer(self): 519 # read and discard trailer up to the CRLF terminator 520 ### note: we shouldn't have any trailers! 521 while True: 522 line = self.fp.readline(_MAXLINE + 1) 523 if len(line) > _MAXLINE: 524 raise LineTooLong("trailer line") 525 if not line: 526 # a vanishingly small number of sites EOF without 527 # sending the trailer 528 break 529 if line in (b'\r\n', b'\n', b''): 530 break 531 532 def _get_chunk_left(self): 533 # return self.chunk_left, reading a new chunk if necessary. 534 # chunk_left == 0: at the end of the current chunk, need to close it 535 # chunk_left == None: No current chunk, should read next. 536 # This function returns non-zero or None if the last chunk has 537 # been read. 538 chunk_left = self.chunk_left 539 if not chunk_left: # Can be 0 or None 540 if chunk_left is not None: 541 # We are at the end of chunk, discard chunk end 542 self._safe_read(2) # toss the CRLF at the end of the chunk 543 try: 544 chunk_left = self._read_next_chunk_size() 545 except ValueError: 546 raise IncompleteRead(b'') 547 if chunk_left == 0: 548 # last chunk: 1*("0") [ chunk-extension ] CRLF 549 self._read_and_discard_trailer() 550 # we read everything; close the "file" 551 self._close_conn() 552 chunk_left = None 553 self.chunk_left = chunk_left 554 return chunk_left 555 556 def _readall_chunked(self): 557 assert self.chunked != _UNKNOWN 558 value = [] 559 try: 560 while True: 561 chunk_left = self._get_chunk_left() 562 if chunk_left is None: 563 break 564 value.append(self._safe_read(chunk_left)) 565 self.chunk_left = 0 566 return b''.join(value) 567 except IncompleteRead: 568 raise IncompleteRead(b''.join(value)) 569 570 def _readinto_chunked(self, b): 571 assert self.chunked != _UNKNOWN 572 total_bytes = 0 573 mvb = memoryview(b) 574 try: 575 while True: 576 chunk_left = self._get_chunk_left() 577 if chunk_left is None: 578 return total_bytes 579 580 if len(mvb) <= chunk_left: 581 n = self._safe_readinto(mvb) 582 self.chunk_left = chunk_left - n 583 return total_bytes + n 584 585 temp_mvb = mvb[:chunk_left] 586 n = self._safe_readinto(temp_mvb) 587 mvb = mvb[n:] 588 total_bytes += n 589 self.chunk_left = 0 590 591 except IncompleteRead: 592 raise IncompleteRead(bytes(b[0:total_bytes])) 593 594 def _safe_read(self, amt): 595 """Read the number of bytes requested, compensating for partial reads. 596 597 Normally, we have a blocking socket, but a read() can be interrupted 598 by a signal (resulting in a partial read). 599 600 Note that we cannot distinguish between EOF and an interrupt when zero 601 bytes have been read. IncompleteRead() will be raised in this 602 situation. 603 604 This function should be used when <amt> bytes "should" be present for 605 reading. If the bytes are truly not available (due to EOF), then the 606 IncompleteRead exception can be used to detect the problem. 607 """ 608 s = [] 609 while amt > 0: 610 chunk = self.fp.read(min(amt, MAXAMOUNT)) 611 if not chunk: 612 raise IncompleteRead(b''.join(s), amt) 613 s.append(chunk) 614 amt -= len(chunk) 615 return b"".join(s) 616 617 def _safe_readinto(self, b): 618 """Same as _safe_read, but for reading into a buffer.""" 619 total_bytes = 0 620 mvb = memoryview(b) 621 while total_bytes < len(b): 622 if MAXAMOUNT < len(mvb): 623 temp_mvb = mvb[0:MAXAMOUNT] 624 n = self.fp.readinto(temp_mvb) 625 else: 626 n = self.fp.readinto(mvb) 627 if not n: 628 raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) 629 mvb = mvb[n:] 630 total_bytes += n 631 return total_bytes 632 633 def read1(self, n=-1): 634 """Read with at most one underlying system call. If at least one 635 byte is buffered, return that instead. 636 """ 637 if self.fp is None or self._method == "HEAD": 638 return b"" 639 if self.chunked: 640 return self._read1_chunked(n) 641 if self.length is not None and (n < 0 or n > self.length): 642 n = self.length 643 result = self.fp.read1(n) 644 if not result and n: 645 self._close_conn() 646 elif self.length is not None: 647 self.length -= len(result) 648 return result 649 650 def peek(self, n=-1): 651 # Having this enables IOBase.readline() to read more than one 652 # byte at a time 653 if self.fp is None or self._method == "HEAD": 654 return b"" 655 if self.chunked: 656 return self._peek_chunked(n) 657 return self.fp.peek(n) 658 659 def readline(self, limit=-1): 660 if self.fp is None or self._method == "HEAD": 661 return b"" 662 if self.chunked: 663 # Fallback to IOBase readline which uses peek() and read() 664 return super().readline(limit) 665 if self.length is not None and (limit < 0 or limit > self.length): 666 limit = self.length 667 result = self.fp.readline(limit) 668 if not result and limit: 669 self._close_conn() 670 elif self.length is not None: 671 self.length -= len(result) 672 return result 673 674 def _read1_chunked(self, n): 675 # Strictly speaking, _get_chunk_left() may cause more than one read, 676 # but that is ok, since that is to satisfy the chunked protocol. 677 chunk_left = self._get_chunk_left() 678 if chunk_left is None or n == 0: 679 return b'' 680 if not (0 <= n <= chunk_left): 681 n = chunk_left # if n is negative or larger than chunk_left 682 read = self.fp.read1(n) 683 self.chunk_left -= len(read) 684 if not read: 685 raise IncompleteRead(b"") 686 return read 687 688 def _peek_chunked(self, n): 689 # Strictly speaking, _get_chunk_left() may cause more than one read, 690 # but that is ok, since that is to satisfy the chunked protocol. 691 try: 692 chunk_left = self._get_chunk_left() 693 except IncompleteRead: 694 return b'' # peek doesn't worry about protocol 695 if chunk_left is None: 696 return b'' # eof 697 # peek is allowed to return more than requested. Just request the 698 # entire chunk, and truncate what we get. 699 return self.fp.peek(chunk_left)[:chunk_left] 700 701 def fileno(self): 702 return self.fp.fileno() 703 704 def getheader(self, name, default=None): 705 '''Returns the value of the header matching *name*. 706 707 If there are multiple matching headers, the values are 708 combined into a single string separated by commas and spaces. 709 710 If no matching header is found, returns *default* or None if 711 the *default* is not specified. 712 713 If the headers are unknown, raises http.client.ResponseNotReady. 714 715 ''' 716 if self.headers is None: 717 raise ResponseNotReady() 718 headers = self.headers.get_all(name) or default 719 if isinstance(headers, str) or not hasattr(headers, '__iter__'): 720 return headers 721 else: 722 return ', '.join(headers) 723 724 def getheaders(self): 725 """Return list of (header, value) tuples.""" 726 if self.headers is None: 727 raise ResponseNotReady() 728 return list(self.headers.items()) 729 730 # We override IOBase.__iter__ so that it doesn't check for closed-ness 731 732 def __iter__(self): 733 return self 734 735 # For compatibility with old-style urllib responses. 736 737 def info(self): 738 '''Returns an instance of the class mimetools.Message containing 739 meta-information associated with the URL. 740 741 When the method is HTTP, these headers are those returned by 742 the server at the head of the retrieved HTML page (including 743 Content-Length and Content-Type). 744 745 When the method is FTP, a Content-Length header will be 746 present if (as is now usual) the server passed back a file 747 length in response to the FTP retrieval request. A 748 Content-Type header will be present if the MIME type can be 749 guessed. 750 751 When the method is local-file, returned headers will include 752 a Date representing the file's last-modified time, a 753 Content-Length giving file size, and a Content-Type 754 containing a guess at the file's type. See also the 755 description of the mimetools module. 756 757 ''' 758 return self.headers 759 760 def geturl(self): 761 '''Return the real URL of the page. 762 763 In some cases, the HTTP server redirects a client to another 764 URL. The urlopen() function handles this transparently, but in 765 some cases the caller needs to know which URL the client was 766 redirected to. The geturl() method can be used to get at this 767 redirected URL. 768 769 ''' 770 return self.url 771 772 def getcode(self): 773 '''Return the HTTP status code that was sent with the response, 774 or None if the URL is not an HTTP URL. 775 776 ''' 777 return self.status 778 779class HTTPConnection: 780 781 _http_vsn = 11 782 _http_vsn_str = 'HTTP/1.1' 783 784 response_class = HTTPResponse 785 default_port = HTTP_PORT 786 auto_open = 1 787 debuglevel = 0 788 789 @staticmethod 790 def _is_textIO(stream): 791 """Test whether a file-like object is a text or a binary stream. 792 """ 793 return isinstance(stream, io.TextIOBase) 794 795 @staticmethod 796 def _get_content_length(body, method): 797 """Get the content-length based on the body. 798 799 If the body is None, we set Content-Length: 0 for methods that expect 800 a body (RFC 7230, Section 3.3.2). We also set the Content-Length for 801 any method if the body is a str or bytes-like object and not a file. 802 """ 803 if body is None: 804 # do an explicit check for not None here to distinguish 805 # between unset and set but empty 806 if method.upper() in _METHODS_EXPECTING_BODY: 807 return 0 808 else: 809 return None 810 811 if hasattr(body, 'read'): 812 # file-like object. 813 return None 814 815 try: 816 # does it implement the buffer protocol (bytes, bytearray, array)? 817 mv = memoryview(body) 818 return mv.nbytes 819 except TypeError: 820 pass 821 822 if isinstance(body, str): 823 return len(body) 824 825 return None 826 827 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 828 source_address=None, blocksize=8192): 829 self.timeout = timeout 830 self.source_address = source_address 831 self.blocksize = blocksize 832 self.sock = None 833 self._buffer = [] 834 self.__response = None 835 self.__state = _CS_IDLE 836 self._method = None 837 self._tunnel_host = None 838 self._tunnel_port = None 839 self._tunnel_headers = {} 840 841 (self.host, self.port) = self._get_hostport(host, port) 842 843 # This is stored as an instance variable to allow unit 844 # tests to replace it with a suitable mockup 845 self._create_connection = socket.create_connection 846 847 def set_tunnel(self, host, port=None, headers=None): 848 """Set up host and port for HTTP CONNECT tunnelling. 849 850 In a connection that uses HTTP CONNECT tunneling, the host passed to the 851 constructor is used as a proxy server that relays all communication to 852 the endpoint passed to `set_tunnel`. This done by sending an HTTP 853 CONNECT request to the proxy server when the connection is established. 854 855 This method must be called before the HTML connection has been 856 established. 857 858 The headers argument should be a mapping of extra HTTP headers to send 859 with the CONNECT request. 860 """ 861 862 if self.sock: 863 raise RuntimeError("Can't set up tunnel for established connection") 864 865 self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) 866 if headers: 867 self._tunnel_headers = headers 868 else: 869 self._tunnel_headers.clear() 870 871 def _get_hostport(self, host, port): 872 if port is None: 873 i = host.rfind(':') 874 j = host.rfind(']') # ipv6 addresses have [...] 875 if i > j: 876 try: 877 port = int(host[i+1:]) 878 except ValueError: 879 if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ 880 port = self.default_port 881 else: 882 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 883 host = host[:i] 884 else: 885 port = self.default_port 886 if host and host[0] == '[' and host[-1] == ']': 887 host = host[1:-1] 888 889 return (host, port) 890 891 def set_debuglevel(self, level): 892 self.debuglevel = level 893 894 def _tunnel(self): 895 connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, 896 self._tunnel_port) 897 connect_bytes = connect_str.encode("ascii") 898 self.send(connect_bytes) 899 for header, value in self._tunnel_headers.items(): 900 header_str = "%s: %s\r\n" % (header, value) 901 header_bytes = header_str.encode("latin-1") 902 self.send(header_bytes) 903 self.send(b'\r\n') 904 905 response = self.response_class(self.sock, method=self._method) 906 (version, code, message) = response._read_status() 907 908 if code != http.HTTPStatus.OK: 909 self.close() 910 raise OSError("Tunnel connection failed: %d %s" % (code, 911 message.strip())) 912 while True: 913 line = response.fp.readline(_MAXLINE + 1) 914 if len(line) > _MAXLINE: 915 raise LineTooLong("header line") 916 if not line: 917 # for sites which EOF without sending a trailer 918 break 919 if line in (b'\r\n', b'\n', b''): 920 break 921 922 if self.debuglevel > 0: 923 print('header:', line.decode()) 924 925 def connect(self): 926 """Connect to the host and port specified in __init__.""" 927 self.sock = self._create_connection( 928 (self.host,self.port), self.timeout, self.source_address) 929 self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 930 931 if self._tunnel_host: 932 self._tunnel() 933 934 def close(self): 935 """Close the connection to the HTTP server.""" 936 self.__state = _CS_IDLE 937 try: 938 sock = self.sock 939 if sock: 940 self.sock = None 941 sock.close() # close it manually... there may be other refs 942 finally: 943 response = self.__response 944 if response: 945 self.__response = None 946 response.close() 947 948 def send(self, data): 949 """Send `data' to the server. 950 ``data`` can be a string object, a bytes object, an array object, a 951 file-like object that supports a .read() method, or an iterable object. 952 """ 953 954 if self.sock is None: 955 if self.auto_open: 956 self.connect() 957 else: 958 raise NotConnected() 959 960 if self.debuglevel > 0: 961 print("send:", repr(data)) 962 if hasattr(data, "read") : 963 if self.debuglevel > 0: 964 print("sendIng a read()able") 965 encode = self._is_textIO(data) 966 if encode and self.debuglevel > 0: 967 print("encoding file using iso-8859-1") 968 while 1: 969 datablock = data.read(self.blocksize) 970 if not datablock: 971 break 972 if encode: 973 datablock = datablock.encode("iso-8859-1") 974 self.sock.sendall(datablock) 975 return 976 try: 977 self.sock.sendall(data) 978 except TypeError: 979 if isinstance(data, collections.abc.Iterable): 980 for d in data: 981 self.sock.sendall(d) 982 else: 983 raise TypeError("data should be a bytes-like object " 984 "or an iterable, got %r" % type(data)) 985 986 def _output(self, s): 987 """Add a line of output to the current request buffer. 988 989 Assumes that the line does *not* end with \\r\\n. 990 """ 991 self._buffer.append(s) 992 993 def _read_readable(self, readable): 994 if self.debuglevel > 0: 995 print("sendIng a read()able") 996 encode = self._is_textIO(readable) 997 if encode and self.debuglevel > 0: 998 print("encoding file using iso-8859-1") 999 while True: 1000 datablock = readable.read(self.blocksize) 1001 if not datablock: 1002 break 1003 if encode: 1004 datablock = datablock.encode("iso-8859-1") 1005 yield datablock 1006 1007 def _send_output(self, message_body=None, encode_chunked=False): 1008 """Send the currently buffered request and clear the buffer. 1009 1010 Appends an extra \\r\\n to the buffer. 1011 A message_body may be specified, to be appended to the request. 1012 """ 1013 self._buffer.extend((b"", b"")) 1014 msg = b"\r\n".join(self._buffer) 1015 del self._buffer[:] 1016 self.send(msg) 1017 1018 if message_body is not None: 1019 1020 # create a consistent interface to message_body 1021 if hasattr(message_body, 'read'): 1022 # Let file-like take precedence over byte-like. This 1023 # is needed to allow the current position of mmap'ed 1024 # files to be taken into account. 1025 chunks = self._read_readable(message_body) 1026 else: 1027 try: 1028 # this is solely to check to see if message_body 1029 # implements the buffer API. it /would/ be easier 1030 # to capture if PyObject_CheckBuffer was exposed 1031 # to Python. 1032 memoryview(message_body) 1033 except TypeError: 1034 try: 1035 chunks = iter(message_body) 1036 except TypeError: 1037 raise TypeError("message_body should be a bytes-like " 1038 "object or an iterable, got %r" 1039 % type(message_body)) 1040 else: 1041 # the object implements the buffer interface and 1042 # can be passed directly into socket methods 1043 chunks = (message_body,) 1044 1045 for chunk in chunks: 1046 if not chunk: 1047 if self.debuglevel > 0: 1048 print('Zero length chunk ignored') 1049 continue 1050 1051 if encode_chunked and self._http_vsn == 11: 1052 # chunked encoding 1053 chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ 1054 + b'\r\n' 1055 self.send(chunk) 1056 1057 if encode_chunked and self._http_vsn == 11: 1058 # end chunked transfer 1059 self.send(b'0\r\n\r\n') 1060 1061 def putrequest(self, method, url, skip_host=False, 1062 skip_accept_encoding=False): 1063 """Send a request to the server. 1064 1065 `method' specifies an HTTP request method, e.g. 'GET'. 1066 `url' specifies the object being requested, e.g. '/index.html'. 1067 `skip_host' if True does not add automatically a 'Host:' header 1068 `skip_accept_encoding' if True does not add automatically an 1069 'Accept-Encoding:' header 1070 """ 1071 1072 # if a prior response has been completed, then forget about it. 1073 if self.__response and self.__response.isclosed(): 1074 self.__response = None 1075 1076 1077 # in certain cases, we cannot issue another request on this connection. 1078 # this occurs when: 1079 # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 1080 # 2) a response to a previous request has signalled that it is going 1081 # to close the connection upon completion. 1082 # 3) the headers for the previous response have not been read, thus 1083 # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 1084 # 1085 # if there is no prior response, then we can request at will. 1086 # 1087 # if point (2) is true, then we will have passed the socket to the 1088 # response (effectively meaning, "there is no prior response"), and 1089 # will open a new one when a new request is made. 1090 # 1091 # Note: if a prior response exists, then we *can* start a new request. 1092 # We are not allowed to begin fetching the response to this new 1093 # request, however, until that prior response is complete. 1094 # 1095 if self.__state == _CS_IDLE: 1096 self.__state = _CS_REQ_STARTED 1097 else: 1098 raise CannotSendRequest(self.__state) 1099 1100 # Save the method we use, we need it later in the response phase 1101 self._method = method 1102 if not url: 1103 url = '/' 1104 request = '%s %s %s' % (method, url, self._http_vsn_str) 1105 1106 # Non-ASCII characters should have been eliminated earlier 1107 self._output(request.encode('ascii')) 1108 1109 if self._http_vsn == 11: 1110 # Issue some standard headers for better HTTP/1.1 compliance 1111 1112 if not skip_host: 1113 # this header is issued *only* for HTTP/1.1 1114 # connections. more specifically, this means it is 1115 # only issued when the client uses the new 1116 # HTTPConnection() class. backwards-compat clients 1117 # will be using HTTP/1.0 and those clients may be 1118 # issuing this header themselves. we should NOT issue 1119 # it twice; some web servers (such as Apache) barf 1120 # when they see two Host: headers 1121 1122 # If we need a non-standard port,include it in the 1123 # header. If the request is going through a proxy, 1124 # but the host of the actual URL, not the host of the 1125 # proxy. 1126 1127 netloc = '' 1128 if url.startswith('http'): 1129 nil, netloc, nil, nil, nil = urlsplit(url) 1130 1131 if netloc: 1132 try: 1133 netloc_enc = netloc.encode("ascii") 1134 except UnicodeEncodeError: 1135 netloc_enc = netloc.encode("idna") 1136 self.putheader('Host', netloc_enc) 1137 else: 1138 if self._tunnel_host: 1139 host = self._tunnel_host 1140 port = self._tunnel_port 1141 else: 1142 host = self.host 1143 port = self.port 1144 1145 try: 1146 host_enc = host.encode("ascii") 1147 except UnicodeEncodeError: 1148 host_enc = host.encode("idna") 1149 1150 # As per RFC 273, IPv6 address should be wrapped with [] 1151 # when used as Host header 1152 1153 if host.find(':') >= 0: 1154 host_enc = b'[' + host_enc + b']' 1155 1156 if port == self.default_port: 1157 self.putheader('Host', host_enc) 1158 else: 1159 host_enc = host_enc.decode("ascii") 1160 self.putheader('Host', "%s:%s" % (host_enc, port)) 1161 1162 # note: we are assuming that clients will not attempt to set these 1163 # headers since *this* library must deal with the 1164 # consequences. this also means that when the supporting 1165 # libraries are updated to recognize other forms, then this 1166 # code should be changed (removed or updated). 1167 1168 # we only want a Content-Encoding of "identity" since we don't 1169 # support encodings such as x-gzip or x-deflate. 1170 if not skip_accept_encoding: 1171 self.putheader('Accept-Encoding', 'identity') 1172 1173 # we can accept "chunked" Transfer-Encodings, but no others 1174 # NOTE: no TE header implies *only* "chunked" 1175 #self.putheader('TE', 'chunked') 1176 1177 # if TE is supplied in the header, then it must appear in a 1178 # Connection header. 1179 #self.putheader('Connection', 'TE') 1180 1181 else: 1182 # For HTTP/1.0, the server will assume "not chunked" 1183 pass 1184 1185 def putheader(self, header, *values): 1186 """Send a request header line to the server. 1187 1188 For example: h.putheader('Accept', 'text/html') 1189 """ 1190 if self.__state != _CS_REQ_STARTED: 1191 raise CannotSendHeader() 1192 1193 if hasattr(header, 'encode'): 1194 header = header.encode('ascii') 1195 1196 if not _is_legal_header_name(header): 1197 raise ValueError('Invalid header name %r' % (header,)) 1198 1199 values = list(values) 1200 for i, one_value in enumerate(values): 1201 if hasattr(one_value, 'encode'): 1202 values[i] = one_value.encode('latin-1') 1203 elif isinstance(one_value, int): 1204 values[i] = str(one_value).encode('ascii') 1205 1206 if _is_illegal_header_value(values[i]): 1207 raise ValueError('Invalid header value %r' % (values[i],)) 1208 1209 value = b'\r\n\t'.join(values) 1210 header = header + b': ' + value 1211 self._output(header) 1212 1213 def endheaders(self, message_body=None, *, encode_chunked=False): 1214 """Indicate that the last header line has been sent to the server. 1215 1216 This method sends the request to the server. The optional message_body 1217 argument can be used to pass a message body associated with the 1218 request. 1219 """ 1220 if self.__state == _CS_REQ_STARTED: 1221 self.__state = _CS_REQ_SENT 1222 else: 1223 raise CannotSendHeader() 1224 self._send_output(message_body, encode_chunked=encode_chunked) 1225 1226 def request(self, method, url, body=None, headers={}, *, 1227 encode_chunked=False): 1228 """Send a complete request to the server.""" 1229 self._send_request(method, url, body, headers, encode_chunked) 1230 1231 def _send_request(self, method, url, body, headers, encode_chunked): 1232 # Honor explicitly requested Host: and Accept-Encoding: headers. 1233 header_names = frozenset(k.lower() for k in headers) 1234 skips = {} 1235 if 'host' in header_names: 1236 skips['skip_host'] = 1 1237 if 'accept-encoding' in header_names: 1238 skips['skip_accept_encoding'] = 1 1239 1240 self.putrequest(method, url, **skips) 1241 1242 # chunked encoding will happen if HTTP/1.1 is used and either 1243 # the caller passes encode_chunked=True or the following 1244 # conditions hold: 1245 # 1. content-length has not been explicitly set 1246 # 2. the body is a file or iterable, but not a str or bytes-like 1247 # 3. Transfer-Encoding has NOT been explicitly set by the caller 1248 1249 if 'content-length' not in header_names: 1250 # only chunk body if not explicitly set for backwards 1251 # compatibility, assuming the client code is already handling the 1252 # chunking 1253 if 'transfer-encoding' not in header_names: 1254 # if content-length cannot be automatically determined, fall 1255 # back to chunked encoding 1256 encode_chunked = False 1257 content_length = self._get_content_length(body, method) 1258 if content_length is None: 1259 if body is not None: 1260 if self.debuglevel > 0: 1261 print('Unable to determine size of %r' % body) 1262 encode_chunked = True 1263 self.putheader('Transfer-Encoding', 'chunked') 1264 else: 1265 self.putheader('Content-Length', str(content_length)) 1266 else: 1267 encode_chunked = False 1268 1269 for hdr, value in headers.items(): 1270 self.putheader(hdr, value) 1271 if isinstance(body, str): 1272 # RFC 2616 Section 3.7.1 says that text default has a 1273 # default charset of iso-8859-1. 1274 body = _encode(body, 'body') 1275 self.endheaders(body, encode_chunked=encode_chunked) 1276 1277 def getresponse(self): 1278 """Get the response from the server. 1279 1280 If the HTTPConnection is in the correct state, returns an 1281 instance of HTTPResponse or of whatever object is returned by 1282 the response_class variable. 1283 1284 If a request has not been sent or if a previous response has 1285 not be handled, ResponseNotReady is raised. If the HTTP 1286 response indicates that the connection should be closed, then 1287 it will be closed before the response is returned. When the 1288 connection is closed, the underlying socket is closed. 1289 """ 1290 1291 # if a prior response has been completed, then forget about it. 1292 if self.__response and self.__response.isclosed(): 1293 self.__response = None 1294 1295 # if a prior response exists, then it must be completed (otherwise, we 1296 # cannot read this response's header to determine the connection-close 1297 # behavior) 1298 # 1299 # note: if a prior response existed, but was connection-close, then the 1300 # socket and response were made independent of this HTTPConnection 1301 # object since a new request requires that we open a whole new 1302 # connection 1303 # 1304 # this means the prior response had one of two states: 1305 # 1) will_close: this connection was reset and the prior socket and 1306 # response operate independently 1307 # 2) persistent: the response was retained and we await its 1308 # isclosed() status to become true. 1309 # 1310 if self.__state != _CS_REQ_SENT or self.__response: 1311 raise ResponseNotReady(self.__state) 1312 1313 if self.debuglevel > 0: 1314 response = self.response_class(self.sock, self.debuglevel, 1315 method=self._method) 1316 else: 1317 response = self.response_class(self.sock, method=self._method) 1318 1319 try: 1320 try: 1321 response.begin() 1322 except ConnectionError: 1323 self.close() 1324 raise 1325 assert response.will_close != _UNKNOWN 1326 self.__state = _CS_IDLE 1327 1328 if response.will_close: 1329 # this effectively passes the connection to the response 1330 self.close() 1331 else: 1332 # remember this, so we can tell when it is complete 1333 self.__response = response 1334 1335 return response 1336 except: 1337 response.close() 1338 raise 1339 1340try: 1341 import ssl 1342except ImportError: 1343 pass 1344else: 1345 class HTTPSConnection(HTTPConnection): 1346 "This class allows communication via SSL." 1347 1348 default_port = HTTPS_PORT 1349 1350 # XXX Should key_file and cert_file be deprecated in favour of context? 1351 1352 def __init__(self, host, port=None, key_file=None, cert_file=None, 1353 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 1354 source_address=None, *, context=None, 1355 check_hostname=None, blocksize=8192): 1356 super(HTTPSConnection, self).__init__(host, port, timeout, 1357 source_address, 1358 blocksize=blocksize) 1359 if (key_file is not None or cert_file is not None or 1360 check_hostname is not None): 1361 import warnings 1362 warnings.warn("key_file, cert_file and check_hostname are " 1363 "deprecated, use a custom context instead.", 1364 DeprecationWarning, 2) 1365 self.key_file = key_file 1366 self.cert_file = cert_file 1367 if context is None: 1368 context = ssl._create_default_https_context() 1369 will_verify = context.verify_mode != ssl.CERT_NONE 1370 if check_hostname is None: 1371 check_hostname = context.check_hostname 1372 if check_hostname and not will_verify: 1373 raise ValueError("check_hostname needs a SSL context with " 1374 "either CERT_OPTIONAL or CERT_REQUIRED") 1375 if key_file or cert_file: 1376 context.load_cert_chain(cert_file, key_file) 1377 self._context = context 1378 if check_hostname is not None: 1379 self._context.check_hostname = check_hostname 1380 1381 def connect(self): 1382 "Connect to a host on a given (SSL) port." 1383 1384 super().connect() 1385 1386 if self._tunnel_host: 1387 server_hostname = self._tunnel_host 1388 else: 1389 server_hostname = self.host 1390 1391 self.sock = self._context.wrap_socket(self.sock, 1392 server_hostname=server_hostname) 1393 1394 __all__.append("HTTPSConnection") 1395 1396class HTTPException(Exception): 1397 # Subclasses that define an __init__ must call Exception.__init__ 1398 # or define self.args. Otherwise, str() will fail. 1399 pass 1400 1401class NotConnected(HTTPException): 1402 pass 1403 1404class InvalidURL(HTTPException): 1405 pass 1406 1407class UnknownProtocol(HTTPException): 1408 def __init__(self, version): 1409 self.args = version, 1410 self.version = version 1411 1412class UnknownTransferEncoding(HTTPException): 1413 pass 1414 1415class UnimplementedFileMode(HTTPException): 1416 pass 1417 1418class IncompleteRead(HTTPException): 1419 def __init__(self, partial, expected=None): 1420 self.args = partial, 1421 self.partial = partial 1422 self.expected = expected 1423 def __repr__(self): 1424 if self.expected is not None: 1425 e = ', %i more expected' % self.expected 1426 else: 1427 e = '' 1428 return '%s(%i bytes read%s)' % (self.__class__.__name__, 1429 len(self.partial), e) 1430 def __str__(self): 1431 return repr(self) 1432 1433class ImproperConnectionState(HTTPException): 1434 pass 1435 1436class CannotSendRequest(ImproperConnectionState): 1437 pass 1438 1439class CannotSendHeader(ImproperConnectionState): 1440 pass 1441 1442class ResponseNotReady(ImproperConnectionState): 1443 pass 1444 1445class BadStatusLine(HTTPException): 1446 def __init__(self, line): 1447 if not line: 1448 line = repr(line) 1449 self.args = line, 1450 self.line = line 1451 1452class LineTooLong(HTTPException): 1453 def __init__(self, line_type): 1454 HTTPException.__init__(self, "got more than %d bytes when reading %s" 1455 % (_MAXLINE, line_type)) 1456 1457class RemoteDisconnected(ConnectionResetError, BadStatusLine): 1458 def __init__(self, *pos, **kw): 1459 BadStatusLine.__init__(self, "") 1460 ConnectionResetError.__init__(self, *pos, **kw) 1461 1462# for backwards compatibility 1463error = HTTPException 1464