• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1r"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10    (null)
11      |
12      | HTTPConnection()
13      v
14    Idle
15      |
16      | putrequest()
17      v
18    Request-started
19      |
20      | ( putheader() )*  endheaders()
21      v
22    Request-sent
23      |\_____________________________
24      |                              | getresponse() raises
25      | response = getresponse()     | ConnectionError
26      v                              v
27    Unread-response                Idle
28    [Response-headers-read]
29      |\____________________
30      |                     |
31      | response.read()     | putrequest()
32      v                     v
33    Idle                  Req-started-unread-response
34                     ______/|
35                   /        |
36   response.read() |        | ( putheader() )*  endheaders()
37                   v        v
38       Request-started    Req-sent-unread-response
39                            |
40                            | response.read()
41                            v
42                          Request-sent
43
44This diagram presents the following rules:
45  -- a second request may not be started until {response-headers-read}
46  -- a response [object] cannot be retrieved until {request-sent}
47  -- there is no differentiation between an unread response body and a
48     partially read response body
49
50Note: this enforcement is applied by the HTTPConnection class. The
51      HTTPResponse class does not enforce this state machine, which
52      implies sophisticated clients may accelerate the request/response
53      pipeline. Caution should be taken, though: accelerating the states
54      beyond the above pattern may imply knowledge of the server's
55      connection-close behavior for certain requests. For example, it
56      is impossible to tell whether the server will close the connection
57      UNTIL the response headers have been read; this means that further
58      requests cannot be placed into the pipeline until it is known that
59      the server will NOT be closing the connection.
60
61Logical State                  __state            __response
62-------------                  -------            ----------
63Idle                           _CS_IDLE           None
64Request-started                _CS_REQ_STARTED    None
65Request-sent                   _CS_REQ_SENT       None
66Unread-response                _CS_IDLE           <response_class>
67Req-started-unread-response    _CS_REQ_STARTED    <response_class>
68Req-sent-unread-response       _CS_REQ_SENT       <response_class>
69"""
70
71import email.parser
72import email.message
73import http
74import io
75import re
76import socket
77import collections.abc
78from urllib.parse import urlsplit
79
80# HTTPMessage, parse_headers(), and the HTTP status code constants are
81# intentionally omitted for simplicity
82__all__ = ["HTTPResponse", "HTTPConnection",
83           "HTTPException", "NotConnected", "UnknownProtocol",
84           "UnknownTransferEncoding", "UnimplementedFileMode",
85           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
86           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
87           "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
88           "responses"]
89
90HTTP_PORT = 80
91HTTPS_PORT = 443
92
93_UNKNOWN = 'UNKNOWN'
94
95# connection states
96_CS_IDLE = 'Idle'
97_CS_REQ_STARTED = 'Request-started'
98_CS_REQ_SENT = 'Request-sent'
99
100
101# hack to maintain backwards compatibility
102globals().update(http.HTTPStatus.__members__)
103
104# another hack to maintain backwards compatibility
105# Mapping status codes to official W3C names
106responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
107
108# maximal amount of data to read at one time in _safe_read
109MAXAMOUNT = 1048576
110
111# maximal line length when calling readline().
112_MAXLINE = 65536
113_MAXHEADERS = 100
114
115# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
116#
117# VCHAR          = %x21-7E
118# obs-text       = %x80-FF
119# header-field   = field-name ":" OWS field-value OWS
120# field-name     = token
121# field-value    = *( field-content / obs-fold )
122# field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
123# field-vchar    = VCHAR / obs-text
124#
125# obs-fold       = CRLF 1*( SP / HTAB )
126#                ; obsolete line folding
127#                ; see Section 3.2.4
128
129# token          = 1*tchar
130#
131# tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
132#                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
133#                / DIGIT / ALPHA
134#                ; any VCHAR, except delimiters
135#
136# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
137
138# the patterns for both name and value are more lenient than RFC
139# definitions to allow for backwards compatibility
140_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
141_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
142
143# We always set the Content-Length header for these methods because some
144# servers will otherwise respond with a 411
145_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
146
147
148def _encode(data, name='data'):
149    """Call data.encode("latin-1") but show a better error message."""
150    try:
151        return data.encode("latin-1")
152    except UnicodeEncodeError as err:
153        raise UnicodeEncodeError(
154            err.encoding,
155            err.object,
156            err.start,
157            err.end,
158            "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
159            "if you want to send it encoded in UTF-8." %
160            (name.title(), data[err.start:err.end], name)) from None
161
162
163class HTTPMessage(email.message.Message):
164    # XXX The only usage of this method is in
165    # http.server.CGIHTTPRequestHandler.  Maybe move the code there so
166    # that it doesn't need to be part of the public API.  The API has
167    # never been defined so this could cause backwards compatibility
168    # issues.
169
170    def getallmatchingheaders(self, name):
171        """Find all header lines matching a given header name.
172
173        Look through the list of headers and find all lines matching a given
174        header name (and their continuation lines).  A list of the lines is
175        returned, without interpretation.  If the header does not occur, an
176        empty list is returned.  If the header occurs multiple times, all
177        occurrences are returned.  Case is not important in the header name.
178
179        """
180        name = name.lower() + ':'
181        n = len(name)
182        lst = []
183        hit = 0
184        for line in self.keys():
185            if line[:n].lower() == name:
186                hit = 1
187            elif not line[:1].isspace():
188                hit = 0
189            if hit:
190                lst.append(line)
191        return lst
192
193def parse_headers(fp, _class=HTTPMessage):
194    """Parses only RFC2822 headers from a file pointer.
195
196    email Parser wants to see strings rather than bytes.
197    But a TextIOWrapper around self.rfile would buffer too many bytes
198    from the stream, bytes which we later need to read as bytes.
199    So we read the correct bytes here, as bytes, for email Parser
200    to parse.
201
202    """
203    headers = []
204    while True:
205        line = fp.readline(_MAXLINE + 1)
206        if len(line) > _MAXLINE:
207            raise LineTooLong("header line")
208        headers.append(line)
209        if len(headers) > _MAXHEADERS:
210            raise HTTPException("got more than %d headers" % _MAXHEADERS)
211        if line in (b'\r\n', b'\n', b''):
212            break
213    hstring = b''.join(headers).decode('iso-8859-1')
214    return email.parser.Parser(_class=_class).parsestr(hstring)
215
216
217class HTTPResponse(io.BufferedIOBase):
218
219    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
220
221    # The bytes from the socket object are iso-8859-1 strings.
222    # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
223    # text following RFC 2047.  The basic status line parsing only
224    # accepts iso-8859-1.
225
226    def __init__(self, sock, debuglevel=0, method=None, url=None):
227        # If the response includes a content-length header, we need to
228        # make sure that the client doesn't read more than the
229        # specified number of bytes.  If it does, it will block until
230        # the server times out and closes the connection.  This will
231        # happen if a self.fp.read() is done (without a size) whether
232        # self.fp is buffered or not.  So, no self.fp.read() by
233        # clients unless they know what they are doing.
234        self.fp = sock.makefile("rb")
235        self.debuglevel = debuglevel
236        self._method = method
237
238        # The HTTPResponse object is returned via urllib.  The clients
239        # of http and urllib expect different attributes for the
240        # headers.  headers is used here and supports urllib.  msg is
241        # provided as a backwards compatibility layer for http
242        # clients.
243
244        self.headers = self.msg = None
245
246        # from the Status-Line of the response
247        self.version = _UNKNOWN # HTTP-Version
248        self.status = _UNKNOWN  # Status-Code
249        self.reason = _UNKNOWN  # Reason-Phrase
250
251        self.chunked = _UNKNOWN         # is "chunked" being used?
252        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
253        self.length = _UNKNOWN          # number of bytes left in response
254        self.will_close = _UNKNOWN      # conn will close at end of response
255
256    def _read_status(self):
257        line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
258        if len(line) > _MAXLINE:
259            raise LineTooLong("status line")
260        if self.debuglevel > 0:
261            print("reply:", repr(line))
262        if not line:
263            # Presumably, the server closed the connection before
264            # sending a valid response.
265            raise RemoteDisconnected("Remote end closed connection without"
266                                     " response")
267        try:
268            version, status, reason = line.split(None, 2)
269        except ValueError:
270            try:
271                version, status = line.split(None, 1)
272                reason = ""
273            except ValueError:
274                # empty version will cause next test to fail.
275                version = ""
276        if not version.startswith("HTTP/"):
277            self._close_conn()
278            raise BadStatusLine(line)
279
280        # The status code is a three-digit number
281        try:
282            status = int(status)
283            if status < 100 or status > 999:
284                raise BadStatusLine(line)
285        except ValueError:
286            raise BadStatusLine(line)
287        return version, status, reason
288
289    def begin(self):
290        if self.headers is not None:
291            # we've already started reading the response
292            return
293
294        # read until we get a non-100 response
295        while True:
296            version, status, reason = self._read_status()
297            if status != CONTINUE:
298                break
299            # skip the header from the 100 response
300            while True:
301                skip = self.fp.readline(_MAXLINE + 1)
302                if len(skip) > _MAXLINE:
303                    raise LineTooLong("header line")
304                skip = skip.strip()
305                if not skip:
306                    break
307                if self.debuglevel > 0:
308                    print("header:", skip)
309
310        self.code = self.status = status
311        self.reason = reason.strip()
312        if version in ("HTTP/1.0", "HTTP/0.9"):
313            # Some servers might still return "0.9", treat it as 1.0 anyway
314            self.version = 10
315        elif version.startswith("HTTP/1."):
316            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
317        else:
318            raise UnknownProtocol(version)
319
320        self.headers = self.msg = parse_headers(self.fp)
321
322        if self.debuglevel > 0:
323            for hdr in self.headers:
324                print("header:", hdr + ":", self.headers.get(hdr))
325
326        # are we using the chunked-style of transfer encoding?
327        tr_enc = self.headers.get("transfer-encoding")
328        if tr_enc and tr_enc.lower() == "chunked":
329            self.chunked = True
330            self.chunk_left = None
331        else:
332            self.chunked = False
333
334        # will the connection close at the end of the response?
335        self.will_close = self._check_close()
336
337        # do we have a Content-Length?
338        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
339        self.length = None
340        length = self.headers.get("content-length")
341
342         # are we using the chunked-style of transfer encoding?
343        tr_enc = self.headers.get("transfer-encoding")
344        if length and not self.chunked:
345            try:
346                self.length = int(length)
347            except ValueError:
348                self.length = None
349            else:
350                if self.length < 0:  # ignore nonsensical negative lengths
351                    self.length = None
352        else:
353            self.length = None
354
355        # does the body have a fixed length? (of zero)
356        if (status == NO_CONTENT or status == NOT_MODIFIED or
357            100 <= status < 200 or      # 1xx codes
358            self._method == "HEAD"):
359            self.length = 0
360
361        # if the connection remains open, and we aren't using chunked, and
362        # a content-length was not provided, then assume that the connection
363        # WILL close.
364        if (not self.will_close and
365            not self.chunked and
366            self.length is None):
367            self.will_close = True
368
369    def _check_close(self):
370        conn = self.headers.get("connection")
371        if self.version == 11:
372            # An HTTP/1.1 proxy is assumed to stay open unless
373            # explicitly closed.
374            if conn and "close" in conn.lower():
375                return True
376            return False
377
378        # Some HTTP/1.0 implementations have support for persistent
379        # connections, using rules different than HTTP/1.1.
380
381        # For older HTTP, Keep-Alive indicates persistent connection.
382        if self.headers.get("keep-alive"):
383            return False
384
385        # At least Akamai returns a "Connection: Keep-Alive" header,
386        # which was supposed to be sent by the client.
387        if conn and "keep-alive" in conn.lower():
388            return False
389
390        # Proxy-Connection is a netscape hack.
391        pconn = self.headers.get("proxy-connection")
392        if pconn and "keep-alive" in pconn.lower():
393            return False
394
395        # otherwise, assume it will close
396        return True
397
398    def _close_conn(self):
399        fp = self.fp
400        self.fp = None
401        fp.close()
402
403    def close(self):
404        try:
405            super().close() # set "closed" flag
406        finally:
407            if self.fp:
408                self._close_conn()
409
410    # These implementations are for the benefit of io.BufferedReader.
411
412    # XXX This class should probably be revised to act more like
413    # the "raw stream" that BufferedReader expects.
414
415    def flush(self):
416        super().flush()
417        if self.fp:
418            self.fp.flush()
419
420    def readable(self):
421        """Always returns True"""
422        return True
423
424    # End of "raw stream" methods
425
426    def isclosed(self):
427        """True if the connection is closed."""
428        # NOTE: it is possible that we will not ever call self.close(). This
429        #       case occurs when will_close is TRUE, length is None, and we
430        #       read up to the last byte, but NOT past it.
431        #
432        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
433        #          called, meaning self.isclosed() is meaningful.
434        return self.fp is None
435
436    def read(self, amt=None):
437        if self.fp is None:
438            return b""
439
440        if self._method == "HEAD":
441            self._close_conn()
442            return b""
443
444        if amt is not None:
445            # Amount is given, implement using readinto
446            b = bytearray(amt)
447            n = self.readinto(b)
448            return memoryview(b)[:n].tobytes()
449        else:
450            # Amount is not given (unbounded read) so we must check self.length
451            # and self.chunked
452
453            if self.chunked:
454                return self._readall_chunked()
455
456            if self.length is None:
457                s = self.fp.read()
458            else:
459                try:
460                    s = self._safe_read(self.length)
461                except IncompleteRead:
462                    self._close_conn()
463                    raise
464                self.length = 0
465            self._close_conn()        # we read everything
466            return s
467
468    def readinto(self, b):
469        """Read up to len(b) bytes into bytearray b and return the number
470        of bytes read.
471        """
472
473        if self.fp is None:
474            return 0
475
476        if self._method == "HEAD":
477            self._close_conn()
478            return 0
479
480        if self.chunked:
481            return self._readinto_chunked(b)
482
483        if self.length is not None:
484            if len(b) > self.length:
485                # clip the read to the "end of response"
486                b = memoryview(b)[0:self.length]
487
488        # we do not use _safe_read() here because this may be a .will_close
489        # connection, and the user is reading more bytes than will be provided
490        # (for example, reading in 1k chunks)
491        n = self.fp.readinto(b)
492        if not n and b:
493            # Ideally, we would raise IncompleteRead if the content-length
494            # wasn't satisfied, but it might break compatibility.
495            self._close_conn()
496        elif self.length is not None:
497            self.length -= n
498            if not self.length:
499                self._close_conn()
500        return n
501
502    def _read_next_chunk_size(self):
503        # Read the next chunk size from the file
504        line = self.fp.readline(_MAXLINE + 1)
505        if len(line) > _MAXLINE:
506            raise LineTooLong("chunk size")
507        i = line.find(b";")
508        if i >= 0:
509            line = line[:i] # strip chunk-extensions
510        try:
511            return int(line, 16)
512        except ValueError:
513            # close the connection as protocol synchronisation is
514            # probably lost
515            self._close_conn()
516            raise
517
518    def _read_and_discard_trailer(self):
519        # read and discard trailer up to the CRLF terminator
520        ### note: we shouldn't have any trailers!
521        while True:
522            line = self.fp.readline(_MAXLINE + 1)
523            if len(line) > _MAXLINE:
524                raise LineTooLong("trailer line")
525            if not line:
526                # a vanishingly small number of sites EOF without
527                # sending the trailer
528                break
529            if line in (b'\r\n', b'\n', b''):
530                break
531
532    def _get_chunk_left(self):
533        # return self.chunk_left, reading a new chunk if necessary.
534        # chunk_left == 0: at the end of the current chunk, need to close it
535        # chunk_left == None: No current chunk, should read next.
536        # This function returns non-zero or None if the last chunk has
537        # been read.
538        chunk_left = self.chunk_left
539        if not chunk_left: # Can be 0 or None
540            if chunk_left is not None:
541                # We are at the end of chunk, discard chunk end
542                self._safe_read(2)  # toss the CRLF at the end of the chunk
543            try:
544                chunk_left = self._read_next_chunk_size()
545            except ValueError:
546                raise IncompleteRead(b'')
547            if chunk_left == 0:
548                # last chunk: 1*("0") [ chunk-extension ] CRLF
549                self._read_and_discard_trailer()
550                # we read everything; close the "file"
551                self._close_conn()
552                chunk_left = None
553            self.chunk_left = chunk_left
554        return chunk_left
555
556    def _readall_chunked(self):
557        assert self.chunked != _UNKNOWN
558        value = []
559        try:
560            while True:
561                chunk_left = self._get_chunk_left()
562                if chunk_left is None:
563                    break
564                value.append(self._safe_read(chunk_left))
565                self.chunk_left = 0
566            return b''.join(value)
567        except IncompleteRead:
568            raise IncompleteRead(b''.join(value))
569
570    def _readinto_chunked(self, b):
571        assert self.chunked != _UNKNOWN
572        total_bytes = 0
573        mvb = memoryview(b)
574        try:
575            while True:
576                chunk_left = self._get_chunk_left()
577                if chunk_left is None:
578                    return total_bytes
579
580                if len(mvb) <= chunk_left:
581                    n = self._safe_readinto(mvb)
582                    self.chunk_left = chunk_left - n
583                    return total_bytes + n
584
585                temp_mvb = mvb[:chunk_left]
586                n = self._safe_readinto(temp_mvb)
587                mvb = mvb[n:]
588                total_bytes += n
589                self.chunk_left = 0
590
591        except IncompleteRead:
592            raise IncompleteRead(bytes(b[0:total_bytes]))
593
594    def _safe_read(self, amt):
595        """Read the number of bytes requested, compensating for partial reads.
596
597        Normally, we have a blocking socket, but a read() can be interrupted
598        by a signal (resulting in a partial read).
599
600        Note that we cannot distinguish between EOF and an interrupt when zero
601        bytes have been read. IncompleteRead() will be raised in this
602        situation.
603
604        This function should be used when <amt> bytes "should" be present for
605        reading. If the bytes are truly not available (due to EOF), then the
606        IncompleteRead exception can be used to detect the problem.
607        """
608        s = []
609        while amt > 0:
610            chunk = self.fp.read(min(amt, MAXAMOUNT))
611            if not chunk:
612                raise IncompleteRead(b''.join(s), amt)
613            s.append(chunk)
614            amt -= len(chunk)
615        return b"".join(s)
616
617    def _safe_readinto(self, b):
618        """Same as _safe_read, but for reading into a buffer."""
619        total_bytes = 0
620        mvb = memoryview(b)
621        while total_bytes < len(b):
622            if MAXAMOUNT < len(mvb):
623                temp_mvb = mvb[0:MAXAMOUNT]
624                n = self.fp.readinto(temp_mvb)
625            else:
626                n = self.fp.readinto(mvb)
627            if not n:
628                raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
629            mvb = mvb[n:]
630            total_bytes += n
631        return total_bytes
632
633    def read1(self, n=-1):
634        """Read with at most one underlying system call.  If at least one
635        byte is buffered, return that instead.
636        """
637        if self.fp is None or self._method == "HEAD":
638            return b""
639        if self.chunked:
640            return self._read1_chunked(n)
641        if self.length is not None and (n < 0 or n > self.length):
642            n = self.length
643        result = self.fp.read1(n)
644        if not result and n:
645            self._close_conn()
646        elif self.length is not None:
647            self.length -= len(result)
648        return result
649
650    def peek(self, n=-1):
651        # Having this enables IOBase.readline() to read more than one
652        # byte at a time
653        if self.fp is None or self._method == "HEAD":
654            return b""
655        if self.chunked:
656            return self._peek_chunked(n)
657        return self.fp.peek(n)
658
659    def readline(self, limit=-1):
660        if self.fp is None or self._method == "HEAD":
661            return b""
662        if self.chunked:
663            # Fallback to IOBase readline which uses peek() and read()
664            return super().readline(limit)
665        if self.length is not None and (limit < 0 or limit > self.length):
666            limit = self.length
667        result = self.fp.readline(limit)
668        if not result and limit:
669            self._close_conn()
670        elif self.length is not None:
671            self.length -= len(result)
672        return result
673
674    def _read1_chunked(self, n):
675        # Strictly speaking, _get_chunk_left() may cause more than one read,
676        # but that is ok, since that is to satisfy the chunked protocol.
677        chunk_left = self._get_chunk_left()
678        if chunk_left is None or n == 0:
679            return b''
680        if not (0 <= n <= chunk_left):
681            n = chunk_left # if n is negative or larger than chunk_left
682        read = self.fp.read1(n)
683        self.chunk_left -= len(read)
684        if not read:
685            raise IncompleteRead(b"")
686        return read
687
688    def _peek_chunked(self, n):
689        # Strictly speaking, _get_chunk_left() may cause more than one read,
690        # but that is ok, since that is to satisfy the chunked protocol.
691        try:
692            chunk_left = self._get_chunk_left()
693        except IncompleteRead:
694            return b'' # peek doesn't worry about protocol
695        if chunk_left is None:
696            return b'' # eof
697        # peek is allowed to return more than requested.  Just request the
698        # entire chunk, and truncate what we get.
699        return self.fp.peek(chunk_left)[:chunk_left]
700
701    def fileno(self):
702        return self.fp.fileno()
703
704    def getheader(self, name, default=None):
705        '''Returns the value of the header matching *name*.
706
707        If there are multiple matching headers, the values are
708        combined into a single string separated by commas and spaces.
709
710        If no matching header is found, returns *default* or None if
711        the *default* is not specified.
712
713        If the headers are unknown, raises http.client.ResponseNotReady.
714
715        '''
716        if self.headers is None:
717            raise ResponseNotReady()
718        headers = self.headers.get_all(name) or default
719        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
720            return headers
721        else:
722            return ', '.join(headers)
723
724    def getheaders(self):
725        """Return list of (header, value) tuples."""
726        if self.headers is None:
727            raise ResponseNotReady()
728        return list(self.headers.items())
729
730    # We override IOBase.__iter__ so that it doesn't check for closed-ness
731
732    def __iter__(self):
733        return self
734
735    # For compatibility with old-style urllib responses.
736
737    def info(self):
738        '''Returns an instance of the class mimetools.Message containing
739        meta-information associated with the URL.
740
741        When the method is HTTP, these headers are those returned by
742        the server at the head of the retrieved HTML page (including
743        Content-Length and Content-Type).
744
745        When the method is FTP, a Content-Length header will be
746        present if (as is now usual) the server passed back a file
747        length in response to the FTP retrieval request. A
748        Content-Type header will be present if the MIME type can be
749        guessed.
750
751        When the method is local-file, returned headers will include
752        a Date representing the file's last-modified time, a
753        Content-Length giving file size, and a Content-Type
754        containing a guess at the file's type. See also the
755        description of the mimetools module.
756
757        '''
758        return self.headers
759
760    def geturl(self):
761        '''Return the real URL of the page.
762
763        In some cases, the HTTP server redirects a client to another
764        URL. The urlopen() function handles this transparently, but in
765        some cases the caller needs to know which URL the client was
766        redirected to. The geturl() method can be used to get at this
767        redirected URL.
768
769        '''
770        return self.url
771
772    def getcode(self):
773        '''Return the HTTP status code that was sent with the response,
774        or None if the URL is not an HTTP URL.
775
776        '''
777        return self.status
778
779class HTTPConnection:
780
781    _http_vsn = 11
782    _http_vsn_str = 'HTTP/1.1'
783
784    response_class = HTTPResponse
785    default_port = HTTP_PORT
786    auto_open = 1
787    debuglevel = 0
788
789    @staticmethod
790    def _is_textIO(stream):
791        """Test whether a file-like object is a text or a binary stream.
792        """
793        return isinstance(stream, io.TextIOBase)
794
795    @staticmethod
796    def _get_content_length(body, method):
797        """Get the content-length based on the body.
798
799        If the body is None, we set Content-Length: 0 for methods that expect
800        a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
801        any method if the body is a str or bytes-like object and not a file.
802        """
803        if body is None:
804            # do an explicit check for not None here to distinguish
805            # between unset and set but empty
806            if method.upper() in _METHODS_EXPECTING_BODY:
807                return 0
808            else:
809                return None
810
811        if hasattr(body, 'read'):
812            # file-like object.
813            return None
814
815        try:
816            # does it implement the buffer protocol (bytes, bytearray, array)?
817            mv = memoryview(body)
818            return mv.nbytes
819        except TypeError:
820            pass
821
822        if isinstance(body, str):
823            return len(body)
824
825        return None
826
827    def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
828                 source_address=None, blocksize=8192):
829        self.timeout = timeout
830        self.source_address = source_address
831        self.blocksize = blocksize
832        self.sock = None
833        self._buffer = []
834        self.__response = None
835        self.__state = _CS_IDLE
836        self._method = None
837        self._tunnel_host = None
838        self._tunnel_port = None
839        self._tunnel_headers = {}
840
841        (self.host, self.port) = self._get_hostport(host, port)
842
843        # This is stored as an instance variable to allow unit
844        # tests to replace it with a suitable mockup
845        self._create_connection = socket.create_connection
846
847    def set_tunnel(self, host, port=None, headers=None):
848        """Set up host and port for HTTP CONNECT tunnelling.
849
850        In a connection that uses HTTP CONNECT tunneling, the host passed to the
851        constructor is used as a proxy server that relays all communication to
852        the endpoint passed to `set_tunnel`. This done by sending an HTTP
853        CONNECT request to the proxy server when the connection is established.
854
855        This method must be called before the HTML connection has been
856        established.
857
858        The headers argument should be a mapping of extra HTTP headers to send
859        with the CONNECT request.
860        """
861
862        if self.sock:
863            raise RuntimeError("Can't set up tunnel for established connection")
864
865        self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
866        if headers:
867            self._tunnel_headers = headers
868        else:
869            self._tunnel_headers.clear()
870
871    def _get_hostport(self, host, port):
872        if port is None:
873            i = host.rfind(':')
874            j = host.rfind(']')         # ipv6 addresses have [...]
875            if i > j:
876                try:
877                    port = int(host[i+1:])
878                except ValueError:
879                    if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
880                        port = self.default_port
881                    else:
882                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
883                host = host[:i]
884            else:
885                port = self.default_port
886            if host and host[0] == '[' and host[-1] == ']':
887                host = host[1:-1]
888
889        return (host, port)
890
891    def set_debuglevel(self, level):
892        self.debuglevel = level
893
894    def _tunnel(self):
895        connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
896            self._tunnel_port)
897        connect_bytes = connect_str.encode("ascii")
898        self.send(connect_bytes)
899        for header, value in self._tunnel_headers.items():
900            header_str = "%s: %s\r\n" % (header, value)
901            header_bytes = header_str.encode("latin-1")
902            self.send(header_bytes)
903        self.send(b'\r\n')
904
905        response = self.response_class(self.sock, method=self._method)
906        (version, code, message) = response._read_status()
907
908        if code != http.HTTPStatus.OK:
909            self.close()
910            raise OSError("Tunnel connection failed: %d %s" % (code,
911                                                               message.strip()))
912        while True:
913            line = response.fp.readline(_MAXLINE + 1)
914            if len(line) > _MAXLINE:
915                raise LineTooLong("header line")
916            if not line:
917                # for sites which EOF without sending a trailer
918                break
919            if line in (b'\r\n', b'\n', b''):
920                break
921
922            if self.debuglevel > 0:
923                print('header:', line.decode())
924
925    def connect(self):
926        """Connect to the host and port specified in __init__."""
927        self.sock = self._create_connection(
928            (self.host,self.port), self.timeout, self.source_address)
929        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
930
931        if self._tunnel_host:
932            self._tunnel()
933
934    def close(self):
935        """Close the connection to the HTTP server."""
936        self.__state = _CS_IDLE
937        try:
938            sock = self.sock
939            if sock:
940                self.sock = None
941                sock.close()   # close it manually... there may be other refs
942        finally:
943            response = self.__response
944            if response:
945                self.__response = None
946                response.close()
947
948    def send(self, data):
949        """Send `data' to the server.
950        ``data`` can be a string object, a bytes object, an array object, a
951        file-like object that supports a .read() method, or an iterable object.
952        """
953
954        if self.sock is None:
955            if self.auto_open:
956                self.connect()
957            else:
958                raise NotConnected()
959
960        if self.debuglevel > 0:
961            print("send:", repr(data))
962        if hasattr(data, "read") :
963            if self.debuglevel > 0:
964                print("sendIng a read()able")
965            encode = self._is_textIO(data)
966            if encode and self.debuglevel > 0:
967                print("encoding file using iso-8859-1")
968            while 1:
969                datablock = data.read(self.blocksize)
970                if not datablock:
971                    break
972                if encode:
973                    datablock = datablock.encode("iso-8859-1")
974                self.sock.sendall(datablock)
975            return
976        try:
977            self.sock.sendall(data)
978        except TypeError:
979            if isinstance(data, collections.abc.Iterable):
980                for d in data:
981                    self.sock.sendall(d)
982            else:
983                raise TypeError("data should be a bytes-like object "
984                                "or an iterable, got %r" % type(data))
985
986    def _output(self, s):
987        """Add a line of output to the current request buffer.
988
989        Assumes that the line does *not* end with \\r\\n.
990        """
991        self._buffer.append(s)
992
993    def _read_readable(self, readable):
994        if self.debuglevel > 0:
995            print("sendIng a read()able")
996        encode = self._is_textIO(readable)
997        if encode and self.debuglevel > 0:
998            print("encoding file using iso-8859-1")
999        while True:
1000            datablock = readable.read(self.blocksize)
1001            if not datablock:
1002                break
1003            if encode:
1004                datablock = datablock.encode("iso-8859-1")
1005            yield datablock
1006
1007    def _send_output(self, message_body=None, encode_chunked=False):
1008        """Send the currently buffered request and clear the buffer.
1009
1010        Appends an extra \\r\\n to the buffer.
1011        A message_body may be specified, to be appended to the request.
1012        """
1013        self._buffer.extend((b"", b""))
1014        msg = b"\r\n".join(self._buffer)
1015        del self._buffer[:]
1016        self.send(msg)
1017
1018        if message_body is not None:
1019
1020            # create a consistent interface to message_body
1021            if hasattr(message_body, 'read'):
1022                # Let file-like take precedence over byte-like.  This
1023                # is needed to allow the current position of mmap'ed
1024                # files to be taken into account.
1025                chunks = self._read_readable(message_body)
1026            else:
1027                try:
1028                    # this is solely to check to see if message_body
1029                    # implements the buffer API.  it /would/ be easier
1030                    # to capture if PyObject_CheckBuffer was exposed
1031                    # to Python.
1032                    memoryview(message_body)
1033                except TypeError:
1034                    try:
1035                        chunks = iter(message_body)
1036                    except TypeError:
1037                        raise TypeError("message_body should be a bytes-like "
1038                                        "object or an iterable, got %r"
1039                                        % type(message_body))
1040                else:
1041                    # the object implements the buffer interface and
1042                    # can be passed directly into socket methods
1043                    chunks = (message_body,)
1044
1045            for chunk in chunks:
1046                if not chunk:
1047                    if self.debuglevel > 0:
1048                        print('Zero length chunk ignored')
1049                    continue
1050
1051                if encode_chunked and self._http_vsn == 11:
1052                    # chunked encoding
1053                    chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
1054                        + b'\r\n'
1055                self.send(chunk)
1056
1057            if encode_chunked and self._http_vsn == 11:
1058                # end chunked transfer
1059                self.send(b'0\r\n\r\n')
1060
1061    def putrequest(self, method, url, skip_host=False,
1062                   skip_accept_encoding=False):
1063        """Send a request to the server.
1064
1065        `method' specifies an HTTP request method, e.g. 'GET'.
1066        `url' specifies the object being requested, e.g. '/index.html'.
1067        `skip_host' if True does not add automatically a 'Host:' header
1068        `skip_accept_encoding' if True does not add automatically an
1069           'Accept-Encoding:' header
1070        """
1071
1072        # if a prior response has been completed, then forget about it.
1073        if self.__response and self.__response.isclosed():
1074            self.__response = None
1075
1076
1077        # in certain cases, we cannot issue another request on this connection.
1078        # this occurs when:
1079        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
1080        #   2) a response to a previous request has signalled that it is going
1081        #      to close the connection upon completion.
1082        #   3) the headers for the previous response have not been read, thus
1083        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
1084        #
1085        # if there is no prior response, then we can request at will.
1086        #
1087        # if point (2) is true, then we will have passed the socket to the
1088        # response (effectively meaning, "there is no prior response"), and
1089        # will open a new one when a new request is made.
1090        #
1091        # Note: if a prior response exists, then we *can* start a new request.
1092        #       We are not allowed to begin fetching the response to this new
1093        #       request, however, until that prior response is complete.
1094        #
1095        if self.__state == _CS_IDLE:
1096            self.__state = _CS_REQ_STARTED
1097        else:
1098            raise CannotSendRequest(self.__state)
1099
1100        # Save the method we use, we need it later in the response phase
1101        self._method = method
1102        if not url:
1103            url = '/'
1104        request = '%s %s %s' % (method, url, self._http_vsn_str)
1105
1106        # Non-ASCII characters should have been eliminated earlier
1107        self._output(request.encode('ascii'))
1108
1109        if self._http_vsn == 11:
1110            # Issue some standard headers for better HTTP/1.1 compliance
1111
1112            if not skip_host:
1113                # this header is issued *only* for HTTP/1.1
1114                # connections. more specifically, this means it is
1115                # only issued when the client uses the new
1116                # HTTPConnection() class. backwards-compat clients
1117                # will be using HTTP/1.0 and those clients may be
1118                # issuing this header themselves. we should NOT issue
1119                # it twice; some web servers (such as Apache) barf
1120                # when they see two Host: headers
1121
1122                # If we need a non-standard port,include it in the
1123                # header.  If the request is going through a proxy,
1124                # but the host of the actual URL, not the host of the
1125                # proxy.
1126
1127                netloc = ''
1128                if url.startswith('http'):
1129                    nil, netloc, nil, nil, nil = urlsplit(url)
1130
1131                if netloc:
1132                    try:
1133                        netloc_enc = netloc.encode("ascii")
1134                    except UnicodeEncodeError:
1135                        netloc_enc = netloc.encode("idna")
1136                    self.putheader('Host', netloc_enc)
1137                else:
1138                    if self._tunnel_host:
1139                        host = self._tunnel_host
1140                        port = self._tunnel_port
1141                    else:
1142                        host = self.host
1143                        port = self.port
1144
1145                    try:
1146                        host_enc = host.encode("ascii")
1147                    except UnicodeEncodeError:
1148                        host_enc = host.encode("idna")
1149
1150                    # As per RFC 273, IPv6 address should be wrapped with []
1151                    # when used as Host header
1152
1153                    if host.find(':') >= 0:
1154                        host_enc = b'[' + host_enc + b']'
1155
1156                    if port == self.default_port:
1157                        self.putheader('Host', host_enc)
1158                    else:
1159                        host_enc = host_enc.decode("ascii")
1160                        self.putheader('Host', "%s:%s" % (host_enc, port))
1161
1162            # note: we are assuming that clients will not attempt to set these
1163            #       headers since *this* library must deal with the
1164            #       consequences. this also means that when the supporting
1165            #       libraries are updated to recognize other forms, then this
1166            #       code should be changed (removed or updated).
1167
1168            # we only want a Content-Encoding of "identity" since we don't
1169            # support encodings such as x-gzip or x-deflate.
1170            if not skip_accept_encoding:
1171                self.putheader('Accept-Encoding', 'identity')
1172
1173            # we can accept "chunked" Transfer-Encodings, but no others
1174            # NOTE: no TE header implies *only* "chunked"
1175            #self.putheader('TE', 'chunked')
1176
1177            # if TE is supplied in the header, then it must appear in a
1178            # Connection header.
1179            #self.putheader('Connection', 'TE')
1180
1181        else:
1182            # For HTTP/1.0, the server will assume "not chunked"
1183            pass
1184
1185    def putheader(self, header, *values):
1186        """Send a request header line to the server.
1187
1188        For example: h.putheader('Accept', 'text/html')
1189        """
1190        if self.__state != _CS_REQ_STARTED:
1191            raise CannotSendHeader()
1192
1193        if hasattr(header, 'encode'):
1194            header = header.encode('ascii')
1195
1196        if not _is_legal_header_name(header):
1197            raise ValueError('Invalid header name %r' % (header,))
1198
1199        values = list(values)
1200        for i, one_value in enumerate(values):
1201            if hasattr(one_value, 'encode'):
1202                values[i] = one_value.encode('latin-1')
1203            elif isinstance(one_value, int):
1204                values[i] = str(one_value).encode('ascii')
1205
1206            if _is_illegal_header_value(values[i]):
1207                raise ValueError('Invalid header value %r' % (values[i],))
1208
1209        value = b'\r\n\t'.join(values)
1210        header = header + b': ' + value
1211        self._output(header)
1212
1213    def endheaders(self, message_body=None, *, encode_chunked=False):
1214        """Indicate that the last header line has been sent to the server.
1215
1216        This method sends the request to the server.  The optional message_body
1217        argument can be used to pass a message body associated with the
1218        request.
1219        """
1220        if self.__state == _CS_REQ_STARTED:
1221            self.__state = _CS_REQ_SENT
1222        else:
1223            raise CannotSendHeader()
1224        self._send_output(message_body, encode_chunked=encode_chunked)
1225
1226    def request(self, method, url, body=None, headers={}, *,
1227                encode_chunked=False):
1228        """Send a complete request to the server."""
1229        self._send_request(method, url, body, headers, encode_chunked)
1230
1231    def _send_request(self, method, url, body, headers, encode_chunked):
1232        # Honor explicitly requested Host: and Accept-Encoding: headers.
1233        header_names = frozenset(k.lower() for k in headers)
1234        skips = {}
1235        if 'host' in header_names:
1236            skips['skip_host'] = 1
1237        if 'accept-encoding' in header_names:
1238            skips['skip_accept_encoding'] = 1
1239
1240        self.putrequest(method, url, **skips)
1241
1242        # chunked encoding will happen if HTTP/1.1 is used and either
1243        # the caller passes encode_chunked=True or the following
1244        # conditions hold:
1245        # 1. content-length has not been explicitly set
1246        # 2. the body is a file or iterable, but not a str or bytes-like
1247        # 3. Transfer-Encoding has NOT been explicitly set by the caller
1248
1249        if 'content-length' not in header_names:
1250            # only chunk body if not explicitly set for backwards
1251            # compatibility, assuming the client code is already handling the
1252            # chunking
1253            if 'transfer-encoding' not in header_names:
1254                # if content-length cannot be automatically determined, fall
1255                # back to chunked encoding
1256                encode_chunked = False
1257                content_length = self._get_content_length(body, method)
1258                if content_length is None:
1259                    if body is not None:
1260                        if self.debuglevel > 0:
1261                            print('Unable to determine size of %r' % body)
1262                        encode_chunked = True
1263                        self.putheader('Transfer-Encoding', 'chunked')
1264                else:
1265                    self.putheader('Content-Length', str(content_length))
1266        else:
1267            encode_chunked = False
1268
1269        for hdr, value in headers.items():
1270            self.putheader(hdr, value)
1271        if isinstance(body, str):
1272            # RFC 2616 Section 3.7.1 says that text default has a
1273            # default charset of iso-8859-1.
1274            body = _encode(body, 'body')
1275        self.endheaders(body, encode_chunked=encode_chunked)
1276
1277    def getresponse(self):
1278        """Get the response from the server.
1279
1280        If the HTTPConnection is in the correct state, returns an
1281        instance of HTTPResponse or of whatever object is returned by
1282        the response_class variable.
1283
1284        If a request has not been sent or if a previous response has
1285        not be handled, ResponseNotReady is raised.  If the HTTP
1286        response indicates that the connection should be closed, then
1287        it will be closed before the response is returned.  When the
1288        connection is closed, the underlying socket is closed.
1289        """
1290
1291        # if a prior response has been completed, then forget about it.
1292        if self.__response and self.__response.isclosed():
1293            self.__response = None
1294
1295        # if a prior response exists, then it must be completed (otherwise, we
1296        # cannot read this response's header to determine the connection-close
1297        # behavior)
1298        #
1299        # note: if a prior response existed, but was connection-close, then the
1300        # socket and response were made independent of this HTTPConnection
1301        # object since a new request requires that we open a whole new
1302        # connection
1303        #
1304        # this means the prior response had one of two states:
1305        #   1) will_close: this connection was reset and the prior socket and
1306        #                  response operate independently
1307        #   2) persistent: the response was retained and we await its
1308        #                  isclosed() status to become true.
1309        #
1310        if self.__state != _CS_REQ_SENT or self.__response:
1311            raise ResponseNotReady(self.__state)
1312
1313        if self.debuglevel > 0:
1314            response = self.response_class(self.sock, self.debuglevel,
1315                                           method=self._method)
1316        else:
1317            response = self.response_class(self.sock, method=self._method)
1318
1319        try:
1320            try:
1321                response.begin()
1322            except ConnectionError:
1323                self.close()
1324                raise
1325            assert response.will_close != _UNKNOWN
1326            self.__state = _CS_IDLE
1327
1328            if response.will_close:
1329                # this effectively passes the connection to the response
1330                self.close()
1331            else:
1332                # remember this, so we can tell when it is complete
1333                self.__response = response
1334
1335            return response
1336        except:
1337            response.close()
1338            raise
1339
1340try:
1341    import ssl
1342except ImportError:
1343    pass
1344else:
1345    class HTTPSConnection(HTTPConnection):
1346        "This class allows communication via SSL."
1347
1348        default_port = HTTPS_PORT
1349
1350        # XXX Should key_file and cert_file be deprecated in favour of context?
1351
1352        def __init__(self, host, port=None, key_file=None, cert_file=None,
1353                     timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1354                     source_address=None, *, context=None,
1355                     check_hostname=None, blocksize=8192):
1356            super(HTTPSConnection, self).__init__(host, port, timeout,
1357                                                  source_address,
1358                                                  blocksize=blocksize)
1359            if (key_file is not None or cert_file is not None or
1360                        check_hostname is not None):
1361                import warnings
1362                warnings.warn("key_file, cert_file and check_hostname are "
1363                              "deprecated, use a custom context instead.",
1364                              DeprecationWarning, 2)
1365            self.key_file = key_file
1366            self.cert_file = cert_file
1367            if context is None:
1368                context = ssl._create_default_https_context()
1369            will_verify = context.verify_mode != ssl.CERT_NONE
1370            if check_hostname is None:
1371                check_hostname = context.check_hostname
1372            if check_hostname and not will_verify:
1373                raise ValueError("check_hostname needs a SSL context with "
1374                                 "either CERT_OPTIONAL or CERT_REQUIRED")
1375            if key_file or cert_file:
1376                context.load_cert_chain(cert_file, key_file)
1377            self._context = context
1378            if check_hostname is not None:
1379                self._context.check_hostname = check_hostname
1380
1381        def connect(self):
1382            "Connect to a host on a given (SSL) port."
1383
1384            super().connect()
1385
1386            if self._tunnel_host:
1387                server_hostname = self._tunnel_host
1388            else:
1389                server_hostname = self.host
1390
1391            self.sock = self._context.wrap_socket(self.sock,
1392                                                  server_hostname=server_hostname)
1393
1394    __all__.append("HTTPSConnection")
1395
1396class HTTPException(Exception):
1397    # Subclasses that define an __init__ must call Exception.__init__
1398    # or define self.args.  Otherwise, str() will fail.
1399    pass
1400
1401class NotConnected(HTTPException):
1402    pass
1403
1404class InvalidURL(HTTPException):
1405    pass
1406
1407class UnknownProtocol(HTTPException):
1408    def __init__(self, version):
1409        self.args = version,
1410        self.version = version
1411
1412class UnknownTransferEncoding(HTTPException):
1413    pass
1414
1415class UnimplementedFileMode(HTTPException):
1416    pass
1417
1418class IncompleteRead(HTTPException):
1419    def __init__(self, partial, expected=None):
1420        self.args = partial,
1421        self.partial = partial
1422        self.expected = expected
1423    def __repr__(self):
1424        if self.expected is not None:
1425            e = ', %i more expected' % self.expected
1426        else:
1427            e = ''
1428        return '%s(%i bytes read%s)' % (self.__class__.__name__,
1429                                        len(self.partial), e)
1430    def __str__(self):
1431        return repr(self)
1432
1433class ImproperConnectionState(HTTPException):
1434    pass
1435
1436class CannotSendRequest(ImproperConnectionState):
1437    pass
1438
1439class CannotSendHeader(ImproperConnectionState):
1440    pass
1441
1442class ResponseNotReady(ImproperConnectionState):
1443    pass
1444
1445class BadStatusLine(HTTPException):
1446    def __init__(self, line):
1447        if not line:
1448            line = repr(line)
1449        self.args = line,
1450        self.line = line
1451
1452class LineTooLong(HTTPException):
1453    def __init__(self, line_type):
1454        HTTPException.__init__(self, "got more than %d bytes when reading %s"
1455                                     % (_MAXLINE, line_type))
1456
1457class RemoteDisconnected(ConnectionResetError, BadStatusLine):
1458    def __init__(self, *pos, **kw):
1459        BadStatusLine.__init__(self, "")
1460        ConnectionResetError.__init__(self, *pos, **kw)
1461
1462# for backwards compatibility
1463error = HTTPException
1464